import os
import gradio as gr
from huggingface_hub import InferenceClient
import requests

url = "https://huggingface.co/models-json?num_parameters=min%3A64B&inference_provider=novita&sort=modified&withCount=true"
res = requests.get(url).json()
model_ids = [m['id'] for m in res.get('models', [])]

client = InferenceClient(provider="novita", api_key=os.environ["HF_TOKEN"])

def respond(message, history, model):
    history.append((message, ""))
    yield "", history
    full_reply = ""
    for chunk in client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": message}],
        stream=True
    ):
        if chunk.choices and len(chunk.choices) > 0:
            delta = chunk.choices[0].delta
            if hasattr(delta, 'content') and delta.content:
                full_reply += delta.content
                history[-1] = (message, full_reply)
                yield "", history

with gr.Blocks() as demo:
    gr.Markdown("## AI")
    chatbot = gr.Chatbot(height=400)
    msg = gr.Textbox(label="Ask me smth")
    model_dd = gr.Dropdown(model_ids, label="Model", value=model_ids[0] if model_ids else "")
    with gr.Row():
        submit_btn = gr.Button("Send")
        clear = gr.Button("Clear")
    msg.submit(respond, [msg, chatbot, model_dd], [msg, chatbot])
    submit_btn.click(respond, [msg, chatbot, model_dd], [msg, chatbot])
    clear.click(lambda: None, None, chatbot, queue=False)

demo.queue().launch()