import os import gradio as gr from huggingface_hub import InferenceClient import requests url = "https://huggingface.co/models-json?num_parameters=min%3A64B&inference_provider=novita&sort=modified&withCount=true" res = requests.get(url).json() model_ids = [m['id'] for m in res.get('models', [])] client = InferenceClient(provider="novita", api_key=os.environ["HF_TOKEN"]) def respond(message, history, model): history.append((message, "")) yield "", history full_reply = "" for chunk in client.chat.completions.create( model=model, messages=[{"role": "user", "content": message}], stream=True ): if chunk.choices and len(chunk.choices) > 0: delta = chunk.choices[0].delta if hasattr(delta, 'content') and delta.content: full_reply += delta.content history[-1] = (message, full_reply) yield "", history with gr.Blocks() as demo: gr.Markdown("## AI") chatbot = gr.Chatbot(height=400) msg = gr.Textbox(label="Ask me smth") model_dd = gr.Dropdown(model_ids, label="Model", value=model_ids[0] if model_ids else "") with gr.Row(): submit_btn = gr.Button("Send") clear = gr.Button("Clear") msg.submit(respond, [msg, chatbot, model_dd], [msg, chatbot]) submit_btn.click(respond, [msg, chatbot, model_dd], [msg, chatbot]) clear.click(lambda: None, None, chatbot, queue=False) demo.queue().launch()