File size: 1,763 Bytes
aed786e
 
 
4e35316
 
 
 
 
 
aed786e
 
4e35316
aed786e
 
9b71440
 
 
 
aed786e
9b71440
 
 
 
 
 
 
aed786e
 
 
 
 
4e35316
9b71440
 
 
aed786e
9b71440
aed786e
 
9b71440
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import os
import gradio as gr
from huggingface_hub import InferenceClient
import requests

url = "https://huggingface.co/models-json?num_parameters=min%3A128B&inference_provider=cerebras%2Cnebius%2Cfireworks-ai%2Ctogether%2Csambanova%2Cnovita%2Cgroq%2Cnscale%2Chyperbolic%2Cfeatherless-ai%2Cfal-ai%2Ccohere%2Creplicate%2Chf-inference&sort=trending&withCount=true"

res = requests.get(url).json()
model_ids = [m['id'] for m in res.get('models', [])]

client = InferenceClient(provider="novita", api_key=os.environ["HF_TOKEN"])
#models = ["deepseek-ai/DeepSeek-V3.1","zai-org/GLM-4.5","Qwen/Qwen3-Coder-480B-A35B-Instruct", "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8","deepseek-ai/DeepSeek-R1","deepseek-ai/DeepSeek-R1-0528","Qwen/Qwen3-Next-80B-A3B-Thinking"]

def respond(message, history, model):
    history.append((message, ""))
    yield "", history
    full_reply = ""
    for chunk in client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": message}],
        stream=True
    ):
        if hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content:
            full_reply += chunk.choices[0].delta.content
            history[-1] = (message, full_reply)
            yield "", history

with gr.Blocks() as demo:
    gr.Markdown("## AI")
    chatbot = gr.Chatbot(height=400)
    msg = gr.Textbox(label="Ask me smth")
    model_dd = gr.Dropdown(model_ids, label="Model", value=models[0])
    with gr.Row():
        submit_btn = gr.Button("Send")
        clear = gr.Button("Clear")
    msg.submit(respond, [msg, chatbot, model_dd], [msg, chatbot])
    submit_btn.click(respond, [msg, chatbot, model_dd], [msg, chatbot])
    clear.click(lambda: None, None, chatbot, queue=False)

demo.queue().launch()