|
|
import os |
|
|
import gradio as gr |
|
|
from huggingface_hub import InferenceClient |
|
|
import requests |
|
|
|
|
|
url = "https://huggingface.co/models-json?num_parameters=min%3A128B&inference_provider=cerebras%2Cnebius%2Cfireworks-ai%2Ctogether%2Csambanova%2Cnovita%2Cgroq%2Cnscale%2Chyperbolic%2Cfeatherless-ai%2Cfal-ai%2Ccohere%2Creplicate%2Chf-inference&sort=trending&withCount=true" |
|
|
|
|
|
res = requests.get(url).json() |
|
|
model_ids = [m['id'] for m in res.get('models', [])] |
|
|
|
|
|
client = InferenceClient(provider="novita", api_key=os.environ["HF_TOKEN"]) |
|
|
|
|
|
|
|
|
def respond(message, history, model): |
|
|
history.append((message, "")) |
|
|
yield "", history |
|
|
full_reply = "" |
|
|
for chunk in client.chat.completions.create( |
|
|
model=model, |
|
|
messages=[{"role": "user", "content": message}], |
|
|
stream=True |
|
|
): |
|
|
if hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content: |
|
|
full_reply += chunk.choices[0].delta.content |
|
|
history[-1] = (message, full_reply) |
|
|
yield "", history |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("## AI") |
|
|
chatbot = gr.Chatbot(height=400) |
|
|
msg = gr.Textbox(label="Ask me smth") |
|
|
model_dd = gr.Dropdown(model_ids, label="Model", value=models[0]) |
|
|
with gr.Row(): |
|
|
submit_btn = gr.Button("Send") |
|
|
clear = gr.Button("Clear") |
|
|
msg.submit(respond, [msg, chatbot, model_dd], [msg, chatbot]) |
|
|
submit_btn.click(respond, [msg, chatbot, model_dd], [msg, chatbot]) |
|
|
clear.click(lambda: None, None, chatbot, queue=False) |
|
|
|
|
|
demo.queue().launch() |