import os from huggingface_hub import InferenceClient client = InferenceClient( provider="featherless-ai", api_key=os.environ["HF_TOKEN"] ) def chat_with_model(message, history, perspective): # build messages list with perspective messages = [{"role": "system", "content": f"Adopt this perspective: {perspective}"}] for user_msg, bot_msg in history: messages.append({"role": "user", "content": user_msg}) if bot_msg: messages.append({"role": "assistant", "content": bot_msg}) messages.append({"role": "user", "content": message}) # Call the model reply="" # start empty reply to create the illusion of streaming for event in client.chat.completions.create( model="mistralai/Mistral-7B-Instruct-v0.2", messages=messages, max_tokens=512, stream=True, # enable streaming ): if token := event.delta.content: reply += token # yield partial result for Gradio streaming yield history + [(message, reply)], history