import gradio as gr from huggingface_hub import InferenceClient MODEL_ID = "mistralai/Mistral-Small-3.2-24B-Instruct-2506" # Initialize HF InferenceClient omodel = InferenceClient(model=MODEL_ID) # Function to generate response def chat_with_model(message, history): # Build messages in OpenAI-style format messages = [] for user, bot in history: messages.append({"role": "user", "content": user}) messages.append({"role": "assistant", "content": bot}) messages.append({"role": "user", "content": message}) # Call Hugging Face Inference API (chat_completion style) response = omodel.chat_completion( model=MODEL_ID, messages=messages, max_tokens=512, ) return response.choices[0].message["content"] # Build Gradio Chat Interface demo = gr.ChatInterface( fn=chat_with_model, title="Chat with HuggingFace Mistral", description="Gradio + HuggingFace InferenceClient", theme="soft" ) if __name__ == "__main__": demo.launch()