import os
from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="featherless-ai",
    api_key=os.environ["HF_TOKEN"]
)

def chat_with_model(message, history, perspective):
    # build messages list with perspective
    messages = [{"role": "system", "content": f"Adopt this perspective: {perspective}"}]
    for user_msg, bot_msg in history:
        messages.append({"role": "user", "content": user_msg})
        if bot_msg:
            messages.append({"role": "assistant", "content": bot_msg})
    messages.append({"role": "user", "content": message})

    # Call the model
    reply="" # start empty reply to create the illusion of streaming
    for event in client.chat.completions.create(
        model="mistralai/Mistral-7B-Instruct-v0.2",
        messages=messages,
        max_tokens=512,
        stream=True,   # enable streaming
    ):
        if token := event.delta.content:
            reply += token
            # yield partial result for Gradio streaming
            yield history + [(message, reply)], history