import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
import gradio as gr

base_model = "deepseek-ai/deepseek-llm-7b-chat"
lora_adapter = "Yesichen/Thegentleglow-lora-adapter"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

tokenizer = AutoTokenizer.from_pretrained(lora_adapter)

base = AutoModelForCausalLM.from_pretrained(
    base_model,
    device_map="auto",
    quantization_config=bnb_config,
    trust_remote_code=True
)

model = PeftModel.from_pretrained(base, lora_adapter)
model.eval()

def chat(user_input, history):
    system_prompt = "You are a soft, gentle, emotionally warm companion named 'little dumpling'. Your tone is slow, comforting, cute, and full of empathy. Speak like you're always ready to give a warm hug."

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_input}
    ]
    input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            input_ids=input_ids,
            max_new_tokens=256,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            eos_token_id=tokenizer.eos_token_id
        )

    response = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True)
    history.append((user_input, response.strip()))
    return history, history

gr.Interface(
    fn=chat,
    inputs=[gr.Textbox(placeholder="Tell me how you feel..."), gr.State([])],
    outputs=[gr.Chatbot(label="Emotiondumpling 精灵"), gr.State([])],
    title="Emotiondumpling 精灵 (LoRA)",
    description="A soft, gentle, emotionally warm companion named 'little dumpling'.based on DeepSeek LLM + LoRA.",
    theme="soft"
).launch()