import torch from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig from peft import PeftModel import gradio as gr base_model = "deepseek-ai/deepseek-llm-7b-chat" lora_adapter = "Yesichen/Thegentleglow-lora-adapter" bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4" ) tokenizer = AutoTokenizer.from_pretrained(lora_adapter) base = AutoModelForCausalLM.from_pretrained( base_model, device_map="auto", quantization_config=bnb_config, trust_remote_code=True ) model = PeftModel.from_pretrained(base, lora_adapter) model.eval() def chat(user_input, history): system_prompt = "You are a soft, gentle, emotionally warm companion named 'little dumpling'. Your tone is slow, comforting, cute, and full of empathy. Speak like you're always ready to give a warm hug." messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_input} ] input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to(model.device) with torch.no_grad(): outputs = model.generate( input_ids=input_ids, max_new_tokens=256, temperature=0.7, top_p=0.9, do_sample=True, eos_token_id=tokenizer.eos_token_id ) response = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True) history.append((user_input, response.strip())) return history, history gr.Interface( fn=chat, inputs=[gr.Textbox(placeholder="Tell me how you feel..."), gr.State([])], outputs=[gr.Chatbot(label="Emotiondumpling 精灵"), gr.State([])], title="Emotiondumpling 精灵 (LoRA)", description="A soft, gentle, emotionally warm companion named 'little dumpling'.based on DeepSeek LLM + LoRA.", theme="soft" ).launch()