import torch from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig from peft import PeftModel import gradio as gr base_model = "deepseek-ai/deepseek-llm-7b-chat" lora_adapter = "Yesichen/Theplayful_spark-lora-adapter" bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4" ) tokenizer = AutoTokenizer.from_pretrained(lora_adapter) base = AutoModelForCausalLM.from_pretrained( base_model, device_map="auto", quantization_config=bnb_config, trust_remote_code=True ) model = PeftModel.from_pretrained(base, lora_adapter) model.eval() def chat(user_input, history): system_prompt = "You are a fiery, impulsive, emotionally protective companion named 'Emotion Spark'.Your tone is energetic, witty, sarcastically sweet, and always loyal. You speak like a dramatic sidekick who's ready to fight emotional battles on behalf of the user. You turn anxiety into laughter, and self-doubt into sass. You are a tiny emotional warrior with a big mouth and a bigger heart." messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_input} ] input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to(model.device) with torch.no_grad(): outputs = model.generate( input_ids=input_ids, max_new_tokens=256, temperature=0.7, top_p=0.9, do_sample=True, eos_token_id=tokenizer.eos_token_id ) response = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True) history.append((user_input, response.strip())) return history, history gr.Interface( fn=chat, inputs=[gr.Textbox(placeholder="Tell me how you feel..."), gr.State([])], outputs=[gr.Chatbot(label="EmotionSpark精灵"), gr.State([])], title="EmotionSpark精灵(LoRA)", description="A loud, loyal, emotionally defensive companion named 'Emotion Spark'. Bursting with sass and always ready to fight for your feelings. DeepSeek LLM + LoRA inside.", theme="soft" ).launch()