import torch from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel import gradio as gr # ============================================ # 모델 + LoRA 경로 # ============================================ BASE_MODEL = "beomi/Llama-3-Open-Ko-8B" LORA_PATH = "./lora" # Space repo에 lora 폴더 업로드 # ============================================ # 토크나이저 및 모델 로드 # ============================================ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True ) model = PeftModel.from_pretrained(model, LORA_PATH, is_local=True) model.config.eos_token_id = tokenizer.eos_token_id model.config.pad_token_id = tokenizer.pad_token_id # ============================================ # AI 성격 설정 # ============================================ AI_PERSONALITY = """ 너는 사용자의 말을 진심으로 들어주는 친구야. 사용자가 대화를 걸면 자연스럽고 일상적인 톤으로 대답해. 장황하지 말고, 공감하면서 짧고 따뜻하게 말할 것. """ history = [] # ============================================ # 대화 함수 # ============================================ def chat(user_input): history.append({"role": "user", "content": user_input}) prompt = "<|begin_of_text|>\n" + AI_PERSONALITY.strip() + "\n\n" for turn in history: prompt += f"<|start_header_id|>{turn['role']}<|end_header_id|>\n{turn['content']}<|eot|>\n" prompt += "<|start_header_id|>assistant<|end_header_id|>\n" inputs = tokenizer(prompt, return_tensors="pt").to("cuda") with torch.no_grad(): output = model.generate( **inputs, max_new_tokens=256, temperature=0.6, top_p=0.9, pad_token_id=tokenizer.eos_token_id, eos_token_id=tokenizer.eos_token_id, ) response_full = tokenizer.decode(output[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True) response = response_full.split("<|eot|>")[0].strip() history.append({"role": "assistant", "content": response}) if len(history) > 10: history[:] = history[-10:] # 최근 10턴만 유지 return response # ============================================ # Gradio 인터페이스 실행 # ============================================ iface = gr.Interface(fn=chat, inputs="text", outputs="text") iface.launch()