Spaces:
Sleeping
Sleeping
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from peft import PeftModel | |
| import gradio as gr | |
| # ============================================ | |
| # ๋ชจ๋ธ + LoRA ๊ฒฝ๋ก | |
| # ============================================ | |
| BASE_MODEL = "beomi/Llama-3-Open-Ko-8B" | |
| LORA_PATH = "./lora" # Space repo์ lora ํด๋ ์ ๋ก๋ | |
| # ============================================ | |
| # ํ ํฌ๋์ด์ ๋ฐ ๋ชจ๋ธ ๋ก๋ | |
| # ============================================ | |
| tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) | |
| tokenizer.pad_token = tokenizer.eos_token | |
| model = AutoModelForCausalLM.from_pretrained( | |
| BASE_MODEL, | |
| torch_dtype=torch.bfloat16, | |
| device_map="auto", | |
| trust_remote_code=True | |
| ) | |
| model = PeftModel.from_pretrained(model, LORA_PATH, is_local=True) | |
| model.config.eos_token_id = tokenizer.eos_token_id | |
| model.config.pad_token_id = tokenizer.pad_token_id | |
| # ============================================ | |
| # AI ์ฑ๊ฒฉ ์ค์ | |
| # ============================================ | |
| AI_PERSONALITY = """ | |
| ๋๋ ์ฌ์ฉ์์ ๋ง์ ์ง์ฌ์ผ๋ก ๋ค์ด์ฃผ๋ ์น๊ตฌ์ผ. | |
| ์ฌ์ฉ์๊ฐ ๋ํ๋ฅผ ๊ฑธ๋ฉด ์์ฐ์ค๋ฝ๊ณ ์ผ์์ ์ธ ํค์ผ๋ก ๋๋ตํด. | |
| ์ฅํฉํ์ง ๋ง๊ณ , ๊ณต๊ฐํ๋ฉด์ ์งง๊ณ ๋ฐ๋ปํ๊ฒ ๋งํ ๊ฒ. | |
| """ | |
| history = [] | |
| # ============================================ | |
| # ๋ํ ํจ์ | |
| # ============================================ | |
| def chat(user_input): | |
| history.append({"role": "user", "content": user_input}) | |
| prompt = "<|begin_of_text|>\n" + AI_PERSONALITY.strip() + "\n\n" | |
| for turn in history: | |
| prompt += f"<|start_header_id|>{turn['role']}<|end_header_id|>\n{turn['content']}<|eot|>\n" | |
| prompt += "<|start_header_id|>assistant<|end_header_id|>\n" | |
| inputs = tokenizer(prompt, return_tensors="pt").to("cuda") | |
| with torch.no_grad(): | |
| output = model.generate( | |
| **inputs, | |
| max_new_tokens=256, | |
| temperature=0.6, | |
| top_p=0.9, | |
| pad_token_id=tokenizer.eos_token_id, | |
| eos_token_id=tokenizer.eos_token_id, | |
| ) | |
| response_full = tokenizer.decode(output[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True) | |
| response = response_full.split("<|eot|>")[0].strip() | |
| history.append({"role": "assistant", "content": response}) | |
| if len(history) > 10: | |
| history[:] = history[-10:] # ์ต๊ทผ 10ํด๋ง ์ ์ง | |
| return response | |
| # ============================================ | |
| # Gradio ์ธํฐํ์ด์ค ์คํ | |
| # ============================================ | |
| iface = gr.Interface(fn=chat, inputs="text", outputs="text") | |
| iface.launch() | |