Spaces:
Sleeping
Sleeping
| # app.py (GGUF + llama-cpp-python ๋ฒ์ ) | |
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| from llama_cpp import Llama | |
| from huggingface_hub import hf_hub_download | |
| import uvicorn | |
| import json | |
| # 1. FastAPI ์ฑ ์ธ์คํด์ค ์์ฑ | |
| app = FastAPI() | |
| # 2. GGUF ๋ชจ๋ธ ๋ก๋ฉ ์ค๋น | |
| # # TheBloke์ SOLAR ๋ชจ๋ธ์ ์์๋ก ์ฌ์ฉ. | |
| # # 'repo_id'๋ ๋ชจ๋ธ์ด ์๋ ์ ์ฅ์, 'filename'์ ๊ทธ ์์ ํน์ GGUF ํ์ผ๋ช . | |
| model_repo_id = "TheBloke/SOLAR-10.7B-Instruct-v1.0-GGUF" | |
| model_filename = "solar-10.7b-instruct-v1.0.Q4_K_S.gguf" | |
| # # 7B ๋ชจ๋ธ์ธ Qwen 2.5๋ก ๋ณ๊ฒฝํ์ฌ ํ ์คํธ | |
| # model_repo_id = "Triangle104/Qwen2.5-7B-Instruct-Q4_K_S-GGUF" | |
| # model_filename = "qwen2.5-7b-instruct-q4_k_s.gguf" | |
| # Hugging Face Hub์์ GGUF ํ์ผ์ ๋ค์ด๋ก๋ํ๊ณ , ๋ก์ปฌ ๊ฒฝ๋ก๋ฅผ ๊ฐ์ ธ์จ๋ค. | |
| # ์ด ๊ณผ์ ์ ์๋ฒ ์์ ์ ํ๋ฒ๋ง ์คํ๋๋ค. | |
| model_path = hf_hub_download(repo_id=model_repo_id, filename=model_filename) | |
| # llama-cpp-python์ ์ฌ์ฉํด GGUF ๋ชจ๋ธ์ ๋ฉ๋ชจ๋ฆฌ์ ๋ก๋ํ๋ค. | |
| # n_gpu_layers=-1 ์ GPU๋ฅผ ์ต๋ํ ์ฌ์ฉํ๋ผ๋ ๋ป. CPU๋ง ์ฐ๋ ํ๊ฒฝ์์๋ 0์ผ๋ก ์ค์ . | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=4096, # ๋ชจ๋ธ์ด ํ๋ฒ์ ์ฒ๋ฆฌํ ์ ์๋ ์ต๋ ํ ํฐ ๊ธธ์ด | |
| n_threads=8, # ์ฌ์ฉํ CPU ์ค๋ ๋ ์ | |
| n_gpu_layers=0 # GPU์ ์ฌ๋ฆด ๋ ์ด์ด ์ (-1์ ๊ฐ๋ฅํ ๋ชจ๋ ์ฌ๋ฆฌ๋ผ๋ ๋ป) | |
| ) | |
| # ์์ฒญ ๋ณธ๋ฌธ ํ์์ ์ด์ ๊ณผ ๋์ผ | |
| class TranslationRequest(BaseModel): | |
| text: str | |
| # 3. API ์๋ํฌ์ธํธ ์์ | |
| async def translate_all_in_one(request: TranslationRequest): | |
| korean_text = request.text | |
| # GGUF ๋ชจ๋ธ(Llama-2 Chat ํ์)์ ๋ง๋ ํ๋กฌํํธ ํ์ | |
| prompt = f"""### User: | |
| You are a helpful translation and pronunciation assistant. | |
| Given the following Korean text, perform three tasks. | |
| 1. Translate the text into natural, everyday English. | |
| 2. Translate the text into natural, everyday Japanese. | |
| 3. Provide the Korean pronunciation (Hangul) for the generated Japanese translation. | |
| Format your response as a single, valid JSON object with the keys "english", "japanese", and "pronunciation". | |
| Korean Text: "{korean_text}" | |
| ### Assistant: | |
| """ | |
| # ๋ชจ๋ธ์ ํตํด ํ ์คํธ ์์ฑ ์คํ | |
| output = llm( | |
| prompt, | |
| max_tokens=512, | |
| stop=["### User:", "</s>"], # ์๋ต ์์ฑ์ ๋ฉ์ถ ํน์ ๋จ์ด | |
| temperature=0.7, | |
| top_k=50, | |
| echo=False # ํ๋กฌํํธ๋ฅผ ๋ค์ ์ถ๋ ฅํ์ง ์๋๋ก ์ค์ | |
| ) | |
| generated_output = output["choices"][0]["text"].strip() | |
| try: | |
| # GGUF ๋ชจ๋ธ์ JSON์ ๊น๋ํ๊ฒ ์์ฑํ๋ ๊ฒฝํฅ์ด ์์ | |
| parsed_json = json.loads(generated_output) | |
| return parsed_json | |
| except (json.JSONDecodeError, IndexError) as e: | |
| print(f"JSON ํ์ฑ ์๋ฌ: {e}") | |
| print(f"๋ชจ๋ธ ์๋ณธ ์ถ๋ ฅ: {generated_output}") | |
| return {"error": "Failed to parse model output as JSON", "raw_output": generated_output} | |
| def read_root(): | |
| return {"message": "GGUF Translation API is running"} |