Spaces:

MegaTronX
/

Hunyuan7B-fp8-Japanese-Translate

Sleeping

App Files Files Community

MegaTronX commited on 24 days ago

Commit

1f87684

verified ·

1 Parent(s): 974cc9a

Create app.py

Browse files

Files changed (1) hide show

app.py +72 -0

app.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+class HunyuanTranslator:
+    def __init__(self, model_name: str = "tencent/Hunyuan-MT-7B-fp8"):
+        self.model_name = model_name
+        self.tokenizer = None
+        self.model = None
+        self._load_model()
+    def _load_model(self):
+        """Load the pre-quantized FP8 model"""
+        print("Loading pre-quantized Hunyuan-MT FP8 model...")
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
+        # Let transformers handle the FP8 quantization automatically
+        self.model = AutoModelForCausalLM.from_pretrained(
+            self.model_name,
+            device_map="auto",
+            # torch_dtype is handled automatically for quantized models
+            trust_remote_code=True,  # Important for custom models
+        )
+        print("FP8 model loaded successfully!")
+        print(f"Model device: {self.model.device}")
+        print(f"Model dtype: {next(self.model.parameters()).dtype}")
+    def translate_ja_to_en(self, input_text: str) -> str:
+        """Translate Japanese to English using FP8 model"""
+        if not input_text.strip():
+            return "Please enter Japanese text to translate."
+        try:
+            # Japanese to English specific prompt
+            prompt = f"Translate the following Japanese text to English:\n\n{input_text}"
+            messages = [{"role": "user", "content": prompt}]
+            # Apply chat template
+            tokenized_chat = self.tokenizer.apply_chat_template(
+                messages,
+                tokenize=True,
+                add_generation_prompt=True,
+                return_tensors="pt",
+            )
+            # Generate with FP8 model
+            with torch.no_grad():
+                outputs = self.model.generate(
+                    tokenized_chat.to(self.model.device),
+                    max_new_tokens=512,
+                    temperature=0.7,
+                    do_sample=True,
+                    top_p=0.9,
+                    repetition_penalty=1.1
+                )
+            # Decode output
+            output_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # Extract translation (remove prompt)
+            if prompt in output_text:
+                output_text = output_text.replace(prompt, "").strip()
+            return output_text
+        except Exception as e:
+            return f"Translation error: {str(e)}"
+# Rest of the Gradio code remains the same...