MegaTronX commited on
Commit
1f87684
·
verified ·
1 Parent(s): 974cc9a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -0
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+
5
+ class HunyuanTranslator:
6
+ def __init__(self, model_name: str = "tencent/Hunyuan-MT-7B-fp8"):
7
+ self.model_name = model_name
8
+ self.tokenizer = None
9
+ self.model = None
10
+ self._load_model()
11
+
12
+ def _load_model(self):
13
+ """Load the pre-quantized FP8 model"""
14
+ print("Loading pre-quantized Hunyuan-MT FP8 model...")
15
+
16
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
17
+
18
+ # Let transformers handle the FP8 quantization automatically
19
+ self.model = AutoModelForCausalLM.from_pretrained(
20
+ self.model_name,
21
+ device_map="auto",
22
+ # torch_dtype is handled automatically for quantized models
23
+ trust_remote_code=True, # Important for custom models
24
+ )
25
+
26
+ print("FP8 model loaded successfully!")
27
+ print(f"Model device: {self.model.device}")
28
+ print(f"Model dtype: {next(self.model.parameters()).dtype}")
29
+
30
+ def translate_ja_to_en(self, input_text: str) -> str:
31
+ """Translate Japanese to English using FP8 model"""
32
+ if not input_text.strip():
33
+ return "Please enter Japanese text to translate."
34
+
35
+ try:
36
+ # Japanese to English specific prompt
37
+ prompt = f"Translate the following Japanese text to English:\n\n{input_text}"
38
+
39
+ messages = [{"role": "user", "content": prompt}]
40
+
41
+ # Apply chat template
42
+ tokenized_chat = self.tokenizer.apply_chat_template(
43
+ messages,
44
+ tokenize=True,
45
+ add_generation_prompt=True,
46
+ return_tensors="pt",
47
+ )
48
+
49
+ # Generate with FP8 model
50
+ with torch.no_grad():
51
+ outputs = self.model.generate(
52
+ tokenized_chat.to(self.model.device),
53
+ max_new_tokens=512,
54
+ temperature=0.7,
55
+ do_sample=True,
56
+ top_p=0.9,
57
+ repetition_penalty=1.1
58
+ )
59
+
60
+ # Decode output
61
+ output_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
62
+
63
+ # Extract translation (remove prompt)
64
+ if prompt in output_text:
65
+ output_text = output_text.replace(prompt, "").strip()
66
+
67
+ return output_text
68
+
69
+ except Exception as e:
70
+ return f"Translation error: {str(e)}"
71
+
72
+ # Rest of the Gradio code remains the same...