hwang233 commited on
Commit
5fec798
·
verified ·
1 Parent(s): a4e599b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -12
README.md CHANGED
@@ -17,7 +17,7 @@ This repository contains the system description paper for Algharb, the submissio
17
 
18
  ## Introduction
19
 
20
- The Algharb system is a large translation model built based on the Qwen3-14B foundation. It is designed for high-quality translation across 13 diverse language directions and demonstrates state-of-the-art performance. Our approach is centered on a multi-stage refinement pipeline that systematically enhances translation fluency and faithfulness. In the WMT 2025 evaluation, Algharb significantly outperformed strong proprietary models like GPT-4o and Claude 3.7 Sonnet, achieving the top score in every submitted language pair.
21
 
22
  ## Usage
23
 
@@ -45,7 +45,6 @@ Here is a complete Python example:
45
  from vllm import LLM, SamplingParams
46
 
47
  # --- 1. Load Model and Tokenizer ---
48
- # Replace with the actual path to your fine-tuned Algharb model
49
  model_path = "path/to/your/algharb_model"
50
  llm = LLM(model=model_path)
51
 
@@ -59,7 +58,7 @@ lang_name_map = {
59
  "zh_CN": "chinese",
60
  "ko_KR": "korean",
61
  "ja_JP": "japanese",
62
- "ar_EG": "arabic", # Note: paper uses 'arz', this might need adjustment
63
  "cs_CZ": "czech",
64
  "ru_RU": "russian",
65
  "uk_UA": "ukraine",
@@ -74,21 +73,18 @@ target_language_name = lang_name_map.get(target_lang_code, "the target language"
74
  # --- 3. Construct the Prompt ---
75
  prompt = (
76
  f"Human: Please translate the following text into {target_language_name}: \n"
77
- f"{source_text}<|im_end|>\n"
78
  f"Assistant:"
79
  )
80
 
81
  prompts_to_generate = [prompt]
82
  print("Formatted Prompt:\n", prompt)
83
 
84
- # --- 4. Configure Sampling Parameters for MBR ---
85
- # We generate n candidates for our hybrid MBR decoding.
86
- # The script uses temperature=1 for diverse sampling.
87
  sampling_params = SamplingParams(
88
- n=10, # Number of candidate translations to generate
89
  temperature=1.0,
90
  top_p=1.0,
91
- max_tokens=512 # Adjust as needed
92
  )
93
 
94
  # --- 5. Generate Translations ---
@@ -104,7 +100,4 @@ for output in outputs:
104
  for i, candidate in enumerate(output.outputs):
105
  generated_text = candidate.text.strip()
106
  print(f"Candidate {i+1}: {generated_text}")
107
-
108
- # The generated candidates can now be passed to the
109
- # hybrid MBR re-ranking process described in the paper.
110
  ```
 
17
 
18
  ## Introduction
19
 
20
+ The Algharb system is a large translation model built based on the Qwen3-14B foundation. It is designed for high-quality translation across 13 diverse language directions and demonstrates state-of-the-art performance. Our approach is centered on a multi-stage refinement pipeline that systematically enhances translation fluency and faithfulness.
21
 
22
  ## Usage
23
 
 
45
  from vllm import LLM, SamplingParams
46
 
47
  # --- 1. Load Model and Tokenizer ---
 
48
  model_path = "path/to/your/algharb_model"
49
  llm = LLM(model=model_path)
50
 
 
58
  "zh_CN": "chinese",
59
  "ko_KR": "korean",
60
  "ja_JP": "japanese",
61
+ "ar_EG": "arabic",
62
  "cs_CZ": "czech",
63
  "ru_RU": "russian",
64
  "uk_UA": "ukraine",
 
73
  # --- 3. Construct the Prompt ---
74
  prompt = (
75
  f"Human: Please translate the following text into {target_language_name}: \n"
76
+ f"{source_text}<|im_end|>\n"
77
  f"Assistant:"
78
  )
79
 
80
  prompts_to_generate = [prompt]
81
  print("Formatted Prompt:\n", prompt)
82
 
 
 
 
83
  sampling_params = SamplingParams(
84
+ n=1,
85
  temperature=1.0,
86
  top_p=1.0,
87
+ max_tokens=512
88
  )
89
 
90
  # --- 5. Generate Translations ---
 
100
  for i, candidate in enumerate(output.outputs):
101
  generated_text = candidate.text.strip()
102
  print(f"Candidate {i+1}: {generated_text}")
 
 
 
103
  ```