fuvty commited on
Commit
4d180b8
Β·
1 Parent(s): 821387d
Files changed (1) hide show
  1. app.py +15 -34
app.py CHANGED
@@ -106,11 +106,7 @@ class ModelManager:
106
  self.single_model_name, self.device
107
  )
108
  set_default_chat_template(self.single_tokenizer, self.single_model_name)
109
-
110
- # Move to CUDA if available (following HuggingFace ZeroGPU pattern)
111
- if torch.cuda.is_available():
112
- self.single_model = self.single_model.to('cuda')
113
- print(f"[Single] βœ“ Model loaded")
114
 
115
  def _load_t2t_model(self):
116
  """Load two-stage model."""
@@ -126,11 +122,7 @@ class ModelManager:
126
  device=str(self.device),
127
  background_prompt=self.t2t_background_prompt
128
  )
129
- # Move to CUDA if available (following HuggingFace ZeroGPU pattern)
130
- if torch.cuda.is_available():
131
- self.t2t_model.context_model = self.t2t_model.context_model.to('cuda')
132
- self.t2t_model.answer_model = self.t2t_model.answer_model.to('cuda')
133
- print("[T2T] βœ“ Models loaded")
134
 
135
  def _load_c2c_model(self):
136
  """Load Rosetta (C2C) model."""
@@ -187,10 +179,6 @@ class ModelManager:
187
  self.c2c_model, self.c2c_tokenizer = load_rosetta_model(
188
  model_config, eval_config, self.device
189
  )
190
-
191
- # Move to CUDA if available (following HuggingFace ZeroGPU pattern)
192
- if torch.cuda.is_available():
193
- self.c2c_model = self.c2c_model.to('cuda')
194
  print("[C2C] βœ“ Model loaded")
195
 
196
  def _load_all_models(self):
@@ -261,15 +249,9 @@ class ModelManager:
261
 
262
  # Stream tokens
263
  generated_text = ""
264
- try:
265
- for token in streamer:
266
- generated_text += token
267
- yield generated_text
268
- except Exception as e:
269
- print(f"[Single] Streaming error: {e}")
270
- yield f"Error generating response: {e}"
271
- finally:
272
- thread.join()
273
 
274
  @spaces.GPU(duration=90)
275
  def generate_t2t(self, user_input: str) -> Generator[tuple[str, str], None, None]:
@@ -399,15 +381,9 @@ class ModelManager:
399
 
400
  # Stream tokens
401
  generated_text = ""
402
- try:
403
- for token in streamer:
404
- generated_text += token
405
- yield generated_text
406
- except Exception as e:
407
- print(f"[C2C] Streaming error: {e}")
408
- yield f"Error generating response: {e}"
409
- finally:
410
- thread.join()
411
 
412
 
413
  def create_demo(model_manager: ModelManager):
@@ -421,7 +397,12 @@ A. Why the act of destroying nature might be immoral.
421
  B. Why people who destroy the environment might be bad people.
422
  C. How the decision to preserve the environment benefits the environment.
423
  D. Whether plants have interests.""",
424
- "example2": "Which company launched the Gemini 1.5 Pro model in early 2024?"
 
 
 
 
 
425
  }
426
 
427
  def respond(user_input: str):
@@ -488,7 +469,7 @@ D. Whether plants have interests.""",
488
  gr.Markdown("Example Questions:")
489
  with gr.Row():
490
  example1_btn = gr.Button("πŸ“ Example 1: Philosophy", size="sm")
491
- example2_btn = gr.Button("πŸ“ Example 2: Knowledge Cutoff", size="sm")
492
 
493
 
494
  with gr.Row():
 
106
  self.single_model_name, self.device
107
  )
108
  set_default_chat_template(self.single_tokenizer, self.single_model_name)
109
+ print("[Single] βœ“ Model loaded")
 
 
 
 
110
 
111
  def _load_t2t_model(self):
112
  """Load two-stage model."""
 
122
  device=str(self.device),
123
  background_prompt=self.t2t_background_prompt
124
  )
125
+ print("[T2T] βœ“ Model loaded")
 
 
 
 
126
 
127
  def _load_c2c_model(self):
128
  """Load Rosetta (C2C) model."""
 
179
  self.c2c_model, self.c2c_tokenizer = load_rosetta_model(
180
  model_config, eval_config, self.device
181
  )
 
 
 
 
182
  print("[C2C] βœ“ Model loaded")
183
 
184
  def _load_all_models(self):
 
249
 
250
  # Stream tokens
251
  generated_text = ""
252
+ for token in streamer:
253
+ generated_text += token
254
+ yield generated_text
 
 
 
 
 
 
255
 
256
  @spaces.GPU(duration=90)
257
  def generate_t2t(self, user_input: str) -> Generator[tuple[str, str], None, None]:
 
381
 
382
  # Stream tokens
383
  generated_text = ""
384
+ for token in streamer:
385
+ generated_text += token
386
+ yield generated_text
 
 
 
 
 
 
387
 
388
 
389
  def create_demo(model_manager: ModelManager):
 
397
  B. Why people who destroy the environment might be bad people.
398
  C. How the decision to preserve the environment benefits the environment.
399
  D. Whether plants have interests.""",
400
+ "example2": """Why is the Mars Exploration Rover Spirit currently tilted towards the north?
401
+
402
+ A. Because it’s climbing up a big hill.
403
+ B. Because it’s in the southern hemisphere where it is winter now.
404
+ C. Because it’s in the northern hemisphere where it is winter now.
405
+ D. Because one of its wheels broke."""
406
  }
407
 
408
  def respond(user_input: str):
 
469
  gr.Markdown("Example Questions:")
470
  with gr.Row():
471
  example1_btn = gr.Button("πŸ“ Example 1: Philosophy", size="sm")
472
+ example2_btn = gr.Button("πŸ“ Example 2: Astronomy", size="sm")
473
 
474
 
475
  with gr.Row():