fariedalfarizi commited on
Commit
8ae5bac
·
1 Parent(s): 7f85bd7

Fix ZeroGPU CUDA init error: force CPU loading, GPU only during inference

Browse files
Files changed (2) hide show
  1. app/interface.py +11 -8
  2. core/scoring_engine.py +18 -9
app/interface.py CHANGED
@@ -45,14 +45,15 @@ scorer = None
45
  # =======================================
46
 
47
  def initialize_model():
48
- """Initialize scoring system"""
49
  global scorer
50
 
51
  if scorer is None:
52
  whisper_model = os.getenv("WHISPER_MODEL", "openai/whisper-medium")
53
  print(f"Loading Whisper model: {whisper_model}...")
54
- scorer = AdvancedVocalScoringSystem(whisper_model=whisper_model)
55
- print("Model loaded!")
 
56
 
57
  return scorer
58
 
@@ -71,8 +72,9 @@ def get_status_icon(score: float) -> str:
71
 
72
  @spaces.GPU(duration=120)
73
  def _score_audio_gpu(audio_path: str, target_text: str, level: int):
74
- """GPU inference with ZeroGPU"""
75
  global scorer
 
76
  return scorer.score_audio(
77
  audio_path=audio_path,
78
  target_text=target_text,
@@ -102,10 +104,11 @@ def score_vocal(
102
  try:
103
  result = _score_audio_gpu(audio_file, target_text, level)
104
  except Exception as gpu_error:
105
- # Check if it's quota error
106
- if "quota" in str(gpu_error).lower() or "ZeroGPU" in str(gpu_error):
107
- print("⚠️ ZeroGPU quota exceeded, falling back to CPU...")
108
- # Fallback to CPU
 
109
  result = scorer.score_audio(
110
  audio_path=audio_file,
111
  target_text=target_text,
 
45
  # =======================================
46
 
47
  def initialize_model():
48
+ """Initialize scoring system - lazy loading without CUDA"""
49
  global scorer
50
 
51
  if scorer is None:
52
  whisper_model = os.getenv("WHISPER_MODEL", "openai/whisper-medium")
53
  print(f"Loading Whisper model: {whisper_model}...")
54
+ # Force CPU for initial loading to avoid CUDA init in main process
55
+ scorer = AdvancedVocalScoringSystem(whisper_model=whisper_model, device="cpu")
56
+ print("Model loaded on CPU (will use GPU when available)!")
57
 
58
  return scorer
59
 
 
72
 
73
  @spaces.GPU(duration=120)
74
  def _score_audio_gpu(audio_path: str, target_text: str, level: int):
75
+ """GPU inference with ZeroGPU - model already on CPU, inference uses GPU via pipeline"""
76
  global scorer
77
+ # Scorer is on CPU, but pipeline will use GPU via @spaces.GPU decorator
78
  return scorer.score_audio(
79
  audio_path=audio_path,
80
  target_text=target_text,
 
104
  try:
105
  result = _score_audio_gpu(audio_file, target_text, level)
106
  except Exception as gpu_error:
107
+ error_msg = str(gpu_error).lower()
108
+ # Check if it's quota error or CUDA init error
109
+ if "quota" in error_msg or "zerogpu" in error_msg or "cuda" in error_msg:
110
+ print(f"⚠️ GPU error ({type(gpu_error).__name__}), falling back to CPU...")
111
+ # Fallback to CPU - direct call without GPU decorator
112
  result = scorer.score_audio(
113
  audio_path=audio_file,
114
  target_text=target_text,
core/scoring_engine.py CHANGED
@@ -99,32 +99,41 @@ class AdvancedVocalScoringSystem:
99
  device: str = None
100
  ):
101
  """
102
- Initialize system dengan Whisper Large V3 (best for Indonesian)
103
 
104
  Args:
105
- whisper_model: Model Whisper (large-v3 recommended for Indonesian)
106
- device: 'cuda' atau 'cpu'
107
  """
108
- self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
 
 
 
 
109
 
110
- print(f"🔄 Loading Whisper Large V3 for Indonesian...")
111
 
112
- # Load Whisper Large V3 - best for all levels
113
  self.processor = WhisperProcessor.from_pretrained(whisper_model)
114
  self.model = WhisperForConditionalGeneration.from_pretrained(whisper_model)
115
- self.model.to(self.device)
 
 
 
 
116
  self.model.eval()
117
 
118
  # Whisper pipeline untuk transcription
 
119
  self.pipe = pipeline(
120
  "automatic-speech-recognition",
121
  model=whisper_model,
122
- device=0 if self.device == "cuda" else -1,
123
  chunk_length_s=30,
124
  return_timestamps=False
125
  )
126
 
127
- print(f"✅ Whisper model loaded on {self.device}")
128
 
129
  # Scoring weights untuk setiap level
130
  self.level_weights = LEVEL_WEIGHTS
 
99
  device: str = None
100
  ):
101
  """
102
+ Initialize system dengan Whisper Medium (best for Indonesian)
103
 
104
  Args:
105
+ whisper_model: Model Whisper (medium recommended for Indonesian)
106
+ device: 'cuda', 'cpu', or None (auto-detect, default to CPU for ZeroGPU compatibility)
107
  """
108
+ # For ZeroGPU: Always load on CPU first, GPU will be used during inference
109
+ if device is None:
110
+ self.device = "cpu" # Force CPU for initial load
111
+ else:
112
+ self.device = device
113
 
114
+ print(f"🔄 Loading Whisper Medium for Indonesian on {self.device}...")
115
 
116
+ # Load Whisper Medium - best balance for all levels
117
  self.processor = WhisperProcessor.from_pretrained(whisper_model)
118
  self.model = WhisperForConditionalGeneration.from_pretrained(whisper_model)
119
+
120
+ # Only move to device if explicitly CPU (avoid CUDA init in main process)
121
+ if self.device == "cpu":
122
+ self.model.to(self.device)
123
+
124
  self.model.eval()
125
 
126
  # Whisper pipeline untuk transcription
127
+ # Pipeline will use device from @spaces.GPU decorator automatically
128
  self.pipe = pipeline(
129
  "automatic-speech-recognition",
130
  model=whisper_model,
131
+ device=-1, # Force CPU, GPU will be assigned by ZeroGPU decorator
132
  chunk_length_s=30,
133
  return_timestamps=False
134
  )
135
 
136
+ print(f"✅ Whisper model loaded on {self.device} (GPU will be used via ZeroGPU decorator)")
137
 
138
  # Scoring weights untuk setiap level
139
  self.level_weights = LEVEL_WEIGHTS