Spaces:

Cyberlace
/

latihan-artikulasi

Running on Zero

App Files Files Community

fariedalfarizi commited on 3 days ago

Commit

8ae5bac

1 Parent(s): 7f85bd7

Fix ZeroGPU CUDA init error: force CPU loading, GPU only during inference

Browse files

Files changed (2) hide show

app/interface.py +11 -8
core/scoring_engine.py +18 -9

app/interface.py CHANGED Viewed

@@ -45,14 +45,15 @@ scorer = None
 # =======================================
 def initialize_model():
-    """Initialize scoring system"""
     global scorer
     if scorer is None:
         whisper_model = os.getenv("WHISPER_MODEL", "openai/whisper-medium")
         print(f"Loading Whisper model: {whisper_model}...")
-        scorer = AdvancedVocalScoringSystem(whisper_model=whisper_model)
-        print("Model loaded!")
     return scorer
@@ -71,8 +72,9 @@ def get_status_icon(score: float) -> str:
 @spaces.GPU(duration=120)
 def _score_audio_gpu(audio_path: str, target_text: str, level: int):
-    """GPU inference with ZeroGPU"""
     global scorer
     return scorer.score_audio(
         audio_path=audio_path,
         target_text=target_text,
@@ -102,10 +104,11 @@ def score_vocal(
         try:
             result = _score_audio_gpu(audio_file, target_text, level)
         except Exception as gpu_error:
-            # Check if it's quota error
-            if "quota" in str(gpu_error).lower() or "ZeroGPU" in str(gpu_error):
-                print("⚠️ ZeroGPU quota exceeded, falling back to CPU...")
-                # Fallback to CPU
                 result = scorer.score_audio(
                     audio_path=audio_file,
                     target_text=target_text,

 # =======================================
 def initialize_model():
+    """Initialize scoring system - lazy loading without CUDA"""
     global scorer
     if scorer is None:
         whisper_model = os.getenv("WHISPER_MODEL", "openai/whisper-medium")
         print(f"Loading Whisper model: {whisper_model}...")
+        # Force CPU for initial loading to avoid CUDA init in main process
+        scorer = AdvancedVocalScoringSystem(whisper_model=whisper_model, device="cpu")
+        print("Model loaded on CPU (will use GPU when available)!")
     return scorer
 @spaces.GPU(duration=120)
 def _score_audio_gpu(audio_path: str, target_text: str, level: int):
+    """GPU inference with ZeroGPU - model already on CPU, inference uses GPU via pipeline"""
     global scorer
+    # Scorer is on CPU, but pipeline will use GPU via @spaces.GPU decorator
     return scorer.score_audio(
         audio_path=audio_path,
         target_text=target_text,
         try:
             result = _score_audio_gpu(audio_file, target_text, level)
         except Exception as gpu_error:
+            error_msg = str(gpu_error).lower()
+            # Check if it's quota error or CUDA init error
+            if "quota" in error_msg or "zerogpu" in error_msg or "cuda" in error_msg:
+                print(f"⚠️ GPU error ({type(gpu_error).__name__}), falling back to CPU...")
+                # Fallback to CPU - direct call without GPU decorator
                 result = scorer.score_audio(
                     audio_path=audio_file,
                     target_text=target_text,

core/scoring_engine.py CHANGED Viewed

@@ -99,32 +99,41 @@ class AdvancedVocalScoringSystem:
         device: str = None
     ):
         """
-        Initialize system dengan Whisper Large V3 (best for Indonesian)
         Args:
-            whisper_model: Model Whisper (large-v3 recommended for Indonesian)
-            device: 'cuda' atau 'cpu'
         """
-        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
-        print(f"🔄 Loading Whisper Large V3 for Indonesian...")
-        # Load Whisper Large V3 - best for all levels
         self.processor = WhisperProcessor.from_pretrained(whisper_model)
         self.model = WhisperForConditionalGeneration.from_pretrained(whisper_model)
-        self.model.to(self.device)
         self.model.eval()
         # Whisper pipeline untuk transcription
         self.pipe = pipeline(
             "automatic-speech-recognition",
             model=whisper_model,
-            device=0 if self.device == "cuda" else -1,
             chunk_length_s=30,
             return_timestamps=False
         )
-        print(f"✅ Whisper model loaded on {self.device}")
         # Scoring weights untuk setiap level
         self.level_weights = LEVEL_WEIGHTS

         device: str = None
     ):
         """
+        Initialize system dengan Whisper Medium (best for Indonesian)
         Args:
+            whisper_model: Model Whisper (medium recommended for Indonesian)
+            device: 'cuda', 'cpu', or None (auto-detect, default to CPU for ZeroGPU compatibility)
         """
+        # For ZeroGPU: Always load on CPU first, GPU will be used during inference
+        if device is None:
+            self.device = "cpu"  # Force CPU for initial load
+        else:
+            self.device = device
+        print(f"🔄 Loading Whisper Medium for Indonesian on {self.device}...")
+        # Load Whisper Medium - best balance for all levels
         self.processor = WhisperProcessor.from_pretrained(whisper_model)
         self.model = WhisperForConditionalGeneration.from_pretrained(whisper_model)
+        # Only move to device if explicitly CPU (avoid CUDA init in main process)
+        if self.device == "cpu":
+            self.model.to(self.device)
         self.model.eval()
         # Whisper pipeline untuk transcription
+        # Pipeline will use device from @spaces.GPU decorator automatically
         self.pipe = pipeline(
             "automatic-speech-recognition",
             model=whisper_model,
+            device=-1,  # Force CPU, GPU will be assigned by ZeroGPU decorator
             chunk_length_s=30,
             return_timestamps=False
         )
+        print(f"✅ Whisper model loaded on {self.device} (GPU will be used via ZeroGPU decorator)")
         # Scoring weights untuk setiap level
         self.level_weights = LEVEL_WEIGHTS