parakeet-tdt-0.6b-v2

Running

sungo-ganpare commited on May 18

Commit

d206e43

1 Parent(s): 9f62d70

CUDAメモリ管理を強化し、トランスクリプション前にキャッシュをクリア。デバッグ用にメモリ状況をログ出力。キューサイズと同時実行数を制限してタイムアウト対策を実施。

Files changed (1) hide show

app.py CHANGED Viewed

@@ -152,8 +152,18 @@ def transcribe_audio(transcribe_path, model, duration_sec, device):
     """
     long_audio_settings_applied = False
     try:
         model.to(device)
         model.to(torch.float32)
         gr.Info(f"Transcribing on {device}...", duration=2)
         if duration_sec > 480:
@@ -622,5 +632,9 @@ with gr.Blocks(theme=nvidia_theme) as demo:
 if __name__ == "__main__":
     print("Launching Gradio Demo...")
-    demo.queue()
     demo.launch()

     """
     long_audio_settings_applied = False
     try:
+        # CUDA 使用前にメモリをクリアし、断片化を低減
+        if device == 'cuda':
+            torch.cuda.empty_cache()
+            gc.collect()
         model.to(device)
         model.to(torch.float32)
+        # メモリ状況をログ出力（デバッグ用）
+        if device == 'cuda':
+            print(f"CUDA Memory before transcription: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
         gr.Info(f"Transcribing on {device}...", duration=2)
         if duration_sec > 480:
 if __name__ == "__main__":
     print("Launching Gradio Demo...")
+    # タイムアウト対策としてキューサイズと同時実行数を抑制
+    demo.queue(
+        max_size=5,
+        default_concurrency_limit=1
+    )
     demo.launch()