Spaces:
Sleeping
Sleeping
| """ | |
| Audio Processor - Main Orchestrator | |
| Koordinasi semua analisis audio | |
| """ | |
| import time | |
| from typing import Dict, Optional, List | |
| from app.config import settings | |
| from app.services.speech_to_text import SpeechToTextService | |
| from app.services.tempo import TempoService | |
| from app.services.articulation import ArticulationService, ProfanityDetector | |
| from app.services.structure import StructureService | |
| from app.services.keywords import KeywordService | |
| class AudioProcessor: | |
| """Main orchestrator untuk audio analysis""" | |
| def __init__(self): | |
| """Initialize all services""" | |
| print("π Initializing Audio Processor...") | |
| # Initialize services (lazy loading) | |
| self._stt_service = None | |
| self._tempo_service = None | |
| self._articulation_service = None | |
| self._structure_service = None | |
| self._keyword_service = None | |
| print("β Audio Processor ready!\n") | |
| def stt_service(self): | |
| """Lazy load STT service""" | |
| if self._stt_service is None: | |
| self._stt_service = SpeechToTextService( | |
| model_name=settings.WHISPER_MODEL, | |
| device="auto", # Auto-detect GPU/CPU | |
| language="id" | |
| ) | |
| return self._stt_service | |
| def tempo_service(self): | |
| """Lazy load Tempo service""" | |
| if self._tempo_service is None: | |
| self._tempo_service = TempoService() | |
| return self._tempo_service | |
| def articulation_service(self): | |
| """Lazy load Articulation service""" | |
| if self._articulation_service is None: | |
| self._articulation_service = ArticulationService() | |
| return self._articulation_service | |
| def structure_service(self): | |
| """Lazy load Structure service""" | |
| if self._structure_service is None: | |
| # Uses default 'Cyberlace/swara-structure-model' from HF Hub | |
| self._structure_service = StructureService() | |
| return self._structure_service | |
| def keyword_service(self): | |
| """Lazy load Keyword service""" | |
| if self._keyword_service is None: | |
| self._keyword_service = KeywordService( | |
| dataset_path=settings.KATA_KUNCI_PATH | |
| ) | |
| return self._keyword_service | |
| def process_audio( | |
| self, | |
| audio_path: str, | |
| reference_text: Optional[str] = None, | |
| topic_id: Optional[str] = None, | |
| custom_topic: Optional[str] = None, | |
| custom_keywords: Optional[List[str]] = None, | |
| analyze_tempo: bool = True, | |
| analyze_articulation: bool = True, | |
| analyze_structure: bool = True, | |
| analyze_keywords: bool = False, | |
| analyze_profanity: bool = False | |
| ) -> Dict: | |
| """ | |
| Process audio file dengan semua analisis yang diminta | |
| Args: | |
| audio_path: Path ke file audio | |
| reference_text: Teks referensi (untuk artikulasi) | |
| topic_id: ID topik dari database (untuk Level 1-2) | |
| custom_topic: Topik custom dari user (untuk Level 3) | |
| custom_keywords: List kata kunci dari GPT (untuk Level 3) | |
| analyze_tempo: Flag untuk analisis tempo | |
| analyze_articulation: Flag untuk analisis artikulasi | |
| analyze_structure: Flag untuk analisis struktur | |
| analyze_keywords: Flag untuk analisis kata kunci | |
| analyze_profanity: Flag untuk deteksi kata tidak senonoh | |
| Returns: | |
| Dict berisi semua hasil analisis | |
| """ | |
| start_time = time.time() | |
| print("="*70) | |
| print("π― STARTING AUDIO ANALYSIS") | |
| print("="*70) | |
| print(f"π Audio file: {audio_path}") | |
| print(f"βοΈ Tempo: {analyze_tempo}") | |
| print(f"βοΈ Articulation: {analyze_articulation}") | |
| print(f"βοΈ Structure: {analyze_structure}") | |
| print(f"βοΈ Keywords: {analyze_keywords}") | |
| print(f"βοΈ Profanity: {analyze_profanity}") | |
| print("="*70 + "\n") | |
| results = {} | |
| # 1. Speech to Text (always required) | |
| print("π Step 1/6: Transcribing audio...") | |
| transcript_result = self.stt_service.transcribe(audio_path) | |
| transcript = transcript_result['text'] | |
| results['transcript'] = transcript | |
| print(f"β Transcript: {transcript[:100]}...\n") | |
| # 2. Tempo Analysis | |
| if analyze_tempo: | |
| print("π΅ Step 2/6: Analyzing tempo...") | |
| results['tempo'] = self.tempo_service.analyze(audio_path, transcript) | |
| print(f"β Tempo score: {results['tempo']['score']}/5\n") | |
| # 3. Articulation Analysis | |
| if analyze_articulation and reference_text: | |
| print("π£οΈ Step 3/6: Analyzing articulation...") | |
| results['articulation'] = self.articulation_service.analyze( | |
| transcribed_text=transcript, | |
| reference_text=reference_text | |
| ) | |
| print(f"β Articulation score: {results['articulation']['score']}/5\n") | |
| elif analyze_articulation: | |
| print("β οΈ Step 3/6: Skipping articulation (no reference text)\n") | |
| # 4. Structure Analysis | |
| if analyze_structure: | |
| print("π Step 4/6: Analyzing structure...") | |
| results['structure'] = self.structure_service.analyze(transcript) | |
| print(f"β Structure score: {results['structure']['score']}/5\n") | |
| # 5. Keyword Analysis | |
| if analyze_keywords: | |
| print("π Step 5/6: Analyzing keywords...") | |
| # Custom keywords (Level 3 - dari GPT) | |
| if custom_topic and custom_keywords: | |
| results['keywords'] = self.keyword_service.analyze( | |
| speech_text=transcript, | |
| custom_topic=custom_topic, | |
| custom_keywords=custom_keywords | |
| ) | |
| # Predefined topic (Level 1-2 - dari database) | |
| elif topic_id: | |
| results['keywords'] = self.keyword_service.analyze( | |
| speech_text=transcript, | |
| topic_id=topic_id | |
| ) | |
| else: | |
| print("β οΈ Step 5/6: Skipping keywords (no topic_id or custom_keywords)\n") | |
| if 'keywords' in results: | |
| print(f"β Keyword score: {results['keywords']['score']}/5\n") | |
| elif analyze_keywords: | |
| print("β οΈ Step 5/6: Keywords analysis disabled\n") | |
| # 6. Profanity Detection | |
| if analyze_profanity: | |
| print("π« Step 6/6: Detecting profanity...") | |
| results['profanity'] = ProfanityDetector.detect_profanity(transcript) | |
| status = "DETECTED" if results['profanity']['has_profanity'] else "CLEAN" | |
| print(f"β Profanity check: {status} ({results['profanity']['profanity_count']} words)\n") | |
| # Calculate overall score | |
| scores = [] | |
| if 'tempo' in results: | |
| scores.append(results['tempo']['score']) | |
| if 'articulation' in results: | |
| scores.append(results['articulation']['score']) | |
| if 'structure' in results: | |
| scores.append(results['structure']['score']) | |
| if 'keywords' in results: | |
| scores.append(results['keywords']['score']) | |
| if scores: | |
| results['overall_score'] = round(sum(scores) / len(scores), 2) | |
| else: | |
| results['overall_score'] = 0 | |
| processing_time = time.time() - start_time | |
| results['processing_time'] = round(processing_time, 2) | |
| print("="*70) | |
| print(f"β ANALYSIS COMPLETE") | |
| print(f"β±οΈ Processing time: {processing_time:.2f}s") | |
| print(f"π Overall score: {results['overall_score']}/5") | |
| print("="*70 + "\n") | |
| return results | |