# ======================================= # FASTAPI BACKEND - VOCAL ARTICULATION API V2 # Updated untuk Whisper ASR + Multi-Level Support # ======================================= from fastapi import FastAPI, File, UploadFile, Form, HTTPException from fastapi.responses import JSONResponse, Response from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from typing import Optional, List, Dict, Any import tempfile import os import json from pathlib import Path try: import orjson ORJSON_AVAILABLE = True except ImportError: ORJSON_AVAILABLE = False from core.scoring_engine import AdvancedVocalScoringSystem, ScoreResult from core.constants import ARTICULATION_LEVELS # ======================================= # CUSTOM JSON RESPONSE # ======================================= class CustomJSONResponse(Response): """Custom JSON Response with explicit UTF-8 encoding""" media_type = "application/json" def render(self, content: Any) -> bytes: if ORJSON_AVAILABLE: # orjson handles encoding properly return orjson.dumps( content, option=orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_NON_STR_KEYS ) else: # Fallback to standard json with ensure_ascii=False return json.dumps( content, ensure_ascii=False, allow_nan=False, indent=None, separators=(',', ':') ).encode('utf-8') # ======================================= # FASTAPI APP INITIALIZATION # ======================================= app = FastAPI( title="Vocal Articulation API v2.0", description="API for Indonesian vocal articulation assessment using Whisper Medium ASR", version="2.0.0", docs_url="/docs", # Enable Swagger UI redoc_url="/redoc", # Enable ReDoc openapi_url="/openapi.json", # Enable OpenAPI JSON ) # CORS middleware app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # ======================================= # PYDANTIC MODELS (Minimal) # ======================================= # Removed to reduce OpenAPI schema size # Models are now returned as plain dicts # ======================================= # GLOBAL VARIABLES # ======================================= scorer: Optional[AdvancedVocalScoringSystem] = None # ======================================= # STARTUP & SHUTDOWN # ======================================= @app.on_event("startup") async def startup_event(): """Load model saat startup""" global scorer print("🚀 Starting Vocal Articulation API v2...") # Whisper model dari environment atau default whisper_model = os.getenv("WHISPER_MODEL", "openai/whisper-small") try: scorer = AdvancedVocalScoringSystem(whisper_model=whisper_model) print("✅ Whisper model loaded successfully!") except Exception as e: print(f"❌ Error loading model: {e}") raise @app.on_event("shutdown") async def shutdown_event(): """Cleanup saat shutdown""" print("👋 Shutting down Vocal Articulation API v2...") # ======================================= # API ENDPOINTS # ======================================= @app.get("/", response_class=CustomJSONResponse) async def root(): """Root endpoint - API information""" return CustomJSONResponse( content={ "message": "Vocal Articulation Assessment API v2", "version": "2.0.0", "features": [ "Whisper ASR-based clarity scoring", "Multi-level support (Level 1-5)", "6 scoring metrics", "Comprehensive audio analysis" ], "endpoints": { "health": "/health", "levels": "/levels", "score": "/score", "batch_score": "/batch_score", "docs": "/docs" } } ) @app.get("/health", tags=["System"]) async def health_check(): """Health check endpoint""" return { "status": "healthy" if scorer is not None else "unhealthy", "model_loaded": scorer is not None, "device": scorer.device if scorer else "unknown", "whisper_model": "openai/whisper-small" if scorer else "not loaded" } @app.get("/levels", tags=["Articulation"]) async def get_levels(): """Get all articulation levels""" return { "levels": ARTICULATION_LEVELS, "total_levels": len(ARTICULATION_LEVELS) } @app.post("/score", response_class=CustomJSONResponse, tags=["Scoring"]) async def score_audio( audio: UploadFile = File(...), target_text: str = Form(...), level: int = Form(1) ): """Score audio file""" if scorer is None: raise HTTPException(status_code=503, detail="Model not loaded") # Validate level if level not in ARTICULATION_LEVELS: raise HTTPException( status_code=400, detail=f"Invalid level. Must be 1-5. Available levels: {list(ARTICULATION_LEVELS.keys())}" ) # Validate target text if not target_text or not target_text.strip(): raise HTTPException( status_code=400, detail="target_text cannot be empty" ) # Save uploaded file to temporary location try: with tempfile.NamedTemporaryFile(delete=False, suffix=Path(audio.filename).suffix) as tmp_file: content = await audio.read() tmp_file.write(content) tmp_path = tmp_file.name # Score audio result = scorer.score_audio( audio_path=tmp_path, target_text=target_text, level=level ) # Clean up temp file os.unlink(tmp_path) # Convert result to dict with JSON-safe types response_data = result.to_dict() response_data["success"] = True # Return with custom JSON response return CustomJSONResponse( content=response_data ) except Exception as e: # Clean up temp file if exists if 'tmp_path' in locals() and os.path.exists(tmp_path): os.unlink(tmp_path) raise HTTPException(status_code=500, detail=f"Error processing audio: {str(e)}") @app.post("/batch_score", tags=["Scoring"]) async def batch_score_audio( audios: List[UploadFile] = File(...), target_texts: str = Form(...), levels: str = Form("1") ): """Batch score multiple audio files""" if scorer is None: raise HTTPException(status_code=503, detail="Model not loaded") # Parse target texts targets = [t.strip() for t in target_texts.split(",")] if len(targets) != len(audios): raise HTTPException( status_code=400, detail="Number of target_texts must match number of audio files" ) # Parse levels level_list = [int(l.strip()) for l in levels.split(",")] if len(level_list) == 1: level_list = level_list * len(audios) elif len(level_list) != len(audios): raise HTTPException( status_code=400, detail="Number of levels must be 1 or match number of audio files" ) results = [] for idx, (audio, target, level) in enumerate(zip(audios, targets, level_list)): try: # Save to temp file with tempfile.NamedTemporaryFile(delete=False, suffix=Path(audio.filename).suffix) as tmp_file: content = await audio.read() tmp_file.write(content) tmp_path = tmp_file.name # Score result = scorer.score_audio( audio_path=tmp_path, target_text=target, level=level ) # Clean up os.unlink(tmp_path) # Convert to dict with JSON-safe types result_dict = result.to_dict() result_dict["filename"] = audio.filename result_dict["success"] = True results.append(result_dict) except Exception as e: if 'tmp_path' in locals() and os.path.exists(tmp_path): os.unlink(tmp_path) results.append({ "filename": audio.filename, "success": False, "error": str(e) }) return CustomJSONResponse( content={"results": results, "total": len(results)} ) # ======================================= # RUN SERVER # ======================================= if __name__ == "__main__": import uvicorn # Configuration host = os.getenv("HOST", "0.0.0.0") port = int(os.getenv("PORT", 8000)) print(f"🚀 Starting server on {host}:{port}") print("📖 API Documentation: http://localhost:8000/docs") uvicorn.run( "api_v2:app", host=host, port=port, reload=True, log_level="info" )