Spaces:
Running
on
Zero
Running
on
Zero
| # ======================================= | |
| # FASTAPI BACKEND - VOCAL ARTICULATION API V2 | |
| # Updated untuk Whisper ASR + Multi-Level Support | |
| # ======================================= | |
| from fastapi import FastAPI, File, UploadFile, Form, HTTPException | |
| from fastapi.responses import JSONResponse, Response | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| from typing import Optional, List, Dict, Any | |
| import tempfile | |
| import os | |
| import json | |
| from pathlib import Path | |
| try: | |
| import orjson | |
| ORJSON_AVAILABLE = True | |
| except ImportError: | |
| ORJSON_AVAILABLE = False | |
| from core.scoring_engine import AdvancedVocalScoringSystem, ScoreResult | |
| from core.constants import ARTICULATION_LEVELS | |
| # ======================================= | |
| # CUSTOM JSON RESPONSE | |
| # ======================================= | |
| class CustomJSONResponse(Response): | |
| """Custom JSON Response with explicit UTF-8 encoding""" | |
| media_type = "application/json" | |
| def render(self, content: Any) -> bytes: | |
| if ORJSON_AVAILABLE: | |
| # orjson handles encoding properly | |
| return orjson.dumps( | |
| content, | |
| option=orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_NON_STR_KEYS | |
| ) | |
| else: | |
| # Fallback to standard json with ensure_ascii=False | |
| return json.dumps( | |
| content, | |
| ensure_ascii=False, | |
| allow_nan=False, | |
| indent=None, | |
| separators=(',', ':') | |
| ).encode('utf-8') | |
| # ======================================= | |
| # FASTAPI APP INITIALIZATION | |
| # ======================================= | |
| app = FastAPI( | |
| title="Vocal Articulation API v2.0", | |
| description="API for Indonesian vocal articulation assessment using Whisper Medium ASR", | |
| version="2.0.0", | |
| docs_url="/docs", # Enable Swagger UI | |
| redoc_url="/redoc", # Enable ReDoc | |
| openapi_url="/openapi.json", # Enable OpenAPI JSON | |
| ) | |
| # CORS middleware | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # ======================================= | |
| # PYDANTIC MODELS (Minimal) | |
| # ======================================= | |
| # Removed to reduce OpenAPI schema size | |
| # Models are now returned as plain dicts | |
| # ======================================= | |
| # GLOBAL VARIABLES | |
| # ======================================= | |
| scorer: Optional[AdvancedVocalScoringSystem] = None | |
| # ======================================= | |
| # STARTUP & SHUTDOWN | |
| # ======================================= | |
| async def startup_event(): | |
| """Load model saat startup""" | |
| global scorer | |
| print("π Starting Vocal Articulation API v2...") | |
| # Whisper model dari environment atau default | |
| whisper_model = os.getenv("WHISPER_MODEL", "openai/whisper-small") | |
| try: | |
| scorer = AdvancedVocalScoringSystem(whisper_model=whisper_model) | |
| print("β Whisper model loaded successfully!") | |
| except Exception as e: | |
| print(f"β Error loading model: {e}") | |
| raise | |
| async def shutdown_event(): | |
| """Cleanup saat shutdown""" | |
| print("π Shutting down Vocal Articulation API v2...") | |
| # ======================================= | |
| # API ENDPOINTS | |
| # ======================================= | |
| async def root(): | |
| """Root endpoint - API information""" | |
| return CustomJSONResponse( | |
| content={ | |
| "message": "Vocal Articulation Assessment API v2", | |
| "version": "2.0.0", | |
| "features": [ | |
| "Whisper ASR-based clarity scoring", | |
| "Multi-level support (Level 1-5)", | |
| "6 scoring metrics", | |
| "Comprehensive audio analysis" | |
| ], | |
| "endpoints": { | |
| "health": "/health", | |
| "levels": "/levels", | |
| "score": "/score", | |
| "batch_score": "/batch_score", | |
| "docs": "/docs" | |
| } | |
| } | |
| ) | |
| async def health_check(): | |
| """Health check endpoint""" | |
| return { | |
| "status": "healthy" if scorer is not None else "unhealthy", | |
| "model_loaded": scorer is not None, | |
| "device": scorer.device if scorer else "unknown", | |
| "whisper_model": "openai/whisper-small" if scorer else "not loaded" | |
| } | |
| async def get_levels(): | |
| """Get all articulation levels""" | |
| return { | |
| "levels": ARTICULATION_LEVELS, | |
| "total_levels": len(ARTICULATION_LEVELS) | |
| } | |
| async def score_audio( | |
| audio: UploadFile = File(...), | |
| target_text: str = Form(...), | |
| level: int = Form(1) | |
| ): | |
| """Score audio file""" | |
| if scorer is None: | |
| raise HTTPException(status_code=503, detail="Model not loaded") | |
| # Validate level | |
| if level not in ARTICULATION_LEVELS: | |
| raise HTTPException( | |
| status_code=400, | |
| detail=f"Invalid level. Must be 1-5. Available levels: {list(ARTICULATION_LEVELS.keys())}" | |
| ) | |
| # Validate target text | |
| if not target_text or not target_text.strip(): | |
| raise HTTPException( | |
| status_code=400, | |
| detail="target_text cannot be empty" | |
| ) | |
| # Save uploaded file to temporary location | |
| try: | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=Path(audio.filename).suffix) as tmp_file: | |
| content = await audio.read() | |
| tmp_file.write(content) | |
| tmp_path = tmp_file.name | |
| # Score audio | |
| result = scorer.score_audio( | |
| audio_path=tmp_path, | |
| target_text=target_text, | |
| level=level | |
| ) | |
| # Clean up temp file | |
| os.unlink(tmp_path) | |
| # Convert result to dict with JSON-safe types | |
| response_data = result.to_dict() | |
| response_data["success"] = True | |
| # Return with custom JSON response | |
| return CustomJSONResponse( | |
| content=response_data | |
| ) | |
| except Exception as e: | |
| # Clean up temp file if exists | |
| if 'tmp_path' in locals() and os.path.exists(tmp_path): | |
| os.unlink(tmp_path) | |
| raise HTTPException(status_code=500, detail=f"Error processing audio: {str(e)}") | |
| async def batch_score_audio( | |
| audios: List[UploadFile] = File(...), | |
| target_texts: str = Form(...), | |
| levels: str = Form("1") | |
| ): | |
| """Batch score multiple audio files""" | |
| if scorer is None: | |
| raise HTTPException(status_code=503, detail="Model not loaded") | |
| # Parse target texts | |
| targets = [t.strip() for t in target_texts.split(",")] | |
| if len(targets) != len(audios): | |
| raise HTTPException( | |
| status_code=400, | |
| detail="Number of target_texts must match number of audio files" | |
| ) | |
| # Parse levels | |
| level_list = [int(l.strip()) for l in levels.split(",")] | |
| if len(level_list) == 1: | |
| level_list = level_list * len(audios) | |
| elif len(level_list) != len(audios): | |
| raise HTTPException( | |
| status_code=400, | |
| detail="Number of levels must be 1 or match number of audio files" | |
| ) | |
| results = [] | |
| for idx, (audio, target, level) in enumerate(zip(audios, targets, level_list)): | |
| try: | |
| # Save to temp file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=Path(audio.filename).suffix) as tmp_file: | |
| content = await audio.read() | |
| tmp_file.write(content) | |
| tmp_path = tmp_file.name | |
| # Score | |
| result = scorer.score_audio( | |
| audio_path=tmp_path, | |
| target_text=target, | |
| level=level | |
| ) | |
| # Clean up | |
| os.unlink(tmp_path) | |
| # Convert to dict with JSON-safe types | |
| result_dict = result.to_dict() | |
| result_dict["filename"] = audio.filename | |
| result_dict["success"] = True | |
| results.append(result_dict) | |
| except Exception as e: | |
| if 'tmp_path' in locals() and os.path.exists(tmp_path): | |
| os.unlink(tmp_path) | |
| results.append({ | |
| "filename": audio.filename, | |
| "success": False, | |
| "error": str(e) | |
| }) | |
| return CustomJSONResponse( | |
| content={"results": results, "total": len(results)} | |
| ) | |
| # ======================================= | |
| # RUN SERVER | |
| # ======================================= | |
| if __name__ == "__main__": | |
| import uvicorn | |
| # Configuration | |
| host = os.getenv("HOST", "0.0.0.0") | |
| port = int(os.getenv("PORT", 8000)) | |
| print(f"π Starting server on {host}:{port}") | |
| print("π API Documentation: http://localhost:8000/docs") | |
| uvicorn.run( | |
| "api_v2:app", | |
| host=host, | |
| port=port, | |
| reload=True, | |
| log_level="info" | |
| ) | |