fariedalfarizi's picture
Enable Swagger UI/ReDoc and fix pipeline language parameter error
01aa014
# =======================================
# FASTAPI BACKEND - VOCAL ARTICULATION API V2
# Updated untuk Whisper ASR + Multi-Level Support
# =======================================
from fastapi import FastAPI, File, UploadFile, Form, HTTPException
from fastapi.responses import JSONResponse, Response
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import Optional, List, Dict, Any
import tempfile
import os
import json
from pathlib import Path
try:
import orjson
ORJSON_AVAILABLE = True
except ImportError:
ORJSON_AVAILABLE = False
from core.scoring_engine import AdvancedVocalScoringSystem, ScoreResult
from core.constants import ARTICULATION_LEVELS
# =======================================
# CUSTOM JSON RESPONSE
# =======================================
class CustomJSONResponse(Response):
"""Custom JSON Response with explicit UTF-8 encoding"""
media_type = "application/json"
def render(self, content: Any) -> bytes:
if ORJSON_AVAILABLE:
# orjson handles encoding properly
return orjson.dumps(
content,
option=orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_NON_STR_KEYS
)
else:
# Fallback to standard json with ensure_ascii=False
return json.dumps(
content,
ensure_ascii=False,
allow_nan=False,
indent=None,
separators=(',', ':')
).encode('utf-8')
# =======================================
# FASTAPI APP INITIALIZATION
# =======================================
app = FastAPI(
title="Vocal Articulation API v2.0",
description="API for Indonesian vocal articulation assessment using Whisper Medium ASR",
version="2.0.0",
docs_url="/docs", # Enable Swagger UI
redoc_url="/redoc", # Enable ReDoc
openapi_url="/openapi.json", # Enable OpenAPI JSON
)
# CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# =======================================
# PYDANTIC MODELS (Minimal)
# =======================================
# Removed to reduce OpenAPI schema size
# Models are now returned as plain dicts
# =======================================
# GLOBAL VARIABLES
# =======================================
scorer: Optional[AdvancedVocalScoringSystem] = None
# =======================================
# STARTUP & SHUTDOWN
# =======================================
@app.on_event("startup")
async def startup_event():
"""Load model saat startup"""
global scorer
print("πŸš€ Starting Vocal Articulation API v2...")
# Whisper model dari environment atau default
whisper_model = os.getenv("WHISPER_MODEL", "openai/whisper-small")
try:
scorer = AdvancedVocalScoringSystem(whisper_model=whisper_model)
print("βœ… Whisper model loaded successfully!")
except Exception as e:
print(f"❌ Error loading model: {e}")
raise
@app.on_event("shutdown")
async def shutdown_event():
"""Cleanup saat shutdown"""
print("πŸ‘‹ Shutting down Vocal Articulation API v2...")
# =======================================
# API ENDPOINTS
# =======================================
@app.get("/", response_class=CustomJSONResponse)
async def root():
"""Root endpoint - API information"""
return CustomJSONResponse(
content={
"message": "Vocal Articulation Assessment API v2",
"version": "2.0.0",
"features": [
"Whisper ASR-based clarity scoring",
"Multi-level support (Level 1-5)",
"6 scoring metrics",
"Comprehensive audio analysis"
],
"endpoints": {
"health": "/health",
"levels": "/levels",
"score": "/score",
"batch_score": "/batch_score",
"docs": "/docs"
}
}
)
@app.get("/health", tags=["System"])
async def health_check():
"""Health check endpoint"""
return {
"status": "healthy" if scorer is not None else "unhealthy",
"model_loaded": scorer is not None,
"device": scorer.device if scorer else "unknown",
"whisper_model": "openai/whisper-small" if scorer else "not loaded"
}
@app.get("/levels", tags=["Articulation"])
async def get_levels():
"""Get all articulation levels"""
return {
"levels": ARTICULATION_LEVELS,
"total_levels": len(ARTICULATION_LEVELS)
}
@app.post("/score", response_class=CustomJSONResponse, tags=["Scoring"])
async def score_audio(
audio: UploadFile = File(...),
target_text: str = Form(...),
level: int = Form(1)
):
"""Score audio file"""
if scorer is None:
raise HTTPException(status_code=503, detail="Model not loaded")
# Validate level
if level not in ARTICULATION_LEVELS:
raise HTTPException(
status_code=400,
detail=f"Invalid level. Must be 1-5. Available levels: {list(ARTICULATION_LEVELS.keys())}"
)
# Validate target text
if not target_text or not target_text.strip():
raise HTTPException(
status_code=400,
detail="target_text cannot be empty"
)
# Save uploaded file to temporary location
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=Path(audio.filename).suffix) as tmp_file:
content = await audio.read()
tmp_file.write(content)
tmp_path = tmp_file.name
# Score audio
result = scorer.score_audio(
audio_path=tmp_path,
target_text=target_text,
level=level
)
# Clean up temp file
os.unlink(tmp_path)
# Convert result to dict with JSON-safe types
response_data = result.to_dict()
response_data["success"] = True
# Return with custom JSON response
return CustomJSONResponse(
content=response_data
)
except Exception as e:
# Clean up temp file if exists
if 'tmp_path' in locals() and os.path.exists(tmp_path):
os.unlink(tmp_path)
raise HTTPException(status_code=500, detail=f"Error processing audio: {str(e)}")
@app.post("/batch_score", tags=["Scoring"])
async def batch_score_audio(
audios: List[UploadFile] = File(...),
target_texts: str = Form(...),
levels: str = Form("1")
):
"""Batch score multiple audio files"""
if scorer is None:
raise HTTPException(status_code=503, detail="Model not loaded")
# Parse target texts
targets = [t.strip() for t in target_texts.split(",")]
if len(targets) != len(audios):
raise HTTPException(
status_code=400,
detail="Number of target_texts must match number of audio files"
)
# Parse levels
level_list = [int(l.strip()) for l in levels.split(",")]
if len(level_list) == 1:
level_list = level_list * len(audios)
elif len(level_list) != len(audios):
raise HTTPException(
status_code=400,
detail="Number of levels must be 1 or match number of audio files"
)
results = []
for idx, (audio, target, level) in enumerate(zip(audios, targets, level_list)):
try:
# Save to temp file
with tempfile.NamedTemporaryFile(delete=False, suffix=Path(audio.filename).suffix) as tmp_file:
content = await audio.read()
tmp_file.write(content)
tmp_path = tmp_file.name
# Score
result = scorer.score_audio(
audio_path=tmp_path,
target_text=target,
level=level
)
# Clean up
os.unlink(tmp_path)
# Convert to dict with JSON-safe types
result_dict = result.to_dict()
result_dict["filename"] = audio.filename
result_dict["success"] = True
results.append(result_dict)
except Exception as e:
if 'tmp_path' in locals() and os.path.exists(tmp_path):
os.unlink(tmp_path)
results.append({
"filename": audio.filename,
"success": False,
"error": str(e)
})
return CustomJSONResponse(
content={"results": results, "total": len(results)}
)
# =======================================
# RUN SERVER
# =======================================
if __name__ == "__main__":
import uvicorn
# Configuration
host = os.getenv("HOST", "0.0.0.0")
port = int(os.getenv("PORT", 8000))
print(f"πŸš€ Starting server on {host}:{port}")
print("πŸ“– API Documentation: http://localhost:8000/docs")
uvicorn.run(
"api_v2:app",
host=host,
port=port,
reload=True,
log_level="info"
)