from TTS.api import TTS from fastapi import FastAPI, Request from fastapi.responses import FileResponse, JSONResponse import uvicorn import time import os # ----------------------------- # Configuration # ----------------------------- YOURTTS_MODEL = "tts_models/multilingual/multi-dataset/your_tts" SPEAKERS = { "male": "speakers/voice1.wav", "female": "speakers/voice2.wav" } OUTPUT_PATH = "output.wav" tts = TTS(YOURTTS_MODEL, gpu=False) # ----------------------------- # Core synthesis function # ----------------------------- def synthesize(text: str, speaker: str = "female"): speaker_path = SPEAKERS.get(speaker.lower()) if not speaker_path or not os.path.exists(speaker_path): return None, {"error": f"❌ Speaker file not found: {speaker_path}"} start_time = time.time() try: tts.tts_to_file( text=text, speaker_wav=speaker_path, file_path=OUTPUT_PATH, language="en" ) except Exception as e: return None, {"error": str(e)} total_time = time.time() - start_time est_duration = len(text.split()) / 2.5 rtf = round(total_time / est_duration, 3) info = { "language": "English", "processing_time_sec": round(total_time, 3), "real_time_factor": rtf, "model_used": YOURTTS_MODEL, "speaker_used": os.path.basename(speaker_path), } return OUTPUT_PATH, info # ----------------------------- # FastAPI setup # ----------------------------- app = FastAPI(title="YourTTS FastAPI", description="Text-to-Speech API") @app.post("/synthesize") async def predict(request: Request): data = await request.json() text = data.get("text") speaker = data.get("speaker", "female") if not text: return JSONResponse({"error": "Missing 'text' field"}, status_code=400) audio_path, info = synthesize(text, speaker) if audio_path is None: return JSONResponse(info, status_code=500) headers = {f"x-{k}": str(v) for k, v in info.items()} return FileResponse(audio_path, media_type="audio/wav", filename="output.wav", headers=headers) # ----------------------------- # Run FastAPI # ----------------------------- if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860)