coquiAPINew / app.py
anuj-exe's picture
Update app.py
b633034 verified
raw
history blame
2.92 kB
import gradio as gr
from TTS.api import TTS
from fastapi import FastAPI, Request
from fastapi.responses import FileResponse, JSONResponse
import uvicorn
import time
import os
# -----------------------------
# Configuration
# -----------------------------
YOURTTS_MODEL = "tts_models/multilingual/multi-dataset/your_tts"
SPEAKERS = {
"male": "speakers/voice1.wav",
"female": "speakers/voice2.wav"
}
OUTPUT_PATH = "output.wav"
tts = TTS(YOURTTS_MODEL, gpu=False)
# -----------------------------
# Core synthesis function
# -----------------------------
def synthesize(text: str, speaker: str = "female"):
speaker_path = SPEAKERS.get(speaker.lower())
if not speaker_path or not os.path.exists(speaker_path):
return None, {"error": f"❌ Speaker file not found: {speaker_path}"}
start_time = time.time()
try:
tts.tts_to_file(
text=text,
speaker_wav=speaker_path,
file_path=OUTPUT_PATH,
language="en"
)
except Exception as e:
return None, {"error": str(e)}
total_time = time.time() - start_time
est_duration = len(text.split()) / 2.5
rtf = round(total_time / est_duration, 3)
info = {
"language": "English",
"processing_time_sec": round(total_time, 3),
"real_time_factor": rtf,
"model_used": YOURTTS_MODEL,
"speaker_used": os.path.basename(speaker_path),
}
return OUTPUT_PATH, info
# -----------------------------
# FastAPI setup
# -----------------------------
app = FastAPI(title="YourTTS FastAPI", description="Text-to-Speech API")
@app.post("/synthesize")
async def predict(request: Request):
data = await request.json()
text = data.get("text")
speaker = data.get("speaker", "female")
if not text:
return JSONResponse({"error": "Missing 'text' field"}, status_code=400)
audio_path, info = synthesize(text, speaker)
if audio_path is None:
return JSONResponse(info, status_code=500)
headers = {f"x-{k}": str(v) for k, v in info.items()}
return FileResponse(audio_path, media_type="audio/wav", filename="output.wav", headers=headers)
# -----------------------------
# Gradio UI
# -----------------------------
demo = gr.Interface(
fn=synthesize,
inputs=[
gr.Textbox(label="Text"),
gr.Dropdown(choices=["male", "female"], value="female", label="Speaker")
],
outputs=[gr.Audio(type="filepath"), gr.JSON()],
title="YourTTS Voice Cloning (English Only, Select Speaker)",
allow_flagging="never"
)
# -----------------------------
# Run both FastAPI + Gradio
# -----------------------------
if __name__ == "__main__":
import threading
def launch_gradio():
demo.launch(server_name="0.0.0.0", server_port=7861, show_api=False)
threading.Thread(target=launch_gradio, daemon=True).start()
uvicorn.run(app, host="0.0.0.0", port=7860)