coquiAPINew

Sleeping

App Files Files Community

coquiAPINew / app.py

anuj-exe

Update app.py

b633034 verified about 2 months ago

raw

history blame

2.92 kB

	import gradio as gr
	from TTS.api import TTS
	from fastapi import FastAPI, Request
	from fastapi.responses import FileResponse, JSONResponse
	import uvicorn
	import time
	import os

	# -----------------------------
	# Configuration
	# -----------------------------
	YOURTTS_MODEL = "tts_models/multilingual/multi-dataset/your_tts"
	SPEAKERS = {
	"male": "speakers/voice1.wav",
	"female": "speakers/voice2.wav"
	}
	OUTPUT_PATH = "output.wav"

	tts = TTS(YOURTTS_MODEL, gpu=False)

	# -----------------------------
	# Core synthesis function
	# -----------------------------
	def synthesize(text: str, speaker: str = "female"):
	speaker_path = SPEAKERS.get(speaker.lower())
	if not speaker_path or not os.path.exists(speaker_path):
	return None, {"error": f"❌ Speaker file not found: {speaker_path}"}

	start_time = time.time()
	try:
	tts.tts_to_file(
	text=text,
	speaker_wav=speaker_path,
	file_path=OUTPUT_PATH,
	language="en"
	)
	except Exception as e:
	return None, {"error": str(e)}

	total_time = time.time() - start_time
	est_duration = len(text.split()) / 2.5
	rtf = round(total_time / est_duration, 3)
	info = {
	"language": "English",
	"processing_time_sec": round(total_time, 3),
	"real_time_factor": rtf,
	"model_used": YOURTTS_MODEL,
	"speaker_used": os.path.basename(speaker_path),
	}
	return OUTPUT_PATH, info

	# -----------------------------
	# FastAPI setup
	# -----------------------------
	app = FastAPI(title="YourTTS FastAPI", description="Text-to-Speech API")

	@app.post("/synthesize")
	async def predict(request: Request):
	data = await request.json()
	text = data.get("text")
	speaker = data.get("speaker", "female")
	if not text:
	return JSONResponse({"error": "Missing 'text' field"}, status_code=400)

	audio_path, info = synthesize(text, speaker)
	if audio_path is None:
	return JSONResponse(info, status_code=500)

	headers = {f"x-{k}": str(v) for k, v in info.items()}
	return FileResponse(audio_path, media_type="audio/wav", filename="output.wav", headers=headers)

	# -----------------------------
	# Gradio UI
	# -----------------------------
	demo = gr.Interface(
	fn=synthesize,
	inputs=[
	gr.Textbox(label="Text"),
	gr.Dropdown(choices=["male", "female"], value="female", label="Speaker")
	],
	outputs=[gr.Audio(type="filepath"), gr.JSON()],
	title="YourTTS Voice Cloning (English Only, Select Speaker)",
	allow_flagging="never"
	)

	# -----------------------------
	# Run both FastAPI + Gradio
	# -----------------------------
	if __name__ == "__main__":
	import threading

	def launch_gradio():
	demo.launch(server_name="0.0.0.0", server_port=7861, show_api=False)

	threading.Thread(target=launch_gradio, daemon=True).start()

	uvicorn.run(app, host="0.0.0.0", port=7860)