Spaces:

remiai3
/

clone_your_voice

Sleeping

remiai3 commited on Aug 31

Commit

e5fa7fa

verified ·

1 Parent(s): ad8ff68

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,46 +1,38 @@
-import gradio as gr
-import torch
-from TTS.api import TTS
-from pydub import AudioSegment
 import os
-# Load XTTS model (runs on CPU if no GPU available)
-device = "cuda" if torch.cuda.is_available() else "cpu"
-tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2").to(device)
-# Convert mp3 to wav if needed
-def convert_to_wav(file_path):
-    if file_path.endswith(".mp3"):
-        sound = AudioSegment.from_mp3(file_path)
-        wav_path = file_path.replace(".mp3", ".wav")
-        sound.export(wav_path, format="wav")
-        return wav_path
-    return file_path
-# Text → Speech cloning
-def clone_from_text(sample_voice, text):
-    if not sample_voice or not text.strip():
-        return None
-    sample_voice = convert_to_wav(sample_voice)
-    output_path = "output.wav"
-    tts.tts_to_file(
-        text=text,
-        file_path=output_path,
-        speaker_wav=sample_voice,
-        language="en"
-    )
-    return output_path
 # Gradio UI
 with gr.Blocks() as demo:
-    gr.Markdown("## 🎤 Voice Cloning App (XTTS-v2 on CPU)\nUpload a sample voice and enter text.")
-    sample_voice_input = gr.Audio(type="filepath", label="Upload Sample Voice (.mp3/.wav)")
-    text_input = gr.Textbox(label="Enter Text")
-    tts_output = gr.Audio(label="Generated Speech")
-    btn1 = gr.Button("Generate")
-    btn1.click(fn=clone_from_text, inputs=[sample_voice_input, text_input], outputs=tts_output)
-demo.launch()

 import os
+from TTS.api import TTS
+import gradio as gr
+# ✅ Auto-accept Coqui license (non-commercial CPML)
+os.environ["COQUI_TOS_AGREED"] = "1"
+# Pick device
+device = "cpu"
+# Load XTTS-v2
+tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2").to(device)
+def clone_voice(sample_audio, text):
+    if sample_audio is None:
+        return "Please upload a voice sample.", None
+    output_path = "output.wav"
+    tts.tts_to_file(text=text,
+                    speaker_wav=sample_audio,
+                    language="en",
+                    file_path=output_path)
+    return f"Generated speech for: {text}", output_path
 # Gradio UI
 with gr.Blocks() as demo:
+    gr.Markdown("## 🎤 Voice Cloning with XTTS-v2")
+    with gr.Row():
+        sample = gr.Audio(label="Upload a sample voice", type="filepath")
+        text = gr.Textbox(label="Enter text to speak")
+    btn = gr.Button("Generate Voice")
+    output_text = gr.Textbox(label="Status")
+    output_audio = gr.Audio(label="Cloned Voice", type="filepath")
+    btn.click(fn=clone_voice, inputs=[sample, text], outputs=[output_text, output_audio])
+if __name__ == "__main__":
+    demo.launch()