Swapnilborhade commited on
Commit
54c89bc
·
verified ·
1 Parent(s): 43db598

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +12 -0
  2. app.py +38 -0
  3. requirements.txt +5 -0
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Marathi Voice Cloning (Hugging Face Space)
2
+
3
+ This project demonstrates **Marathi text-to-speech voice cloning** using [IndicF5](https://huggingface.co/ai4bharat/IndicF5).
4
+
5
+ ## Usage
6
+ 1. Upload your short voice sample (`.wav`, 6–30 seconds).
7
+ 2. Provide the transcript of what you said in that sample.
8
+ 3. Enter new Marathi text you want to synthesize.
9
+ 4. Click **Generate Speech** and listen!
10
+
11
+ ---
12
+ *Created with ❤️ using Hugging Face + Gradio.*
app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModel
3
+ import soundfile as sf
4
+ import numpy as np
5
+
6
+ # Load IndicF5 model
7
+ model = AutoModel.from_pretrained("ai4bharat/IndicF5", trust_remote_code=True)
8
+
9
+ def generate_tts(text, ref_audio, ref_text):
10
+ if ref_audio is None:
11
+ return "⚠️ कृपया एक आवाज फाइल अपलोड करा", None
12
+
13
+ # ref_audio = path string (because gr.Audio gives filepath)
14
+ ref_audio_path = ref_audio
15
+
16
+ # Run model
17
+ audio = model(text, ref_audio_path=ref_audio_path, ref_text=ref_text)
18
+ audio = np.array(audio, dtype=np.float32)
19
+
20
+ # Save output
21
+ out_path = "output.wav"
22
+ sf.write(out_path, audio, samplerate=24000)
23
+
24
+ return "✅ आवाज तयार झाला!", out_path
25
+
26
+ with gr.Blocks() as demo:
27
+ gr.Markdown("## 🎙️ माझं Marathi Voice Cloning (IndicF5)")
28
+
29
+ text = gr.Textbox(label="तुम्हाला काय बोलायचं आहे?")
30
+ ref_audio = gr.Audio(type="filepath", label="तुमचा आवाज (WAV, 6–30 सेकंद)")
31
+ ref_text = gr.Textbox(label="त्या ऑडिओमध्ये तुम्ही काय बोललात?")
32
+ out_msg = gr.Textbox(label="Status")
33
+ out_audio = gr.Audio(label="Generated Audio", type="filepath")
34
+
35
+ btn = gr.Button("Generate Speech")
36
+ btn.click(generate_tts, inputs=[text, ref_audio, ref_text], outputs=[out_msg, out_audio])
37
+
38
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ TTS==0.22.0
2
+ gradio==4.44.0
3
+ soundfile==0.12.1
4
+ transformers==4.44.2
5
+ torch