Update app.py
Browse files
app.py
CHANGED
|
@@ -12,8 +12,10 @@ transcriber = pipeline("automatic-speech-recognition", model="facebook/s2t-small
|
|
| 12 |
generator = pipeline("text-generation", model="microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True)
|
| 13 |
|
| 14 |
# Initialize TTS tokenizer and model
|
| 15 |
-
|
| 16 |
-
|
|
|
|
|
|
|
| 17 |
|
| 18 |
def transcribe_and_generate_audio(audio):
|
| 19 |
sr, y = audio
|
|
@@ -26,11 +28,11 @@ def transcribe_and_generate_audio(audio):
|
|
| 26 |
# Generate text based on ASR output
|
| 27 |
generated_text = generator(asr_output)[0]['generated_text']
|
| 28 |
|
| 29 |
-
# Generate audio from text
|
| 30 |
-
inputs =
|
| 31 |
set_seed(555)
|
| 32 |
with torch.no_grad():
|
| 33 |
-
outputs =
|
| 34 |
waveform = outputs.waveform[0]
|
| 35 |
waveform_path = "output.wav"
|
| 36 |
sf.write(waveform_path, waveform.numpy(), 16000, format='wav')
|
|
|
|
| 12 |
generator = pipeline("text-generation", model="microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True)
|
| 13 |
|
| 14 |
# Initialize TTS tokenizer and model
|
| 15 |
+
tokenizer_tts = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
|
| 16 |
+
model_tts = VitsModel.from_pretrained("facebook/mms-tts-eng")
|
| 17 |
+
|
| 18 |
+
print("TTS Tokenizer:", tokenizer_tts) # Print the tokenizer for the TTS model
|
| 19 |
|
| 20 |
def transcribe_and_generate_audio(audio):
|
| 21 |
sr, y = audio
|
|
|
|
| 28 |
# Generate text based on ASR output
|
| 29 |
generated_text = generator(asr_output)[0]['generated_text']
|
| 30 |
|
| 31 |
+
# Generate audio from text using TTS model
|
| 32 |
+
inputs = tokenizer_tts(text=generated_text, return_tensors="pt")
|
| 33 |
set_seed(555)
|
| 34 |
with torch.no_grad():
|
| 35 |
+
outputs = model_tts(**inputs)
|
| 36 |
waveform = outputs.waveform[0]
|
| 37 |
waveform_path = "output.wav"
|
| 38 |
sf.write(waveform_path, waveform.numpy(), 16000, format='wav')
|