File size: 953 Bytes
089cafd 7a30417 fe64719 089cafd fe64719 ff85ece 089cafd ff85ece fe64719 7a30417 fe64719 fa94c49 fe64719 7a30417 fe64719 fa94c49 fe64719 fa94c49 fe64719 ff85ece |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
from transformers import pipeline
import scipy.io.wavfile
import numpy as np
# Initialize the pipeline (load once globally)
pipe = pipeline("text-to-speech", model="maya-research/Veena")
def text_to_speech_with_veena(input_text, output_filepath="doctor_voice.wav"):
"""Generate voice using Hugging Face Veena model and save as WAV"""
result = pipe(input_text)
audio = result["audio"]
# Fix: fallback sample rate
sr = result.get("sampling_rate")
if sr is None:
sr = 16000 # default 16kHz
# Ensure numpy array
if not isinstance(audio, np.ndarray):
audio = np.array(audio)
# Normalize to int16 if not already
if audio.dtype != np.int16:
audio = (audio / np.max(np.abs(audio)) * 32767).astype(np.int16)
scipy.io.wavfile.write(output_filepath, rate=sr, data=audio)
print(f"[DEBUG] Saved {output_filepath} with sample_rate={sr}, shape={audio.shape}")
return output_filepath
|