AI_doctor / voice_of_the_doctor.py
Ruchir456's picture
Update voice_of_the_doctor.py
fe64719 verified
raw
history blame contribute delete
953 Bytes
from transformers import pipeline
import scipy.io.wavfile
import numpy as np
# Initialize the pipeline (load once globally)
pipe = pipeline("text-to-speech", model="maya-research/Veena")
def text_to_speech_with_veena(input_text, output_filepath="doctor_voice.wav"):
"""Generate voice using Hugging Face Veena model and save as WAV"""
result = pipe(input_text)
audio = result["audio"]
# Fix: fallback sample rate
sr = result.get("sampling_rate")
if sr is None:
sr = 16000 # default 16kHz
# Ensure numpy array
if not isinstance(audio, np.ndarray):
audio = np.array(audio)
# Normalize to int16 if not already
if audio.dtype != np.int16:
audio = (audio / np.max(np.abs(audio)) * 32767).astype(np.int16)
scipy.io.wavfile.write(output_filepath, rate=sr, data=audio)
print(f"[DEBUG] Saved {output_filepath} with sample_rate={sr}, shape={audio.shape}")
return output_filepath