Spaces:

Ruchir456
/

AI_doctor

Running

AI_doctor / voice_of_the_doctor.py

Update voice_of_the_doctor.py

fe64719 verified 2 months ago

953 Bytes

	from transformers import pipeline
	import scipy.io.wavfile
	import numpy as np

	# Initialize the pipeline (load once globally)
	pipe = pipeline("text-to-speech", model="maya-research/Veena")

	def text_to_speech_with_veena(input_text, output_filepath="doctor_voice.wav"):
	"""Generate voice using Hugging Face Veena model and save as WAV"""
	result = pipe(input_text)

	audio = result["audio"]

	# Fix: fallback sample rate
	sr = result.get("sampling_rate")
	if sr is None:
	sr = 16000 # default 16kHz

	# Ensure numpy array
	if not isinstance(audio, np.ndarray):
	audio = np.array(audio)

	# Normalize to int16 if not already
	if audio.dtype != np.int16:
	audio = (audio / np.max(np.abs(audio)) * 32767).astype(np.int16)

	scipy.io.wavfile.write(output_filepath, rate=sr, data=audio)

	print(f"[DEBUG] Saved {output_filepath} with sample_rate={sr}, shape={audio.shape}")
	return output_filepath