Spaces:

Ruchir456
/

AI_doctor

Running

File size: 953 Bytes

089cafd
7a30417
fe64719
089cafd
fe64719
ff85ece
089cafd
ff85ece
 
 
 
fe64719
7a30417
fe64719
 
 
 
fa94c49
fe64719
7a30417
 
 
fe64719
fa94c49
fe64719
fa94c49
 
fe64719
 
ff85ece

from transformers import pipeline
import scipy.io.wavfile
import numpy as np

# Initialize the pipeline (load once globally)
pipe = pipeline("text-to-speech", model="maya-research/Veena")

def text_to_speech_with_veena(input_text, output_filepath="doctor_voice.wav"):
    """Generate voice using Hugging Face Veena model and save as WAV"""
    result = pipe(input_text)

    audio = result["audio"]

    # Fix: fallback sample rate
    sr = result.get("sampling_rate")
    if sr is None:
        sr = 16000  # default 16kHz

    # Ensure numpy array
    if not isinstance(audio, np.ndarray):
        audio = np.array(audio)

    # Normalize to int16 if not already
    if audio.dtype != np.int16:
        audio = (audio / np.max(np.abs(audio)) * 32767).astype(np.int16)

    scipy.io.wavfile.write(output_filepath, rate=sr, data=audio)

    print(f"[DEBUG] Saved {output_filepath} with sample_rate={sr}, shape={audio.shape}")
    return output_filepath