File size: 953 Bytes
089cafd
7a30417
fe64719
089cafd
fe64719
ff85ece
089cafd
ff85ece
 
 
 
fe64719
7a30417
fe64719
 
 
 
fa94c49
fe64719
7a30417
 
 
fe64719
fa94c49
fe64719
fa94c49
 
fe64719
 
ff85ece
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from transformers import pipeline
import scipy.io.wavfile
import numpy as np

# Initialize the pipeline (load once globally)
pipe = pipeline("text-to-speech", model="maya-research/Veena")

def text_to_speech_with_veena(input_text, output_filepath="doctor_voice.wav"):
    """Generate voice using Hugging Face Veena model and save as WAV"""
    result = pipe(input_text)

    audio = result["audio"]

    # Fix: fallback sample rate
    sr = result.get("sampling_rate")
    if sr is None:
        sr = 16000  # default 16kHz

    # Ensure numpy array
    if not isinstance(audio, np.ndarray):
        audio = np.array(audio)

    # Normalize to int16 if not already
    if audio.dtype != np.int16:
        audio = (audio / np.max(np.abs(audio)) * 32767).astype(np.int16)

    scipy.io.wavfile.write(output_filepath, rate=sr, data=audio)

    print(f"[DEBUG] Saved {output_filepath} with sample_rate={sr}, shape={audio.shape}")
    return output_filepath