from transformers import pipeline
import scipy.io.wavfile
import numpy as np

# Initialize the pipeline (load once globally)
pipe = pipeline("text-to-speech", model="maya-research/Veena")

def text_to_speech_with_veena(input_text, output_filepath="doctor_voice.wav"):
    """Generate voice using Hugging Face Veena model and save as WAV"""
    result = pipe(input_text)

    audio = result["audio"]

    # Fix: fallback sample rate
    sr = result.get("sampling_rate")
    if sr is None:
        sr = 16000  # default 16kHz

    # Ensure numpy array
    if not isinstance(audio, np.ndarray):
        audio = np.array(audio)

    # Normalize to int16 if not already
    if audio.dtype != np.int16:
        audio = (audio / np.max(np.abs(audio)) * 32767).astype(np.int16)

    scipy.io.wavfile.write(output_filepath, rate=sr, data=audio)

    print(f"[DEBUG] Saved {output_filepath} with sample_rate={sr}, shape={audio.shape}")
    return output_filepath