Spaces:

aungkomyat
/

mmtts

Configuration error

App Files Files Community

aungkomyat commited on May 11

Commit

f1d74e2

verified ·

1 Parent(s): a50ce25

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -65

app.py CHANGED Viewed

@@ -1,93 +1,99 @@
 import os
 import gradio as gr
 import numpy as np
 import torch
-import scipy.io.wavfile
-from utils.hparams import create_hparams
-from train import load_model
-from synthesis import generate_speech
-from text import text_to_sequence
-# Path configurations
 MODEL_DIR = "trained_model"
-MODEL_PATH = os.path.join(MODEL_DIR, "checkpoint_latest.pth.tar")
-CONFIG_PATH = os.path.join(MODEL_DIR, "hparams.yml")
-OUTPUT_PATH = "output.wav"
-# Download model if it doesn't exist
-def download_model():
     if not os.path.exists(MODEL_DIR):
         os.makedirs(MODEL_DIR)
-    if not os.path.exists(MODEL_PATH):
-        print("Downloading model...")
-        # Add model download code here
-        # For example:
-        # !wget -O MODEL_PATH https://path/to/model
-        raise Exception("You need to download the model checkpoint file and place it in trained_model/checkpoint_latest.pth.tar")
-    if not os.path.exists(CONFIG_PATH):
-        print("Downloading config...")
-        # Add config download code here
-        # For example:
-        # !wget -O CONFIG_PATH https://path/to/config
-        raise Exception("You need to download the hparams.yml file and place it in trained_model/hparams.yml")
-# Initialize model
-def init_model():
     try:
-        download_model()
-        hparams = create_hparams(CONFIG_PATH)
         model = load_model(hparams)
-        model.load_state_dict(torch.load(MODEL_PATH, map_location=torch.device('cpu'))['state_dict'])
         model.eval()
-        return model, hparams
-    except Exception as e:
-        print(f"Error initializing model: {str(e)}")
-        return None, None
-# Generate speech
-def synthesize(text, model, hparams):
-    try:
         sequence = np.array(text_to_sequence(text, ['burmese_cleaners']))[None, :]
         sequence = torch.autograd.Variable(torch.from_numpy(sequence)).cpu().long()
         mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
         with torch.no_grad():
             waveform = generate_speech(mel_outputs_postnet, hparams)
-        scipy.io.wavfile.write(OUTPUT_PATH, hparams.sampling_rate, waveform)
-        return OUTPUT_PATH, None
     except Exception as e:
-        return None, str(e)
-# Gradio interface
 def tts_interface(text):
     if not text.strip():
         return None, "Please enter some text."
-    global MODEL, HPARAMS
-    if MODEL is None or HPARAMS is None:
-        MODEL, HPARAMS = init_model()
-    if MODEL is None:
-        return None, "Model could not be initialized. Please check logs."
-    audio_path, error = synthesize(text, MODEL, HPARAMS)
-    if error:
-        return None, f"Error generating speech: {error}"
-    return audio_path, "Speech generated successfully!"
-# Initialize global model variables
-MODEL, HPARAMS = None, None
-# Create Gradio interface
 demo = gr.Interface(
     fn=tts_interface,
     inputs=[
@@ -106,9 +112,12 @@ demo = gr.Interface(
     This is a demo of the Myanmar Text-to-Speech system developed by hpbyte.
     Enter Burmese text in the box below and click 'Submit' to generate speech.
     GitHub Repository: https://github.com/hpbyte/myanmar-tts
     """,
-    allow_flagging="never",
     examples=[
         ["မင်္ဂလာပါ"],
         ["မြန်မာစကားပြောစနစ်ကို ကြိုဆိုပါတယ်"],
@@ -116,13 +125,6 @@ demo = gr.Interface(
     ]
 )
-# Initialize model at startup
-try:
-    MODEL, HPARAMS = init_model()
-    print("Model initialized successfully!")
-except Exception as e:
-    print(f"Error initializing model: {str(e)}")
 # Launch the app
 if __name__ == "__main__":
     demo.launch()

 import os
+import sys
 import gradio as gr
 import numpy as np
 import torch
+import subprocess
+import shutil
+from pathlib import Path
+# Model repository information
+REPO_URL = "https://github.com/hpbyte/myanmar-tts.git"
 MODEL_DIR = "trained_model"
+REPO_DIR = "myanmar-tts"
+# Check and install the package if not already installed
+def setup_environment():
+    status_msg = ""
+    # Clone the repository if it doesn't exist
+    if not os.path.exists(REPO_DIR):
+        status_msg += "Cloning repository...\n"
+        subprocess.run(["git", "clone", REPO_URL], check=True)
+    # Add the repository to Python path
+    repo_path = os.path.abspath(REPO_DIR)
+    if repo_path not in sys.path:
+        sys.path.append(repo_path)
+        status_msg += f"Added {repo_path} to Python path\n"
+    # Create model directory if it doesn't exist
     if not os.path.exists(MODEL_DIR):
         os.makedirs(MODEL_DIR)
+        status_msg += f"Created {MODEL_DIR} directory\n"
+    return status_msg + "Environment setup complete"
+# Function to synthesize speech
+def synthesize_speech(text):
     try:
+        # Import necessary modules from the repository
+        sys.path.append(REPO_DIR)
+        from myanmar_tts.text import text_to_sequence
+        from myanmar_tts.utils.hparams import create_hparams
+        from myanmar_tts.train import load_model
+        from myanmar_tts.synthesis import generate_speech
+        import scipy.io.wavfile
+        # Check if model exists, if not provide instructions
+        checkpoint_path = os.path.join(MODEL_DIR, "checkpoint_latest.pth.tar")
+        config_path = os.path.join(MODEL_DIR, "hparams.yml")
+        if not os.path.exists(checkpoint_path) or not os.path.exists(config_path):
+            return None, f"""Model files not found. Please upload:
+1. The checkpoint file at: {checkpoint_path}
+2. The hparams.yml file at: {config_path}
+You can obtain these files from the original repository or by training the model."""
+        # Load the model and hyperparameters
+        hparams = create_hparams(config_path)
         model = load_model(hparams)
+        model.load_state_dict(torch.load(checkpoint_path, map_location=torch.device('cpu'))['state_dict'])
         model.eval()
+        # Process text input
         sequence = np.array(text_to_sequence(text, ['burmese_cleaners']))[None, :]
         sequence = torch.autograd.Variable(torch.from_numpy(sequence)).cpu().long()
+        # Generate mel spectrograms
         mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
+        # Generate waveform
         with torch.no_grad():
             waveform = generate_speech(mel_outputs_postnet, hparams)
+        # Save and return the audio
+        output_path = "output.wav"
+        scipy.io.wavfile.write(output_path, hparams.sampling_rate, waveform)
+        return output_path, "Speech generated successfully!"
     except Exception as e:
+        return None, f"Error: {str(e)}\n\nMake sure you have uploaded the model files to the {MODEL_DIR} directory."
+# Function for the Gradio interface
 def tts_interface(text):
     if not text.strip():
         return None, "Please enter some text."
+    return synthesize_speech(text)
+# Set up the environment
+setup_message = setup_environment()
+print(setup_message)
+# Create the Gradio interface
 demo = gr.Interface(
     fn=tts_interface,
     inputs=[
     This is a demo of the Myanmar Text-to-Speech system developed by hpbyte.
     Enter Burmese text in the box below and click 'Submit' to generate speech.
+    **Note:** You need to upload the model files to the 'trained_model' directory:
+    - checkpoint_latest.pth.tar
+    - hparams.yml
     GitHub Repository: https://github.com/hpbyte/myanmar-tts
     """,
     examples=[
         ["မင်္ဂလာပါ"],
         ["မြန်မာစကားပြောစနစ်ကို ကြိုဆိုပါတယ်"],
     ]
 )
 # Launch the app
 if __name__ == "__main__":
     demo.launch()