import gradio as gr from nemo.collections.asr.models import EncDecMultiTaskModel # Load the model canary_model = EncDecMultiTaskModel.from_pretrained('nvidia/canary-1b') # Define ASR function def transcribe_audio(audio): # Perform transcription predicted_text = canary_model.transcribe( paths2audio_files=[audio.name], batch_size=16 # Batch size for inference ) return predicted_text[0] # Interface inputs = gr.inputs.Audio(source="microphone", label="Speak into the microphone", type="microphone") outputs = gr.outputs.Textbox(label="Transcription") title = "Canary ASR" description = "Transcribe speech from the microphone using the NeMo Canary ASR model." interface = gr.Interface(transcribe_audio, inputs, outputs, title=title, description=description) # Launch interface interface.launch()