import gradio as gr from transformers import pipeline import numpy as np # Load the Whisper model for transcription transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny.en") def transcribe(stream, new_chunk): sr, y = new_chunk # Convert to mono if stereo if y.ndim > 1: y = y.mean(axis=1) y = y.astype(np.float32) y /= np.max(np.abs(y)) if stream is not None: stream = np.concatenate([stream, y]) else: stream = y # Transcribe audio transcription = transcriber(stream)["text"] # Pass raw audio directly to the transcriber return stream, transcription # Return updated stream and transcription # Create the Gradio interface demo = gr.Interface( transcribe, ["state", gr.Audio(sources=["microphone"], streaming=True)], ["state", "text"], live=True, ) demo.launch(share = True)