|
|
import gradio as gr |
|
|
import torch |
|
|
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification |
|
|
|
|
|
|
|
|
asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3") |
|
|
|
|
|
|
|
|
cola_model = AutoModelForSequenceClassification.from_pretrained("textattack/roberta-base-CoLA") |
|
|
cola_tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-CoLA") |
|
|
grammar_pipeline = pipeline("text-classification", model=cola_model, tokenizer=cola_tokenizer) |
|
|
|
|
|
|
|
|
correction_pipeline = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction") |
|
|
|
|
|
def process_audio(audio_path): |
|
|
|
|
|
transcription = asr_pipeline(audio_path)["text"] |
|
|
|
|
|
|
|
|
grammar_result = grammar_pipeline(transcription)[0] |
|
|
score_label = grammar_result["label"] |
|
|
score_confidence = grammar_result["score"] |
|
|
|
|
|
|
|
|
corrected_text = correction_pipeline(transcription, max_length=128)[0]["generated_text"] |
|
|
|
|
|
return transcription, f"{score_label} ({score_confidence:.2f})", corrected_text |
|
|
|
|
|
|
|
|
interface = gr.Interface( |
|
|
fn=process_audio, |
|
|
inputs=gr.Audio( |
|
|
source="microphone", |
|
|
type="filepath", |
|
|
label="π€ Record or Upload Audio (.wav)" |
|
|
), |
|
|
outputs=[ |
|
|
gr.Textbox(label="π Transcription"), |
|
|
gr.Textbox(label="β
Grammar Score"), |
|
|
gr.Textbox(label="βοΈ Suggested Correction") |
|
|
], |
|
|
title="ποΈ Voice Grammar Scorer", |
|
|
description="Record or upload your voice (.wav). This app transcribes it, scores grammar, and suggests corrections." |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
interface.launch() |
|
|
|