Commit
Β·
ba5e3a9
1
Parent(s):
ede25a6
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
from nemo.collections.asr.models import
|
| 2 |
import yt_dlp as youtube_dl
|
| 3 |
import os
|
| 4 |
import tempfile
|
|
@@ -7,10 +7,10 @@ import gradio as gr
|
|
| 7 |
from pydub import AudioSegment
|
| 8 |
|
| 9 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 10 |
-
MODEL_NAME="nvidia/parakeet-
|
| 11 |
YT_LENGTH_LIMIT_S=3600
|
| 12 |
|
| 13 |
-
model =
|
| 14 |
model.eval()
|
| 15 |
|
| 16 |
def get_transcripts(audio_path):
|
|
@@ -19,8 +19,8 @@ def get_transcripts(audio_path):
|
|
| 19 |
|
| 20 |
article = (
|
| 21 |
"<p style='text-align: center'>"
|
| 22 |
-
"<a href='https://huggingface.co/nvidia/parakeet-
|
| 23 |
-
"<a href='https://arxiv.org/abs/
|
| 24 |
"<a href='https://github.com/NVIDIA/NeMo' target='_blank'>π§βπ» Repository</a>"
|
| 25 |
"</p>"
|
| 26 |
)
|
|
@@ -92,11 +92,11 @@ mf_transcribe = gr.Interface(
|
|
| 92 |
],
|
| 93 |
outputs="text",
|
| 94 |
theme="huggingface",
|
| 95 |
-
title="Parakeet
|
| 96 |
description=(
|
| 97 |
"Transcribe microphone or audio inputs with the click of a button! Demo uses the"
|
| 98 |
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) to transcribe audio files"
|
| 99 |
-
" of arbitrary length."
|
| 100 |
),
|
| 101 |
allow_flagging="never",
|
| 102 |
)
|
|
@@ -108,11 +108,11 @@ file_transcribe = gr.Interface(
|
|
| 108 |
],
|
| 109 |
outputs="text",
|
| 110 |
theme="huggingface",
|
| 111 |
-
title="Parakeet
|
| 112 |
description=(
|
| 113 |
"Transcribe microphone or audio inputs with the click of a button! Demo uses the"
|
| 114 |
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) to transcribe audio files"
|
| 115 |
-
" of arbitrary length."
|
| 116 |
),
|
| 117 |
allow_flagging="never",
|
| 118 |
)
|
|
@@ -124,11 +124,11 @@ youtube_transcribe = gr.Interface(
|
|
| 124 |
],
|
| 125 |
outputs=["html", "text"],
|
| 126 |
theme="huggingface",
|
| 127 |
-
title="Parakeet
|
| 128 |
description=(
|
| 129 |
"Transcribe microphone or audio inputs with the click of a button! Demo uses the"
|
| 130 |
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) to transcribe audio files"
|
| 131 |
-
" of arbitrary length."
|
| 132 |
),
|
| 133 |
allow_flagging="never",
|
| 134 |
)
|
|
|
|
| 1 |
+
from nemo.collections.asr.models import ASRModel
|
| 2 |
import yt_dlp as youtube_dl
|
| 3 |
import os
|
| 4 |
import tempfile
|
|
|
|
| 7 |
from pydub import AudioSegment
|
| 8 |
|
| 9 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 10 |
+
MODEL_NAME="nvidia/parakeet-tdt-1.1b"
|
| 11 |
YT_LENGTH_LIMIT_S=3600
|
| 12 |
|
| 13 |
+
model = ASRModel.from_pretrained(model_name=MODEL_NAME).to(device)
|
| 14 |
model.eval()
|
| 15 |
|
| 16 |
def get_transcripts(audio_path):
|
|
|
|
| 19 |
|
| 20 |
article = (
|
| 21 |
"<p style='text-align: center'>"
|
| 22 |
+
"<a href='https://huggingface.co/nvidia/parakeet-tdt-1.1b' target='_blank'>ποΈ Learn more about Parakeet TDT model</a> | "
|
| 23 |
+
"<a href='https://arxiv.org/abs/2304.06795' target='_blank'>π TDT ICML paper</a> | "
|
| 24 |
"<a href='https://github.com/NVIDIA/NeMo' target='_blank'>π§βπ» Repository</a>"
|
| 25 |
"</p>"
|
| 26 |
)
|
|
|
|
| 92 |
],
|
| 93 |
outputs="text",
|
| 94 |
theme="huggingface",
|
| 95 |
+
title="Parakeet TDT 1.1B: Transcribe Audio",
|
| 96 |
description=(
|
| 97 |
"Transcribe microphone or audio inputs with the click of a button! Demo uses the"
|
| 98 |
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) to transcribe audio files"
|
| 99 |
+
" of arbitrary length. TDT models are 75% more efficient than similar size RNNT model"
|
| 100 |
),
|
| 101 |
allow_flagging="never",
|
| 102 |
)
|
|
|
|
| 108 |
],
|
| 109 |
outputs="text",
|
| 110 |
theme="huggingface",
|
| 111 |
+
title="Parakeet TDT 1.1B: Transcribe Audio",
|
| 112 |
description=(
|
| 113 |
"Transcribe microphone or audio inputs with the click of a button! Demo uses the"
|
| 114 |
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) to transcribe audio files"
|
| 115 |
+
" of arbitrary length. TDT models are 75% more efficient than similar size RNNT model"
|
| 116 |
),
|
| 117 |
allow_flagging="never",
|
| 118 |
)
|
|
|
|
| 124 |
],
|
| 125 |
outputs=["html", "text"],
|
| 126 |
theme="huggingface",
|
| 127 |
+
title="Parakeet TDT 1.1B: Transcribe Audio",
|
| 128 |
description=(
|
| 129 |
"Transcribe microphone or audio inputs with the click of a button! Demo uses the"
|
| 130 |
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) to transcribe audio files"
|
| 131 |
+
" of arbitrary length. TDT models are 75% more efficient than similar size RNNT model"
|
| 132 |
),
|
| 133 |
allow_flagging="never",
|
| 134 |
)
|