Spaces:
Paused
Paused
avinash
commited on
Commit
Β·
299633b
1
Parent(s):
d425d6c
addedtiny llama
Browse files- app.py +14 -8
- asr.py +1 -2
- llm_agent.py +19 -0
- requirements.txt +1 -1
app.py
CHANGED
|
@@ -1,17 +1,23 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from asr import transcribe_audio
|
|
|
|
| 3 |
|
| 4 |
-
def
|
| 5 |
if audio is None:
|
| 6 |
-
return "No audio received!"
|
| 7 |
-
|
|
|
|
|
|
|
| 8 |
|
| 9 |
ui = gr.Interface(
|
| 10 |
-
fn=
|
| 11 |
-
inputs=gr.Audio(type="filepath", label="Upload
|
| 12 |
-
outputs=
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
| 15 |
)
|
| 16 |
|
| 17 |
if __name__ == "__main__":
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from asr import transcribe_audio
|
| 3 |
+
from llm_agent import get_llm_reply
|
| 4 |
|
| 5 |
+
def voice_to_reply(audio):
|
| 6 |
if audio is None:
|
| 7 |
+
return "No audio received!", "..."
|
| 8 |
+
text = transcribe_audio(audio)
|
| 9 |
+
reply = get_llm_reply(text)
|
| 10 |
+
return text, reply
|
| 11 |
|
| 12 |
ui = gr.Interface(
|
| 13 |
+
fn=voice_to_reply,
|
| 14 |
+
inputs=gr.Audio(type="filepath", label="Upload voice (.wav or .mp3)"),
|
| 15 |
+
outputs=[
|
| 16 |
+
gr.Textbox(label="π Transcribed Text"),
|
| 17 |
+
gr.Textbox(label="π€ LLM Reply")
|
| 18 |
+
],
|
| 19 |
+
title="VoiceFreight AI v1: ASR + LLM",
|
| 20 |
+
description="Upload a voice file. ASR + TinyLlama gives you a response!"
|
| 21 |
)
|
| 22 |
|
| 23 |
if __name__ == "__main__":
|
asr.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
-
# asr.py
|
| 2 |
import whisper
|
| 3 |
|
| 4 |
-
model = whisper.load_model("tiny")
|
| 5 |
|
| 6 |
def transcribe_audio(file_path: str) -> str:
|
| 7 |
result = model.transcribe(file_path)
|
|
|
|
|
|
|
| 1 |
import whisper
|
| 2 |
|
| 3 |
+
model = whisper.load_model("tiny")
|
| 4 |
|
| 5 |
def transcribe_audio(file_path: str) -> str:
|
| 6 |
result = model.transcribe(file_path)
|
llm_agent.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
# Load the model (TinyLlama)
|
| 5 |
+
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
|
| 6 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 7 |
+
model = AutoModelForCausalLM.from_pretrained(model_name)
|
| 8 |
+
|
| 9 |
+
def get_llm_reply(user_input: str) -> str:
|
| 10 |
+
prompt = f"User: {user_input}\nAssistant:"
|
| 11 |
+
inputs = tokenizer(prompt, return_tensors="pt")
|
| 12 |
+
output = model.generate(
|
| 13 |
+
**inputs,
|
| 14 |
+
max_new_tokens=100,
|
| 15 |
+
do_sample=True,
|
| 16 |
+
temperature=0.7
|
| 17 |
+
)
|
| 18 |
+
response = tokenizer.decode(output[0], skip_special_tokens=True)
|
| 19 |
+
return response.replace(prompt, "").strip()
|
requirements.txt
CHANGED
|
@@ -3,4 +3,4 @@ openai-whisper
|
|
| 3 |
transformers
|
| 4 |
torch
|
| 5 |
TTS
|
| 6 |
-
accelerate
|
|
|
|
| 3 |
transformers
|
| 4 |
torch
|
| 5 |
TTS
|
| 6 |
+
accelerate
|