avinash commited on
Commit
2ddcf2d
·
1 Parent(s): 6d4e4e8
Files changed (4) hide show
  1. .gitignore +1 -0
  2. app.py +17 -87
  3. asr.py +8 -0
  4. requirements.txt +5 -5
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ venv
app.py CHANGED
@@ -1,89 +1,19 @@
1
- import torch
2
- from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel
3
- from sentence_transformers import SentenceTransformer
4
- import PyPDF2
5
- import faiss
6
- import numpy as np
7
- import os
 
 
 
 
 
 
 
 
 
8
 
9
- def extract_text_from_pdf(pdf_path):
10
- text = ""
11
- if pdf_path.endswith('.txt'):
12
- with open(pdf_path, 'r', encoding='utf-8') as f:
13
- text = f.read()
14
- else:
15
- with open(pdf_path, 'rb') as f:
16
- reader = PyPDF2.PdfReader(f)
17
- for page in reader.pages:
18
- text += page.extract_text() or ""
19
- return text
20
-
21
- def split_text(text, chunk_size=512, overlap=64):
22
- words = text.split()
23
- chunks = []
24
- for i in range(0, len(words), chunk_size - overlap):
25
- chunk = " ".join(words[i:i+chunk_size])
26
- chunks.append(chunk)
27
- return chunks
28
-
29
- def build_faiss_index(embedding_model, chunks):
30
- embeddings = embedding_model.encode(chunks)
31
- index = faiss.IndexFlatL2(embeddings.shape[1])
32
- index.add(np.array(embeddings))
33
- return index, embeddings
34
-
35
- def get_top_k_chunks(query, chunks, embedding_model, index, k=5):
36
- query_vec = embedding_model.encode([query])
37
- D, I = index.search(np.array(query_vec), k)
38
- return [chunks[i] for i in I[0]]
39
-
40
- def setup_models():
41
- model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
42
- tokenizer = AutoTokenizer.from_pretrained(model_name)
43
- model = AutoModelForCausalLM.from_pretrained(model_name)
44
- embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
45
- return tokenizer, model, embedding_model
46
-
47
- def generate_response(tokenizer, model, context_chunks, query):
48
- context = "\n".join(context_chunks)
49
- prompt = f"""<|system|>
50
- You are a helpful assistant. Use the context below to answer the user's question.
51
-
52
- CONTEXT:
53
- {context}
54
-
55
- <|user|>
56
- {query}
57
-
58
- <|assistant|>"""
59
-
60
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
61
-
62
- with torch.no_grad():
63
- outputs = model.generate(
64
- inputs.input_ids,
65
- max_length=2048,
66
- temperature=0.7,
67
- do_sample=True,
68
- pad_token_id=tokenizer.eos_token_id,
69
- )
70
-
71
- response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
72
- return response.strip()
73
-
74
- # =====================
75
  if __name__ == "__main__":
76
- pdf_path = "./doc.txt" # Or .pdf
77
- query = "What is the main topic of the document?"
78
-
79
- # Setup
80
- text = extract_text_from_pdf(pdf_path)
81
- chunks = split_text(text)
82
- tokenizer, model, embedding_model = setup_models()
83
- index, _ = build_faiss_index(embedding_model, chunks)
84
-
85
- # Retrieval + Generation
86
- top_chunks = get_top_k_chunks(query, chunks, embedding_model, index)
87
- response = generate_response(tokenizer, model, top_chunks, query)
88
-
89
- print("Response:\n", response)
 
1
+ # app.py
2
+ import gradio as gr
3
+ from asr import transcribe_audio
4
+
5
+ def process_audio(audio):
6
+ if audio is None:
7
+ return "No audio received!"
8
+ return transcribe_audio(audio)
9
+
10
+ ui = gr.Interface(
11
+ fn=process_audio,
12
+ inputs=gr.Audio(source="microphone", type="filepath"),
13
+ outputs="text",
14
+ title="🎤 Whisper ASR Tester",
15
+ description="Speak into the mic and see the transcribed text using Whisper-tiny."
16
+ )
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  if __name__ == "__main__":
19
+ ui.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
asr.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # asr.py
2
+ import whisper
3
+
4
+ model = whisper.load_model("tiny") # lightweight + fast
5
+
6
+ def transcribe_audio(file_path: str) -> str:
7
+ result = model.transcribe(file_path)
8
+ return result["text"]
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
- torch
 
2
  transformers
3
- datasets
4
- sentence-transformers
5
- faiss-cpu
6
- PyPDF2
 
1
+ gradio
2
+ openai-whisper
3
  transformers
4
+ torch
5
+ TTS
6
+ accelerate