naveen07garg commited on
Commit
7a1baef
·
verified ·
1 Parent(s): 23fc7ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +183 -105
app.py CHANGED
@@ -1,97 +1,180 @@
1
- #import gradio as gr
2
- #from huggingface_hub import InferenceClient
3
- #
4
- #
5
- #def respond(
6
- # message,
7
- # history: list[dict[str, str]],
8
- # system_message,
9
- # max_tokens,
10
- # temperature,
11
- # top_p,
12
- # hf_token: gr.OAuthToken,
13
- #):
14
- # """
15
- # For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
16
- # """
17
- # client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
18
- #
19
- # messages = [{"role": "system", "content": system_message}]
20
- #
21
- # messages.extend(history)
22
- #
23
- # messages.append({"role": "user", "content": message})
24
- #
25
- # response = ""
26
- #
27
- # for message in client.chat_completion(
28
- # messages,
29
- # max_tokens=max_tokens,
30
- # stream=True,
31
- # temperature=temperature,
32
- # top_p=top_p,
33
- # ):
34
- # choices = message.choices
35
- # token = ""
36
- # if len(choices) and choices[0].delta.content:
37
- # token = choices[0].delta.content
38
- #
39
- # response += token
40
- # yield response
41
- #
42
- #
43
- #"""
44
- #For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- #"""
46
- #chatbot = gr.ChatInterface(
47
- # respond,
48
- # type="messages",
49
- # additional_inputs=[
50
- # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
- # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
- # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
- # gr.Slider(
54
- # minimum=0.1,
55
- # maximum=1.0,
56
- # value=0.95,
57
- # step=0.05,
58
- # label="Top-p (nucleus sampling)",
59
- # ),
60
- # ],
61
- #)
62
- #
63
- #with gr.Blocks() as demo:
64
- # with gr.Sidebar():
65
- # gr.LoginButton()
66
- # chatbot.render()
67
- #
68
- #
69
- #if __name__ == "__main__":
70
- # demo.launch()
71
- #
72
- #
73
-
74
-
75
- # app.py
76
- #import gradio as gr
77
-
78
- #def chat_fn(message, history):
79
- # # history is a list of (user, bot) pairs
80
- # response = f"🤖 You said: {message}"
81
- # return response
82
- #
83
- #gr.ChatInterface(
84
- # fn=chat_fn,
85
- # title="BubbleBot",
86
- # description="A friendly chatbot built with Gradio on Hugging Face Spaces."
87
- #).launch()
88
-
89
-
90
- #--==== Fancy bubbles ====
91
-
92
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  css1 = r"""
96
  #chatbot .user {
97
  background: linear-gradient(to bottom right, #93c5fd, #60a5fa);
@@ -115,7 +198,6 @@ css1 = r"""
115
  margin-right: auto;
116
  box-shadow: 0 2px 6px rgba(0,0,0,0.05);
117
  }
118
-
119
  @keyframes typing {
120
  0%, 100% { opacity: 0.4; transform: translateY(0); }
121
  50% { opacity: 1; transform: translateY(-4px); }
@@ -123,7 +205,6 @@ css1 = r"""
123
  .typing-dot {
124
  animation: typing 1s infinite;
125
  }
126
-
127
  """
128
 
129
  css = """
@@ -133,20 +214,16 @@ css = """
133
  padding: 15px;
134
  overflow-y: auto;
135
  }
136
-
137
  #chatbot .message {
138
  display: flex;
139
  margin: 10px 0;
140
  }
141
-
142
  #chatbot .message.user {
143
  justify-content: flex-end;
144
  }
145
-
146
  #chatbot .message.bot {
147
  justify-content: flex-start;
148
  }
149
-
150
  /* User bubble */
151
  #chatbot .message.user .bubble {
152
  background: linear-gradient(135deg, #4CAF50, #81C784);
@@ -156,7 +233,6 @@ css = """
156
  max-width: 70%;
157
  box-shadow: 0 2px 5px rgba(0,0,0,0.15);
158
  }
159
-
160
  /* Bot bubble */
161
  #chatbot .message.bot .bubble {
162
  background: linear-gradient(135deg, #2196F3, #64B5F6);
@@ -166,26 +242,28 @@ css = """
166
  max-width: 70%;
167
  box-shadow: 0 2px 5px rgba(0,0,0,0.15);
168
  }
169
-
170
  /* Optional: add smooth fade-in animation */
171
  @keyframes bubblePop {
172
  from { transform: scale(0.95); opacity: 0; }
173
  to { transform: scale(1); opacity: 1; }
174
  }
175
-
176
  #chatbot .bubble {
177
  animation: bubblePop 0.2s ease-out;
178
  }
179
  """
180
 
181
 
182
- def respond(message, history):
183
- return f"BubbleBot says: {message}"
 
 
 
 
184
 
185
  gr.ChatInterface(
186
- fn=respond,
187
- title="BubbleBot",
188
- description="Stylish chat UI built with Gradio",
189
  theme="soft",
190
  css=css
191
  ).launch()
 
1
+ import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import gradio as gr
3
+ from huggingface_hub import snapshot_download
4
+ from langchain.embeddings import SentenceTransformerEmbeddings
5
+ from langchain_community.vectorstores import Chroma
6
+ from transformers import pipeline
7
+
8
+ # =========================================================
9
+ # Step 1: Download Vectorstore from Hugging Face Dataset
10
+ # =========================================================
11
+ VECTOR_DIR = "vectorstore/chroma"
12
+ DATASET_REPO = "naveen07garg/AirlineChatBot-vectorstore"
13
+
14
+ if not os.path.exists(VECTOR_DIR):
15
+ print("Downloading vectorstore from Hugging Face dataset...")
16
+ snapshot_download(
17
+ repo_id=DATASET_REPO,
18
+ repo_type="dataset",
19
+ local_dir=VECTOR_DIR,
20
+ ignore_patterns=[".gitattributes"],
21
+ )
22
+ print("✅ Vectorstore downloaded successfully!")
23
+
24
+ # =============================
25
+ # Step 2: Load Chroma Vectorstore
26
+ # =============================
27
+ embedding_fn = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
28
+ vectordb = Chroma(persist_directory=VECTOR_DIR, embedding_function=embedding_fn)
29
+ retriever = vectordb.as_retriever(search_kwargs={"k": 3})
30
+ print("Chroma vectorstore loaded successfully!")
31
+
32
+ # =============================
33
+ # Step 3: Load LLM
34
+ # =============================
35
+ qa_model = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.2")
36
+
37
+ # =============================
38
+ # Step 4: RAG Response Function
39
+ # =============================
40
+
41
+ # -----------------------
42
+ # User Query Enrichment
43
+ # -----------------------
44
+ def extract_metadata_from_query(query: str):
45
+ """Use spaCy + LLM to extract role/location/department from user query."""
46
+ spacy_res = extract_with_spacy(query)
47
+ logging.info("spaCy results ## ==>%s", spacy_res)
48
+ llm_res = extract_with_llm(query)
49
+ logging.info("LLM Extraction Results ## ==>%s", llm_res)
50
+
51
+
52
+ return {
53
+ "roles": list(set(spacy_res["roles"] + llm_res["roles"])),
54
+ "locations": list(set(spacy_res["locations"] + llm_res["locations"])),
55
+ "departments": list(set(spacy_res["departments"] + llm_res["departments"]))
56
+ }
57
+
58
+ # -------------------------------
59
+ # Helper: Filter docs manually
60
+ # -------------------------------
61
+ def filter_docs_by_metadata(docs, metadata_filters):
62
+ filtered = []
63
+ for d in docs:
64
+ meta = d.metadata
65
+ keep = True
66
+ if metadata_filters.get("roles"):
67
+ keep &= any(r in meta.get("roles", []) for r in metadata_filters["roles"])
68
+ if metadata_filters.get("locations"):
69
+ keep &= any(l in meta.get("locations", []) for l in metadata_filters["locations"])
70
+ if metadata_filters.get("departments"):
71
+ keep &= any(dep in meta.get("departments", []) for dep in metadata_filters["departments"])
72
+ if keep:
73
+ filtered.append(d)
74
+ return filtered
75
+
76
+
77
+
78
+ def generate_rag_based_response(user_input, retriever, k=3, max_tokens=800, temperature=0, top_p=0.95):
79
+ """
80
+ Args:
81
+ user_input: User query string
82
+ retriever: LangChain retriever (from Chroma)
83
+ k: number of top documents to retrieve
84
+ Returns:
85
+ The generated response based on user query + context with citations
86
+ """
87
+
88
+ # Step 1: Retrieve relevant chunks
89
+ # relevant_docs = retriever.get_relevant_documents(user_input)
90
+ # selected_docs = relevant_docs[:k]
91
+ # relevant_docs = retriever.get_relevant_documents(user_input)[:k]
92
+
93
+
94
+ # When user asks a query, we enrich it by extracting role, location, department using the same spaCy + LLM pipeline.
95
+ # Pass those extracted values as filters to the retriever → only chunks with matching metadata are considered.
96
+ # If nothing matches, fallback to plain semantic search (so we don’t block valid answers).
97
+
98
+ # Step 1: Extract personalization metadata from query
99
+ query_metadata = extract_metadata_from_query(user_input)
100
+
101
+ logging.info("\n======================")
102
+ logging.info("User Query: %s", user_input)
103
+ logging.info("Extracted metadata from query: %s", query_metadata) # Investigatory log
104
+
105
+ # 2. Retrieve top-k docs semantically
106
+ retrieved_docs = retriever.get_relevant_documents(user_input, k=k)
107
+ logging.info("Retrieved %d docs before filtering", len(retrieved_docs))
108
+
109
+ # 3. Apply metadata filtering
110
+ filtered_docs = filter_docs_by_metadata(retrieved_docs, query_metadata)
111
+ if filtered_docs:
112
+ selected_docs = filtered_docs
113
+ logging.info("✅ %d docs kept after metadata filtering", len(selected_docs))
114
+ else:
115
+ selected_docs = retrieved_docs # fallback if no metadata match
116
+ logging.info("⚠️ No metadata match, falling back to semantic retrieval only")
117
+
118
+
119
+
120
+ # Step 4: Log retrieved docs metadata
121
+ logging.info("✅ Retrieved %d docs", len(selected_docs))
122
+ for i, d in enumerate(selected_docs, 1):
123
+ logging.info("\n--- Chunk %d ---", i)
124
+ logging.info("Text: %s...", d.page_content[:200]) # preview first 200 chars
125
+ logging.info("Metadata: %s", d.metadata)
126
 
127
 
128
+
129
+ # Step 4: Build context with citations
130
+ context_parts = []
131
+ for d in selected_docs:
132
+ meta = d.metadata
133
+ citation = f"{meta.get('document')} → {meta.get('section')}"
134
+ if meta.get("subsection"):
135
+ citation += f" / {meta.get('subsection')}"
136
+ if meta.get("subsubsection"):
137
+ citation += f" / {meta.get('subsubsection')}"
138
+ context_parts.append(f"Source: {citation}\n{d.page_content}")
139
+
140
+ context_for_query = "\n\n---\n\n".join(context_parts)
141
+
142
+ # Step 5: Construct prompt
143
+ user_prompt = hr_user_message_template.format(
144
+ context=context_for_query,
145
+ question=user_input
146
+ )
147
+
148
+ messages = [
149
+ {"role": "system", "content": QNA_SYSTEM_MESSAGE},
150
+ {"role": "user", "content": user_prompt},
151
+ ]
152
+
153
+ # Step 6: Query the LLM
154
+ llm = ChatOpenAI(model="gpt-4o-mini", temperature=temperature, max_tokens=max_tokens)
155
+
156
+ try:
157
+ response = llm.invoke(messages)
158
+ prediction = response.content
159
+ except Exception as e:
160
+ prediction = f" Error: {e}"
161
+
162
+ return prediction
163
+
164
+ # =============================
165
+ # Step 5: Chat Function
166
+ # =============================
167
+ def chat_fn(message, history):
168
+ answer = generate_rag_based_response(message)
169
+ return f"{answer}\n\n🧠 (Context retrieved from {DATASET_REPO})"
170
+
171
+
172
+ # =============================
173
+ # Step 6: Chat bubbles UI
174
+ # =============================
175
+
176
+ import gradio as gr
177
+
178
  css1 = r"""
179
  #chatbot .user {
180
  background: linear-gradient(to bottom right, #93c5fd, #60a5fa);
 
198
  margin-right: auto;
199
  box-shadow: 0 2px 6px rgba(0,0,0,0.05);
200
  }
 
201
  @keyframes typing {
202
  0%, 100% { opacity: 0.4; transform: translateY(0); }
203
  50% { opacity: 1; transform: translateY(-4px); }
 
205
  .typing-dot {
206
  animation: typing 1s infinite;
207
  }
 
208
  """
209
 
210
  css = """
 
214
  padding: 15px;
215
  overflow-y: auto;
216
  }
 
217
  #chatbot .message {
218
  display: flex;
219
  margin: 10px 0;
220
  }
 
221
  #chatbot .message.user {
222
  justify-content: flex-end;
223
  }
 
224
  #chatbot .message.bot {
225
  justify-content: flex-start;
226
  }
 
227
  /* User bubble */
228
  #chatbot .message.user .bubble {
229
  background: linear-gradient(135deg, #4CAF50, #81C784);
 
233
  max-width: 70%;
234
  box-shadow: 0 2px 5px rgba(0,0,0,0.15);
235
  }
 
236
  /* Bot bubble */
237
  #chatbot .message.bot .bubble {
238
  background: linear-gradient(135deg, #2196F3, #64B5F6);
 
242
  max-width: 70%;
243
  box-shadow: 0 2px 5px rgba(0,0,0,0.15);
244
  }
 
245
  /* Optional: add smooth fade-in animation */
246
  @keyframes bubblePop {
247
  from { transform: scale(0.95); opacity: 0; }
248
  to { transform: scale(1); opacity: 1; }
249
  }
 
250
  #chatbot .bubble {
251
  animation: bubblePop 0.2s ease-out;
252
  }
253
  """
254
 
255
 
256
+ # =============================
257
+ # Step 7: Launch App
258
+ # =============================
259
+
260
+ #def respond(message, history):
261
+ # return f"BubbleBot says: {message}"
262
 
263
  gr.ChatInterface(
264
+ fn=chat_fn,
265
+ title="Flyline Chatbot ✈ ️",
266
+ description="Ask Flyline HR",
267
  theme="soft",
268
  css=css
269
  ).launch()