Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -133,9 +133,9 @@ def extract_with_llm(text):
|
|
| 133 |
def extract_metadata_from_query(query: str):
|
| 134 |
"""Use spaCy + LLM to extract role/location/department from user query."""
|
| 135 |
spacy_res = extract_with_spacy(query)
|
| 136 |
-
|
| 137 |
llm_res = extract_with_llm(query)
|
| 138 |
-
|
| 139 |
|
| 140 |
|
| 141 |
return {
|
|
@@ -187,31 +187,31 @@ def generate_rag_based_response(user_input, retriever, k=3, max_tokens=800, temp
|
|
| 187 |
# Step 1: Extract personalization metadata from query
|
| 188 |
query_metadata = extract_metadata_from_query(user_input)
|
| 189 |
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
|
| 194 |
# 2. Retrieve top-k docs semantically
|
| 195 |
retrieved_docs = retriever.get_relevant_documents(user_input, k=k)
|
| 196 |
-
|
| 197 |
|
| 198 |
# 3. Apply metadata filtering
|
| 199 |
filtered_docs = filter_docs_by_metadata(retrieved_docs, query_metadata)
|
| 200 |
if filtered_docs:
|
| 201 |
selected_docs = filtered_docs
|
| 202 |
-
|
| 203 |
else:
|
| 204 |
selected_docs = retrieved_docs # fallback if no metadata match
|
| 205 |
-
|
| 206 |
|
| 207 |
|
| 208 |
|
| 209 |
# Step 4: Log retrieved docs metadata
|
| 210 |
-
|
| 211 |
for i, d in enumerate(selected_docs, 1):
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
|
| 216 |
|
| 217 |
|
|
@@ -358,4 +358,3 @@ gr.ChatInterface(
|
|
| 358 |
).launch()
|
| 359 |
|
| 360 |
|
| 361 |
-
|
|
|
|
| 133 |
def extract_metadata_from_query(query: str):
|
| 134 |
"""Use spaCy + LLM to extract role/location/department from user query."""
|
| 135 |
spacy_res = extract_with_spacy(query)
|
| 136 |
+
print("spaCy results ## ==>%s", spacy_res)
|
| 137 |
llm_res = extract_with_llm(query)
|
| 138 |
+
print("LLM Extraction Results ## ==>%s", llm_res)
|
| 139 |
|
| 140 |
|
| 141 |
return {
|
|
|
|
| 187 |
# Step 1: Extract personalization metadata from query
|
| 188 |
query_metadata = extract_metadata_from_query(user_input)
|
| 189 |
|
| 190 |
+
print("\n======================")
|
| 191 |
+
print("User Query: %s", user_input)
|
| 192 |
+
print("Extracted metadata from query: %s", query_metadata) # Investigatory log
|
| 193 |
|
| 194 |
# 2. Retrieve top-k docs semantically
|
| 195 |
retrieved_docs = retriever.get_relevant_documents(user_input, k=k)
|
| 196 |
+
print("Retrieved %d docs before filtering", len(retrieved_docs))
|
| 197 |
|
| 198 |
# 3. Apply metadata filtering
|
| 199 |
filtered_docs = filter_docs_by_metadata(retrieved_docs, query_metadata)
|
| 200 |
if filtered_docs:
|
| 201 |
selected_docs = filtered_docs
|
| 202 |
+
print("✅ %d docs kept after metadata filtering", len(selected_docs))
|
| 203 |
else:
|
| 204 |
selected_docs = retrieved_docs # fallback if no metadata match
|
| 205 |
+
print("⚠️ No metadata match, falling back to semantic retrieval only")
|
| 206 |
|
| 207 |
|
| 208 |
|
| 209 |
# Step 4: Log retrieved docs metadata
|
| 210 |
+
print("✅ Retrieved %d docs", len(selected_docs))
|
| 211 |
for i, d in enumerate(selected_docs, 1):
|
| 212 |
+
print("\n--- Chunk %d ---", i)
|
| 213 |
+
print("Text: %s...", d.page_content[:200]) # preview first 200 chars
|
| 214 |
+
print("Metadata: %s", d.metadata)
|
| 215 |
|
| 216 |
|
| 217 |
|
|
|
|
| 358 |
).launch()
|
| 359 |
|
| 360 |
|
|
|