Spaces:

naveen07garg
/

AirlineChatBot

Sleeping

App Files Files Community

naveen07garg commited on Oct 23

Commit

7a1baef

verified ·

1 Parent(s): 23fc7ff

Update app.py

Browse files

Files changed (1) hide show

app.py +183 -105

app.py CHANGED Viewed

@@ -1,97 +1,180 @@
-#import gradio as gr
-#from huggingface_hub import InferenceClient
-#
-#
-#def respond(
-#    message,
-#    history: list[dict[str, str]],
-#    system_message,
-#    max_tokens,
-#    temperature,
-#    top_p,
-#    hf_token: gr.OAuthToken,
-#):
-#    """
-#    For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-#    """
-#    client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
-#
-#    messages = [{"role": "system", "content": system_message}]
-#
-#    messages.extend(history)
-#
-#    messages.append({"role": "user", "content": message})
-#
-#    response = ""
-#
-#    for message in client.chat_completion(
-#        messages,
-#        max_tokens=max_tokens,
-#        stream=True,
-#        temperature=temperature,
-#        top_p=top_p,
-#    ):
-#        choices = message.choices
-#        token = ""
-#        if len(choices) and choices[0].delta.content:
-#            token = choices[0].delta.content
-#
-#        response += token
-#        yield response
-#
-#
-#"""
-#For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-#"""
-#chatbot = gr.ChatInterface(
-#    respond,
-#    type="messages",
-#    additional_inputs=[
-#        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-#        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-#        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-#        gr.Slider(
-#            minimum=0.1,
-#            maximum=1.0,
-#            value=0.95,
-#            step=0.05,
-#            label="Top-p (nucleus sampling)",
-#        ),
-#    ],
-#)
-#
-#with gr.Blocks() as demo:
-#    with gr.Sidebar():
-#        gr.LoginButton()
-#    chatbot.render()
-#
-#
-#if __name__ == "__main__":
-#    demo.launch()
-#
-#
-# app.py
-#import gradio as gr
-#def chat_fn(message, history):
-#    # history is a list of (user, bot) pairs
-#    response = f"🤖 You said: {message}"
-#    return response
-#
-#gr.ChatInterface(
-#    fn=chat_fn,
-#    title="BubbleBot",
-#    description="A friendly chatbot built with Gradio on Hugging Face Spaces."
-#).launch()
-#--==== Fancy bubbles ====
 import gradio as gr
 css1 = r"""
 #chatbot .user {
   background: linear-gradient(to bottom right, #93c5fd, #60a5fa);
@@ -115,7 +198,6 @@ css1 = r"""
   margin-right: auto;
   box-shadow: 0 2px 6px rgba(0,0,0,0.05);
 }
 @keyframes typing {
   0%, 100% { opacity: 0.4; transform: translateY(0); }
   50% { opacity: 1; transform: translateY(-4px); }
@@ -123,7 +205,6 @@ css1 = r"""
 .typing-dot {
   animation: typing 1s infinite;
 }
 """
 css = """
@@ -133,20 +214,16 @@ css = """
   padding: 15px;
   overflow-y: auto;
 }
 #chatbot .message {
   display: flex;
   margin: 10px 0;
 }
 #chatbot .message.user {
   justify-content: flex-end;
 }
 #chatbot .message.bot {
   justify-content: flex-start;
 }
 /* User bubble */
 #chatbot .message.user .bubble {
   background: linear-gradient(135deg, #4CAF50, #81C784);
@@ -156,7 +233,6 @@ css = """
   max-width: 70%;
   box-shadow: 0 2px 5px rgba(0,0,0,0.15);
 }
 /* Bot bubble */
 #chatbot .message.bot .bubble {
   background: linear-gradient(135deg, #2196F3, #64B5F6);
@@ -166,26 +242,28 @@ css = """
   max-width: 70%;
   box-shadow: 0 2px 5px rgba(0,0,0,0.15);
 }
 /* Optional: add smooth fade-in animation */
 @keyframes bubblePop {
   from { transform: scale(0.95); opacity: 0; }
   to { transform: scale(1); opacity: 1; }
 }
 #chatbot .bubble {
   animation: bubblePop 0.2s ease-out;
 }
 """
-def respond(message, history):
-    return f"BubbleBot says: {message}"
 gr.ChatInterface(
-    fn=respond,
-    title="BubbleBot",
-    description="Stylish chat UI built with Gradio",
     theme="soft",
     css=css
 ).launch()

+import os
 import gradio as gr
+from huggingface_hub import snapshot_download
+from langchain.embeddings import SentenceTransformerEmbeddings
+from langchain_community.vectorstores import Chroma
+from transformers import pipeline
+# =========================================================
+# Step 1: Download Vectorstore from Hugging Face Dataset
+# =========================================================
+VECTOR_DIR = "vectorstore/chroma"
+DATASET_REPO = "naveen07garg/AirlineChatBot-vectorstore"
+if not os.path.exists(VECTOR_DIR):
+    print("Downloading vectorstore from Hugging Face dataset...")
+    snapshot_download(
+        repo_id=DATASET_REPO,
+        repo_type="dataset",
+        local_dir=VECTOR_DIR,
+        ignore_patterns=[".gitattributes"],
+    )
+    print("✅ Vectorstore downloaded successfully!")
+# =============================
+# Step 2: Load Chroma Vectorstore
+# =============================
+embedding_fn = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
+vectordb = Chroma(persist_directory=VECTOR_DIR, embedding_function=embedding_fn)
+retriever = vectordb.as_retriever(search_kwargs={"k": 3})
+print("Chroma vectorstore loaded successfully!")
+# =============================
+# Step 3: Load LLM
+# =============================
+qa_model = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.2")
+# =============================
+# Step 4: RAG Response Function
+# =============================
+# -----------------------
+# User Query Enrichment
+# -----------------------
+def extract_metadata_from_query(query: str):
+    """Use spaCy + LLM to extract role/location/department from user query."""
+    spacy_res = extract_with_spacy(query)
+    logging.info("spaCy results ## ==>%s", spacy_res)
+    llm_res = extract_with_llm(query)
+    logging.info("LLM Extraction Results ## ==>%s", llm_res)
+    return {
+        "roles": list(set(spacy_res["roles"] + llm_res["roles"])),
+        "locations": list(set(spacy_res["locations"] + llm_res["locations"])),
+        "departments": list(set(spacy_res["departments"] + llm_res["departments"]))
+    }
+# -------------------------------
+# Helper: Filter docs manually
+# -------------------------------
+def filter_docs_by_metadata(docs, metadata_filters):
+    filtered = []
+    for d in docs:
+        meta = d.metadata
+        keep = True
+        if metadata_filters.get("roles"):
+            keep &= any(r in meta.get("roles", []) for r in metadata_filters["roles"])
+        if metadata_filters.get("locations"):
+            keep &= any(l in meta.get("locations", []) for l in metadata_filters["locations"])
+        if metadata_filters.get("departments"):
+            keep &= any(dep in meta.get("departments", []) for dep in metadata_filters["departments"])
+        if keep:
+            filtered.append(d)
+    return filtered
+def generate_rag_based_response(user_input, retriever, k=3, max_tokens=800, temperature=0, top_p=0.95):
+    """
+    Args:
+        user_input: User query string
+        retriever: LangChain retriever (from Chroma)
+        k: number of top documents to retrieve
+    Returns:
+        The generated response based on user query + context with citations
+    """
+    # Step 1: Retrieve relevant chunks
+    # relevant_docs = retriever.get_relevant_documents(user_input)
+    # selected_docs = relevant_docs[:k]
+    # relevant_docs = retriever.get_relevant_documents(user_input)[:k]
+    # When user asks a query, we enrich it by extracting role, location, department using the same spaCy + LLM pipeline.
+    # Pass those extracted values as filters to the retriever → only chunks with matching metadata are considered.
+    # If nothing matches, fallback to plain semantic search (so we don’t block valid answers).
+    # Step 1: Extract personalization metadata from query
+    query_metadata = extract_metadata_from_query(user_input)
+    logging.info("\n======================")
+    logging.info("User Query: %s", user_input)
+    logging.info("Extracted metadata from query: %s", query_metadata)  # Investigatory log
+    # 2. Retrieve top-k docs semantically
+    retrieved_docs = retriever.get_relevant_documents(user_input, k=k)
+    logging.info("Retrieved %d docs before filtering", len(retrieved_docs))
+    # 3. Apply metadata filtering
+    filtered_docs = filter_docs_by_metadata(retrieved_docs, query_metadata)
+    if filtered_docs:
+        selected_docs = filtered_docs
+        logging.info("✅ %d docs kept after metadata filtering", len(selected_docs))
+    else:
+        selected_docs = retrieved_docs  # fallback if no metadata match
+        logging.info("⚠️ No metadata match, falling back to semantic retrieval only")
+     #  Step 4: Log retrieved docs metadata
+    logging.info("✅ Retrieved %d docs", len(selected_docs))
+    for i, d in enumerate(selected_docs, 1):
+        logging.info("\n--- Chunk %d ---", i)
+        logging.info("Text: %s...", d.page_content[:200])  # preview first 200 chars
+        logging.info("Metadata: %s", d.metadata)
+    # Step 4: Build context with citations
+    context_parts = []
+    for d in selected_docs:
+        meta = d.metadata
+        citation = f"{meta.get('document')} → {meta.get('section')}"
+        if meta.get("subsection"):
+            citation += f" / {meta.get('subsection')}"
+        if meta.get("subsubsection"):
+            citation += f" / {meta.get('subsubsection')}"
+        context_parts.append(f"Source: {citation}\n{d.page_content}")
+    context_for_query = "\n\n---\n\n".join(context_parts)
+    # Step 5: Construct prompt
+    user_prompt = hr_user_message_template.format(
+        context=context_for_query,
+        question=user_input
+    )
+    messages = [
+        {"role": "system", "content": QNA_SYSTEM_MESSAGE},
+        {"role": "user", "content": user_prompt},
+    ]
+    # Step 6: Query the LLM
+    llm = ChatOpenAI(model="gpt-4o-mini", temperature=temperature, max_tokens=max_tokens)
+    try:
+        response = llm.invoke(messages)
+        prediction = response.content
+    except Exception as e:
+        prediction = f" Error: {e}"
+    return prediction
+# =============================
+# Step 5: Chat Function
+# =============================
+def chat_fn(message, history):
+    answer = generate_rag_based_response(message)
+    return f"{answer}\n\n🧠 (Context retrieved from {DATASET_REPO})"
+# =============================
+# Step 6: Chat bubbles UI
+# =============================
+import gradio as gr
 css1 = r"""
 #chatbot .user {
   background: linear-gradient(to bottom right, #93c5fd, #60a5fa);
   margin-right: auto;
   box-shadow: 0 2px 6px rgba(0,0,0,0.05);
 }
 @keyframes typing {
   0%, 100% { opacity: 0.4; transform: translateY(0); }
   50% { opacity: 1; transform: translateY(-4px); }
 .typing-dot {
   animation: typing 1s infinite;
 }
 """
 css = """
   padding: 15px;
   overflow-y: auto;
 }
 #chatbot .message {
   display: flex;
   margin: 10px 0;
 }
 #chatbot .message.user {
   justify-content: flex-end;
 }
 #chatbot .message.bot {
   justify-content: flex-start;
 }
 /* User bubble */
 #chatbot .message.user .bubble {
   background: linear-gradient(135deg, #4CAF50, #81C784);
   max-width: 70%;
   box-shadow: 0 2px 5px rgba(0,0,0,0.15);
 }
 /* Bot bubble */
 #chatbot .message.bot .bubble {
   background: linear-gradient(135deg, #2196F3, #64B5F6);
   max-width: 70%;
   box-shadow: 0 2px 5px rgba(0,0,0,0.15);
 }
 /* Optional: add smooth fade-in animation */
 @keyframes bubblePop {
   from { transform: scale(0.95); opacity: 0; }
   to { transform: scale(1); opacity: 1; }
 }
 #chatbot .bubble {
   animation: bubblePop 0.2s ease-out;
 }
 """
+# =============================
+# Step 7: Launch App
+# =============================
+#def respond(message, history):
+#    return f"BubbleBot says: {message}"
 gr.ChatInterface(
+    fn=chat_fn,
+    title="Flyline Chatbot ✈ ️",
+    description="Ask Flyline HR",
     theme="soft",
     css=css
 ).launch()