Spaces:

naveen07garg
/

AirlineChatBot

Sleeping

App Files Files Community

naveen07garg commited on Oct 23

Commit

46f6681

verified ·

1 Parent(s): 8ff2068

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -1

app.py CHANGED Viewed

@@ -4,6 +4,11 @@ from huggingface_hub import snapshot_download
 from langchain.embeddings import SentenceTransformerEmbeddings
 from langchain_community.vectorstores import Chroma
 from transformers import pipeline
 # =========================================================
 # Step 1: Download Vectorstore from Hugging Face Dataset
@@ -36,11 +41,92 @@ print("Chroma vectorstore loaded successfully!")
 # Step 3: Load LLM
 # =============================
 qa_model = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.2")
 # =============================
 # Step 4: RAG Response Function
 # =============================
 # -----------------------
 # User Query Enrichment
 # -----------------------

 from langchain.embeddings import SentenceTransformerEmbeddings
 from langchain_community.vectorstores import Chroma
 from transformers import pipeline
+from langchain_community.llms import HuggingFacePipeline
+from langchain.chains import LLMChain
+from langchain.prompts import PromptTemplate
+import spacy
+import json
 # =========================================================
 # Step 1: Download Vectorstore from Hugging Face Dataset
 # Step 3: Load LLM
 # =============================
 qa_model = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.2")
+llm = HuggingFacePipeline(pipeline=qa_model)
 # =============================
 # Step 4: RAG Response Function
 # =============================
+# Load spaCy NER model
+nlp = spacy.load("en_core_web_sm")
+# --- spaCy Extraction ---
+def extract_with_spacy(text):
+    doc = nlp(text)
+    roles, locations, departments = [], [], []
+    for ent in doc.ents:
+        if ent.label_ in ["GPE", "LOC"]:   # e.g., "Singapore"
+            locations.append(ent.text)
+        elif ent.label_ in ["ORG"]:        # e.g., "HR", "Finance"
+            departments.append(ent.text)
+        elif ent.label_ in ["PERSON"]:     # sometimes job titles slip
+            roles.append(ent.text)
+    return {
+        "roles": list(set(roles)),
+        "locations": list(set(locations)),
+        "departments": list(set(departments))
+    }
+# --- LLM Extraction ---
+def extract_with_llm(text):
+    prompt = f"""
+    You are an expert HR assistant for an airline company.
+    Your Task:
+    - Extract **Role(s)**, **Location(s)**, and **Department(s)** explicitly or implicitly mentioned
+      in the following HR policy text.
+    - Focus on aviation-related roles (e.g., Pilot, Cabin Crew, Engineer, Ground Staff, Field Staff),
+      locations (e.g., India, UK, Singapore, Headquarters), and departments (e.g., HR, Finance, Compliance, Operations).
+    - If something is implied (e.g., "field staff" → role=Field Staff, location unspecified), capture it.
+    - If no information is found, return an empty list for that field.
+    ---
+    ### FEW SHOTS Examples
+    Text: "Special leave for cabin crew in Singapore"
+    Output: {{"roles": ["Cabin Crew"], "locations": ["Singapore"], "departments": []}}
+    Text: "Pilots based in UK headquarters"
+    Output: {{"roles": ["Pilot"], "locations": ["United Kingdom", "Headquarters"], "departments": []}}
+    Text: "HR staff policies in India"
+    Output: {{"roles": [], "locations": ["India"], "departments": ["HR"]}}
+    Text: "Field staff in Dubai get separate insurance policy"
+    Output: {{"roles": ["Field Staff"], "locations": ["Dubai"], "departments": []}}
+    ---
+    Now extract from:
+    {text}
+    Output:
+    Return only valid JSON in this exact schema:
+    {{
+        "roles": [list of roles],
+        "locations": [list of locations],
+        "departments": [list of departments]
+    }}
+    """
+    try:
+        response = llm.invoke(prompt)
+        content = response.content.strip()
+        # Enforce safe parsing
+        if content.startswith("{"):
+            extracted = json.loads(content)
+        else:
+            extracted = {"roles": [], "locations": [], "departments": []}
+    except Exception:
+        extracted = {"roles": [], "locations": [], "departments": []}
+    return extracted
 # -----------------------
 # User Query Enrichment
 # -----------------------