naveen07garg commited on
Commit
46f6681
·
verified ·
1 Parent(s): 8ff2068

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -1
app.py CHANGED
@@ -4,6 +4,11 @@ from huggingface_hub import snapshot_download
4
  from langchain.embeddings import SentenceTransformerEmbeddings
5
  from langchain_community.vectorstores import Chroma
6
  from transformers import pipeline
 
 
 
 
 
7
 
8
  # =========================================================
9
  # Step 1: Download Vectorstore from Hugging Face Dataset
@@ -36,11 +41,92 @@ print("Chroma vectorstore loaded successfully!")
36
  # Step 3: Load LLM
37
  # =============================
38
  qa_model = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.2")
39
-
40
  # =============================
41
  # Step 4: RAG Response Function
42
  # =============================
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  # -----------------------
45
  # User Query Enrichment
46
  # -----------------------
 
4
  from langchain.embeddings import SentenceTransformerEmbeddings
5
  from langchain_community.vectorstores import Chroma
6
  from transformers import pipeline
7
+ from langchain_community.llms import HuggingFacePipeline
8
+ from langchain.chains import LLMChain
9
+ from langchain.prompts import PromptTemplate
10
+ import spacy
11
+ import json
12
 
13
  # =========================================================
14
  # Step 1: Download Vectorstore from Hugging Face Dataset
 
41
  # Step 3: Load LLM
42
  # =============================
43
  qa_model = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.2")
44
+ llm = HuggingFacePipeline(pipeline=qa_model)
45
  # =============================
46
  # Step 4: RAG Response Function
47
  # =============================
48
 
49
+ # Load spaCy NER model
50
+ nlp = spacy.load("en_core_web_sm")
51
+
52
+ # --- spaCy Extraction ---
53
+ def extract_with_spacy(text):
54
+ doc = nlp(text)
55
+ roles, locations, departments = [], [], []
56
+
57
+ for ent in doc.ents:
58
+ if ent.label_ in ["GPE", "LOC"]: # e.g., "Singapore"
59
+ locations.append(ent.text)
60
+ elif ent.label_ in ["ORG"]: # e.g., "HR", "Finance"
61
+ departments.append(ent.text)
62
+ elif ent.label_ in ["PERSON"]: # sometimes job titles slip
63
+ roles.append(ent.text)
64
+
65
+ return {
66
+ "roles": list(set(roles)),
67
+ "locations": list(set(locations)),
68
+ "departments": list(set(departments))
69
+ }
70
+
71
+ # --- LLM Extraction ---
72
+ def extract_with_llm(text):
73
+ prompt = f"""
74
+ You are an expert HR assistant for an airline company.
75
+
76
+ Your Task:
77
+ - Extract **Role(s)**, **Location(s)**, and **Department(s)** explicitly or implicitly mentioned
78
+ in the following HR policy text.
79
+ - Focus on aviation-related roles (e.g., Pilot, Cabin Crew, Engineer, Ground Staff, Field Staff),
80
+ locations (e.g., India, UK, Singapore, Headquarters), and departments (e.g., HR, Finance, Compliance, Operations).
81
+ - If something is implied (e.g., "field staff" → role=Field Staff, location unspecified), capture it.
82
+ - If no information is found, return an empty list for that field.
83
+
84
+
85
+
86
+ ---
87
+ ### FEW SHOTS Examples
88
+ Text: "Special leave for cabin crew in Singapore"
89
+ Output: {{"roles": ["Cabin Crew"], "locations": ["Singapore"], "departments": []}}
90
+
91
+ Text: "Pilots based in UK headquarters"
92
+ Output: {{"roles": ["Pilot"], "locations": ["United Kingdom", "Headquarters"], "departments": []}}
93
+
94
+ Text: "HR staff policies in India"
95
+ Output: {{"roles": [], "locations": ["India"], "departments": ["HR"]}}
96
+
97
+ Text: "Field staff in Dubai get separate insurance policy"
98
+ Output: {{"roles": ["Field Staff"], "locations": ["Dubai"], "departments": []}}
99
+
100
+ ---
101
+ Now extract from:
102
+ {text}
103
+
104
+ Output:
105
+ Return only valid JSON in this exact schema:
106
+ {{
107
+ "roles": [list of roles],
108
+ "locations": [list of locations],
109
+ "departments": [list of departments]
110
+ }}
111
+
112
+ """
113
+
114
+ try:
115
+ response = llm.invoke(prompt)
116
+ content = response.content.strip()
117
+
118
+ # Enforce safe parsing
119
+ if content.startswith("{"):
120
+ extracted = json.loads(content)
121
+ else:
122
+ extracted = {"roles": [], "locations": [], "departments": []}
123
+
124
+ except Exception:
125
+ extracted = {"roles": [], "locations": [], "departments": []}
126
+
127
+ return extracted
128
+
129
+
130
  # -----------------------
131
  # User Query Enrichment
132
  # -----------------------