kp0001 commited on
Commit
6520191
·
1 Parent(s): b8fc8b6

Track chroma.sqlite3 with Git

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ indian_law_bge_work_1/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
indian_law_bge_work_1/8babcb0a-54dc-49e1-b6f7-a3f6eb965240/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdf6fa467ebf9e317a826cba3772b9748a4163380b03d803d55286249e40420b
3
+ size 41900000
indian_law_bge_work_1/8babcb0a-54dc-49e1-b6f7-a3f6eb965240/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11dce8eb590ff54cfbb57ed4587b151cee9aec6580017ee184e12a77adfcc7eb
3
+ size 100
indian_law_bge_work_1/8babcb0a-54dc-49e1-b6f7-a3f6eb965240/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28e199030b0b44498f53a482dcd66918b7f8734778a731226e3237b92619261d
3
+ size 662166
indian_law_bge_work_1/8babcb0a-54dc-49e1-b6f7-a3f6eb965240/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68daf6e901276be27e0261438adf3604d24887da64b3a09e6e8e6ecd52c36bee
3
+ size 100000
indian_law_bge_work_1/8babcb0a-54dc-49e1-b6f7-a3f6eb965240/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87c06b25da15c036b7dd394c10acd31c3b812d55f01b3c79816c3adb819c6b08
3
+ size 210976
main.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import chromadb
3
+ import os
4
+ from langchain.prompts import PromptTemplate
5
+ from langchain.chains import LLMChain
6
+ from langchain.llms import OpenAI
7
+ from langchain.schema.output_parser import StrOutputParser
8
+ from langchain.load import dumps, loads
9
+ import openai
10
+
11
+ # Initialize the ChromaDB client
12
+ client = chromadb.PersistentClient(path="indian_law_bge_work_1")
13
+
14
+ # Load the collection
15
+ collection = client.get_or_create_collection("indian_law_bge_work_1")
16
+
17
+ # Vector Search Function
18
+ def vector_search(query, top_k=5):
19
+ try:
20
+ results = collection.query(query_texts=[query], n_results=top_k)
21
+ return results['documents']
22
+ except Exception as e:
23
+ return f"Error during vector search: {e}"
24
+
25
+ # Generate Query Function
26
+ def generate_query(query, query_length):
27
+ try:
28
+ prompt = PromptTemplate(
29
+ input_variables=["query", "query_length"],
30
+ template="""
31
+ You are a helpful assistant that can answer questions about Indian law.
32
+ You are given a query: "{query}" and you need to generate {query_length} reformulated queries for vector search.
33
+ """
34
+ )
35
+
36
+ llm = OpenAI(temperature=0.7)
37
+ chain = LLMChain(llm=llm, prompt=prompt, output_parser=StrOutputParser())
38
+
39
+ result = chain.run({"query": query, "query_length": query_length})
40
+ result = [i.strip() for i in result.split("\n") if i.strip()]
41
+ result = [i for i in result if i != ""]
42
+ return result
43
+ except Exception as e:
44
+ return f"Error during query generation: {e}"
45
+
46
+ # Reciprocal Rank Fusion Function
47
+ def reciprocal_rank_fusion(results_list, k=60):
48
+ fused_scores = {}
49
+
50
+ try:
51
+ for docs in results_list:
52
+ for rank, doc in enumerate(docs):
53
+ doc_str = dumps(doc)
54
+ if doc_str not in fused_scores:
55
+ fused_scores[doc_str] = 0
56
+ fused_scores[doc_str] += 1 / (rank + 1 + k)
57
+
58
+ reranked_results = [
59
+ (loads(doc), score)
60
+ for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
61
+ ]
62
+
63
+ return reranked_results
64
+ except Exception as e:
65
+ return f"Error during RRF: {e}"
66
+
67
+ # Main Function to Handle the Workflow
68
+ def handle_query(openai_key, query, query_length):
69
+ openai.api_key = openai_key
70
+ os.environ["OPENAI_API_KEY"] = openai_key
71
+
72
+ # Generate reformulated queries
73
+ generated_queries = generate_query(query, query_length)
74
+
75
+ if isinstance(generated_queries, str):
76
+ return generated_queries, []
77
+
78
+ all_results = []
79
+ for g_query in generated_queries:
80
+ documents = vector_search(g_query, top_k=5)
81
+ if isinstance(documents, str): # Error handling
82
+ return documents, []
83
+ all_results.append(documents)
84
+
85
+ # Fuse results using RRF
86
+ fused_results = reciprocal_rank_fusion(all_results)
87
+
88
+ if isinstance(fused_results, str):
89
+ return fused_results, []
90
+
91
+ # Prepare fused results for language model input
92
+ fused_results_str = "\n".join([f"Document: {result}, Score: {score}" for result, score in fused_results])
93
+
94
+ prompt = PromptTemplate(
95
+ input_variables=["query", "fused_results"],
96
+ template="""
97
+ You are a helpful assistant that can answer questions about Indian law.
98
+ You are given a query: "{query}" and the following fused results from a vector search:
99
+
100
+ {fused_results}
101
+ These are the results from the vector search. Take the best result and provide a response.
102
+ """
103
+ )
104
+
105
+ formatted_prompt = prompt.format(
106
+ query=query,
107
+ fused_results=fused_results_str
108
+ )
109
+
110
+ # Get the OpenAI response
111
+ response = openai.ChatCompletion.create(
112
+ model="gpt-4",
113
+ messages=[
114
+ {"role": "system", "content": "You are a helpful assistant."},
115
+ {"role": "user", "content": formatted_prompt}
116
+ ],
117
+ max_tokens=300,
118
+ temperature=0.7
119
+ )
120
+
121
+ answer = response.choices[0].message['content']
122
+
123
+ return answer, fused_results
124
+
125
+ # Gradio Interface
126
+ def app(openai_key, query, query_length):
127
+ answer, fused_results = handle_query(openai_key, query, query_length)
128
+
129
+ fused_results_str = "\n".join([f"Document: {result}, Score: {score}" for result, score in fused_results])
130
+
131
+ return answer, fused_results_str
132
+
133
+ with gr.Blocks() as demo:
134
+ gr.Markdown("## Indian Law Assistant")
135
+
136
+ openai_key = gr.Textbox(label="OpenAI API Key", placeholder="Enter your OpenAI API key")
137
+ query = gr.Textbox(label="Query", placeholder="Enter your query about Indian law")
138
+ query_length = gr.Slider(minimum=1, maximum=10, value=3, label="Number of Reformulated Queries")
139
+
140
+ answer_output = gr.Textbox(label="Answer", interactive=False)
141
+ fused_results_output = gr.Textbox(label="Fused Results", interactive=False)
142
+
143
+ submit_button = gr.Button("Submit")
144
+
145
+ submit_button.click(
146
+ fn=app,
147
+ inputs=[openai_key, query, query_length],
148
+ outputs=[answer_output, fused_results_output]
149
+ )
150
+
151
+ demo.launch()