Spaces:

kp0001
/

law_chatbot

Sleeping

App Files Files Community

kp0001 commited on Sep 11, 2024

Commit

6520191

1 Parent(s): b8fc8b6

Track chroma.sqlite3 with Git

Browse files

Files changed (7) hide show

.gitattributes +1 -0
indian_law_bge_work_1/8babcb0a-54dc-49e1-b6f7-a3f6eb965240/data_level0.bin +3 -0
indian_law_bge_work_1/8babcb0a-54dc-49e1-b6f7-a3f6eb965240/header.bin +3 -0
indian_law_bge_work_1/8babcb0a-54dc-49e1-b6f7-a3f6eb965240/index_metadata.pickle +3 -0
indian_law_bge_work_1/8babcb0a-54dc-49e1-b6f7-a3f6eb965240/length.bin +3 -0
indian_law_bge_work_1/8babcb0a-54dc-49e1-b6f7-a3f6eb965240/link_lists.bin +3 -0
main.py +151 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+indian_law_bge_work_1/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text

indian_law_bge_work_1/8babcb0a-54dc-49e1-b6f7-a3f6eb965240/data_level0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cdf6fa467ebf9e317a826cba3772b9748a4163380b03d803d55286249e40420b
+size 41900000

indian_law_bge_work_1/8babcb0a-54dc-49e1-b6f7-a3f6eb965240/header.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:11dce8eb590ff54cfbb57ed4587b151cee9aec6580017ee184e12a77adfcc7eb
+size 100

indian_law_bge_work_1/8babcb0a-54dc-49e1-b6f7-a3f6eb965240/index_metadata.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:28e199030b0b44498f53a482dcd66918b7f8734778a731226e3237b92619261d
+size 662166

indian_law_bge_work_1/8babcb0a-54dc-49e1-b6f7-a3f6eb965240/length.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:68daf6e901276be27e0261438adf3604d24887da64b3a09e6e8e6ecd52c36bee
+size 100000

indian_law_bge_work_1/8babcb0a-54dc-49e1-b6f7-a3f6eb965240/link_lists.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:87c06b25da15c036b7dd394c10acd31c3b812d55f01b3c79816c3adb819c6b08
+size 210976

main.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import gradio as gr
+import chromadb
+import os
+from langchain.prompts import PromptTemplate
+from langchain.chains import LLMChain
+from langchain.llms import OpenAI
+from langchain.schema.output_parser import StrOutputParser
+from langchain.load import dumps, loads
+import openai
+# Initialize the ChromaDB client
+client = chromadb.PersistentClient(path="indian_law_bge_work_1")
+# Load the collection
+collection = client.get_or_create_collection("indian_law_bge_work_1")
+# Vector Search Function
+def vector_search(query, top_k=5):
+    try:
+        results = collection.query(query_texts=[query], n_results=top_k)
+        return results['documents']
+    except Exception as e:
+        return f"Error during vector search: {e}"
+# Generate Query Function
+def generate_query(query, query_length):
+    try:
+        prompt = PromptTemplate(
+            input_variables=["query", "query_length"],
+            template="""
+            You are a helpful assistant that can answer questions about Indian law.
+            You are given a query: "{query}" and you need to generate {query_length} reformulated queries for vector search.
+            """
+        )
+        llm = OpenAI(temperature=0.7)
+        chain = LLMChain(llm=llm, prompt=prompt, output_parser=StrOutputParser())
+        result = chain.run({"query": query, "query_length": query_length})
+        result = [i.strip() for i in result.split("\n") if i.strip()]
+        result = [i for i in result if i != ""]
+        return result
+    except Exception as e:
+        return f"Error during query generation: {e}"
+# Reciprocal Rank Fusion Function
+def reciprocal_rank_fusion(results_list, k=60):
+    fused_scores = {}
+    try:
+        for docs in results_list:
+            for rank, doc in enumerate(docs):
+                doc_str = dumps(doc)
+                if doc_str not in fused_scores:
+                    fused_scores[doc_str] = 0
+                fused_scores[doc_str] += 1 / (rank + 1 + k)
+        reranked_results = [
+            (loads(doc), score)
+            for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
+        ]
+        return reranked_results
+    except Exception as e:
+        return f"Error during RRF: {e}"
+# Main Function to Handle the Workflow
+def handle_query(openai_key, query, query_length):
+    openai.api_key = openai_key
+    os.environ["OPENAI_API_KEY"] = openai_key
+    # Generate reformulated queries
+    generated_queries = generate_query(query, query_length)
+    if isinstance(generated_queries, str):
+        return generated_queries, []
+    all_results = []
+    for g_query in generated_queries:
+        documents = vector_search(g_query, top_k=5)
+        if isinstance(documents, str):  # Error handling
+            return documents, []
+        all_results.append(documents)
+    # Fuse results using RRF
+    fused_results = reciprocal_rank_fusion(all_results)
+    if isinstance(fused_results, str):
+        return fused_results, []
+    # Prepare fused results for language model input
+    fused_results_str = "\n".join([f"Document: {result}, Score: {score}" for result, score in fused_results])
+    prompt = PromptTemplate(
+        input_variables=["query", "fused_results"],
+        template="""
+        You are a helpful assistant that can answer questions about Indian law.
+        You are given a query: "{query}" and the following fused results from a vector search:
+        {fused_results}
+        These are the results from the vector search. Take the best result and provide a response.
+        """
+    )
+    formatted_prompt = prompt.format(
+        query=query,
+        fused_results=fused_results_str
+    )
+    # Get the OpenAI response
+    response = openai.ChatCompletion.create(
+        model="gpt-4",
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": formatted_prompt}
+        ],
+        max_tokens=300,
+        temperature=0.7
+    )
+    answer = response.choices[0].message['content']
+    return answer, fused_results
+# Gradio Interface
+def app(openai_key, query, query_length):
+    answer, fused_results = handle_query(openai_key, query, query_length)
+    fused_results_str = "\n".join([f"Document: {result}, Score: {score}" for result, score in fused_results])
+    return answer, fused_results_str
+with gr.Blocks() as demo:
+    gr.Markdown("## Indian Law Assistant")
+    openai_key = gr.Textbox(label="OpenAI API Key", placeholder="Enter your OpenAI API key")
+    query = gr.Textbox(label="Query", placeholder="Enter your query about Indian law")
+    query_length = gr.Slider(minimum=1, maximum=10, value=3, label="Number of Reformulated Queries")
+    answer_output = gr.Textbox(label="Answer", interactive=False)
+    fused_results_output = gr.Textbox(label="Fused Results", interactive=False)
+    submit_button = gr.Button("Submit")
+    submit_button.click(
+        fn=app,
+        inputs=[openai_key, query, query_length],
+        outputs=[answer_output, fused_results_output]
+    )
+demo.launch()