Spaces:

Pixeltable
/

AI-Chatbot-With-Retrieval-Augmented-Generation

Running

App Files Files Community

PierreBrunelle commited on Nov 1, 2024

Commit

34bee60

verified ·

1 Parent(s): 737d90e

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -30

app.py CHANGED Viewed

@@ -35,6 +35,21 @@ def create_prompt(top_k_list: list[dict], question: str) -> str:
     QUESTION:
     {question}'''
 def validate_token(token):
     try:
         api = HfApi()
@@ -45,7 +60,6 @@ def validate_token(token):
 def process_files(token, pdf_files, chunk_limit, chunk_separator):
     if not validate_token(token):
         return "Invalid token. Please enter a valid Hugging Face token."
     # Initialize Pixeltable
@@ -54,9 +68,11 @@ def process_files(token, pdf_files, chunk_limit, chunk_separator):
     # Create a table to store the uploaded PDF documents
     t = pxt.create_table(
-    'chatbot_demo.documents',
-    {'document': pxt.DocumentType(nullable=True),
-     'question': pxt.StringType(nullable=True)}
     )
     # Insert the PDF files into the documents table
@@ -86,51 +102,34 @@ def process_files(token, pdf_files, chunk_limit, chunk_separator):
                 .limit(5)
         )
-    # Add computed columns to the table for context retrieval and prompt creation
     t['question_context'] = chunks_t.queries.top_k(t.question)
-    t['prompt'] = create_prompt(
-        t.question_context, t.question
-    )
-    # Prepare messages for the API
-    msgs = [
-        {
-            'role': 'system',
-            'content': 'Answer questions using only the provided context. If the context lacks sufficient information, state this clearly.'
-        },
-        {
-            'role': 'user',
-            'content': t.prompt
-        }
-    ]
-    # Add OpenAI response column
     t['response'] = openai.chat_completions(
         model='gpt-4o-mini-2024-07-18',
-        messages=msgs,
         max_tokens=300,
         top_p=0.9,
         temperature=0.7
     )
-    # Extract the answer text from the API response
     t['gpt4omini'] = t.response.choices[0].message.content
     return "Files processed successfully. You can start the discussion."
 def get_answer(token, msg):
     if not validate_token(token):
         return "Invalid token. Please enter a valid Hugging Face token."
     t = pxt.get_table('chatbot_demo.documents')
-    chunks_t = pxt.get_table('chatbot_demo.chunks')
     # Insert the question into the table
     t.insert([{'question': msg}])
     answer = t.select(t.gpt4omini).where(t.question == msg).collect()['gpt4omini'][0]
     return answer
 def respond(token, message, chat_history):

     QUESTION:
     {question}'''
+# New UDF for creating messages
+@pxt.udf
+def create_messages(prompt: str) -> list[dict]:
+    """Creates a structured message list for the LLM from the prompt"""
+    return [
+        {
+            'role': 'system',
+            'content': 'Answer questions using only the provided context. If the context lacks sufficient information, state this clearly.'
+        },
+        {
+            'role': 'user',
+            'content': prompt
+        }
+    ]
 def validate_token(token):
     try:
         api = HfApi()
 def process_files(token, pdf_files, chunk_limit, chunk_separator):
     if not validate_token(token):
         return "Invalid token. Please enter a valid Hugging Face token."
     # Initialize Pixeltable
     # Create a table to store the uploaded PDF documents
     t = pxt.create_table(
+        'chatbot_demo.documents',
+        {
+            'document': pxt.DocumentType(nullable=True),
+            'question': pxt.StringType(nullable=True)
+        }
     )
     # Insert the PDF files into the documents table
                 .limit(5)
         )
+    # Add computed columns to create the chain of transformations
     t['question_context'] = chunks_t.queries.top_k(t.question)
+    t['prompt'] = create_prompt(t.question_context, t.question)
+    t['messages'] = create_messages(t.prompt)  # New computed column for messages
+    # Add the response column using the messages computed column
     t['response'] = openai.chat_completions(
         model='gpt-4o-mini-2024-07-18',
+        messages=t.messages,  # Use the computed messages column
         max_tokens=300,
         top_p=0.9,
         temperature=0.7
     )
     t['gpt4omini'] = t.response.choices[0].message.content
     return "Files processed successfully. You can start the discussion."
 def get_answer(token, msg):
     if not validate_token(token):
         return "Invalid token. Please enter a valid Hugging Face token."
     t = pxt.get_table('chatbot_demo.documents')
     # Insert the question into the table
     t.insert([{'question': msg}])
+    # The answer will be automatically generated through the chain of computed columns
     answer = t.select(t.gpt4omini).where(t.question == msg).collect()['gpt4omini'][0]
     return answer
 def respond(token, message, chat_history):