Update app.py
Browse files
app.py
CHANGED
|
@@ -18,11 +18,6 @@ import getpass
|
|
| 18 |
if 'OPENAI_API_KEY' not in os.environ:
|
| 19 |
os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API key:')
|
| 20 |
|
| 21 |
-
# Set up embedding function
|
| 22 |
-
@pxt.expr_udf
|
| 23 |
-
def e5_embed(text: str) -> np.ndarray:
|
| 24 |
-
return sentence_transformer(text, model_id='intfloat/e5-large-v2')
|
| 25 |
-
|
| 26 |
# Create prompt function
|
| 27 |
@pxt.udf
|
| 28 |
def create_prompt(top_k_list: list[dict], question: str) -> str:
|
|
@@ -70,8 +65,8 @@ def process_files(token, pdf_files, chunk_limit, chunk_separator):
|
|
| 70 |
t = pxt.create_table(
|
| 71 |
'chatbot_demo.documents',
|
| 72 |
{
|
| 73 |
-
'document': pxt.
|
| 74 |
-
'question': pxt.
|
| 75 |
}
|
| 76 |
)
|
| 77 |
|
|
@@ -90,8 +85,11 @@ def process_files(token, pdf_files, chunk_limit, chunk_separator):
|
|
| 90 |
)
|
| 91 |
)
|
| 92 |
|
| 93 |
-
# Add
|
| 94 |
-
chunks_t.add_embedding_index(
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
@chunks_t.query
|
| 97 |
def top_k(query_text: str):
|
|
@@ -102,20 +100,20 @@ def process_files(token, pdf_files, chunk_limit, chunk_separator):
|
|
| 102 |
.limit(5)
|
| 103 |
)
|
| 104 |
|
| 105 |
-
# Add computed columns
|
| 106 |
-
t
|
| 107 |
-
t
|
| 108 |
-
t
|
| 109 |
|
| 110 |
# Add the response column using the messages computed column
|
| 111 |
-
t
|
| 112 |
model='gpt-4o-mini-2024-07-18',
|
| 113 |
-
messages=t.messages,
|
| 114 |
max_tokens=300,
|
| 115 |
top_p=0.9,
|
| 116 |
temperature=0.7
|
| 117 |
-
)
|
| 118 |
-
t
|
| 119 |
|
| 120 |
return "Files processed successfully. You can start the discussion."
|
| 121 |
|
|
@@ -142,7 +140,7 @@ with gr.Blocks(theme=gr.themes.Base()) as demo:
|
|
| 142 |
gr.Markdown(
|
| 143 |
"""
|
| 144 |
<div>
|
| 145 |
-
<img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/
|
| 146 |
<h1 style="margin-bottom: 0.5em;">AI Chatbot With Retrieval-Augmented Generation (RAG)</h1>
|
| 147 |
</div>
|
| 148 |
"""
|
|
|
|
| 18 |
if 'OPENAI_API_KEY' not in os.environ:
|
| 19 |
os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API key:')
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
# Create prompt function
|
| 22 |
@pxt.udf
|
| 23 |
def create_prompt(top_k_list: list[dict], question: str) -> str:
|
|
|
|
| 65 |
t = pxt.create_table(
|
| 66 |
'chatbot_demo.documents',
|
| 67 |
{
|
| 68 |
+
'document': pxt.Document,
|
| 69 |
+
'question': pxt.String
|
| 70 |
}
|
| 71 |
)
|
| 72 |
|
|
|
|
| 85 |
)
|
| 86 |
)
|
| 87 |
|
| 88 |
+
# Add embedding index with updated pattern
|
| 89 |
+
chunks_t.add_embedding_index(
|
| 90 |
+
'text',
|
| 91 |
+
string_embed=sentence_transformer.using(model_id='intfloat/e5-large-v2')
|
| 92 |
+
)
|
| 93 |
|
| 94 |
@chunks_t.query
|
| 95 |
def top_k(query_text: str):
|
|
|
|
| 100 |
.limit(5)
|
| 101 |
)
|
| 102 |
|
| 103 |
+
# Add computed columns using keyword argument syntax
|
| 104 |
+
t.add_computed_column(question_context=chunks_t.queries.top_k(t.question))
|
| 105 |
+
t.add_computed_column(prompt=create_prompt(t.question_context, t.question))
|
| 106 |
+
t.add_computed_column(messages=create_messages(t.prompt))
|
| 107 |
|
| 108 |
# Add the response column using the messages computed column
|
| 109 |
+
t.add_computed_column(response=openai.chat_completions(
|
| 110 |
model='gpt-4o-mini-2024-07-18',
|
| 111 |
+
messages=t.messages,
|
| 112 |
max_tokens=300,
|
| 113 |
top_p=0.9,
|
| 114 |
temperature=0.7
|
| 115 |
+
))
|
| 116 |
+
t.add_computed_column(gpt4omini=t.response.choices[0].message.content)
|
| 117 |
|
| 118 |
return "Files processed successfully. You can start the discussion."
|
| 119 |
|
|
|
|
| 140 |
gr.Markdown(
|
| 141 |
"""
|
| 142 |
<div>
|
| 143 |
+
<img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/resources/pixeltable-logo-large.png" alt="Pixeltable" style="max-width: 200px; margin-bottom: 20px;" />
|
| 144 |
<h1 style="margin-bottom: 0.5em;">AI Chatbot With Retrieval-Augmented Generation (RAG)</h1>
|
| 145 |
</div>
|
| 146 |
"""
|