Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import pandas as pd | |
| import tiktoken | |
| import pandas as pd | |
| import time | |
| import spacy | |
| from spacy.lang.en.stop_words import STOP_WORDS | |
| from string import punctuation | |
| from collections import Counter | |
| from heapq import nlargest | |
| import nltk | |
| import numpy as np | |
| from tqdm import tqdm | |
| from sentence_transformers import SentenceTransformer, util | |
| from sentence_transformers import SentenceTransformer, CrossEncoder, util | |
| import gzip | |
| import os | |
| import torch | |
| import re | |
| import openai | |
| from openai.embeddings_utils import get_embedding, cosine_similarity | |
| import os | |
| # from dotenv import load_dotenv | |
| # load_dotenv() | |
| # print(os.getcwd()) | |
| # openai.api_key = os.environ['OPENAI_KEY'] | |
| df = pd.read_pickle('entire_data.pkl') #to load 123.pkl back to the dataframe df | |
| model = SentenceTransformer('all-mpnet-base-v2') | |
| def remove_html_tags(text): | |
| clean = re.compile('<.*?>') | |
| return re.sub(clean, '', text) | |
| df['content'] = df.content.apply(lambda x: remove_html_tags(x)) | |
| df['summary_html'] = df.summary_html.apply(lambda x: remove_html_tags(x)) | |
| session_prompt = """ A bot that is open to discussions about different cultural, philosophical and political exchanges. I will use do different analysis to the articles provided to me. Stay truthful and if you weren't provided any resources give your oppinion only.""" | |
| def new_ask(user_input): | |
| response = openai.ChatCompletion.create(model ="gpt-3.5-turbo", | |
| messages = [{'role': 'system', 'content': session_prompt},{'role': 'user', 'content': user_input}], | |
| temperature = 0 | |
| ) | |
| # print(response) | |
| return response['choices'][0]['message']['content'] | |
| def search(query): | |
| n = 10 | |
| query_embedding = model.encode(query) | |
| df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(x, query_embedding.reshape(768,-1))) | |
| results = (df.sort_values("similarity", ascending=False).head(n)) | |
| r_groupby = pd.DataFrame(results.groupby(['title','url','keywords','summary_html']).similarity.max()) | |
| #results = results[['title','url','keywords','summary_html']].drop_duplicates() | |
| results = r_groupby.reset_index() | |
| results = results.sort_values("similarity", ascending=False) | |
| tier_1 = [] | |
| tier_2 = [] | |
| for r in results.index: | |
| if results.similarity[r][0] > 0.5: | |
| tier_1.append( | |
| { | |
| "title":results.title[r], | |
| "url":results.url[r], | |
| "score": str(results.similarity[r][0]), | |
| "summary": results.summary_html[r][:200], | |
| "keywords": results.keywords[r] | |
| } | |
| ) | |
| elif results.similarity[r][0] > 0.4: | |
| tier_2.append( | |
| { | |
| "title":results.title[r], | |
| "url":results.url[r], | |
| "score": str(results.similarity[r][0]), | |
| "summary": results.summary_html[r][:200], | |
| "keywords": results.keywords[r] | |
| } | |
| ) | |
| print(tier_1) | |
| print(tier_2) | |
| ln = "\n" | |
| prefix = f"tier 1:\n{ln.join([x['title'] for x in tier_1])}" | |
| print(prefix) | |
| answer = new_ask(f"Answer the following query by giving arguments from the different arguments provided below. Make sure to quote the article used if the argument corrseponds to the query: Query: {query} Articles {ln.join([x['title'] + ': ' + x['summary'] for i, x in enumerate(tier_1)])}\nUse careful reasoning to explain your answer and give your conclusion about this.") | |
| if len(tier_2): | |
| suffix = f"tier 2:\n{ln.join([x['title'] for x in tier_2])}" | |
| related_questions = new_ask(f"Give general questions related the following articles: {ln.join([str(i) + ' ' + x['summary'] for i, x in enumerate(tier_2)])}") | |
| return f"{answer}\n\nRelated Questions:\n{related_questions}" | |
| return f"{answer}" | |
| def greet(query): | |
| bm25 = search(query) | |
| return bm25 | |
| examples = [ | |
| ["Climate Change Challenges in Europe"], | |
| ["Philosophy in the world of Minimalism"], | |
| ["Hate Speech vs Freedom of Speech"], | |
| ["The importance of values and reflection"] | |
| ] | |
| demo = gr.Interface(fn=greet, title="cicero-interactive-qa", | |
| outputs = "text",inputs=gr.inputs.Textbox(lines=5, label="what would you like to learn about?"),examples=examples) | |
| demo.launch(share = True, debug = True) | |