Spaces:
Runtime error
Runtime error
Upload 5 files
Browse files- app.py +16 -11
- llm.py +14 -5
- main.py +5 -5
- rag.py +37 -18
- requirements.txt +3 -1
app.py
CHANGED
|
@@ -7,7 +7,7 @@ import gradio as gr
|
|
| 7 |
import main
|
| 8 |
|
| 9 |
# Define two separate functions for each button
|
| 10 |
-
def call_generate_news(subject, sites, min_words, max_words):
|
| 11 |
if subject == '':
|
| 12 |
return 'Erro: Assunto não informado', ''
|
| 13 |
if min_words == '':
|
|
@@ -24,11 +24,11 @@ def call_generate_news(subject, sites, min_words, max_words):
|
|
| 24 |
for item in list_sites:
|
| 25 |
if item:
|
| 26 |
filtered_list_sites.append(item)
|
| 27 |
-
result_news = main.generate_news(subject, min_words, max_words, filtered_list_sites)
|
| 28 |
return result_news
|
| 29 |
|
| 30 |
-
def call_invoke_llm(context, prompt):
|
| 31 |
-
result = main.call_llm(context, prompt)
|
| 32 |
return result
|
| 33 |
|
| 34 |
# Create the Gradio interface using Blocks
|
|
@@ -41,11 +41,16 @@ with gr.Blocks(title='BotNews') as page:
|
|
| 41 |
with gr.Row():
|
| 42 |
input1 = gr.Textbox(label="Assunto:", lines=1)
|
| 43 |
with gr.Row():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
input2 = gr.Textbox(label="Domínios para pesquisa (coloque um domínio por linha):",
|
| 45 |
-
lines=
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
|
|
|
| 49 |
with gr.Row():
|
| 50 |
button1 = gr.Button("Gerar notícia")
|
| 51 |
with gr.Row():
|
|
@@ -56,14 +61,14 @@ with gr.Blocks(title='BotNews') as page:
|
|
| 56 |
gr.Markdown("<b>Instrução:</b> Preencha abaixo com um comando para ser executado sobre a notícia (Por exemplo: 'Resuma em tópicos' ou 'Adicione um tom sarcástico').")
|
| 57 |
gr.Markdown(" ")
|
| 58 |
with gr.Row():
|
| 59 |
-
|
| 60 |
with gr.Row():
|
| 61 |
button2 = gr.Button("Gerar resposta")
|
| 62 |
with gr.Row():
|
| 63 |
output2 = gr.Textbox(label="Resposta gerada por IA", lines=25)
|
| 64 |
|
| 65 |
-
button1.click(call_generate_news, inputs=[input1, input2, input3, input4], outputs=[output1])
|
| 66 |
-
button2.click(call_invoke_llm, inputs=[output1, input5], outputs=[output2])
|
| 67 |
|
| 68 |
# Launch the interface
|
| 69 |
page.launch() #share=True
|
|
|
|
| 7 |
import main
|
| 8 |
|
| 9 |
# Define two separate functions for each button
|
| 10 |
+
def call_generate_news(subject, sites, min_words, max_words, model):
|
| 11 |
if subject == '':
|
| 12 |
return 'Erro: Assunto não informado', ''
|
| 13 |
if min_words == '':
|
|
|
|
| 24 |
for item in list_sites:
|
| 25 |
if item:
|
| 26 |
filtered_list_sites.append(item)
|
| 27 |
+
result_news = main.generate_news(subject, min_words, max_words, filtered_list_sites, model.lower())
|
| 28 |
return result_news
|
| 29 |
|
| 30 |
+
def call_invoke_llm(context, prompt, model):
|
| 31 |
+
result = main.call_llm(context, prompt, model)
|
| 32 |
return result
|
| 33 |
|
| 34 |
# Create the Gradio interface using Blocks
|
|
|
|
| 41 |
with gr.Row():
|
| 42 |
input1 = gr.Textbox(label="Assunto:", lines=1)
|
| 43 |
with gr.Row():
|
| 44 |
+
input5 = gr.Dropdown(
|
| 45 |
+
["OpenAI", "Mixtral-8x7B"], value='OpenAI', label="Modelo LLM", info="Escolha o modelo para processamento das informações.",
|
| 46 |
+
scale=1
|
| 47 |
+
)
|
| 48 |
input2 = gr.Textbox(label="Domínios para pesquisa (coloque um domínio por linha):",
|
| 49 |
+
lines=8,
|
| 50 |
+
scale=2,
|
| 51 |
+
value='https://www.cnnbrasil.com.br/\nhttps://g1.globo.com/\nhttps://www.metropoles.com/\nhttps://www.bbc.com/portuguese/\nhttps://www.instagram.com/')
|
| 52 |
+
input3 = gr.Textbox(label="Mínimo de palavras:", lines=1, value="300", scale=1)
|
| 53 |
+
input4 = gr.Textbox(label="Máximo de palavras:", lines=1, value="700", scale=1)
|
| 54 |
with gr.Row():
|
| 55 |
button1 = gr.Button("Gerar notícia")
|
| 56 |
with gr.Row():
|
|
|
|
| 61 |
gr.Markdown("<b>Instrução:</b> Preencha abaixo com um comando para ser executado sobre a notícia (Por exemplo: 'Resuma em tópicos' ou 'Adicione um tom sarcástico').")
|
| 62 |
gr.Markdown(" ")
|
| 63 |
with gr.Row():
|
| 64 |
+
input6 = gr.Textbox(label="Prompt para a notícia")
|
| 65 |
with gr.Row():
|
| 66 |
button2 = gr.Button("Gerar resposta")
|
| 67 |
with gr.Row():
|
| 68 |
output2 = gr.Textbox(label="Resposta gerada por IA", lines=25)
|
| 69 |
|
| 70 |
+
button1.click(call_generate_news, inputs=[input1, input2, input3, input4, input5], outputs=[output1])
|
| 71 |
+
button2.click(call_invoke_llm, inputs=[output1, input6, input5], outputs=[output2])
|
| 72 |
|
| 73 |
# Launch the interface
|
| 74 |
page.launch() #share=True
|
llm.py
CHANGED
|
@@ -2,9 +2,11 @@
|
|
| 2 |
# Description:
|
| 3 |
# ------------------------------------------------
|
| 4 |
from langchain_openai import ChatOpenAI
|
|
|
|
|
|
|
| 5 |
import os
|
| 6 |
|
| 7 |
-
def invoke_llm(context, task):
|
| 8 |
prompt = f"""You are an assistant of a newspaper.
|
| 9 |
Execute the task just based on the given context.
|
| 10 |
The task is delimited by <> and the context is delimited by <>.
|
|
@@ -15,10 +17,17 @@ def invoke_llm(context, task):
|
|
| 15 |
Answer here:
|
| 16 |
"""
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
result = llm.invoke(prompt)
|
| 23 |
return result.content
|
| 24 |
|
|
|
|
| 2 |
# Description:
|
| 3 |
# ------------------------------------------------
|
| 4 |
from langchain_openai import ChatOpenAI
|
| 5 |
+
from langchain_together import Together
|
| 6 |
+
|
| 7 |
import os
|
| 8 |
|
| 9 |
+
def invoke_llm(context, task, model):
|
| 10 |
prompt = f"""You are an assistant of a newspaper.
|
| 11 |
Execute the task just based on the given context.
|
| 12 |
The task is delimited by <> and the context is delimited by <>.
|
|
|
|
| 17 |
Answer here:
|
| 18 |
"""
|
| 19 |
|
| 20 |
+
if model == 'openai':
|
| 21 |
+
llm=ChatOpenAI(model_name="gpt-3.5-turbo",
|
| 22 |
+
temperature=0.3,
|
| 23 |
+
openai_api_key=os.environ['OPENAI_KEY'],
|
| 24 |
+
max_tokens=1000)
|
| 25 |
+
else:
|
| 26 |
+
llm=ChatOpenAI(model_name="mistralai/Mixtral-8x7B-Instruct-v0.1",
|
| 27 |
+
temperature=0.3,
|
| 28 |
+
together_api_key=os.environ['TOGETHER_KEY'],
|
| 29 |
+
max_tokens=1000)
|
| 30 |
+
|
| 31 |
result = llm.invoke(prompt)
|
| 32 |
return result.content
|
| 33 |
|
main.py
CHANGED
|
@@ -8,7 +8,7 @@ import constants
|
|
| 8 |
import llm
|
| 9 |
|
| 10 |
|
| 11 |
-
def generate_news(subject, min_words, max_words, sites):
|
| 12 |
print('\n\n' + '*' * 50)
|
| 13 |
print('\n\nInício do Programa: \n')
|
| 14 |
|
|
@@ -31,13 +31,13 @@ def generate_news(subject, min_words, max_words, sites):
|
|
| 31 |
return 'Erro: ' + ret
|
| 32 |
|
| 33 |
print('\nGerando embeddings e vectorstore...')
|
| 34 |
-
vectorstore = rag.generate_embeddings_and_vectorstore(constants.local_base)
|
| 35 |
if type(vectorstore) == str:
|
| 36 |
return 'Erro: ' + vectorstore
|
| 37 |
|
| 38 |
print('\nGerando a notícia (RAG)...')
|
| 39 |
print(' Assunto: ' + subject)
|
| 40 |
-
obj_rag = rag.Rag(vectorstore, min_words, max_words)
|
| 41 |
result_news = obj_rag.generate_text(subject)
|
| 42 |
if type(result_news) == str:
|
| 43 |
if 'maximum context length' in result_news:
|
|
@@ -56,8 +56,8 @@ def generate_news(subject, min_words, max_words, sites):
|
|
| 56 |
else:
|
| 57 |
return result_news[0]
|
| 58 |
|
| 59 |
-
def call_llm(context, prompt):
|
| 60 |
print('\nChamando o modelo de linguagem...')
|
| 61 |
-
result = llm.invoke_llm(context, prompt)
|
| 62 |
|
| 63 |
return result
|
|
|
|
| 8 |
import llm
|
| 9 |
|
| 10 |
|
| 11 |
+
def generate_news(subject, min_words, max_words, sites, model):
|
| 12 |
print('\n\n' + '*' * 50)
|
| 13 |
print('\n\nInício do Programa: \n')
|
| 14 |
|
|
|
|
| 31 |
return 'Erro: ' + ret
|
| 32 |
|
| 33 |
print('\nGerando embeddings e vectorstore...')
|
| 34 |
+
vectorstore = rag.generate_embeddings_and_vectorstore(constants.local_base, model)
|
| 35 |
if type(vectorstore) == str:
|
| 36 |
return 'Erro: ' + vectorstore
|
| 37 |
|
| 38 |
print('\nGerando a notícia (RAG)...')
|
| 39 |
print(' Assunto: ' + subject)
|
| 40 |
+
obj_rag = rag.Rag(vectorstore, min_words, max_words, model)
|
| 41 |
result_news = obj_rag.generate_text(subject)
|
| 42 |
if type(result_news) == str:
|
| 43 |
if 'maximum context length' in result_news:
|
|
|
|
| 56 |
else:
|
| 57 |
return result_news[0]
|
| 58 |
|
| 59 |
+
def call_llm(context, prompt, model):
|
| 60 |
print('\nChamando o modelo de linguagem...')
|
| 61 |
+
result = llm.invoke_llm(context, prompt, model)
|
| 62 |
|
| 63 |
return result
|
rag.py
CHANGED
|
@@ -3,11 +3,13 @@
|
|
| 3 |
# ------------------------------------------------
|
| 4 |
#from langchain.embeddings import OpenAIEmbeddings
|
| 5 |
from langchain_openai import OpenAIEmbeddings
|
|
|
|
| 6 |
from langchain_community.vectorstores import Chroma
|
| 7 |
from langchain_community.document_loaders import DirectoryLoader
|
| 8 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 9 |
from langchain.prompts import PromptTemplate
|
| 10 |
from langchain_openai import ChatOpenAI
|
|
|
|
| 11 |
from langchain.memory import ConversationBufferMemory
|
| 12 |
from langchain.chains import ConversationalRetrievalChain
|
| 13 |
import os
|
|
@@ -22,7 +24,7 @@ def read_csv_to_dict(filename):
|
|
| 22 |
data_dict[key] = value
|
| 23 |
return data_dict
|
| 24 |
|
| 25 |
-
def generate_embeddings_and_vectorstore(path):
|
| 26 |
try:
|
| 27 |
loader = DirectoryLoader(path=path, glob="**/*.txt")
|
| 28 |
corpus = loader.load()
|
|
@@ -41,8 +43,10 @@ def generate_embeddings_and_vectorstore(path):
|
|
| 41 |
|
| 42 |
#print('docs')
|
| 43 |
#print(docs)
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
| 46 |
vectorstore = Chroma.from_documents(docs, fc_embeddings)
|
| 47 |
print('total de docs no vectorstore=',len(vectorstore.get()['documents']))
|
| 48 |
|
|
@@ -52,7 +56,7 @@ def generate_embeddings_and_vectorstore(path):
|
|
| 52 |
return str(e)
|
| 53 |
|
| 54 |
class Rag:
|
| 55 |
-
def __init__(self, vectorstore, min_words, max_words):
|
| 56 |
self.text = None
|
| 57 |
self.vectorstore = vectorstore
|
| 58 |
self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, output_key="answer")
|
|
@@ -72,20 +76,35 @@ class Rag:
|
|
| 72 |
input_variables=["context", "question"],
|
| 73 |
partial_variables={"min_words": min_words, "max_words": max_words})
|
| 74 |
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
# from langchain_together import Together
|
| 90 |
# self.qa = ConversationalRetrievalChain.from_llm(
|
| 91 |
# llm=Together(model="mistralai/Mixtral-8x7B-Instruct-v0.1", # 0125 #1106
|
|
|
|
| 3 |
# ------------------------------------------------
|
| 4 |
#from langchain.embeddings import OpenAIEmbeddings
|
| 5 |
from langchain_openai import OpenAIEmbeddings
|
| 6 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 7 |
from langchain_community.vectorstores import Chroma
|
| 8 |
from langchain_community.document_loaders import DirectoryLoader
|
| 9 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 10 |
from langchain.prompts import PromptTemplate
|
| 11 |
from langchain_openai import ChatOpenAI
|
| 12 |
+
from langchain_together import Together
|
| 13 |
from langchain.memory import ConversationBufferMemory
|
| 14 |
from langchain.chains import ConversationalRetrievalChain
|
| 15 |
import os
|
|
|
|
| 24 |
data_dict[key] = value
|
| 25 |
return data_dict
|
| 26 |
|
| 27 |
+
def generate_embeddings_and_vectorstore(path, model):
|
| 28 |
try:
|
| 29 |
loader = DirectoryLoader(path=path, glob="**/*.txt")
|
| 30 |
corpus = loader.load()
|
|
|
|
| 43 |
|
| 44 |
#print('docs')
|
| 45 |
#print(docs)
|
| 46 |
+
if model == 'openai':
|
| 47 |
+
fc_embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_KEY'])
|
| 48 |
+
else:
|
| 49 |
+
fc_embeddings = HuggingFaceEmbeddings(model_name = 'intfloat/multilingual-e5-large-instruct')
|
| 50 |
vectorstore = Chroma.from_documents(docs, fc_embeddings)
|
| 51 |
print('total de docs no vectorstore=',len(vectorstore.get()['documents']))
|
| 52 |
|
|
|
|
| 56 |
return str(e)
|
| 57 |
|
| 58 |
class Rag:
|
| 59 |
+
def __init__(self, vectorstore, min_words, max_words, model):
|
| 60 |
self.text = None
|
| 61 |
self.vectorstore = vectorstore
|
| 62 |
self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, output_key="answer")
|
|
|
|
| 76 |
input_variables=["context", "question"],
|
| 77 |
partial_variables={"min_words": min_words, "max_words": max_words})
|
| 78 |
|
| 79 |
+
if model == 'openai':
|
| 80 |
+
self.qa = ConversationalRetrievalChain.from_llm(
|
| 81 |
+
llm=ChatOpenAI(model_name="gpt-3.5-turbo-0125", #0125 #1106
|
| 82 |
+
temperature=0,
|
| 83 |
+
openai_api_key=os.environ['OPENAI_KEY'],
|
| 84 |
+
max_tokens=int(int(max_words) + (int(max_words) / 2))), #número máximo de tokens para a resposta
|
| 85 |
+
memory=self.memory,
|
| 86 |
+
# retriever=vectorstore.as_retriever(search_type='similarity_score_threshold',
|
| 87 |
+
# search_kwargs={'k':4, 'score_threshold':0.8}), #search_kwargs={'k': 3}
|
| 88 |
+
retriever=vectorstore.as_retriever(),
|
| 89 |
+
combine_docs_chain_kwargs={"prompt": self.prompt},
|
| 90 |
+
chain_type="stuff",#map_reduce, refine, map_rerank
|
| 91 |
+
return_source_documents=True,
|
| 92 |
+
)
|
| 93 |
+
else:
|
| 94 |
+
self.qa = ConversationalRetrievalChain.from_llm(
|
| 95 |
+
llm=Together(model="mistralai/Mixtral-8x7B-Instruct-v0.1", #0125 #1106
|
| 96 |
+
temperature=0,
|
| 97 |
+
together_api_key=os.environ['TOGETHER_KEY'],
|
| 98 |
+
max_tokens=int(int(max_words) + (int(max_words) / 2))), #número máximo de tokens para a resposta
|
| 99 |
+
memory=self.memory,
|
| 100 |
+
# retriever=vectorstore.as_retriever(search_type='similarity_score_threshold',
|
| 101 |
+
# search_kwargs={'k':4, 'score_threshold':0.8}), #search_kwargs={'k': 3}
|
| 102 |
+
retriever=vectorstore.as_retriever(),
|
| 103 |
+
combine_docs_chain_kwargs={"prompt": self.prompt},
|
| 104 |
+
chain_type="stuff",#map_reduce, refine, map_rerank
|
| 105 |
+
return_source_documents=True,
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
# from langchain_together import Together
|
| 109 |
# self.qa = ConversationalRetrievalChain.from_llm(
|
| 110 |
# llm=Together(model="mistralai/Mixtral-8x7B-Instruct-v0.1", # 0125 #1106
|
requirements.txt
CHANGED
|
@@ -2,8 +2,10 @@ google-api-python-client
|
|
| 2 |
langchain
|
| 3 |
langchain-community
|
| 4 |
langchain_openai
|
|
|
|
| 5 |
openai
|
| 6 |
unstructured
|
| 7 |
chromadb
|
| 8 |
tiktoken
|
| 9 |
-
gradio
|
|
|
|
|
|
| 2 |
langchain
|
| 3 |
langchain-community
|
| 4 |
langchain_openai
|
| 5 |
+
langchain-together
|
| 6 |
openai
|
| 7 |
unstructured
|
| 8 |
chromadb
|
| 9 |
tiktoken
|
| 10 |
+
gradio
|
| 11 |
+
sentence-transformers
|