Spaces:
Sleeping
Sleeping
fix auth bug with a new embedding endpoint
Browse files- app.py +5 -4
- spinoza_project/source/backend/llm_utils.py +20 -1
app.py
CHANGED
|
@@ -7,6 +7,7 @@ from spinoza_project.source.backend.llm_utils import (
|
|
| 7 |
get_llm,
|
| 8 |
get_llm_api,
|
| 9 |
get_vectorstore,
|
|
|
|
| 10 |
)
|
| 11 |
from spinoza_project.source.backend.document_store import pickle_to_document_store
|
| 12 |
from spinoza_project.source.backend.get_prompts import get_qa_prompts
|
|
@@ -46,6 +47,7 @@ llm = get_llm_api()
|
|
| 46 |
|
| 47 |
## Loading_tools
|
| 48 |
print("Loading Databases")
|
|
|
|
| 49 |
qdrants = {
|
| 50 |
tab: pickle_to_document_store(
|
| 51 |
hf_hub_download(
|
|
@@ -204,7 +206,7 @@ def answer_questions(*questions_sources, config=config):
|
|
| 204 |
]
|
| 205 |
|
| 206 |
|
| 207 |
-
def get_sources(questions,
|
| 208 |
k = config["num_document_retrieved"]
|
| 209 |
min_similarity = config["min_similarity"]
|
| 210 |
formated = []
|
|
@@ -258,9 +260,8 @@ def get_sources(questions, bdd_presse, qdrants=qdrants, config=config):
|
|
| 258 |
return formated, text
|
| 259 |
|
| 260 |
|
| 261 |
-
def retrieve_sources(*questions, qdrants=qdrants, config=config):
|
| 262 |
-
|
| 263 |
-
formated_sources, text_sources = get_sources(questions, bdd_presse, qdrants, config)
|
| 264 |
|
| 265 |
return (formated_sources, *text_sources)
|
| 266 |
|
|
|
|
| 7 |
get_llm,
|
| 8 |
get_llm_api,
|
| 9 |
get_vectorstore,
|
| 10 |
+
get_vectorstore_api,
|
| 11 |
)
|
| 12 |
from spinoza_project.source.backend.document_store import pickle_to_document_store
|
| 13 |
from spinoza_project.source.backend.get_prompts import get_qa_prompts
|
|
|
|
| 47 |
|
| 48 |
## Loading_tools
|
| 49 |
print("Loading Databases")
|
| 50 |
+
bdd_presse = get_vectorstore_api("presse")
|
| 51 |
qdrants = {
|
| 52 |
tab: pickle_to_document_store(
|
| 53 |
hf_hub_download(
|
|
|
|
| 206 |
]
|
| 207 |
|
| 208 |
|
| 209 |
+
def get_sources(questions, qdrants=qdrants, bdd_presse=bdd_presse, config=config):
|
| 210 |
k = config["num_document_retrieved"]
|
| 211 |
min_similarity = config["min_similarity"]
|
| 212 |
formated = []
|
|
|
|
| 260 |
return formated, text
|
| 261 |
|
| 262 |
|
| 263 |
+
def retrieve_sources(*questions, qdrants=qdrants, bdd_presse=bdd_presse, config=config):
|
| 264 |
+
formated_sources, text_sources = get_sources(questions, qdrants, bdd_presse, config)
|
|
|
|
| 265 |
|
| 266 |
return (formated_sources, *text_sources)
|
| 267 |
|
spinoza_project/source/backend/llm_utils.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
from tracemalloc import stop
|
| 2 |
from langchain_openai import AzureChatOpenAI
|
| 3 |
from msal import ConfidentialClientApplication
|
| 4 |
from langchain_openai import AzureOpenAIEmbeddings
|
|
@@ -97,3 +96,23 @@ def get_vectorstore(index_name, model="text-embedding-ada-002"):
|
|
| 97 |
)
|
| 98 |
|
| 99 |
return vector_store
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from langchain_openai import AzureChatOpenAI
|
| 2 |
from msal import ConfidentialClientApplication
|
| 3 |
from langchain_openai import AzureOpenAIEmbeddings
|
|
|
|
| 96 |
)
|
| 97 |
|
| 98 |
return vector_store
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def get_vectorstore_api(index_name):
|
| 102 |
+
aoai_embeddings = AzureOpenAIEmbeddings(
|
| 103 |
+
model="text-embedding-ada-002",
|
| 104 |
+
api_key=os.getenv("OPENAI_API_KEY"),
|
| 105 |
+
azure_endpoint=os.environ["AZURE_ENDPOINT_API"],
|
| 106 |
+
openai_api_version=os.getenv("OPENAI_API_VERSION"),
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
os.environ["AZURE_OPENAI_API_KEY"] = get_token()
|
| 110 |
+
|
| 111 |
+
vector_store: AzureSearch = AzureSearch(
|
| 112 |
+
azure_search_endpoint=os.getenv("VECTOR_STORE_ADDRESS"),
|
| 113 |
+
azure_search_key=os.getenv("VECTOR_STORE_PASSWORD"),
|
| 114 |
+
index_name=index_name,
|
| 115 |
+
embedding_function=aoai_embeddings.embed_query,
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
return vector_store
|