Spaces:
Paused
Paused
Commit
·
8187b01
1
Parent(s):
31f9732
tidy up models file
Browse files
models.py
CHANGED
|
@@ -20,7 +20,9 @@ os.environ["LANGCHAIN_ENDPOINT"] = constants.LANGCHAIN_ENDPOINT
|
|
| 20 |
tracer = LangChainTracer()
|
| 21 |
callback_manager = CallbackManager([tracer])
|
| 22 |
|
| 23 |
-
|
|
|
|
|
|
|
| 24 |
|
| 25 |
opus3 = ChatAnthropic(
|
| 26 |
api_key=constants.ANTRHOPIC_API_KEY,
|
|
@@ -67,12 +69,20 @@ gpt4o_mini = ChatOpenAI(
|
|
| 67 |
callbacks=callback_manager
|
| 68 |
)
|
| 69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
basic_embeddings = HuggingFaceEmbeddings(model_name="snowflake/snowflake-arctic-embed-l")
|
| 71 |
|
| 72 |
tuned_embeddings = HuggingFaceEmbeddings(model_name="CoExperiences/snowflake-l-marketing-tuned")
|
| 73 |
|
| 74 |
te3_small = OpenAIEmbeddings(api_key=constants.OPENAI_API_KEY, model="text-embedding-3-small")
|
| 75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
semanticChunker = SemanticChunker(
|
| 77 |
te3_small,
|
| 78 |
breakpoint_threshold_type="percentile"
|
|
@@ -91,14 +101,35 @@ RCTS = RecursiveCharacterTextSplitter(
|
|
| 91 |
length_function=len,
|
| 92 |
)
|
| 93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
semantic_tuned_Qdrant_vs = QdrantVectorStore(
|
| 95 |
client=qdrant_client,
|
| 96 |
collection_name="docs_from_ripped_urls_semantic_tuned",
|
| 97 |
embedding=tuned_embeddings
|
| 98 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
semantic_tuned_retriever = semantic_tuned_Qdrant_vs.as_retriever(search_kwargs={"k" : 10})
|
| 100 |
|
| 101 |
-
#compression
|
| 102 |
compressor = CohereRerank(model="rerank-english-v3.0")
|
| 103 |
compression_retriever = ContextualCompressionRetriever(
|
| 104 |
base_compressor=compressor, base_retriever=semantic_tuned_retriever
|
|
|
|
| 20 |
tracer = LangChainTracer()
|
| 21 |
callback_manager = CallbackManager([tracer])
|
| 22 |
|
| 23 |
+
########################
|
| 24 |
+
### Chat Models ###
|
| 25 |
+
########################
|
| 26 |
|
| 27 |
opus3 = ChatAnthropic(
|
| 28 |
api_key=constants.ANTRHOPIC_API_KEY,
|
|
|
|
| 69 |
callbacks=callback_manager
|
| 70 |
)
|
| 71 |
|
| 72 |
+
########################
|
| 73 |
+
### Embedding Models ###
|
| 74 |
+
########################
|
| 75 |
+
|
| 76 |
basic_embeddings = HuggingFaceEmbeddings(model_name="snowflake/snowflake-arctic-embed-l")
|
| 77 |
|
| 78 |
tuned_embeddings = HuggingFaceEmbeddings(model_name="CoExperiences/snowflake-l-marketing-tuned")
|
| 79 |
|
| 80 |
te3_small = OpenAIEmbeddings(api_key=constants.OPENAI_API_KEY, model="text-embedding-3-small")
|
| 81 |
|
| 82 |
+
#######################
|
| 83 |
+
### Text Splitters ###
|
| 84 |
+
#######################
|
| 85 |
+
|
| 86 |
semanticChunker = SemanticChunker(
|
| 87 |
te3_small,
|
| 88 |
breakpoint_threshold_type="percentile"
|
|
|
|
| 101 |
length_function=len,
|
| 102 |
)
|
| 103 |
|
| 104 |
+
#######################
|
| 105 |
+
### Vector Stores ###
|
| 106 |
+
#######################
|
| 107 |
+
|
| 108 |
+
qdrant_client = QdrantClient(url=constants.QDRANT_ENDPOINT, api_key=constants.QDRANT_API_KEY)
|
| 109 |
+
|
| 110 |
+
semantic_Qdrant_vs = QdrantVectorStore(
|
| 111 |
+
client=qdrant_client,
|
| 112 |
+
collection_name="docs_from_ripped_urls",
|
| 113 |
+
embedding=te3_small
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
rcts_Qdrant_vs = QdrantVectorStore(
|
| 117 |
+
client=qdrant_client,
|
| 118 |
+
collection_name="docs_from_ripped_urls_recursive",
|
| 119 |
+
embedding=te3_small
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
semantic_tuned_Qdrant_vs = QdrantVectorStore(
|
| 123 |
client=qdrant_client,
|
| 124 |
collection_name="docs_from_ripped_urls_semantic_tuned",
|
| 125 |
embedding=tuned_embeddings
|
| 126 |
)
|
| 127 |
+
|
| 128 |
+
#######################
|
| 129 |
+
### Retrievers ###
|
| 130 |
+
#######################
|
| 131 |
semantic_tuned_retriever = semantic_tuned_Qdrant_vs.as_retriever(search_kwargs={"k" : 10})
|
| 132 |
|
|
|
|
| 133 |
compressor = CohereRerank(model="rerank-english-v3.0")
|
| 134 |
compression_retriever = ContextualCompressionRetriever(
|
| 135 |
base_compressor=compressor, base_retriever=semantic_tuned_retriever
|