riteshraut
commited on
Commit
·
be8f70c
1
Parent(s):
76039ca
feat/audio
Browse files- app.py +48 -83
- query_expansion.py +0 -1
- rag_processor.py +35 -124
- templates/index.html +3 -7
app.py
CHANGED
|
@@ -1,9 +1,6 @@
|
|
| 1 |
-
#!/usr/bin/python
|
| 2 |
-
# -*- coding: utf-8 -*-
|
| 3 |
import os
|
| 4 |
import uuid
|
| 5 |
-
from flask import Flask, request, render_template, session, jsonify,
|
| 6 |
-
Response, stream_with_context # Added stream_with_context
|
| 7 |
from werkzeug.utils import secure_filename
|
| 8 |
from rag_processor import create_rag_chain
|
| 9 |
from typing import Sequence, Any, List
|
|
@@ -12,15 +9,12 @@ import re
|
|
| 12 |
import io
|
| 13 |
from gtts import gTTS
|
| 14 |
from langchain_core.documents import Document
|
| 15 |
-
from langchain_community.document_loaders import TextLoader,
|
| 16 |
-
Docx2txtLoader
|
| 17 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 18 |
from langchain_huggingface import HuggingFaceEmbeddings
|
| 19 |
from langchain_community.vectorstores import FAISS
|
| 20 |
-
from langchain.retrievers import EnsembleRetriever,
|
| 21 |
-
|
| 22 |
-
from langchain.retrievers.document_compressors.base import \
|
| 23 |
-
BaseDocumentCompressor
|
| 24 |
from langchain_community.retrievers import BM25Retriever
|
| 25 |
from langchain_community.chat_message_histories import ChatMessageHistory
|
| 26 |
from langchain.storage import InMemoryStore
|
|
@@ -28,8 +22,6 @@ from sentence_transformers.cross_encoder import CrossEncoder
|
|
| 28 |
|
| 29 |
app = Flask(__name__)
|
| 30 |
app.config['SECRET_KEY'] = os.urandom(24)
|
| 31 |
-
|
| 32 |
-
# Maps temperature strings (from the form) to the mode labels
|
| 33 |
TEMPERATURE_LABELS = {
|
| 34 |
'0.2': 'Precise',
|
| 35 |
'0.4': 'Confident',
|
|
@@ -37,8 +29,6 @@ TEMPERATURE_LABELS = {
|
|
| 37 |
'0.8': 'Flexible',
|
| 38 |
'1.0': 'Creative',
|
| 39 |
}
|
| 40 |
-
|
| 41 |
-
|
| 42 |
class LocalReranker(BaseDocumentCompressor):
|
| 43 |
model: Any
|
| 44 |
top_n: int = 5
|
|
@@ -64,7 +54,7 @@ class LocalReranker(BaseDocumentCompressor):
|
|
| 64 |
|
| 65 |
def create_optimized_parent_child_chunks(all_docs):
|
| 66 |
if not all_docs:
|
| 67 |
-
print ('
|
| 68 |
return ([], [], [])
|
| 69 |
|
| 70 |
parent_splitter = RecursiveCharacterTextSplitter(chunk_size=900,
|
|
@@ -89,20 +79,15 @@ def create_optimized_parent_child_chunks(all_docs):
|
|
| 89 |
- 1})
|
| 90 |
if len(children) > 1:
|
| 91 |
if j == 0:
|
| 92 |
-
child.page_content = '[Beginning] '
|
| 93 |
-
+ child.page_content
|
| 94 |
elif j == len(children) - 1:
|
| 95 |
-
child.page_content = '[Continues...] '
|
| 96 |
-
+ child.page_content
|
| 97 |
child_docs.append(child)
|
| 98 |
|
| 99 |
-
print (f"
|
| 100 |
)
|
| 101 |
return (parent_docs, child_docs, doc_ids)
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
def get_context_aware_parents(docs: List[Document],
|
| 105 |
-
store: InMemoryStore) -> List[Document]:
|
| 106 |
if not docs:
|
| 107 |
return []
|
| 108 |
(parent_scores, child_content_by_parent) = ({}, {})
|
|
@@ -124,20 +109,16 @@ def get_context_aware_parents(docs: List[Document],
|
|
| 124 |
parent_id = parent_ids[i]
|
| 125 |
if parent_id in child_content_by_parent:
|
| 126 |
child_excerpts = '\n'.join(child_content_by_parent[parent_id][:3])
|
| 127 |
-
enhanced_content = \
|
| 128 |
-
|
| 129 |
-
enhanced_parent = \
|
| 130 |
-
Document(page_content=enhanced_content,
|
| 131 |
metadata={**parent.metadata,
|
| 132 |
'child_relevance_score': parent_scores[parent_id],
|
| 133 |
'matching_children': len(child_content_by_parent[parent_id])})
|
| 134 |
enhanced_parents.append(enhanced_parent)
|
| 135 |
else:
|
| 136 |
-
print (f"
|
| 137 |
-
)
|
| 138 |
|
| 139 |
-
enhanced_parents.sort(key=lambda p: p.metadata.get('child_relevance_score',
|
| 140 |
-
0), reverse=True)
|
| 141 |
return enhanced_parents
|
| 142 |
|
| 143 |
|
|
@@ -147,35 +128,34 @@ app.config['UPLOAD_FOLDER'] = '/tmp/uploads' if is_hf_spaces else 'uploads'
|
|
| 147 |
|
| 148 |
try:
|
| 149 |
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
|
| 150 |
-
print (f"
|
| 151 |
except Exception as e:
|
| 152 |
-
print (f"
|
| 153 |
-
)
|
| 154 |
app.config['UPLOAD_FOLDER'] = '/tmp/uploads'
|
| 155 |
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
|
| 156 |
|
| 157 |
session_data = {}
|
| 158 |
message_histories = {}
|
| 159 |
|
| 160 |
-
print ('
|
| 161 |
try:
|
| 162 |
EMBEDDING_MODEL = \
|
| 163 |
HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2'
|
| 164 |
, model_kwargs={'device': 'cpu'},
|
| 165 |
encode_kwargs={'normalize_embeddings': True})
|
| 166 |
-
print ('
|
| 167 |
except Exception as e:
|
| 168 |
-
print (f"
|
| 169 |
raise e
|
| 170 |
|
| 171 |
-
print ('
|
| 172 |
try:
|
| 173 |
RERANKER_MODEL = \
|
| 174 |
CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2',
|
| 175 |
device='cpu')
|
| 176 |
-
print ('
|
| 177 |
except Exception as e:
|
| 178 |
-
print (f"
|
| 179 |
raise e
|
| 180 |
|
| 181 |
|
|
@@ -190,13 +170,13 @@ def load_pdf_with_fallback(filepath):
|
|
| 190 |
metadata={'source': os.path.basename(filepath),
|
| 191 |
'page': page_num + 1}))
|
| 192 |
if docs:
|
| 193 |
-
print (f"
|
| 194 |
)
|
| 195 |
return docs
|
| 196 |
else:
|
| 197 |
raise ValueError('No text content found in PDF.')
|
| 198 |
except Exception as e:
|
| 199 |
-
print (f"
|
| 200 |
raise
|
| 201 |
|
| 202 |
|
|
@@ -228,14 +208,14 @@ def upload_files():
|
|
| 228 |
temperature = float(temperature_str)
|
| 229 |
model_name = request.form.get('model_name',
|
| 230 |
'moonshotai/kimi-k2-instruct')
|
| 231 |
-
print (f"
|
| 232 |
|
| 233 |
if not files or all(f.filename == '' for f in files):
|
| 234 |
return (jsonify({'status': 'error',
|
| 235 |
'message': 'No selected files.'}), 400)
|
| 236 |
|
| 237 |
(all_docs, processed_files, failed_files) = ([], [], [])
|
| 238 |
-
print (f"
|
| 239 |
for file in files:
|
| 240 |
if file and file.filename:
|
| 241 |
filename = secure_filename(file.filename)
|
|
@@ -261,9 +241,9 @@ def upload_files():
|
|
| 261 |
'message': f"Failed to process all files. Reasons: {', '.join(failed_files)}"
|
| 262 |
}), 400)
|
| 263 |
|
| 264 |
-
print (f"
|
| 265 |
try:
|
| 266 |
-
print ('
|
| 267 |
(parent_docs, child_docs, doc_ids) = \
|
| 268 |
create_optimized_parent_child_chunks(all_docs)
|
| 269 |
if not child_docs:
|
|
@@ -272,7 +252,7 @@ def upload_files():
|
|
| 272 |
vectorstore = FAISS.from_documents(child_docs, EMBEDDING_MODEL)
|
| 273 |
store = InMemoryStore()
|
| 274 |
store.mset(list(zip(doc_ids, parent_docs)))
|
| 275 |
-
print (f"
|
| 276 |
|
| 277 |
bm25_retriever = BM25Retriever.from_documents(child_docs)
|
| 278 |
bm25_retriever.k = 12
|
|
@@ -309,7 +289,7 @@ def upload_files():
|
|
| 309 |
mode_label = TEMPERATURE_LABELS.get(temperature_str,
|
| 310 |
temperature_str)
|
| 311 |
|
| 312 |
-
print (f"
|
| 313 |
|
| 314 |
return jsonify({
|
| 315 |
'status': 'success',
|
|
@@ -324,29 +304,24 @@ def upload_files():
|
|
| 324 |
return (jsonify({'status': 'error',
|
| 325 |
'message': f'RAG setup failed: {e}'}), 500)
|
| 326 |
|
| 327 |
-
# --- CORRECTED: Added 'GET' to methods and handle request args ---
|
| 328 |
@app.route('/chat', methods=['POST', 'GET'])
|
| 329 |
def chat():
|
| 330 |
-
# Handle GET request (used by EventSource)
|
| 331 |
if request.method == 'GET':
|
| 332 |
question = request.args.get('question')
|
| 333 |
session_id = request.args.get('session_id')
|
| 334 |
-
print(f"Received GET request for chat: session={session_id}, question={question[:50]}...")
|
| 335 |
-
# Handle POST request (if you ever need it again)
|
| 336 |
elif request.method == 'POST':
|
| 337 |
data = request.get_json()
|
| 338 |
question = data.get('question')
|
| 339 |
session_id = data.get('session_id') or session.get('session_id')
|
| 340 |
-
print(f"Received POST request for chat: session={session_id}, question={question[:50]}...")
|
| 341 |
else:
|
| 342 |
return (jsonify({'status': 'error', 'message': 'Method not allowed'}), 405)
|
| 343 |
|
| 344 |
-
# --- Validation ---
|
| 345 |
if not question:
|
| 346 |
error_msg = "Error: No question provided."
|
| 347 |
-
print(f"
|
| 348 |
if request.method == 'GET':
|
| 349 |
-
# For SSE, need to yield an error event, not return plain text
|
| 350 |
def error_stream():
|
| 351 |
yield f'data: {{"error": "{error_msg}"}}\n\n'
|
| 352 |
return Response(stream_with_context(error_stream()), mimetype='text/event-stream', status=400)
|
|
@@ -354,14 +329,12 @@ def chat():
|
|
| 354 |
|
| 355 |
if not session_id or session_id not in session_data:
|
| 356 |
error_msg = "Error: Invalid session. Please upload documents first."
|
| 357 |
-
print(f"
|
| 358 |
if request.method == 'GET':
|
| 359 |
def error_stream():
|
| 360 |
yield f'data: {{"error": "{error_msg}"}}\n\n'
|
| 361 |
return Response(stream_with_context(error_stream()), mimetype='text/event-stream', status=400)
|
| 362 |
return jsonify({'status': 'error', 'message': error_msg }), 400
|
| 363 |
-
|
| 364 |
-
# --- Process Request ---
|
| 365 |
try:
|
| 366 |
session_info = session_data[session_id]
|
| 367 |
rag_chain = session_info['chain']
|
|
@@ -370,7 +343,7 @@ def chat():
|
|
| 370 |
temperature_str = str(temperature_float)
|
| 371 |
mode_label = TEMPERATURE_LABELS.get(temperature_str, temperature_str)
|
| 372 |
|
| 373 |
-
print (f"
|
| 374 |
|
| 375 |
def generate_chunks():
|
| 376 |
full_response = ''
|
|
@@ -379,34 +352,28 @@ def chat():
|
|
| 379 |
config={'configurable': {'session_id': session_id}})
|
| 380 |
|
| 381 |
for chunk in stream_iterator:
|
| 382 |
-
if isinstance(chunk, str):
|
| 383 |
full_response += chunk
|
| 384 |
token_escaped = chunk.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
|
| 385 |
model_name_escaped = model_name.replace('"', '\\"')
|
| 386 |
mode_label_escaped = mode_label.replace('"', '\\"')
|
| 387 |
yield f'data: {{"token": "{token_escaped}", "model_name": "{model_name_escaped}", "mode": "{mode_label_escaped}"}}\n\n'
|
| 388 |
else:
|
| 389 |
-
|
| 390 |
-
print(f"⚠️ Received non-string chunk: {type(chunk)}")
|
| 391 |
|
| 392 |
|
| 393 |
-
print ('
|
| 394 |
-
# Optionally update session history or store full response if needed later
|
| 395 |
-
# get_session_history(session_id).add_ai_message(full_response)
|
| 396 |
|
| 397 |
except Exception as e:
|
| 398 |
-
print(f"
|
| 399 |
import traceback
|
| 400 |
traceback.print_exc()
|
| 401 |
error_msg = f"Error during response generation: {str(e)}".replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
|
| 402 |
yield f'data: {{"error": "{error_msg}"}}\n\n'
|
| 403 |
-
|
| 404 |
-
# Return the streaming response
|
| 405 |
return Response(stream_with_context(generate_chunks()), mimetype='text/event-stream')
|
| 406 |
|
| 407 |
except Exception as e:
|
| 408 |
-
|
| 409 |
-
print(f"❌ CHAT Setup Error: {e}")
|
| 410 |
import traceback
|
| 411 |
traceback.print_exc()
|
| 412 |
error_msg = f"Error setting up chat stream: {str(e)}"
|
|
@@ -419,14 +386,13 @@ def chat():
|
|
| 419 |
|
| 420 |
|
| 421 |
def clean_markdown_for_tts(text: str) -> str:
|
| 422 |
-
|
| 423 |
-
text = re.sub(r'
|
| 424 |
-
text = re.sub(r'[
|
| 425 |
-
text = re.sub(r'^\s
|
| 426 |
-
text = re.sub(r'^\s
|
| 427 |
-
text = re.sub(r'
|
| 428 |
-
text = re.sub(r'\
|
| 429 |
-
text = re.sub(r'\s{2,}', ' ', text) # Collapse multiple spaces
|
| 430 |
return text.strip()
|
| 431 |
|
| 432 |
|
|
@@ -448,13 +414,12 @@ def text_to_speech():
|
|
| 448 |
mp3_fp.seek(0)
|
| 449 |
return Response(mp3_fp, mimetype='audio/mpeg')
|
| 450 |
except Exception as e:
|
| 451 |
-
print (f"
|
| 452 |
return (jsonify({'status': 'error',
|
| 453 |
'message': 'Failed to generate audio.'}), 500)
|
| 454 |
|
| 455 |
|
| 456 |
if __name__ == '__main__':
|
| 457 |
port = int(os.environ.get('PORT', 7860))
|
| 458 |
-
print (f"
|
| 459 |
-
# Use threaded=True for better handling of concurrent requests during streaming
|
| 460 |
app.run(host='0.0.0.0', port=port, debug=False, threaded=True)
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import uuid
|
| 3 |
+
from flask import Flask, request, render_template, session, jsonify, Response, stream_with_context
|
|
|
|
| 4 |
from werkzeug.utils import secure_filename
|
| 5 |
from rag_processor import create_rag_chain
|
| 6 |
from typing import Sequence, Any, List
|
|
|
|
| 9 |
import io
|
| 10 |
from gtts import gTTS
|
| 11 |
from langchain_core.documents import Document
|
| 12 |
+
from langchain_community.document_loaders import TextLoader, Docx2txtLoader
|
|
|
|
| 13 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 14 |
from langchain_huggingface import HuggingFaceEmbeddings
|
| 15 |
from langchain_community.vectorstores import FAISS
|
| 16 |
+
from langchain.retrievers import EnsembleRetriever, ContextualCompressionRetriever
|
| 17 |
+
from langchain.retrievers.document_compressors.base import BaseDocumentCompressor
|
|
|
|
|
|
|
| 18 |
from langchain_community.retrievers import BM25Retriever
|
| 19 |
from langchain_community.chat_message_histories import ChatMessageHistory
|
| 20 |
from langchain.storage import InMemoryStore
|
|
|
|
| 22 |
|
| 23 |
app = Flask(__name__)
|
| 24 |
app.config['SECRET_KEY'] = os.urandom(24)
|
|
|
|
|
|
|
| 25 |
TEMPERATURE_LABELS = {
|
| 26 |
'0.2': 'Precise',
|
| 27 |
'0.4': 'Confident',
|
|
|
|
| 29 |
'0.8': 'Flexible',
|
| 30 |
'1.0': 'Creative',
|
| 31 |
}
|
|
|
|
|
|
|
| 32 |
class LocalReranker(BaseDocumentCompressor):
|
| 33 |
model: Any
|
| 34 |
top_n: int = 5
|
|
|
|
| 54 |
|
| 55 |
def create_optimized_parent_child_chunks(all_docs):
|
| 56 |
if not all_docs:
|
| 57 |
+
print ('CHUNKING: No input documents provided!')
|
| 58 |
return ([], [], [])
|
| 59 |
|
| 60 |
parent_splitter = RecursiveCharacterTextSplitter(chunk_size=900,
|
|
|
|
| 79 |
- 1})
|
| 80 |
if len(children) > 1:
|
| 81 |
if j == 0:
|
| 82 |
+
child.page_content = '[Beginning] ' + child.page_content
|
|
|
|
| 83 |
elif j == len(children) - 1:
|
| 84 |
+
child.page_content = '[Continues...] ' + child.page_content
|
|
|
|
| 85 |
child_docs.append(child)
|
| 86 |
|
| 87 |
+
print (f"CHUNKING: Created {len(parent_docs)} parent and {len(child_docs)} child chunks."
|
| 88 |
)
|
| 89 |
return (parent_docs, child_docs, doc_ids)
|
| 90 |
+
def get_context_aware_parents(docs: List[Document], store: InMemoryStore) -> List[Document]:
|
|
|
|
|
|
|
|
|
|
| 91 |
if not docs:
|
| 92 |
return []
|
| 93 |
(parent_scores, child_content_by_parent) = ({}, {})
|
|
|
|
| 109 |
parent_id = parent_ids[i]
|
| 110 |
if parent_id in child_content_by_parent:
|
| 111 |
child_excerpts = '\n'.join(child_content_by_parent[parent_id][:3])
|
| 112 |
+
enhanced_content = f"{parent.page_content}\n\nRelevant excerpts:\n{child_excerpts}"
|
| 113 |
+
enhanced_parent =Document(page_content=enhanced_content,
|
|
|
|
|
|
|
| 114 |
metadata={**parent.metadata,
|
| 115 |
'child_relevance_score': parent_scores[parent_id],
|
| 116 |
'matching_children': len(child_content_by_parent[parent_id])})
|
| 117 |
enhanced_parents.append(enhanced_parent)
|
| 118 |
else:
|
| 119 |
+
print (f"PARENT_FETCH: Parent {parent_ids[i]} not found in store!")
|
|
|
|
| 120 |
|
| 121 |
+
enhanced_parents.sort(key=lambda p: p.metadata.get('child_relevance_score', 0), reverse=True)
|
|
|
|
| 122 |
return enhanced_parents
|
| 123 |
|
| 124 |
|
|
|
|
| 128 |
|
| 129 |
try:
|
| 130 |
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
|
| 131 |
+
print (f"Upload folder ready: {app.config['UPLOAD_FOLDER']}")
|
| 132 |
except Exception as e:
|
| 133 |
+
print (f"Failed to create upload folder, falling back to /tmp: {e}")
|
|
|
|
| 134 |
app.config['UPLOAD_FOLDER'] = '/tmp/uploads'
|
| 135 |
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
|
| 136 |
|
| 137 |
session_data = {}
|
| 138 |
message_histories = {}
|
| 139 |
|
| 140 |
+
print ('Loading embedding model...')
|
| 141 |
try:
|
| 142 |
EMBEDDING_MODEL = \
|
| 143 |
HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2'
|
| 144 |
, model_kwargs={'device': 'cpu'},
|
| 145 |
encode_kwargs={'normalize_embeddings': True})
|
| 146 |
+
print ('Embedding model loaded.')
|
| 147 |
except Exception as e:
|
| 148 |
+
print (f"FATAL: Could not load embedding model. Error: {e}")
|
| 149 |
raise e
|
| 150 |
|
| 151 |
+
print ('Loading reranker model...')
|
| 152 |
try:
|
| 153 |
RERANKER_MODEL = \
|
| 154 |
CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2',
|
| 155 |
device='cpu')
|
| 156 |
+
print ('Reranker model loaded.')
|
| 157 |
except Exception as e:
|
| 158 |
+
print (f"FATAL: Could not load reranker model. Error: {e}")
|
| 159 |
raise e
|
| 160 |
|
| 161 |
|
|
|
|
| 170 |
metadata={'source': os.path.basename(filepath),
|
| 171 |
'page': page_num + 1}))
|
| 172 |
if docs:
|
| 173 |
+
print (f"Loaded PDF: {os.path.basename(filepath)} - {len(docs)} pages"
|
| 174 |
)
|
| 175 |
return docs
|
| 176 |
else:
|
| 177 |
raise ValueError('No text content found in PDF.')
|
| 178 |
except Exception as e:
|
| 179 |
+
print (f"PyMuPDF failed for {filepath}: {e}")
|
| 180 |
raise
|
| 181 |
|
| 182 |
|
|
|
|
| 208 |
temperature = float(temperature_str)
|
| 209 |
model_name = request.form.get('model_name',
|
| 210 |
'moonshotai/kimi-k2-instruct')
|
| 211 |
+
print (f"UPLOAD: Model: {model_name}, Temp: {temperature}")
|
| 212 |
|
| 213 |
if not files or all(f.filename == '' for f in files):
|
| 214 |
return (jsonify({'status': 'error',
|
| 215 |
'message': 'No selected files.'}), 400)
|
| 216 |
|
| 217 |
(all_docs, processed_files, failed_files) = ([], [], [])
|
| 218 |
+
print (f"Processing {len(files)} file(s)...")
|
| 219 |
for file in files:
|
| 220 |
if file and file.filename:
|
| 221 |
filename = secure_filename(file.filename)
|
|
|
|
| 241 |
'message': f"Failed to process all files. Reasons: {', '.join(failed_files)}"
|
| 242 |
}), 400)
|
| 243 |
|
| 244 |
+
print (f"UPLOAD: Processed {len(processed_files)} files.")
|
| 245 |
try:
|
| 246 |
+
print ('Starting RAG pipeline setup...')
|
| 247 |
(parent_docs, child_docs, doc_ids) = \
|
| 248 |
create_optimized_parent_child_chunks(all_docs)
|
| 249 |
if not child_docs:
|
|
|
|
| 252 |
vectorstore = FAISS.from_documents(child_docs, EMBEDDING_MODEL)
|
| 253 |
store = InMemoryStore()
|
| 254 |
store.mset(list(zip(doc_ids, parent_docs)))
|
| 255 |
+
print (f"Indexed {len(child_docs)} document chunks.")
|
| 256 |
|
| 257 |
bm25_retriever = BM25Retriever.from_documents(child_docs)
|
| 258 |
bm25_retriever.k = 12
|
|
|
|
| 289 |
mode_label = TEMPERATURE_LABELS.get(temperature_str,
|
| 290 |
temperature_str)
|
| 291 |
|
| 292 |
+
print (f"UPLOAD COMPLETE: Session {session_id} is ready.")
|
| 293 |
|
| 294 |
return jsonify({
|
| 295 |
'status': 'success',
|
|
|
|
| 304 |
return (jsonify({'status': 'error',
|
| 305 |
'message': f'RAG setup failed: {e}'}), 500)
|
| 306 |
|
|
|
|
| 307 |
@app.route('/chat', methods=['POST', 'GET'])
|
| 308 |
def chat():
|
|
|
|
| 309 |
if request.method == 'GET':
|
| 310 |
question = request.args.get('question')
|
| 311 |
session_id = request.args.get('session_id')
|
| 312 |
+
print(f"Received GET request for chat: session={session_id}, question={question[:50]}...")
|
|
|
|
| 313 |
elif request.method == 'POST':
|
| 314 |
data = request.get_json()
|
| 315 |
question = data.get('question')
|
| 316 |
session_id = data.get('session_id') or session.get('session_id')
|
| 317 |
+
print(f"Received POST request for chat: session={session_id}, question={question[:50]}...")
|
| 318 |
else:
|
| 319 |
return (jsonify({'status': 'error', 'message': 'Method not allowed'}), 405)
|
| 320 |
|
|
|
|
| 321 |
if not question:
|
| 322 |
error_msg = "Error: No question provided."
|
| 323 |
+
print(f"CHAT Validation Error: {error_msg}")
|
| 324 |
if request.method == 'GET':
|
|
|
|
| 325 |
def error_stream():
|
| 326 |
yield f'data: {{"error": "{error_msg}"}}\n\n'
|
| 327 |
return Response(stream_with_context(error_stream()), mimetype='text/event-stream', status=400)
|
|
|
|
| 329 |
|
| 330 |
if not session_id or session_id not in session_data:
|
| 331 |
error_msg = "Error: Invalid session. Please upload documents first."
|
| 332 |
+
print(f"CHAT Validation Error: Invalid session {session_id}.")
|
| 333 |
if request.method == 'GET':
|
| 334 |
def error_stream():
|
| 335 |
yield f'data: {{"error": "{error_msg}"}}\n\n'
|
| 336 |
return Response(stream_with_context(error_stream()), mimetype='text/event-stream', status=400)
|
| 337 |
return jsonify({'status': 'error', 'message': error_msg }), 400
|
|
|
|
|
|
|
| 338 |
try:
|
| 339 |
session_info = session_data[session_id]
|
| 340 |
rag_chain = session_info['chain']
|
|
|
|
| 343 |
temperature_str = str(temperature_float)
|
| 344 |
mode_label = TEMPERATURE_LABELS.get(temperature_str, temperature_str)
|
| 345 |
|
| 346 |
+
print (f"CHAT: Streaming response for session {session_id} (Model: {model_name}, Temp: {temperature_float})...")
|
| 347 |
|
| 348 |
def generate_chunks():
|
| 349 |
full_response = ''
|
|
|
|
| 352 |
config={'configurable': {'session_id': session_id}})
|
| 353 |
|
| 354 |
for chunk in stream_iterator:
|
| 355 |
+
if isinstance(chunk, str):
|
| 356 |
full_response += chunk
|
| 357 |
token_escaped = chunk.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
|
| 358 |
model_name_escaped = model_name.replace('"', '\\"')
|
| 359 |
mode_label_escaped = mode_label.replace('"', '\\"')
|
| 360 |
yield f'data: {{"token": "{token_escaped}", "model_name": "{model_name_escaped}", "mode": "{mode_label_escaped}"}}\n\n'
|
| 361 |
else:
|
| 362 |
+
print(f"Received non-string chunk: {type(chunk)}")
|
|
|
|
| 363 |
|
| 364 |
|
| 365 |
+
print ('CHAT: Streaming finished successfully.')
|
|
|
|
|
|
|
| 366 |
|
| 367 |
except Exception as e:
|
| 368 |
+
print(f"CHAT Error during streaming generation: {e}")
|
| 369 |
import traceback
|
| 370 |
traceback.print_exc()
|
| 371 |
error_msg = f"Error during response generation: {str(e)}".replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
|
| 372 |
yield f'data: {{"error": "{error_msg}"}}\n\n'
|
|
|
|
|
|
|
| 373 |
return Response(stream_with_context(generate_chunks()), mimetype='text/event-stream')
|
| 374 |
|
| 375 |
except Exception as e:
|
| 376 |
+
print(f"CHAT Setup Error: {e}")
|
|
|
|
| 377 |
import traceback
|
| 378 |
traceback.print_exc()
|
| 379 |
error_msg = f"Error setting up chat stream: {str(e)}"
|
|
|
|
| 386 |
|
| 387 |
|
| 388 |
def clean_markdown_for_tts(text: str) -> str:
|
| 389 |
+
text = re.sub(r'\[.*?\]\(.*?\)', '', text)
|
| 390 |
+
text = re.sub(r'[`*_#]', '', text)
|
| 391 |
+
text = re.sub(r'^\s*[\-\*\+]\s+', '', text, flags=re.MULTILINE)
|
| 392 |
+
text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE)
|
| 393 |
+
text = re.sub(r'^\s*>\s?', '', text, flags=re.MULTILINE)
|
| 394 |
+
text = re.sub(r'\n+', ' ', text)
|
| 395 |
+
text = re.sub(r'\s{2,}', ' ', text)
|
|
|
|
| 396 |
return text.strip()
|
| 397 |
|
| 398 |
|
|
|
|
| 414 |
mp3_fp.seek(0)
|
| 415 |
return Response(mp3_fp, mimetype='audio/mpeg')
|
| 416 |
except Exception as e:
|
| 417 |
+
print (f"TTS Error: {e}")
|
| 418 |
return (jsonify({'status': 'error',
|
| 419 |
'message': 'Failed to generate audio.'}), 500)
|
| 420 |
|
| 421 |
|
| 422 |
if __name__ == '__main__':
|
| 423 |
port = int(os.environ.get('PORT', 7860))
|
| 424 |
+
print (f"Starting Flask app on port {port}")
|
|
|
|
| 425 |
app.run(host='0.0.0.0', port=port, debug=False, threaded=True)
|
query_expansion.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
# utils/query_expansion.py
|
| 2 |
|
| 3 |
"""
|
| 4 |
Query Expansion System for CogniChat RAG Application
|
|
|
|
|
|
|
| 1 |
|
| 2 |
"""
|
| 3 |
Query Expansion System for CogniChat RAG Application
|
rag_processor.py
CHANGED
|
@@ -12,15 +12,7 @@ from typing import List, Optional
|
|
| 12 |
import time
|
| 13 |
|
| 14 |
class GroqAPIKeyManager:
|
| 15 |
-
"""Manages multiple Groq API keys with automatic rotation and fallback."""
|
| 16 |
-
|
| 17 |
def __init__(self, api_keys: List[str]):
|
| 18 |
-
"""
|
| 19 |
-
Initialize with a list of API keys.
|
| 20 |
-
|
| 21 |
-
Args:
|
| 22 |
-
api_keys: List of Groq API keys to use
|
| 23 |
-
"""
|
| 24 |
self.api_keys = [key for key in api_keys if key and key != "your_groq_api_key_here"]
|
| 25 |
if not self.api_keys:
|
| 26 |
raise ValueError("No valid API keys provided!")
|
|
@@ -30,34 +22,24 @@ class GroqAPIKeyManager:
|
|
| 30 |
self.success_count = {key: 0 for key in self.api_keys}
|
| 31 |
self.failure_count = {key: 0 for key in self.api_keys}
|
| 32 |
|
| 33 |
-
print(f"
|
| 34 |
|
| 35 |
def get_current_key(self) -> str:
|
| 36 |
-
"""Get the current API key."""
|
| 37 |
return self.api_keys[self.current_index]
|
| 38 |
|
| 39 |
def mark_success(self, api_key: str):
|
| 40 |
-
"""Mark an API key as successful."""
|
| 41 |
if api_key in self.success_count:
|
| 42 |
self.success_count[api_key] += 1
|
| 43 |
-
# Remove from failed keys if it was there
|
| 44 |
if api_key in self.failed_keys:
|
| 45 |
self.failed_keys.remove(api_key)
|
| 46 |
-
print(f"
|
| 47 |
|
| 48 |
def mark_failure(self, api_key: str):
|
| 49 |
-
"""Mark an API key as failed."""
|
| 50 |
if api_key in self.failure_count:
|
| 51 |
self.failure_count[api_key] += 1
|
| 52 |
self.failed_keys.add(api_key)
|
| 53 |
|
| 54 |
def rotate_to_next_key(self) -> bool:
|
| 55 |
-
"""
|
| 56 |
-
Rotate to the next available API key.
|
| 57 |
-
|
| 58 |
-
Returns:
|
| 59 |
-
True if a new key is available, False if all keys failed
|
| 60 |
-
"""
|
| 61 |
initial_index = self.current_index
|
| 62 |
attempts = 0
|
| 63 |
|
|
@@ -66,63 +48,43 @@ class GroqAPIKeyManager:
|
|
| 66 |
attempts += 1
|
| 67 |
|
| 68 |
current_key = self.api_keys[self.current_index]
|
| 69 |
-
|
| 70 |
-
# If we've tried all keys, allow retry even failed ones
|
| 71 |
if attempts >= len(self.api_keys):
|
| 72 |
-
print(f"
|
| 73 |
return True
|
| 74 |
-
|
| 75 |
-
# Skip recently failed keys unless it's been a while
|
| 76 |
if current_key not in self.failed_keys:
|
| 77 |
-
print(f"
|
| 78 |
return True
|
| 79 |
|
| 80 |
return False
|
| 81 |
|
| 82 |
def get_statistics(self) -> str:
|
| 83 |
-
"""Get statistics about API key usage."""
|
| 84 |
stats = []
|
| 85 |
for i, key in enumerate(self.api_keys):
|
| 86 |
success = self.success_count[key]
|
| 87 |
failure = self.failure_count[key]
|
| 88 |
-
status = "
|
| 89 |
masked_key = key[:8] + "..." + key[-4:] if len(key) > 12 else "***"
|
| 90 |
stats.append(f" Key #{i+1} ({masked_key}): {success} success, {failure} failures [{status}]")
|
| 91 |
return "\n".join(stats)
|
| 92 |
|
| 93 |
|
| 94 |
def load_api_keys_from_hf_secrets() -> List[str]:
|
| 95 |
-
"""
|
| 96 |
-
Load API keys from Hugging Face Spaces Secrets.
|
| 97 |
-
|
| 98 |
-
In your Hugging Face Space settings, add these secrets:
|
| 99 |
-
- GROQ_API_KEY_1
|
| 100 |
-
- GROQ_API_KEY_2
|
| 101 |
-
- GROQ_API_KEY_3
|
| 102 |
-
- GROQ_API_KEY_4
|
| 103 |
-
|
| 104 |
-
Returns:
|
| 105 |
-
List of API keys retrieved from HF secrets
|
| 106 |
-
"""
|
| 107 |
api_keys = []
|
| 108 |
secret_names = ['GROQ_API_KEY_1', 'GROQ_API_KEY_2', 'GROQ_API_KEY_3', 'GROQ_API_KEY_4']
|
| 109 |
|
| 110 |
-
print("
|
| 111 |
|
| 112 |
for secret_name in secret_names:
|
| 113 |
try:
|
| 114 |
-
# HF Spaces secrets are available as environment variables
|
| 115 |
api_key = os.getenv(secret_name)
|
| 116 |
|
| 117 |
if api_key and api_key.strip() and api_key != "your_groq_api_key_here":
|
| 118 |
api_keys.append(api_key.strip())
|
| 119 |
-
print(f"
|
| 120 |
else:
|
| 121 |
-
print(f"
|
| 122 |
except Exception as e:
|
| 123 |
-
print(f"
|
| 124 |
-
|
| 125 |
-
# ADD THIS RETURN STATEMENT - this was missing!
|
| 126 |
return api_keys
|
| 127 |
|
| 128 |
|
|
@@ -132,18 +94,6 @@ def create_llm_with_fallback(
|
|
| 132 |
temperature: float,
|
| 133 |
max_retries: int = 3
|
| 134 |
) -> ChatGroq:
|
| 135 |
-
"""
|
| 136 |
-
Create a ChatGroq LLM with automatic API key fallback.
|
| 137 |
-
|
| 138 |
-
Args:
|
| 139 |
-
api_key_manager: Manager handling multiple API keys
|
| 140 |
-
model_name: Name of the model to use
|
| 141 |
-
temperature: Temperature setting
|
| 142 |
-
max_retries: Maximum number of retry attempts
|
| 143 |
-
|
| 144 |
-
Returns:
|
| 145 |
-
ChatGroq instance
|
| 146 |
-
"""
|
| 147 |
for attempt in range(max_retries):
|
| 148 |
current_key = api_key_manager.get_current_key()
|
| 149 |
|
|
@@ -153,7 +103,6 @@ def create_llm_with_fallback(
|
|
| 153 |
api_key=current_key,
|
| 154 |
temperature=temperature
|
| 155 |
)
|
| 156 |
-
# Test the connection with a simple call
|
| 157 |
test_result = llm.invoke("test")
|
| 158 |
api_key_manager.mark_success(current_key)
|
| 159 |
return llm
|
|
@@ -161,20 +110,17 @@ def create_llm_with_fallback(
|
|
| 161 |
except Exception as e:
|
| 162 |
error_msg = str(e).lower()
|
| 163 |
api_key_manager.mark_failure(current_key)
|
| 164 |
-
|
| 165 |
-
# Check if it's a rate limit or auth error
|
| 166 |
if "rate" in error_msg or "limit" in error_msg:
|
| 167 |
-
print(f"
|
| 168 |
elif "auth" in error_msg or "api" in error_msg:
|
| 169 |
-
print(f"
|
| 170 |
else:
|
| 171 |
-
print(f"
|
| 172 |
|
| 173 |
-
# Try next key if available
|
| 174 |
if attempt < max_retries - 1:
|
| 175 |
if api_key_manager.rotate_to_next_key():
|
| 176 |
-
print(f"
|
| 177 |
-
time.sleep(1)
|
| 178 |
else:
|
| 179 |
raise ValueError("All API keys failed!")
|
| 180 |
else:
|
|
@@ -184,9 +130,7 @@ def create_llm_with_fallback(
|
|
| 184 |
|
| 185 |
|
| 186 |
def create_multi_query_retriever(base_retriever, llm, strategy: str = "balanced"):
|
| 187 |
-
"""Wraps a base retriever with query expansion capabilities."""
|
| 188 |
def multi_query_retrieve(query: str) -> List[Document]:
|
| 189 |
-
"""Retrieves documents using expanded query variations."""
|
| 190 |
query_variations = expand_query_simple(query, strategy=strategy, llm=llm)
|
| 191 |
all_docs = []
|
| 192 |
seen_content = set()
|
|
@@ -199,25 +143,15 @@ def create_multi_query_retriever(base_retriever, llm, strategy: str = "balanced"
|
|
| 199 |
seen_content.add(content_hash)
|
| 200 |
all_docs.append(doc)
|
| 201 |
except Exception as e:
|
| 202 |
-
print(f"
|
| 203 |
continue
|
| 204 |
-
print(f"
|
| 205 |
return all_docs
|
| 206 |
return multi_query_retrieve
|
| 207 |
|
| 208 |
|
| 209 |
def get_system_prompt(temperature: float) -> str:
|
| 210 |
-
"""
|
| 211 |
-
Returns a system prompt dynamically based on temperature setting.
|
| 212 |
-
|
| 213 |
-
Temperature ranges:
|
| 214 |
-
- 0.0-0.4: Highly factual, structured, conservative
|
| 215 |
-
- 0.4-0.8: Balanced approach with moderate creativity
|
| 216 |
-
- 0.8-1.0: Creative, engaging, storytelling mode
|
| 217 |
-
"""
|
| 218 |
-
|
| 219 |
if temperature <= 0.4:
|
| 220 |
-
# Conservative, structured prompt
|
| 221 |
return """You are CogniChat, an expert document analysis assistant specializing in comprehensive and well-structured answers.
|
| 222 |
|
| 223 |
RESPONSE GUIDELINES:
|
|
@@ -256,7 +190,6 @@ RESPONSE GUIDELINES:
|
|
| 256 |
Now answer the following question comprehensively using the context above:"""
|
| 257 |
|
| 258 |
elif temperature <= 0.8:
|
| 259 |
-
# Balanced prompt
|
| 260 |
return """You are CogniChat, an intelligent document analysis assistant that combines accuracy with engaging communication.
|
| 261 |
|
| 262 |
RESPONSE GUIDELINES:
|
|
@@ -294,7 +227,7 @@ Now answer the following question in an engaging yet accurate way:"""
|
|
| 294 |
# Creative BUT CLEAR prompt - REVISED VERSION
|
| 295 |
return """You are CogniChat, a creative document analyst who makes complex information clear, memorable, and engaging.
|
| 296 |
|
| 297 |
-
|
| 298 |
|
| 299 |
Make information easier to understand, not harder. Your creativity should illuminate, not obscure.
|
| 300 |
|
|
@@ -333,20 +266,20 @@ Make information easier to understand, not harder. Your creativity should illumi
|
|
| 333 |
- Numbered lists for sequences, bullets for related items
|
| 334 |
|
| 335 |
**WHAT TO AVOID:**
|
| 336 |
-
-
|
| 337 |
-
-
|
| 338 |
-
-
|
| 339 |
-
-
|
| 340 |
-
-
|
| 341 |
-
-
|
| 342 |
|
| 343 |
**ACCURACY BOUNDARIES:**
|
| 344 |
-
-
|
| 345 |
-
-
|
| 346 |
-
-
|
| 347 |
-
-
|
| 348 |
-
-
|
| 349 |
-
-
|
| 350 |
|
| 351 |
**Source Attribution:**
|
| 352 |
- End with: [Source: filename, Page: X]
|
|
@@ -371,20 +304,6 @@ def create_rag_chain(
|
|
| 371 |
temperature: float = 0.2,
|
| 372 |
api_keys: Optional[List[str]] = None
|
| 373 |
):
|
| 374 |
-
"""
|
| 375 |
-
Creates an advanced RAG chain with temperature-adaptive prompting and API key rotation.
|
| 376 |
-
|
| 377 |
-
Args:
|
| 378 |
-
retriever: Document retriever
|
| 379 |
-
get_session_history_func: Function to get session history
|
| 380 |
-
enable_query_expansion: Whether to enable query expansion
|
| 381 |
-
expansion_strategy: Strategy for query expansion
|
| 382 |
-
model_name: Name of the LLM model
|
| 383 |
-
temperature: Temperature setting (0.0-1.0)
|
| 384 |
-
api_keys: Optional list of API keys. If None, loads from environment
|
| 385 |
-
"""
|
| 386 |
-
|
| 387 |
-
# Load API keys from HF Secrets
|
| 388 |
if api_keys is None:
|
| 389 |
api_keys = load_api_keys_from_hf_secrets()
|
| 390 |
|
|
@@ -394,26 +313,23 @@ def create_rag_chain(
|
|
| 394 |
"GROQ_API_KEY_2, GROQ_API_KEY_3, GROQ_API_KEY_4 in your .env file"
|
| 395 |
)
|
| 396 |
|
| 397 |
-
# Initialize API key manager
|
| 398 |
api_key_manager = GroqAPIKeyManager(api_keys)
|
| 399 |
|
| 400 |
-
print(f"
|
| 401 |
|
| 402 |
-
# Display creativity mode based on temperature
|
| 403 |
if temperature <= 0.4:
|
| 404 |
creativity_mode = "FACTUAL & STRUCTURED"
|
| 405 |
elif temperature <= 0.8:
|
| 406 |
creativity_mode = "BALANCED & ENGAGING"
|
| 407 |
else:
|
| 408 |
creativity_mode = "CREATIVE & STORYTELLING"
|
| 409 |
-
print(f"
|
| 410 |
|
| 411 |
-
# Create LLM with fallback
|
| 412 |
llm = create_llm_with_fallback(api_key_manager, model_name, temperature)
|
| 413 |
-
print(f"
|
| 414 |
|
| 415 |
if enable_query_expansion:
|
| 416 |
-
print(f"
|
| 417 |
enhanced_retriever = create_multi_query_retriever(
|
| 418 |
base_retriever=retriever,
|
| 419 |
llm=llm,
|
|
@@ -445,7 +361,6 @@ Optimized Standalone Question:"""
|
|
| 445 |
query_rewriter = rewrite_prompt | llm | StrOutputParser()
|
| 446 |
|
| 447 |
def format_docs(docs):
|
| 448 |
-
"""Format retrieved documents with clear structure and metadata."""
|
| 449 |
if not docs:
|
| 450 |
return "No relevant documents found in the knowledge base."
|
| 451 |
|
|
@@ -469,7 +384,6 @@ Optimized Standalone Question:"""
|
|
| 469 |
)
|
| 470 |
return f"RETRIEVED CONTEXT ({len(docs)} documents):\n\n" + "\n".join(formatted_parts)
|
| 471 |
|
| 472 |
-
# Get temperature-adaptive system prompt
|
| 473 |
rag_template = get_system_prompt(temperature)
|
| 474 |
|
| 475 |
rag_prompt = ChatPromptTemplate.from_messages([
|
|
@@ -478,16 +392,13 @@ Optimized Standalone Question:"""
|
|
| 478 |
("human", "{question}"),
|
| 479 |
])
|
| 480 |
|
| 481 |
-
# Rewriter input construction
|
| 482 |
rewriter_input = RunnableParallel({
|
| 483 |
"question": itemgetter("question"),
|
| 484 |
"chat_history": itemgetter("chat_history"),
|
| 485 |
})
|
| 486 |
|
| 487 |
-
# Main retrieval pipeline
|
| 488 |
retrieval_chain = rewriter_input | query_rewriter | enhanced_retriever | format_docs
|
| 489 |
|
| 490 |
-
# Final conversational RAG chain
|
| 491 |
conversational_rag_chain = RunnableParallel({
|
| 492 |
"context": retrieval_chain,
|
| 493 |
"question": itemgetter("question"),
|
|
@@ -501,7 +412,7 @@ Optimized Standalone Question:"""
|
|
| 501 |
history_messages_key="chat_history",
|
| 502 |
)
|
| 503 |
|
| 504 |
-
print("
|
| 505 |
print("\n" + api_key_manager.get_statistics())
|
| 506 |
|
| 507 |
-
return chain_with_memory, api_key_manager
|
|
|
|
| 12 |
import time
|
| 13 |
|
| 14 |
class GroqAPIKeyManager:
|
|
|
|
|
|
|
| 15 |
def __init__(self, api_keys: List[str]):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
self.api_keys = [key for key in api_keys if key and key != "your_groq_api_key_here"]
|
| 17 |
if not self.api_keys:
|
| 18 |
raise ValueError("No valid API keys provided!")
|
|
|
|
| 22 |
self.success_count = {key: 0 for key in self.api_keys}
|
| 23 |
self.failure_count = {key: 0 for key in self.api_keys}
|
| 24 |
|
| 25 |
+
print(f"API Key Manager: Loaded {len(self.api_keys)} API keys")
|
| 26 |
|
| 27 |
def get_current_key(self) -> str:
|
|
|
|
| 28 |
return self.api_keys[self.current_index]
|
| 29 |
|
| 30 |
def mark_success(self, api_key: str):
|
|
|
|
| 31 |
if api_key in self.success_count:
|
| 32 |
self.success_count[api_key] += 1
|
|
|
|
| 33 |
if api_key in self.failed_keys:
|
| 34 |
self.failed_keys.remove(api_key)
|
| 35 |
+
print(f"API Key #{self.api_keys.index(api_key) + 1} recovered!")
|
| 36 |
|
| 37 |
def mark_failure(self, api_key: str):
|
|
|
|
| 38 |
if api_key in self.failure_count:
|
| 39 |
self.failure_count[api_key] += 1
|
| 40 |
self.failed_keys.add(api_key)
|
| 41 |
|
| 42 |
def rotate_to_next_key(self) -> bool:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
initial_index = self.current_index
|
| 44 |
attempts = 0
|
| 45 |
|
|
|
|
| 48 |
attempts += 1
|
| 49 |
|
| 50 |
current_key = self.api_keys[self.current_index]
|
|
|
|
|
|
|
| 51 |
if attempts >= len(self.api_keys):
|
| 52 |
+
print(f"All keys attempted, retrying with key #{self.current_index + 1}")
|
| 53 |
return True
|
|
|
|
|
|
|
| 54 |
if current_key not in self.failed_keys:
|
| 55 |
+
print(f"Switching to API Key #{self.current_index + 1}")
|
| 56 |
return True
|
| 57 |
|
| 58 |
return False
|
| 59 |
|
| 60 |
def get_statistics(self) -> str:
|
|
|
|
| 61 |
stats = []
|
| 62 |
for i, key in enumerate(self.api_keys):
|
| 63 |
success = self.success_count[key]
|
| 64 |
failure = self.failure_count[key]
|
| 65 |
+
status = "FAILED" if key in self.failed_keys else "ACTIVE"
|
| 66 |
masked_key = key[:8] + "..." + key[-4:] if len(key) > 12 else "***"
|
| 67 |
stats.append(f" Key #{i+1} ({masked_key}): {success} success, {failure} failures [{status}]")
|
| 68 |
return "\n".join(stats)
|
| 69 |
|
| 70 |
|
| 71 |
def load_api_keys_from_hf_secrets() -> List[str]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
api_keys = []
|
| 73 |
secret_names = ['GROQ_API_KEY_1', 'GROQ_API_KEY_2', 'GROQ_API_KEY_3', 'GROQ_API_KEY_4']
|
| 74 |
|
| 75 |
+
print("Loading API keys from Hugging Face Secrets...")
|
| 76 |
|
| 77 |
for secret_name in secret_names:
|
| 78 |
try:
|
|
|
|
| 79 |
api_key = os.getenv(secret_name)
|
| 80 |
|
| 81 |
if api_key and api_key.strip() and api_key != "your_groq_api_key_here":
|
| 82 |
api_keys.append(api_key.strip())
|
| 83 |
+
print(f" Loaded: {secret_name}")
|
| 84 |
else:
|
| 85 |
+
print(f" Not found or empty: {secret_name}")
|
| 86 |
except Exception as e:
|
| 87 |
+
print(f" Error loading {secret_name}: {str(e)}")
|
|
|
|
|
|
|
| 88 |
return api_keys
|
| 89 |
|
| 90 |
|
|
|
|
| 94 |
temperature: float,
|
| 95 |
max_retries: int = 3
|
| 96 |
) -> ChatGroq:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
for attempt in range(max_retries):
|
| 98 |
current_key = api_key_manager.get_current_key()
|
| 99 |
|
|
|
|
| 103 |
api_key=current_key,
|
| 104 |
temperature=temperature
|
| 105 |
)
|
|
|
|
| 106 |
test_result = llm.invoke("test")
|
| 107 |
api_key_manager.mark_success(current_key)
|
| 108 |
return llm
|
|
|
|
| 110 |
except Exception as e:
|
| 111 |
error_msg = str(e).lower()
|
| 112 |
api_key_manager.mark_failure(current_key)
|
|
|
|
|
|
|
| 113 |
if "rate" in error_msg or "limit" in error_msg:
|
| 114 |
+
print(f" Rate limit hit on API Key #{api_key_manager.current_index + 1}")
|
| 115 |
elif "auth" in error_msg or "api" in error_msg:
|
| 116 |
+
print(f" Authentication failed on API Key #{api_key_manager.current_index + 1}")
|
| 117 |
else:
|
| 118 |
+
print(f" Error with API Key #{api_key_manager.current_index + 1}: {str(e)[:50]}")
|
| 119 |
|
|
|
|
| 120 |
if attempt < max_retries - 1:
|
| 121 |
if api_key_manager.rotate_to_next_key():
|
| 122 |
+
print(f" Retrying with next API key (Attempt {attempt + 2}/{max_retries})...")
|
| 123 |
+
time.sleep(1)
|
| 124 |
else:
|
| 125 |
raise ValueError("All API keys failed!")
|
| 126 |
else:
|
|
|
|
| 130 |
|
| 131 |
|
| 132 |
def create_multi_query_retriever(base_retriever, llm, strategy: str = "balanced"):
|
|
|
|
| 133 |
def multi_query_retrieve(query: str) -> List[Document]:
|
|
|
|
| 134 |
query_variations = expand_query_simple(query, strategy=strategy, llm=llm)
|
| 135 |
all_docs = []
|
| 136 |
seen_content = set()
|
|
|
|
| 143 |
seen_content.add(content_hash)
|
| 144 |
all_docs.append(doc)
|
| 145 |
except Exception as e:
|
| 146 |
+
print(f" Query Expansion Error (Query {i+1}): {str(e)[:50]}")
|
| 147 |
continue
|
| 148 |
+
print(f" Query Expansion: Retrieved {len(all_docs)} unique documents.")
|
| 149 |
return all_docs
|
| 150 |
return multi_query_retrieve
|
| 151 |
|
| 152 |
|
| 153 |
def get_system_prompt(temperature: float) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
if temperature <= 0.4:
|
|
|
|
| 155 |
return """You are CogniChat, an expert document analysis assistant specializing in comprehensive and well-structured answers.
|
| 156 |
|
| 157 |
RESPONSE GUIDELINES:
|
|
|
|
| 190 |
Now answer the following question comprehensively using the context above:"""
|
| 191 |
|
| 192 |
elif temperature <= 0.8:
|
|
|
|
| 193 |
return """You are CogniChat, an intelligent document analysis assistant that combines accuracy with engaging communication.
|
| 194 |
|
| 195 |
RESPONSE GUIDELINES:
|
|
|
|
| 227 |
# Creative BUT CLEAR prompt - REVISED VERSION
|
| 228 |
return """You are CogniChat, a creative document analyst who makes complex information clear, memorable, and engaging.
|
| 229 |
|
| 230 |
+
YOUR CORE MISSION: **CLARITY FIRST, CREATIVITY SECOND**
|
| 231 |
|
| 232 |
Make information easier to understand, not harder. Your creativity should illuminate, not obscure.
|
| 233 |
|
|
|
|
| 266 |
- Numbered lists for sequences, bullets for related items
|
| 267 |
|
| 268 |
**WHAT TO AVOID:**
|
| 269 |
+
- Flowery or overly descriptive language
|
| 270 |
+
- Complex metaphors that need their own explanation
|
| 271 |
+
- Long narrative storytelling that buries the facts
|
| 272 |
+
- Multiple rhetorical questions in a row
|
| 273 |
+
- Overuse of emojis or exclamation points
|
| 274 |
+
- Making simple things sound complicated
|
| 275 |
|
| 276 |
**ACCURACY BOUNDARIES:**
|
| 277 |
+
- Creative explanation and presentation of facts
|
| 278 |
+
- Simple, helpful examples from common knowledge
|
| 279 |
+
- Reorganizing information for better understanding
|
| 280 |
+
- Never invent facts not in the documents
|
| 281 |
+
- Don't contradict source material
|
| 282 |
+
- If info is missing, say so clearly and briefly
|
| 283 |
|
| 284 |
**Source Attribution:**
|
| 285 |
- End with: [Source: filename, Page: X]
|
|
|
|
| 304 |
temperature: float = 0.2,
|
| 305 |
api_keys: Optional[List[str]] = None
|
| 306 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
if api_keys is None:
|
| 308 |
api_keys = load_api_keys_from_hf_secrets()
|
| 309 |
|
|
|
|
| 313 |
"GROQ_API_KEY_2, GROQ_API_KEY_3, GROQ_API_KEY_4 in your .env file"
|
| 314 |
)
|
| 315 |
|
|
|
|
| 316 |
api_key_manager = GroqAPIKeyManager(api_keys)
|
| 317 |
|
| 318 |
+
print(f" RAG: Initializing LLM - Model: {model_name}, Temp: {temperature}")
|
| 319 |
|
|
|
|
| 320 |
if temperature <= 0.4:
|
| 321 |
creativity_mode = "FACTUAL & STRUCTURED"
|
| 322 |
elif temperature <= 0.8:
|
| 323 |
creativity_mode = "BALANCED & ENGAGING"
|
| 324 |
else:
|
| 325 |
creativity_mode = "CREATIVE & STORYTELLING"
|
| 326 |
+
print(f"Creativity Mode: {creativity_mode}")
|
| 327 |
|
|
|
|
| 328 |
llm = create_llm_with_fallback(api_key_manager, model_name, temperature)
|
| 329 |
+
print(f"LLM initialized with API Key #{api_key_manager.current_index + 1}")
|
| 330 |
|
| 331 |
if enable_query_expansion:
|
| 332 |
+
print(f"RAG: Query Expansion ENABLED (Strategy: {expansion_strategy})")
|
| 333 |
enhanced_retriever = create_multi_query_retriever(
|
| 334 |
base_retriever=retriever,
|
| 335 |
llm=llm,
|
|
|
|
| 361 |
query_rewriter = rewrite_prompt | llm | StrOutputParser()
|
| 362 |
|
| 363 |
def format_docs(docs):
|
|
|
|
| 364 |
if not docs:
|
| 365 |
return "No relevant documents found in the knowledge base."
|
| 366 |
|
|
|
|
| 384 |
)
|
| 385 |
return f"RETRIEVED CONTEXT ({len(docs)} documents):\n\n" + "\n".join(formatted_parts)
|
| 386 |
|
|
|
|
| 387 |
rag_template = get_system_prompt(temperature)
|
| 388 |
|
| 389 |
rag_prompt = ChatPromptTemplate.from_messages([
|
|
|
|
| 392 |
("human", "{question}"),
|
| 393 |
])
|
| 394 |
|
|
|
|
| 395 |
rewriter_input = RunnableParallel({
|
| 396 |
"question": itemgetter("question"),
|
| 397 |
"chat_history": itemgetter("chat_history"),
|
| 398 |
})
|
| 399 |
|
|
|
|
| 400 |
retrieval_chain = rewriter_input | query_rewriter | enhanced_retriever | format_docs
|
| 401 |
|
|
|
|
| 402 |
conversational_rag_chain = RunnableParallel({
|
| 403 |
"context": retrieval_chain,
|
| 404 |
"question": itemgetter("question"),
|
|
|
|
| 412 |
history_messages_key="chat_history",
|
| 413 |
)
|
| 414 |
|
| 415 |
+
print("RAG: Chain created successfully.")
|
| 416 |
print("\n" + api_key_manager.get_statistics())
|
| 417 |
|
| 418 |
+
return chain_with_memory, api_key_manager
|
templates/index.html
CHANGED
|
@@ -348,7 +348,6 @@
|
|
| 348 |
|
| 349 |
<script>
|
| 350 |
document.addEventListener('DOMContentLoaded', () => {
|
| 351 |
-
// ... (keep existing element variables)
|
| 352 |
const uploadContainer = document.getElementById('upload-container');
|
| 353 |
const chatContainer = document.getElementById('chat-container');
|
| 354 |
const dropZone = document.getElementById('drop-zone');
|
|
@@ -364,20 +363,17 @@
|
|
| 364 |
const chatContent = document.getElementById('chat-content');
|
| 365 |
const modelSelect = document.getElementById('model-select');
|
| 366 |
const temperatureSelect = document.getElementById('temperature-select');
|
| 367 |
-
// Speed select variable removed
|
| 368 |
const chatFilename = document.getElementById('chat-filename');
|
| 369 |
const chatSessionInfo = document.getElementById('chat-session-info');
|
| 370 |
|
| 371 |
let sessionId = sessionStorage.getItem('cognichat_session_id');
|
| 372 |
-
let currentModelInfo = JSON.parse(sessionStorage.getItem('cognichat_model_info'));
|
| 373 |
|
| 374 |
-
// --- Initialize Marked.js options ---
|
| 375 |
marked.setOptions({
|
| 376 |
-
breaks: true,
|
| 377 |
-
gfm: true,
|
| 378 |
});
|
| 379 |
|
| 380 |
-
// --- Restore Chat State if Session Exists ---
|
| 381 |
if (sessionId && currentModelInfo) {
|
| 382 |
console.log("Restoring session:", sessionId);
|
| 383 |
uploadContainer.classList.add('hidden');
|
|
|
|
| 348 |
|
| 349 |
<script>
|
| 350 |
document.addEventListener('DOMContentLoaded', () => {
|
|
|
|
| 351 |
const uploadContainer = document.getElementById('upload-container');
|
| 352 |
const chatContainer = document.getElementById('chat-container');
|
| 353 |
const dropZone = document.getElementById('drop-zone');
|
|
|
|
| 363 |
const chatContent = document.getElementById('chat-content');
|
| 364 |
const modelSelect = document.getElementById('model-select');
|
| 365 |
const temperatureSelect = document.getElementById('temperature-select');
|
|
|
|
| 366 |
const chatFilename = document.getElementById('chat-filename');
|
| 367 |
const chatSessionInfo = document.getElementById('chat-session-info');
|
| 368 |
|
| 369 |
let sessionId = sessionStorage.getItem('cognichat_session_id');
|
| 370 |
+
let currentModelInfo = JSON.parse(sessionStorage.getItem('cognichat_model_info'));
|
| 371 |
|
|
|
|
| 372 |
marked.setOptions({
|
| 373 |
+
breaks: true,
|
| 374 |
+
gfm: true,
|
| 375 |
});
|
| 376 |
|
|
|
|
| 377 |
if (sessionId && currentModelInfo) {
|
| 378 |
console.log("Restoring session:", sessionId);
|
| 379 |
uploadContainer.classList.add('hidden');
|