Spaces:

Zeri00
/

Cogni-Chat-document-reader-v2

Running

App Files Files Community

riteshraut commited on 24 days ago

Commit

be8f70c

1 Parent(s): 76039ca

feat/audio

Browse files

Files changed (4) hide show

app.py +48 -83
query_expansion.py +0 -1
rag_processor.py +35 -124
templates/index.html +3 -7

app.py CHANGED Viewed

@@ -1,9 +1,6 @@
-#!/usr/bin/python
- # -*- coding: utf-8 -*-
 import os
 import uuid
-from flask import Flask, request, render_template, session, jsonify, \
-    Response, stream_with_context # Added stream_with_context
 from werkzeug.utils import secure_filename
 from rag_processor import create_rag_chain
 from typing import Sequence, Any, List
@@ -12,15 +9,12 @@ import re
 import io
 from gtts import gTTS
 from langchain_core.documents import Document
-from langchain_community.document_loaders import TextLoader, \
-    Docx2txtLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
-from langchain.retrievers import EnsembleRetriever, \
-    ContextualCompressionRetriever
-from langchain.retrievers.document_compressors.base import \
-    BaseDocumentCompressor
 from langchain_community.retrievers import BM25Retriever
 from langchain_community.chat_message_histories import ChatMessageHistory
 from langchain.storage import InMemoryStore
@@ -28,8 +22,6 @@ from sentence_transformers.cross_encoder import CrossEncoder
 app = Flask(__name__)
 app.config['SECRET_KEY'] = os.urandom(24)
-# Maps temperature strings (from the form) to the mode labels
 TEMPERATURE_LABELS = {
     '0.2': 'Precise',
     '0.4': 'Confident',
@@ -37,8 +29,6 @@ TEMPERATURE_LABELS = {
     '0.8': 'Flexible',
     '1.0': 'Creative',
     }
 class LocalReranker(BaseDocumentCompressor):
     model: Any
     top_n: int = 5
@@ -64,7 +54,7 @@ class LocalReranker(BaseDocumentCompressor):
 def create_optimized_parent_child_chunks(all_docs):
     if not all_docs:
-        print ('❌ CHUNKING: No input documents provided!')
         return ([], [], [])
     parent_splitter = RecursiveCharacterTextSplitter(chunk_size=900,
@@ -89,20 +79,15 @@ def create_optimized_parent_child_chunks(all_docs):
                                   - 1})
             if len(children) > 1:
                 if j == 0:
-                    child.page_content = '[Beginning] ' \
-                        + child.page_content
                 elif j == len(children) - 1:
-                    child.page_content = '[Continues...] ' \
-                        + child.page_content
             child_docs.append(child)
-    print (f"✅ CHUNKING: Created {len(parent_docs)} parent and {len(child_docs)} child chunks."
            )
     return (parent_docs, child_docs, doc_ids)
-def get_context_aware_parents(docs: List[Document],
-                              store: InMemoryStore) -> List[Document]:
     if not docs:
         return []
     (parent_scores, child_content_by_parent) = ({}, {})
@@ -124,20 +109,16 @@ def get_context_aware_parents(docs: List[Document],
             parent_id = parent_ids[i]
             if parent_id in child_content_by_parent:
                 child_excerpts = '\n'.join(child_content_by_parent[parent_id][:3])
-                enhanced_content = \
-                    f"{parent.page_content}\n\nRelevant excerpts:\n{child_excerpts}"
-                enhanced_parent = \
-                    Document(page_content=enhanced_content,
                              metadata={**parent.metadata,
                              'child_relevance_score': parent_scores[parent_id],
                              'matching_children': len(child_content_by_parent[parent_id])})
                 enhanced_parents.append(enhanced_parent)
         else:
-            print (f"❌ PARENT_FETCH: Parent {parent_ids[i]} not found in store!"
-                   )
-    enhanced_parents.sort(key=lambda p: p.metadata.get('child_relevance_score',
-                         0), reverse=True)
     return enhanced_parents
@@ -147,35 +128,34 @@ app.config['UPLOAD_FOLDER'] = '/tmp/uploads' if is_hf_spaces else 'uploads'
 try:
     os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
-    print (f"📁 Upload folder ready: {app.config['UPLOAD_FOLDER']}")
 except Exception as e:
-    print (f"❌ Failed to create upload folder, falling back to /tmp: {e}"
-           )
     app.config['UPLOAD_FOLDER'] = '/tmp/uploads'
     os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
 session_data = {}
 message_histories = {}
-print ('🔄 Loading embedding model...')
 try:
     EMBEDDING_MODEL = \
         HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2'
                               , model_kwargs={'device': 'cpu'},
                               encode_kwargs={'normalize_embeddings': True})
-    print ('✅ Embedding model loaded.')
 except Exception as e:
-    print (f"❌ FATAL: Could not load embedding model. Error: {e}")
     raise e
-print ('🔄 Loading reranker model...')
 try:
     RERANKER_MODEL = \
         CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2',
                      device='cpu')
-    print ('✅ Reranker model loaded.')
 except Exception as e:
-    print (f"❌ FATAL: Could not load reranker model. Error: {e}")
     raise e
@@ -190,13 +170,13 @@ def load_pdf_with_fallback(filepath):
                                 metadata={'source': os.path.basename(filepath),
                                 'page': page_num + 1}))
         if docs:
-            print (f"✅ Loaded PDF: {os.path.basename(filepath)} - {len(docs)} pages"
                    )
             return docs
         else:
             raise ValueError('No text content found in PDF.')
     except Exception as e:
-        print (f"❌ PyMuPDF failed for {filepath}: {e}")
         raise
@@ -228,14 +208,14 @@ def upload_files():
     temperature = float(temperature_str)
     model_name = request.form.get('model_name',
                                   'moonshotai/kimi-k2-instruct')
-    print (f"⚙️ UPLOAD: Model: {model_name}, Temp: {temperature}")
     if not files or all(f.filename == '' for f in files):
         return (jsonify({'status': 'error',
                 'message': 'No selected files.'}), 400)
     (all_docs, processed_files, failed_files) = ([], [], [])
-    print (f"📁 Processing {len(files)} file(s)...")
     for file in files:
         if file and file.filename:
             filename = secure_filename(file.filename)
@@ -261,9 +241,9 @@ def upload_files():
                 'message': f"Failed to process all files. Reasons: {', '.join(failed_files)}"
                 }), 400)
-    print (f"✅ UPLOAD: Processed {len(processed_files)} files.")
     try:
-        print ('🔄 Starting RAG pipeline setup...')
         (parent_docs, child_docs, doc_ids) = \
             create_optimized_parent_child_chunks(all_docs)
         if not child_docs:
@@ -272,7 +252,7 @@ def upload_files():
         vectorstore = FAISS.from_documents(child_docs, EMBEDDING_MODEL)
         store = InMemoryStore()
         store.mset(list(zip(doc_ids, parent_docs)))
-        print (f"✅ Indexed {len(child_docs)} document chunks.")
         bm25_retriever = BM25Retriever.from_documents(child_docs)
         bm25_retriever.k = 12
@@ -309,7 +289,7 @@ def upload_files():
         mode_label = TEMPERATURE_LABELS.get(temperature_str,
                 temperature_str)
-        print (f"✅ UPLOAD COMPLETE: Session {session_id} is ready.")
         return jsonify({
             'status': 'success',
@@ -324,29 +304,24 @@ def upload_files():
         return (jsonify({'status': 'error',
                 'message': f'RAG setup failed: {e}'}), 500)
-# --- CORRECTED: Added 'GET' to methods and handle request args ---
 @app.route('/chat', methods=['POST', 'GET'])
 def chat():
-    # Handle GET request (used by EventSource)
     if request.method == 'GET':
         question = request.args.get('question')
         session_id = request.args.get('session_id')
-        print(f"Received GET request for chat: session={session_id}, question={question[:50]}...") # Log GET request
-    # Handle POST request (if you ever need it again)
     elif request.method == 'POST':
         data = request.get_json()
         question = data.get('question')
         session_id = data.get('session_id') or session.get('session_id')
-        print(f"Received POST request for chat: session={session_id}, question={question[:50]}...") # Log POST request
     else:
         return (jsonify({'status': 'error', 'message': 'Method not allowed'}), 405)
-    # --- Validation ---
     if not question:
         error_msg = "Error: No question provided."
-        print(f"❌ CHAT Validation Error: {error_msg}")
         if request.method == 'GET':
-            # For SSE, need to yield an error event, not return plain text
             def error_stream():
                 yield f'data: {{"error": "{error_msg}"}}\n\n'
             return Response(stream_with_context(error_stream()), mimetype='text/event-stream', status=400)
@@ -354,14 +329,12 @@ def chat():
     if not session_id or session_id not in session_data:
         error_msg = "Error: Invalid session. Please upload documents first."
-        print(f"❌ CHAT Validation Error: Invalid session {session_id}.")
         if request.method == 'GET':
             def error_stream():
                 yield f'data: {{"error": "{error_msg}"}}\n\n'
             return Response(stream_with_context(error_stream()), mimetype='text/event-stream', status=400)
         return jsonify({'status': 'error', 'message': error_msg }), 400
-    # --- Process Request ---
     try:
         session_info = session_data[session_id]
         rag_chain = session_info['chain']
@@ -370,7 +343,7 @@ def chat():
         temperature_str = str(temperature_float)
         mode_label = TEMPERATURE_LABELS.get(temperature_str, temperature_str)
-        print (f"💬 CHAT: Streaming response for session {session_id} (Model: {model_name}, Temp: {temperature_float})...")
         def generate_chunks():
             full_response = ''
@@ -379,34 +352,28 @@ def chat():
                         config={'configurable': {'session_id': session_id}})
                 for chunk in stream_iterator:
-                    if isinstance(chunk, str): # Ensure it's a string chunk
                         full_response += chunk
                         token_escaped = chunk.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
                         model_name_escaped = model_name.replace('"', '\\"')
                         mode_label_escaped = mode_label.replace('"', '\\"')
                         yield f'data: {{"token": "{token_escaped}", "model_name": "{model_name_escaped}", "mode": "{mode_label_escaped}"}}\n\n'
                     else:
-                        # Handle potential other types if stream yields non-strings
-                        print(f"⚠️ Received non-string chunk: {type(chunk)}")
-                print ('✅ CHAT: Streaming finished successfully.')
-                # Optionally update session history or store full response if needed later
-                # get_session_history(session_id).add_ai_message(full_response)
             except Exception as e:
-                print(f"❌ CHAT Error during streaming generation: {e}")
                 import traceback
                 traceback.print_exc()
                 error_msg = f"Error during response generation: {str(e)}".replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
                 yield f'data: {{"error": "{error_msg}"}}\n\n'
-        # Return the streaming response
         return Response(stream_with_context(generate_chunks()), mimetype='text/event-stream')
     except Exception as e:
-        # Catch errors during setup before streaming starts
-        print(f"❌ CHAT Setup Error: {e}")
         import traceback
         traceback.print_exc()
         error_msg = f"Error setting up chat stream: {str(e)}"
@@ -419,14 +386,13 @@ def chat():
 def clean_markdown_for_tts(text: str) -> str:
-    # --- Simplified cleaning for TTS for new one ---
-    text = re.sub(r'\[.*?\]\(.*?\)', '', text) # Remove links
-    text = re.sub(r'[`*_#]', '', text) # Remove formatting chars
-    text = re.sub(r'^\s*[\-\*\+]\s+', '', text, flags=re.MULTILINE) # Remove list markers
-    text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE) # Remove numbered list markers
-    text = re.sub(r'^\s*>\s?', '', text, flags=re.MULTILINE) # Remove blockquote markers
-    text = re.sub(r'\n+', ' ', text) # Replace newlines with spaces
-    text = re.sub(r'\s{2,}', ' ', text) # Collapse multiple spaces
     return text.strip()
@@ -448,13 +414,12 @@ def text_to_speech():
         mp3_fp.seek(0)
         return Response(mp3_fp, mimetype='audio/mpeg')
     except Exception as e:
-        print (f"❌ TTS Error: {e}")
         return (jsonify({'status': 'error',
                 'message': 'Failed to generate audio.'}), 500)
 if __name__ == '__main__':
     port = int(os.environ.get('PORT', 7860))
-    print (f"🚀 Starting Flask app on port {port}")
-    # Use threaded=True for better handling of concurrent requests during streaming
     app.run(host='0.0.0.0', port=port, debug=False, threaded=True)

 import os
 import uuid
+from flask import Flask, request, render_template, session, jsonify, Response, stream_with_context
 from werkzeug.utils import secure_filename
 from rag_processor import create_rag_chain
 from typing import Sequence, Any, List
 import io
 from gtts import gTTS
 from langchain_core.documents import Document
+from langchain_community.document_loaders import TextLoader, Docx2txtLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
+from langchain.retrievers import EnsembleRetriever, ContextualCompressionRetriever
+from langchain.retrievers.document_compressors.base import BaseDocumentCompressor
 from langchain_community.retrievers import BM25Retriever
 from langchain_community.chat_message_histories import ChatMessageHistory
 from langchain.storage import InMemoryStore
 app = Flask(__name__)
 app.config['SECRET_KEY'] = os.urandom(24)
 TEMPERATURE_LABELS = {
     '0.2': 'Precise',
     '0.4': 'Confident',
     '0.8': 'Flexible',
     '1.0': 'Creative',
     }
 class LocalReranker(BaseDocumentCompressor):
     model: Any
     top_n: int = 5
 def create_optimized_parent_child_chunks(all_docs):
     if not all_docs:
+        print ('CHUNKING: No input documents provided!')
         return ([], [], [])
     parent_splitter = RecursiveCharacterTextSplitter(chunk_size=900,
                                   - 1})
             if len(children) > 1:
                 if j == 0:
+                    child.page_content = '[Beginning] ' + child.page_content
                 elif j == len(children) - 1:
+                    child.page_content = '[Continues...] '  + child.page_content
             child_docs.append(child)
+    print (f"CHUNKING: Created {len(parent_docs)} parent and {len(child_docs)} child chunks."
            )
     return (parent_docs, child_docs, doc_ids)
+def get_context_aware_parents(docs: List[Document], store: InMemoryStore) -> List[Document]:
     if not docs:
         return []
     (parent_scores, child_content_by_parent) = ({}, {})
             parent_id = parent_ids[i]
             if parent_id in child_content_by_parent:
                 child_excerpts = '\n'.join(child_content_by_parent[parent_id][:3])
+                enhanced_content = f"{parent.page_content}\n\nRelevant excerpts:\n{child_excerpts}"
+                enhanced_parent =Document(page_content=enhanced_content,
                              metadata={**parent.metadata,
                              'child_relevance_score': parent_scores[parent_id],
                              'matching_children': len(child_content_by_parent[parent_id])})
                 enhanced_parents.append(enhanced_parent)
         else:
+            print (f"PARENT_FETCH: Parent {parent_ids[i]} not found in store!")
+    enhanced_parents.sort(key=lambda p: p.metadata.get('child_relevance_score', 0), reverse=True)
     return enhanced_parents
 try:
     os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
+    print (f"Upload folder ready: {app.config['UPLOAD_FOLDER']}")
 except Exception as e:
+    print (f"Failed to create upload folder, falling back to /tmp: {e}")
     app.config['UPLOAD_FOLDER'] = '/tmp/uploads'
     os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
 session_data = {}
 message_histories = {}
+print ('Loading embedding model...')
 try:
     EMBEDDING_MODEL = \
         HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2'
                               , model_kwargs={'device': 'cpu'},
                               encode_kwargs={'normalize_embeddings': True})
+    print ('Embedding model loaded.')
 except Exception as e:
+    print (f"FATAL: Could not load embedding model. Error: {e}")
     raise e
+print ('Loading reranker model...')
 try:
     RERANKER_MODEL = \
         CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2',
                      device='cpu')
+    print ('Reranker model loaded.')
 except Exception as e:
+    print (f"FATAL: Could not load reranker model. Error: {e}")
     raise e
                                 metadata={'source': os.path.basename(filepath),
                                 'page': page_num + 1}))
         if docs:
+            print (f"Loaded PDF: {os.path.basename(filepath)} - {len(docs)} pages"
                    )
             return docs
         else:
             raise ValueError('No text content found in PDF.')
     except Exception as e:
+        print (f"PyMuPDF failed for {filepath}: {e}")
         raise
     temperature = float(temperature_str)
     model_name = request.form.get('model_name',
                                   'moonshotai/kimi-k2-instruct')
+    print (f"UPLOAD: Model: {model_name}, Temp: {temperature}")
     if not files or all(f.filename == '' for f in files):
         return (jsonify({'status': 'error',
                 'message': 'No selected files.'}), 400)
     (all_docs, processed_files, failed_files) = ([], [], [])
+    print (f"Processing {len(files)} file(s)...")
     for file in files:
         if file and file.filename:
             filename = secure_filename(file.filename)
                 'message': f"Failed to process all files. Reasons: {', '.join(failed_files)}"
                 }), 400)
+    print (f"UPLOAD: Processed {len(processed_files)} files.")
     try:
+        print ('Starting RAG pipeline setup...')
         (parent_docs, child_docs, doc_ids) = \
             create_optimized_parent_child_chunks(all_docs)
         if not child_docs:
         vectorstore = FAISS.from_documents(child_docs, EMBEDDING_MODEL)
         store = InMemoryStore()
         store.mset(list(zip(doc_ids, parent_docs)))
+        print (f"Indexed {len(child_docs)} document chunks.")
         bm25_retriever = BM25Retriever.from_documents(child_docs)
         bm25_retriever.k = 12
         mode_label = TEMPERATURE_LABELS.get(temperature_str,
                 temperature_str)
+        print (f"UPLOAD COMPLETE: Session {session_id} is ready.")
         return jsonify({
             'status': 'success',
         return (jsonify({'status': 'error',
                 'message': f'RAG setup failed: {e}'}), 500)
 @app.route('/chat', methods=['POST', 'GET'])
 def chat():
     if request.method == 'GET':
         question = request.args.get('question')
         session_id = request.args.get('session_id')
+        print(f"Received GET request for chat: session={session_id}, question={question[:50]}...")
     elif request.method == 'POST':
         data = request.get_json()
         question = data.get('question')
         session_id = data.get('session_id') or session.get('session_id')
+        print(f"Received POST request for chat: session={session_id}, question={question[:50]}...")
     else:
         return (jsonify({'status': 'error', 'message': 'Method not allowed'}), 405)
     if not question:
         error_msg = "Error: No question provided."
+        print(f"CHAT Validation Error: {error_msg}")
         if request.method == 'GET':
             def error_stream():
                 yield f'data: {{"error": "{error_msg}"}}\n\n'
             return Response(stream_with_context(error_stream()), mimetype='text/event-stream', status=400)
     if not session_id or session_id not in session_data:
         error_msg = "Error: Invalid session. Please upload documents first."
+        print(f"CHAT Validation Error: Invalid session {session_id}.")
         if request.method == 'GET':
             def error_stream():
                 yield f'data: {{"error": "{error_msg}"}}\n\n'
             return Response(stream_with_context(error_stream()), mimetype='text/event-stream', status=400)
         return jsonify({'status': 'error', 'message': error_msg }), 400
     try:
         session_info = session_data[session_id]
         rag_chain = session_info['chain']
         temperature_str = str(temperature_float)
         mode_label = TEMPERATURE_LABELS.get(temperature_str, temperature_str)
+        print (f"CHAT: Streaming response for session {session_id} (Model: {model_name}, Temp: {temperature_float})...")
         def generate_chunks():
             full_response = ''
                         config={'configurable': {'session_id': session_id}})
                 for chunk in stream_iterator:
+                    if isinstance(chunk, str):
                         full_response += chunk
                         token_escaped = chunk.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
                         model_name_escaped = model_name.replace('"', '\\"')
                         mode_label_escaped = mode_label.replace('"', '\\"')
                         yield f'data: {{"token": "{token_escaped}", "model_name": "{model_name_escaped}", "mode": "{mode_label_escaped}"}}\n\n'
                     else:
+                        print(f"Received non-string chunk: {type(chunk)}")
+                print ('CHAT: Streaming finished successfully.')
             except Exception as e:
+                print(f"CHAT Error during streaming generation: {e}")
                 import traceback
                 traceback.print_exc()
                 error_msg = f"Error during response generation: {str(e)}".replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
                 yield f'data: {{"error": "{error_msg}"}}\n\n'
         return Response(stream_with_context(generate_chunks()), mimetype='text/event-stream')
     except Exception as e:
+        print(f"CHAT Setup Error: {e}")
         import traceback
         traceback.print_exc()
         error_msg = f"Error setting up chat stream: {str(e)}"
 def clean_markdown_for_tts(text: str) -> str:
+    text = re.sub(r'\[.*?\]\(.*?\)', '', text)
+    text = re.sub(r'[`*_#]', '', text)
+    text = re.sub(r'^\s*[\-\*\+]\s+', '', text, flags=re.MULTILINE)
+    text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE)
+    text = re.sub(r'^\s*>\s?', '', text, flags=re.MULTILINE)
+    text = re.sub(r'\n+', ' ', text)
+    text = re.sub(r'\s{2,}', ' ', text)
     return text.strip()
         mp3_fp.seek(0)
         return Response(mp3_fp, mimetype='audio/mpeg')
     except Exception as e:
+        print (f"TTS Error: {e}")
         return (jsonify({'status': 'error',
                 'message': 'Failed to generate audio.'}), 500)
 if __name__ == '__main__':
     port = int(os.environ.get('PORT', 7860))
+    print (f"Starting Flask app on port {port}")
     app.run(host='0.0.0.0', port=port, debug=False, threaded=True)

query_expansion.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# utils/query_expansion.py
 """
 Query Expansion System for CogniChat RAG Application



1
2	"""
3	Query Expansion System for CogniChat RAG Application

rag_processor.py CHANGED Viewed

@@ -12,15 +12,7 @@ from typing import List, Optional
 import time
 class GroqAPIKeyManager:
-    """Manages multiple Groq API keys with automatic rotation and fallback."""
     def __init__(self, api_keys: List[str]):
-        """
-        Initialize with a list of API keys.
-        Args:
-            api_keys: List of Groq API keys to use
-        """
         self.api_keys = [key for key in api_keys if key and key != "your_groq_api_key_here"]
         if not self.api_keys:
             raise ValueError("No valid API keys provided!")
@@ -30,34 +22,24 @@ class GroqAPIKeyManager:
         self.success_count = {key: 0 for key in self.api_keys}
         self.failure_count = {key: 0 for key in self.api_keys}
-        print(f"🔑 API Key Manager: Loaded {len(self.api_keys)} API keys")
     def get_current_key(self) -> str:
-        """Get the current API key."""
         return self.api_keys[self.current_index]
     def mark_success(self, api_key: str):
-        """Mark an API key as successful."""
         if api_key in self.success_count:
             self.success_count[api_key] += 1
-            # Remove from failed keys if it was there
             if api_key in self.failed_keys:
                 self.failed_keys.remove(api_key)
-                print(f"   ✅ API Key #{self.api_keys.index(api_key) + 1} recovered!")
     def mark_failure(self, api_key: str):
-        """Mark an API key as failed."""
         if api_key in self.failure_count:
             self.failure_count[api_key] += 1
             self.failed_keys.add(api_key)
     def rotate_to_next_key(self) -> bool:
-        """
-        Rotate to the next available API key.
-        Returns:
-            True if a new key is available, False if all keys failed
-        """
         initial_index = self.current_index
         attempts = 0
@@ -66,63 +48,43 @@ class GroqAPIKeyManager:
             attempts += 1
             current_key = self.api_keys[self.current_index]
-            # If we've tried all keys, allow retry even failed ones
             if attempts >= len(self.api_keys):
-                print(f"   ⚠️ All keys attempted, retrying with key #{self.current_index + 1}")
                 return True
-            # Skip recently failed keys unless it's been a while
             if current_key not in self.failed_keys:
-                print(f"   🔄 Switching to API Key #{self.current_index + 1}")
                 return True
         return False
     def get_statistics(self) -> str:
-        """Get statistics about API key usage."""
         stats = []
         for i, key in enumerate(self.api_keys):
             success = self.success_count[key]
             failure = self.failure_count[key]
-            status = "❌ FAILED" if key in self.failed_keys else "✅ ACTIVE"
             masked_key = key[:8] + "..." + key[-4:] if len(key) > 12 else "***"
             stats.append(f"   Key #{i+1} ({masked_key}): {success} success, {failure} failures [{status}]")
         return "\n".join(stats)
 def load_api_keys_from_hf_secrets() -> List[str]:
-    """
-    Load API keys from Hugging Face Spaces Secrets.
-    In your Hugging Face Space settings, add these secrets:
-    - GROQ_API_KEY_1
-    - GROQ_API_KEY_2
-    - GROQ_API_KEY_3
-    - GROQ_API_KEY_4
-    Returns:
-        List of API keys retrieved from HF secrets
-    """
     api_keys = []
     secret_names = ['GROQ_API_KEY_1', 'GROQ_API_KEY_2', 'GROQ_API_KEY_3', 'GROQ_API_KEY_4']
-    print("🔐 Loading API keys from Hugging Face Secrets...")
     for secret_name in secret_names:
         try:
-            # HF Spaces secrets are available as environment variables
             api_key = os.getenv(secret_name)
             if api_key and api_key.strip() and api_key != "your_groq_api_key_here":
                 api_keys.append(api_key.strip())
-                print(f"   ✅ Loaded: {secret_name}")
             else:
-                print(f"   ⚠️ Not found or empty: {secret_name}")
         except Exception as e:
-            print(f"   ❌ Error loading {secret_name}: {str(e)}")
-    # ADD THIS RETURN STATEMENT - this was missing!
     return api_keys
@@ -132,18 +94,6 @@ def create_llm_with_fallback(
     temperature: float,
     max_retries: int = 3
 ) -> ChatGroq:
-    """
-    Create a ChatGroq LLM with automatic API key fallback.
-    Args:
-        api_key_manager: Manager handling multiple API keys
-        model_name: Name of the model to use
-        temperature: Temperature setting
-        max_retries: Maximum number of retry attempts
-    Returns:
-        ChatGroq instance
-    """
     for attempt in range(max_retries):
         current_key = api_key_manager.get_current_key()
@@ -153,7 +103,6 @@ def create_llm_with_fallback(
                 api_key=current_key,
                 temperature=temperature
             )
-            # Test the connection with a simple call
             test_result = llm.invoke("test")
             api_key_manager.mark_success(current_key)
             return llm
@@ -161,20 +110,17 @@ def create_llm_with_fallback(
         except Exception as e:
             error_msg = str(e).lower()
             api_key_manager.mark_failure(current_key)
-            # Check if it's a rate limit or auth error
             if "rate" in error_msg or "limit" in error_msg:
-                print(f"   ⚠️ Rate limit hit on API Key #{api_key_manager.current_index + 1}")
             elif "auth" in error_msg or "api" in error_msg:
-                print(f"   ❌ Authentication failed on API Key #{api_key_manager.current_index + 1}")
             else:
-                print(f"   ❌ Error with API Key #{api_key_manager.current_index + 1}: {str(e)[:50]}")
-            # Try next key if available
             if attempt < max_retries - 1:
                 if api_key_manager.rotate_to_next_key():
-                    print(f"   🔄 Retrying with next API key (Attempt {attempt + 2}/{max_retries})...")
-                    time.sleep(1)  # Brief pause before retry
                 else:
                     raise ValueError("All API keys failed!")
             else:
@@ -184,9 +130,7 @@ def create_llm_with_fallback(
 def create_multi_query_retriever(base_retriever, llm, strategy: str = "balanced"):
-    """Wraps a base retriever with query expansion capabilities."""
     def multi_query_retrieve(query: str) -> List[Document]:
-        """Retrieves documents using expanded query variations."""
         query_variations = expand_query_simple(query, strategy=strategy, llm=llm)
         all_docs = []
         seen_content = set()
@@ -199,25 +143,15 @@ def create_multi_query_retriever(base_retriever, llm, strategy: str = "balanced"
                         seen_content.add(content_hash)
                         all_docs.append(doc)
             except Exception as e:
-                print(f"   ✗ Query Expansion Error (Query {i+1}): {str(e)[:50]}")
                 continue
-        print(f"   📊 Query Expansion: Retrieved {len(all_docs)} unique documents.")
         return all_docs
     return multi_query_retrieve
 def get_system_prompt(temperature: float) -> str:
-    """
-    Returns a system prompt dynamically based on temperature setting.
-    Temperature ranges:
-    - 0.0-0.4: Highly factual, structured, conservative
-    - 0.4-0.8: Balanced approach with moderate creativity
-    - 0.8-1.0: Creative, engaging, storytelling mode
-    """
     if temperature <= 0.4:
-        # Conservative, structured prompt
         return """You are CogniChat, an expert document analysis assistant specializing in comprehensive and well-structured answers.
 RESPONSE GUIDELINES:
@@ -256,7 +190,6 @@ RESPONSE GUIDELINES:
 Now answer the following question comprehensively using the context above:"""
     elif temperature <= 0.8:
-        # Balanced prompt
         return """You are CogniChat, an intelligent document analysis assistant that combines accuracy with engaging communication.
 RESPONSE GUIDELINES:
@@ -294,7 +227,7 @@ Now answer the following question in an engaging yet accurate way:"""
         # Creative BUT CLEAR prompt - REVISED VERSION
         return """You are CogniChat, a creative document analyst who makes complex information clear, memorable, and engaging.
-🎯 YOUR CORE MISSION: **CLARITY FIRST, CREATIVITY SECOND**
 Make information easier to understand, not harder. Your creativity should illuminate, not obscure.
@@ -333,20 +266,20 @@ Make information easier to understand, not harder. Your creativity should illumi
    - Numbered lists for sequences, bullets for related items
 **WHAT TO AVOID:**
-- ❌ Flowery or overly descriptive language
-- ❌ Complex metaphors that need their own explanation
-- ❌ Long narrative storytelling that buries the facts
-- ❌ Multiple rhetorical questions in a row
-- ❌ Overuse of emojis or exclamation points
-- ❌ Making simple things sound complicated
 **ACCURACY BOUNDARIES:**
-- ✅ Creative explanation and presentation of facts
-- ✅ Simple, helpful examples from common knowledge
-- ✅ Reorganizing information for better understanding
-- ❌ Never invent facts not in the documents
-- ❌ Don't contradict source material
-- ❌ If info is missing, say so clearly and briefly
 **Source Attribution:**
 - End with: [Source: filename, Page: X]
@@ -371,20 +304,6 @@ def create_rag_chain(
     temperature: float = 0.2,
     api_keys: Optional[List[str]] = None
 ):
-    """
-    Creates an advanced RAG chain with temperature-adaptive prompting and API key rotation.
-    Args:
-        retriever: Document retriever
-        get_session_history_func: Function to get session history
-        enable_query_expansion: Whether to enable query expansion
-        expansion_strategy: Strategy for query expansion
-        model_name: Name of the LLM model
-        temperature: Temperature setting (0.0-1.0)
-        api_keys: Optional list of API keys. If None, loads from environment
-    """
-    # Load API keys from HF Secrets
     if api_keys is None:
         api_keys = load_api_keys_from_hf_secrets()
@@ -394,26 +313,23 @@ def create_rag_chain(
             "GROQ_API_KEY_2, GROQ_API_KEY_3, GROQ_API_KEY_4 in your .env file"
         )
-    # Initialize API key manager
     api_key_manager = GroqAPIKeyManager(api_keys)
-    print(f"⚙️ RAG: Initializing LLM - Model: {model_name}, Temp: {temperature}")
-    # Display creativity mode based on temperature
     if temperature <= 0.4:
         creativity_mode = "FACTUAL & STRUCTURED"
     elif temperature <= 0.8:
         creativity_mode = "BALANCED & ENGAGING"
     else:
         creativity_mode = "CREATIVE & STORYTELLING"
-    print(f"🎭 Creativity Mode: {creativity_mode}")
-    # Create LLM with fallback
     llm = create_llm_with_fallback(api_key_manager, model_name, temperature)
-    print(f"✅ LLM initialized with API Key #{api_key_manager.current_index + 1}")
     if enable_query_expansion:
-        print(f"✨ RAG: Query Expansion ENABLED (Strategy: {expansion_strategy})")
         enhanced_retriever = create_multi_query_retriever(
             base_retriever=retriever,
             llm=llm,
@@ -445,7 +361,6 @@ Optimized Standalone Question:"""
     query_rewriter = rewrite_prompt | llm | StrOutputParser()
     def format_docs(docs):
-        """Format retrieved documents with clear structure and metadata."""
         if not docs:
             return "No relevant documents found in the knowledge base."
@@ -469,7 +384,6 @@ Optimized Standalone Question:"""
             )
         return f"RETRIEVED CONTEXT ({len(docs)} documents):\n\n" + "\n".join(formatted_parts)
-    # Get temperature-adaptive system prompt
     rag_template = get_system_prompt(temperature)
     rag_prompt = ChatPromptTemplate.from_messages([
@@ -478,16 +392,13 @@ Optimized Standalone Question:"""
         ("human", "{question}"),
     ])
-    # Rewriter input construction
     rewriter_input = RunnableParallel({
         "question": itemgetter("question"),
         "chat_history": itemgetter("chat_history"),
     })
-    # Main retrieval pipeline
     retrieval_chain = rewriter_input | query_rewriter | enhanced_retriever | format_docs
-    # Final conversational RAG chain
     conversational_rag_chain = RunnableParallel({
         "context": retrieval_chain,
         "question": itemgetter("question"),
@@ -501,7 +412,7 @@ Optimized Standalone Question:"""
         history_messages_key="chat_history",
     )
-    print("✅ RAG: Chain created successfully.")
     print("\n" + api_key_manager.get_statistics())
-    return chain_with_memory, api_key_manager  # Return manager for statistics

 import time
 class GroqAPIKeyManager:
     def __init__(self, api_keys: List[str]):
         self.api_keys = [key for key in api_keys if key and key != "your_groq_api_key_here"]
         if not self.api_keys:
             raise ValueError("No valid API keys provided!")
         self.success_count = {key: 0 for key in self.api_keys}
         self.failure_count = {key: 0 for key in self.api_keys}
+        print(f"API Key Manager: Loaded {len(self.api_keys)} API keys")
     def get_current_key(self) -> str:
         return self.api_keys[self.current_index]
     def mark_success(self, api_key: str):
         if api_key in self.success_count:
             self.success_count[api_key] += 1
             if api_key in self.failed_keys:
                 self.failed_keys.remove(api_key)
+                print(f"API Key #{self.api_keys.index(api_key) + 1} recovered!")
     def mark_failure(self, api_key: str):
         if api_key in self.failure_count:
             self.failure_count[api_key] += 1
             self.failed_keys.add(api_key)
     def rotate_to_next_key(self) -> bool:
         initial_index = self.current_index
         attempts = 0
             attempts += 1
             current_key = self.api_keys[self.current_index]
             if attempts >= len(self.api_keys):
+                print(f"All keys attempted, retrying with key #{self.current_index + 1}")
                 return True
             if current_key not in self.failed_keys:
+                print(f"Switching to API Key #{self.current_index + 1}")
                 return True
         return False
     def get_statistics(self) -> str:
         stats = []
         for i, key in enumerate(self.api_keys):
             success = self.success_count[key]
             failure = self.failure_count[key]
+            status = "FAILED" if key in self.failed_keys else "ACTIVE"
             masked_key = key[:8] + "..." + key[-4:] if len(key) > 12 else "***"
             stats.append(f"   Key #{i+1} ({masked_key}): {success} success, {failure} failures [{status}]")
         return "\n".join(stats)
 def load_api_keys_from_hf_secrets() -> List[str]:
     api_keys = []
     secret_names = ['GROQ_API_KEY_1', 'GROQ_API_KEY_2', 'GROQ_API_KEY_3', 'GROQ_API_KEY_4']
+    print("Loading API keys from Hugging Face Secrets...")
     for secret_name in secret_names:
         try:
             api_key = os.getenv(secret_name)
             if api_key and api_key.strip() and api_key != "your_groq_api_key_here":
                 api_keys.append(api_key.strip())
+                print(f" Loaded: {secret_name}")
             else:
+                print(f" Not found or empty: {secret_name}")
         except Exception as e:
+            print(f" Error loading {secret_name}: {str(e)}")
     return api_keys
     temperature: float,
     max_retries: int = 3
 ) -> ChatGroq:
     for attempt in range(max_retries):
         current_key = api_key_manager.get_current_key()
                 api_key=current_key,
                 temperature=temperature
             )
             test_result = llm.invoke("test")
             api_key_manager.mark_success(current_key)
             return llm
         except Exception as e:
             error_msg = str(e).lower()
             api_key_manager.mark_failure(current_key)
             if "rate" in error_msg or "limit" in error_msg:
+                print(f"  Rate limit hit on API Key #{api_key_manager.current_index + 1}")
             elif "auth" in error_msg or "api" in error_msg:
+                print(f"  Authentication failed on API Key #{api_key_manager.current_index + 1}")
             else:
+                print(f"  Error with API Key #{api_key_manager.current_index + 1}: {str(e)[:50]}")
             if attempt < max_retries - 1:
                 if api_key_manager.rotate_to_next_key():
+                    print(f" Retrying with next API key (Attempt {attempt + 2}/{max_retries})...")
+                    time.sleep(1)
                 else:
                     raise ValueError("All API keys failed!")
             else:
 def create_multi_query_retriever(base_retriever, llm, strategy: str = "balanced"):
     def multi_query_retrieve(query: str) -> List[Document]:
         query_variations = expand_query_simple(query, strategy=strategy, llm=llm)
         all_docs = []
         seen_content = set()
                         seen_content.add(content_hash)
                         all_docs.append(doc)
             except Exception as e:
+                print(f" Query Expansion Error (Query {i+1}): {str(e)[:50]}")
                 continue
+        print(f" Query Expansion: Retrieved {len(all_docs)} unique documents.")
         return all_docs
     return multi_query_retrieve
 def get_system_prompt(temperature: float) -> str:
     if temperature <= 0.4:
         return """You are CogniChat, an expert document analysis assistant specializing in comprehensive and well-structured answers.
 RESPONSE GUIDELINES:
 Now answer the following question comprehensively using the context above:"""
     elif temperature <= 0.8:
         return """You are CogniChat, an intelligent document analysis assistant that combines accuracy with engaging communication.
 RESPONSE GUIDELINES:
         # Creative BUT CLEAR prompt - REVISED VERSION
         return """You are CogniChat, a creative document analyst who makes complex information clear, memorable, and engaging.
+ YOUR CORE MISSION: **CLARITY FIRST, CREATIVITY SECOND**
 Make information easier to understand, not harder. Your creativity should illuminate, not obscure.
    - Numbered lists for sequences, bullets for related items
 **WHAT TO AVOID:**
+-  Flowery or overly descriptive language
+-  Complex metaphors that need their own explanation
+-  Long narrative storytelling that buries the facts
+-  Multiple rhetorical questions in a row
+-  Overuse of emojis or exclamation points
+-  Making simple things sound complicated
 **ACCURACY BOUNDARIES:**
+-  Creative explanation and presentation of facts
+-  Simple, helpful examples from common knowledge
+-  Reorganizing information for better understanding
+-  Never invent facts not in the documents
+-  Don't contradict source material
+-  If info is missing, say so clearly and briefly
 **Source Attribution:**
 - End with: [Source: filename, Page: X]
     temperature: float = 0.2,
     api_keys: Optional[List[str]] = None
 ):
     if api_keys is None:
         api_keys = load_api_keys_from_hf_secrets()
             "GROQ_API_KEY_2, GROQ_API_KEY_3, GROQ_API_KEY_4 in your .env file"
         )
     api_key_manager = GroqAPIKeyManager(api_keys)
+    print(f" RAG: Initializing LLM - Model: {model_name}, Temp: {temperature}")
     if temperature <= 0.4:
         creativity_mode = "FACTUAL & STRUCTURED"
     elif temperature <= 0.8:
         creativity_mode = "BALANCED & ENGAGING"
     else:
         creativity_mode = "CREATIVE & STORYTELLING"
+    print(f"Creativity Mode: {creativity_mode}")
     llm = create_llm_with_fallback(api_key_manager, model_name, temperature)
+    print(f"LLM initialized with API Key #{api_key_manager.current_index + 1}")
     if enable_query_expansion:
+        print(f"RAG: Query Expansion ENABLED (Strategy: {expansion_strategy})")
         enhanced_retriever = create_multi_query_retriever(
             base_retriever=retriever,
             llm=llm,
     query_rewriter = rewrite_prompt | llm | StrOutputParser()
     def format_docs(docs):
         if not docs:
             return "No relevant documents found in the knowledge base."
             )
         return f"RETRIEVED CONTEXT ({len(docs)} documents):\n\n" + "\n".join(formatted_parts)
     rag_template = get_system_prompt(temperature)
     rag_prompt = ChatPromptTemplate.from_messages([
         ("human", "{question}"),
     ])
     rewriter_input = RunnableParallel({
         "question": itemgetter("question"),
         "chat_history": itemgetter("chat_history"),
     })
     retrieval_chain = rewriter_input | query_rewriter | enhanced_retriever | format_docs
     conversational_rag_chain = RunnableParallel({
         "context": retrieval_chain,
         "question": itemgetter("question"),
         history_messages_key="chat_history",
     )
+    print("RAG: Chain created successfully.")
     print("\n" + api_key_manager.get_statistics())
+    return chain_with_memory, api_key_manager

templates/index.html CHANGED Viewed

@@ -348,7 +348,6 @@
      <script>
          document.addEventListener('DOMContentLoaded', () => {
-             // ... (keep existing element variables)
              const uploadContainer = document.getElementById('upload-container');
              const chatContainer = document.getElementById('chat-container');
              const dropZone = document.getElementById('drop-zone');
@@ -364,20 +363,17 @@
              const chatContent = document.getElementById('chat-content');
              const modelSelect = document.getElementById('model-select');
              const temperatureSelect = document.getElementById('temperature-select');
-             // Speed select variable removed
              const chatFilename = document.getElementById('chat-filename');
              const chatSessionInfo = document.getElementById('chat-session-info');
              let sessionId = sessionStorage.getItem('cognichat_session_id');
-             let currentModelInfo = JSON.parse(sessionStorage.getItem('cognichat_model_info')); // Load model info
-             // --- Initialize Marked.js options ---
              marked.setOptions({
-                 breaks: true, // Convert single line breaks to <br>
-                 gfm: true,    // Enable GitHub Flavored Markdown
              });
-             // --- Restore Chat State if Session Exists ---
              if (sessionId && currentModelInfo) {
                  console.log("Restoring session:", sessionId);
                  uploadContainer.classList.add('hidden');

      <script>
          document.addEventListener('DOMContentLoaded', () => {
              const uploadContainer = document.getElementById('upload-container');
              const chatContainer = document.getElementById('chat-container');
              const dropZone = document.getElementById('drop-zone');
              const chatContent = document.getElementById('chat-content');
              const modelSelect = document.getElementById('model-select');
              const temperatureSelect = document.getElementById('temperature-select');
              const chatFilename = document.getElementById('chat-filename');
              const chatSessionInfo = document.getElementById('chat-session-info');
              let sessionId = sessionStorage.getItem('cognichat_session_id');
+             let currentModelInfo = JSON.parse(sessionStorage.getItem('cognichat_model_info'));
              marked.setOptions({
+                 breaks: true,
+                 gfm: true,
              });
              if (sessionId && currentModelInfo) {
                  console.log("Restoring session:", sessionId);
                  uploadContainer.classList.add('hidden');