Spaces:

jatinmehra
/

PDF-Insight-PRO

Running

PDF-Insight-PRO / services /rag_service.py

Jatin Mehra

Refactor and reorganize codebase for improved maintainability and clarity

ba907cd 6 months ago

16.1 kB

	"""
	RAG (Retrieval Augmented Generation) service.

	This module provides the RAG implementation with tool creation and agent management.
	"""

	import traceback
	from typing import List, Dict, Any, Optional, Tuple
	from langchain.tools import tool
	from langchain.agents import AgentExecutor, create_tool_calling_agent
	from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
	from langchain.memory import ConversationBufferMemory
	from sentence_transformers import SentenceTransformer
	import faiss

	from configs.config import Config
	from utils import (
	retrieve_similar_chunks,
	filter_relevant_chunks,
	prepare_context_from_chunks
	)
	from services.llm_service import create_tavily_search_tool


	def create_vector_search_tool(
	faiss_index: faiss.IndexHNSWFlat,
	document_chunks_with_metadata: List[Dict[str, Any]],
	embedding_model: SentenceTransformer,
	k: int = None,
	max_chunk_length: int = None
	):
	"""
	Create a vector search tool for document retrieval.

	Args:
	faiss_index: FAISS index for similarity search
	document_chunks_with_metadata: List of document chunks
	embedding_model: SentenceTransformer model
	k: Number of chunks to retrieve
	max_chunk_length: Maximum chunk length

	Returns:
	LangChain tool for vector search
	"""
	if k is None:
	k = Config.DEFAULT_K_CHUNKS // 3 # Use fewer chunks for tool
	if max_chunk_length is None:
	max_chunk_length = Config.DEFAULT_CHUNK_SIZE

	@tool
	def vector_database_search(query: str) -> str:
	"""Search the uploaded PDF document for information related to the query.

	Args:
	query: The search query string to find relevant information in the document.

	Returns:
	A string containing relevant information found in the document.
	"""
	# Handle very short or empty queries
	if not query or len(query.strip()) < 3:
	return "Please provide a more specific search query with at least 3 characters."

	try:
	# Retrieve similar chunks using the provided session-specific components
	similar_chunks_data = retrieve_similar_chunks(
	query,
	faiss_index,
	document_chunks_with_metadata,
	embedding_model,
	k=k,
	max_chunk_length=max_chunk_length
	)

	# Format the response
	if not similar_chunks_data:
	return "No relevant information found in the document for that query. Please try rephrasing your question or using different keywords."

	# Filter out chunks with very high distance (low similarity)
	filtered_chunks = filter_relevant_chunks(similar_chunks_data)

	if not filtered_chunks:
	return "No sufficiently relevant information found in the document for that query. Please try rephrasing your question or using different keywords."

	context = "\n\n---\n\n".join([chunk_text for chunk_text, _, _ in filtered_chunks])
	return f"The following information was found in the document regarding '{query}':\n{context}"

	except Exception as e:
	print(f"Error in vector search tool: {e}")
	return f"Error searching the document: {str(e)}"

	return vector_database_search


	class RAGService:
	"""Service for RAG operations."""

	def __init__(self):
	"""Initialize RAG service."""
	self.tavily_tool = create_tavily_search_tool()

	def create_agent_tools(
	self,
	faiss_index: faiss.IndexHNSWFlat,
	document_chunks: List[Dict[str, Any]],
	embedding_model: SentenceTransformer,
	use_web_search: bool = False
	) -> List:
	"""
	Create tools for the RAG agent.

	Args:
	faiss_index: FAISS index
	document_chunks: Document chunks
	embedding_model: Embedding model
	use_web_search: Whether to include web search tool

	Returns:
	List of tools for the agent
	"""
	tools = []

	# Add vector search tool
	vector_tool = create_vector_search_tool(
	faiss_index=faiss_index,
	document_chunks_with_metadata=document_chunks,
	embedding_model=embedding_model,
	max_chunk_length=Config.DEFAULT_CHUNK_SIZE,
	k=10
	)
	tools.append(vector_tool)

	# Add web search tool if requested and available
	if use_web_search and self.tavily_tool:
	tools.append(self.tavily_tool)

	return tools

	def create_agent_prompt(self, has_document_search: bool, has_web_search: bool) -> ChatPromptTemplate:
	"""
	Create prompt template for the agent.

	Args:
	has_document_search: Whether document search is available
	has_web_search: Whether web search is available

	Returns:
	ChatPromptTemplate for the agent
	"""
	# Build tool instructions dynamically
	tool_instructions = ""
	if has_document_search:
	tool_instructions += "Use vector_database_search to find information in the uploaded document. "
	if has_web_search:
	tool_instructions += "Use tavily_search_results_json for web searches when document search is insufficient. "

	if not tool_instructions:
	tool_instructions = "Answer based on the provided context only. "

	return ChatPromptTemplate.from_messages([
	("system", f"""You are a helpful AI assistant that answers questions about documents.

	Context: {{context}}

	Tools available: {tool_instructions}

	Instructions:
	- Use the provided context first
	- If context is insufficient, use available tools to search for more information
	- Provide clear, helpful answers
	- If you cannot find an answer, say so clearly"""),
	("human", "{input}"),
	MessagesPlaceholder(variable_name="chat_history"),
	MessagesPlaceholder(variable_name="agent_scratchpad"),
	])

	def execute_agent(
	self,
	llm,
	tools: List,
	query: str,
	context: str,
	memory: ConversationBufferMemory
	) -> Dict[str, Any]:
	"""
	Execute the RAG agent with given tools and context.

	Args:
	llm: Language model
	tools: List of tools
	query: User query
	context: Context string
	memory: Conversation memory

	Returns:
	Agent response
	"""
	try:
	# Validate tools
	for tool in tools:
	if not hasattr(tool, 'name') or not hasattr(tool, 'description'):
	raise ValueError(f"Tool {tool} is missing required attributes")

	# Create prompt
	has_document_search = any(t.name == "vector_database_search" for t in tools)
	has_web_search = any(t.name == "tavily_search_results_json" for t in tools)
	prompt = self.create_agent_prompt(has_document_search, has_web_search)

	# Create agent
	agent = create_tool_calling_agent(llm, tools, prompt)
	agent_executor = AgentExecutor(
	agent=agent,
	tools=tools,
	memory=memory,
	verbose=Config.AGENT_VERBOSE,
	handle_parsing_errors=True,
	max_iterations=Config.AGENT_MAX_ITERATIONS,
	return_intermediate_steps=False,
	early_stopping_method="generate"
	)

	# Execute agent
	agent_input = {
	"input": query,
	"context": context,
	}

	response_payload = agent_executor.invoke(agent_input)

	# Validate response
	agent_output = response_payload.get("output", "") if response_payload else ""

	if not agent_output or len(agent_output.strip()) < 10:
	raise ValueError("Insufficient response from agent")

	# Check for incomplete responses
	problematic_prefixes = [
	"Based on the Document,",
	"According to a web search,",
	"Based on the available information,",
	"I need to",
	"Let me"
	]

	stripped_output = agent_output.strip()
	if any(stripped_output == prefix.strip() or
	stripped_output == prefix.strip() + "."
	for prefix in problematic_prefixes):
	raise ValueError("Agent returned incomplete response")

	return response_payload

	except Exception as e:
	raise

	def fallback_response(
	self,
	llm,
	tools: List,
	query: str,
	context: str,
	use_tavily: bool = False
	) -> Dict[str, Any]:
	"""
	Generate fallback response using direct tool usage or LLM.

	Args:
	llm: Language model
	tools: List of available tools
	query: User query
	context: Context string
	use_tavily: Whether to use web search

	Returns:
	Fallback response
	"""
	try:
	tool_results = []

	# Try vector search first if available
	vector_tool = next((t for t in tools if t.name == "vector_database_search"), None)
	if vector_tool:
	try:
	search_result = vector_tool.run(query)
	if search_result and "No relevant information" not in search_result:
	tool_results.append(f"Document Search: {search_result}")
	except Exception as tool_error:
	pass

	# Try web search if needed and available
	if use_tavily:
	web_tool = next((t for t in tools if t.name == "tavily_search_results_json"), None)
	if web_tool:
	try:
	web_result = web_tool.run(query)
	if web_result:
	tool_results.append(f"Web Search: {web_result}")
	except Exception as tool_error:
	pass

	# Combine tool results with context
	enhanced_context = context
	if tool_results:
	enhanced_context += "\n\nAdditional Information:\n" + "\n\n".join(tool_results)

	# Use direct LLM call with enhanced context
	direct_prompt = ChatPromptTemplate.from_messages([
	("system", "You are a helpful assistant. Use the provided context and information to answer the user's question clearly and completely."),
	("human", "Context and Information: {context}\n\nQuestion: {input}")
	])

	formatted_prompt = direct_prompt.format_prompt(
	context=enhanced_context,
	input=query
	).to_messages()

	response = llm.invoke(formatted_prompt)
	direct_output = response.content if hasattr(response, 'content') else str(response)

	return {"output": direct_output}

	except Exception as manual_error:

	# Final fallback - simple LLM response
	fallback_prompt = ChatPromptTemplate.from_messages([
	("system", """You are a helpful assistant that answers questions about documents.
	Use the provided context to answer the user's question.
	If the context contains relevant information, start your answer with "Based on the document, ..."
	If the context is insufficient, clearly state what you don't know."""),
	("human", "Context: {context}\n\nQuestion: {input}")
	])

	formatted_fallback = fallback_prompt.format_prompt(
	context=context,
	input=query
	).to_messages()

	response = llm.invoke(formatted_fallback)
	fallback_output = response.content if hasattr(response, 'content') else str(response)

	return {"output": fallback_output}

	def generate_response(
	self,
	llm,
	query: str,
	context_chunks: List[Tuple],
	faiss_index: faiss.IndexHNSWFlat,
	document_chunks: List[Dict[str, Any]],
	embedding_model: SentenceTransformer,
	memory: ConversationBufferMemory,
	use_tavily: bool = False
	) -> Dict[str, Any]:
	"""
	Generate RAG response using agent or fallback methods.

	Args:
	llm: Language model
	query: User query
	context_chunks: Initial context chunks
	faiss_index: FAISS index
	document_chunks: Document chunks
	embedding_model: Embedding model
	memory: Conversation memory
	use_tavily: Whether to use web search

	Returns:
	Generated response
	"""
	# Validate inputs
	if not query or not query.strip():
	return {"output": "Please provide a valid question."}

	# Create tools
	tools = self.create_agent_tools(
	faiss_index, document_chunks, embedding_model, use_tavily
	)

	if not tools:
	fallback_prompt = ChatPromptTemplate.from_messages([
	("system", "You are a helpful assistant that answers questions about documents. Use the provided context to answer the user's question."),
	("human", "Context: {context}\n\nQuestion: {input}")
	])
	try:
	formatted_prompt = fallback_prompt.format_prompt(
	context="No context available",
	input=query
	).to_messages()
	response = llm.invoke(formatted_prompt)
	return {"output": response.content if hasattr(response, 'content') else str(response)}
	except Exception as e:
	return {"output": "I'm sorry, I encountered an error processing your request."}

	# Prepare context
	context = prepare_context_from_chunks(context_chunks)

	# Try agent execution
	if not tools:
	# Handle case where no tools are available
	fallback_prompt = ChatPromptTemplate.from_messages([
	("system", "You are a helpful assistant that answers questions about documents. Use the provided context to answer the user's question."),
	("human", "Context: {context}\n\nQuestion: {input}")
	])
	formatted_prompt = fallback_prompt.format_prompt(
	context=context,
	input=query
	).to_messages()
	response = llm.invoke(formatted_prompt)
	return {"output": response.content if hasattr(response, 'content') else str(response)}

	try:
	return self.execute_agent(llm, tools, query, context, memory)

	except Exception as e:
	error_msg = str(e)

	# Try fallback approach
	try:
	return self.fallback_response(llm, tools, query, context, use_tavily)
	except Exception as fallback_error:
	return {"output": "I'm sorry, I encountered an error processing your request. Please try again."}


	# Global RAG service instance
	rag_service = RAGService()