aipai / app.py
edi
promtp24
c60a889
import os
from fastapi import FastAPI
from pydantic import BaseModel
from typing import List
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
from langchain_community.vectorstores import FAISS
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from transformers import pipeline
from huggingface_hub import login
login(os.environ["HF_TOKEN"])
# -----------------------------
# Prompt (forces concise output)
# -----------------------------
QA_PROMPT = PromptTemplate(
template="""Answer the following question in a short and concise way
(maximum 20 sentences), using only the information from the context below.
If you don’t know the answer, just say "I cant assist you".
Context:
{context}
Question:
{question}
Concise Answer:""",
input_variables=["context", "question"],
)
# -----------------------------
# Load and process documents
# -----------------------------
pdfs = ["ejemplo2.pdf"]
docs = []
for pdf in pdfs:
docs.extend(PyPDFLoader(pdf).load())
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-MiniLM-L3-v2")
vectorstore = FAISS.from_documents(splits, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 10})
cross_encoder = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-base")
compressor = CrossEncoderReranker(model=cross_encoder, top_n=5)
compression_retriever = ContextualCompressionRetriever(
base_retriever=retriever, base_compressor=compressor
)
# -----------------------------
# Configure FLAN-T5 (better task)
# -----------------------------
generator = pipeline(
"text2text-generation", # 👈 Use this for T5 models
model="google/flan-t5-base",
max_new_tokens=512, # shorter answers
temperature=5,
repetition_penalty=1.1
)
llm = HuggingFacePipeline(pipeline=generator)
# -----------------------------
# RetrievalQA
# -----------------------------
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
retriever=compression_retriever,
return_source_documents=True,
chain_type_kwargs={"prompt": QA_PROMPT}
)
# -----------------------------
# FastAPI app
# -----------------------------
app = FastAPI(title="PDF QA API", description="Query PDFs with RAG + HuggingFace")
class QueryRequest(BaseModel):
query: str
class QueryResponse(BaseModel):
answer: str
sources: List[str]
@app.post("/ask", response_model=QueryResponse)
def ask_question(request: QueryRequest):
result = qa_chain.invoke({"query": request.query})
return QueryResponse(
answer=result["result"],
sources=[doc.metadata.get("source", "unknown") for doc in result["source_documents"]]
)