Spaces:
Sleeping
Sleeping
| import os | |
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| from typing import List | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline | |
| from langchain_community.cross_encoders import HuggingFaceCrossEncoder | |
| from langchain_community.vectorstores import FAISS | |
| from langchain.retrievers import ContextualCompressionRetriever | |
| from langchain.retrievers.document_compressors import CrossEncoderReranker | |
| from langchain.prompts import PromptTemplate | |
| from langchain.chains import RetrievalQA | |
| from transformers import pipeline | |
| from huggingface_hub import login | |
| login(os.environ["HF_TOKEN"]) | |
| # ----------------------------- | |
| # Prompt (forces concise output) | |
| # ----------------------------- | |
| QA_PROMPT = PromptTemplate( | |
| template="""Answer the following question in a short and concise way | |
| (maximum 20 sentences), using only the information from the context below. | |
| If you don’t know the answer, just say "I cant assist you". | |
| Context: | |
| {context} | |
| Question: | |
| {question} | |
| Concise Answer:""", | |
| input_variables=["context", "question"], | |
| ) | |
| # ----------------------------- | |
| # Load and process documents | |
| # ----------------------------- | |
| pdfs = ["ejemplo2.pdf"] | |
| docs = [] | |
| for pdf in pdfs: | |
| docs.extend(PyPDFLoader(pdf).load()) | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
| splits = text_splitter.split_documents(docs) | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-MiniLM-L3-v2") | |
| vectorstore = FAISS.from_documents(splits, embeddings) | |
| retriever = vectorstore.as_retriever(search_kwargs={"k": 10}) | |
| cross_encoder = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-base") | |
| compressor = CrossEncoderReranker(model=cross_encoder, top_n=5) | |
| compression_retriever = ContextualCompressionRetriever( | |
| base_retriever=retriever, base_compressor=compressor | |
| ) | |
| # ----------------------------- | |
| # Configure FLAN-T5 (better task) | |
| # ----------------------------- | |
| generator = pipeline( | |
| "text2text-generation", # 👈 Use this for T5 models | |
| model="google/flan-t5-base", | |
| max_new_tokens=512, # shorter answers | |
| temperature=5, | |
| repetition_penalty=1.1 | |
| ) | |
| llm = HuggingFacePipeline(pipeline=generator) | |
| # ----------------------------- | |
| # RetrievalQA | |
| # ----------------------------- | |
| qa_chain = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| retriever=compression_retriever, | |
| return_source_documents=True, | |
| chain_type_kwargs={"prompt": QA_PROMPT} | |
| ) | |
| # ----------------------------- | |
| # FastAPI app | |
| # ----------------------------- | |
| app = FastAPI(title="PDF QA API", description="Query PDFs with RAG + HuggingFace") | |
| class QueryRequest(BaseModel): | |
| query: str | |
| class QueryResponse(BaseModel): | |
| answer: str | |
| sources: List[str] | |
| def ask_question(request: QueryRequest): | |
| result = qa_chain.invoke({"query": request.query}) | |
| return QueryResponse( | |
| answer=result["result"], | |
| sources=[doc.metadata.get("source", "unknown") for doc in result["source_documents"]] | |
| ) | |