Add initial implementation of BestRAG library with Streamlit app and README updates
Browse files- README.md +36 -1
- app.py +83 -0
- requirements.txt +2 -0
README.md
CHANGED
|
@@ -11,4 +11,39 @@ license: mit
|
|
| 11 |
short_description: 'BestRAG: Hybrid Retrieval-Augmented Generation library'
|
| 12 |
---
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
short_description: 'BestRAG: Hybrid Retrieval-Augmented Generation library'
|
| 12 |
---
|
| 13 |
|
| 14 |
+
# BestRAG - Hybrid Retrieval-Augmented Generation (RAG)
|
| 15 |
+
|
| 16 |
+
**BestRAG** is a Python library that leverages a hybrid Retrieval-Augmented Generation (RAG) approach to efficiently store and retrieve embeddings. By combining dense, sparse, and late interaction embeddings, BestRAG offers a robust solution for managing large datasets.
|
| 17 |
+
|
| 18 |
+
## Features
|
| 19 |
+
|
| 20 |
+
- π **Hybrid RAG**: Utilizes dense, sparse, and late interaction embeddings for enhanced performance.
|
| 21 |
+
- π **Easy Integration**: Simple API for storing and searching embeddings.
|
| 22 |
+
- π **PDF Support**: Directly store embeddings from PDF documents.
|
| 23 |
+
|
| 24 |
+
## How to Use
|
| 25 |
+
|
| 26 |
+
1. **Initialize BestRAG**: Enter your Qdrant URL, API Key, and Collection Name, then click "Initialize BestRAG".
|
| 27 |
+
2. **Create Embeddings**: Upload a PDF file and click "Create Embeddings" to store embeddings.
|
| 28 |
+
3. **Search Embeddings**: Enter a search query and set the limit, then click "Search" to retrieve results.
|
| 29 |
+
|
| 30 |
+
## Installation
|
| 31 |
+
|
| 32 |
+
You can use BestRAG freely by installing it with:
|
| 33 |
+
|
| 34 |
+
```bash
|
| 35 |
+
pip install bestrag
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
For more details, visit the [GitHub repository](https://github.com/samadpls/BestRAG).
|
| 39 |
+
|
| 40 |
+
[](https://github.com/samadpls/BestRAG)
|
| 41 |
+
[](https://pypi.org/project/bestrag/)
|
| 42 |
+
|
| 43 |
+
> **Note**: Qdrant offers a free tier with 4GB of storage. To generate your API key and endpoint, visit [Qdrant](https://qdrant.tech/).
|
| 44 |
+
|
| 45 |
+
Made with β€οΈ by [samadpls](https://github.com/samadpls)
|
| 46 |
+
|
| 47 |
+
---
|
| 48 |
+
|
| 49 |
+
Please like this project on [GitHub](https://github.com/samadpls/BestRAG) if you find it useful!
|
app.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import streamlit as st
|
| 3 |
+
from bestrag import BestRAG
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
# Streamlit app title
|
| 7 |
+
col1, col2 = st.columns([1, 5])
|
| 8 |
+
with col1:
|
| 9 |
+
st.image("https://github.com/user-attachments/assets/e23d11d5-2d7b-44e2-aa11-59ddcb66bebc", width=140)
|
| 10 |
+
with col2:
|
| 11 |
+
st.title("BestRAG - Hybrid Retrieval-Augmented Generation (RAG)")
|
| 12 |
+
|
| 13 |
+
st.markdown("""
|
| 14 |
+
[](https://github.com/samadpls/BestRAG)
|
| 15 |
+
[](https://pypi.org/project/bestrag/)
|
| 16 |
+
|
| 17 |
+
> **Note**: Qdrant offers a free tier with 4GB of storage. To generate your API key and endpoint, visit [Qdrant](https://qdrant.tech/).
|
| 18 |
+
|
| 19 |
+
You can use BestRAG freely by installing it with `pip install bestrag`. For more details, visit the [GitHub repository](https://github.com/samadpls/BestRAG).
|
| 20 |
+
|
| 21 |
+
Made with β€οΈ by [samadpls](https://github.com/samadpls)
|
| 22 |
+
""")
|
| 23 |
+
|
| 24 |
+
# Input fields for BestRAG initialization
|
| 25 |
+
url = st.text_input("Qdrant URL", "https://YOUR_QDRANT_URL")
|
| 26 |
+
api_key = st.text_input("Qdrant API Key", "YOUR_API_KEY")
|
| 27 |
+
collection_name = st.text_input("Collection Name", "YOUR_COLLECTION_NAME")
|
| 28 |
+
|
| 29 |
+
# Initialize BestRAG only when the user clicks a button
|
| 30 |
+
if st.button("Initialize BestRAG"):
|
| 31 |
+
st.session_state['rag'] = BestRAG(url=url, api_key=api_key, collection_name=collection_name)
|
| 32 |
+
st.success("BestRAG initialized successfully!")
|
| 33 |
+
|
| 34 |
+
# Check if BestRAG is initialized
|
| 35 |
+
if 'rag' in st.session_state:
|
| 36 |
+
rag = st.session_state['rag']
|
| 37 |
+
|
| 38 |
+
# Tabs for different functionalities
|
| 39 |
+
tab1, tab2 = st.tabs(["Create Embeddings", "Search Embeddings"])
|
| 40 |
+
|
| 41 |
+
with tab1:
|
| 42 |
+
st.header("Create Embeddings")
|
| 43 |
+
|
| 44 |
+
# File uploader for PDF
|
| 45 |
+
pdf_file = st.file_uploader("Upload PDF", type=["pdf"])
|
| 46 |
+
|
| 47 |
+
if st.button("Create Embeddings"):
|
| 48 |
+
if pdf_file is not None:
|
| 49 |
+
# Save the uploaded PDF to a temporary file
|
| 50 |
+
temp_pdf_path = os.path.join("/tmp", pdf_file.name)
|
| 51 |
+
with open(temp_pdf_path, "wb") as f:
|
| 52 |
+
f.write(pdf_file.getbuffer())
|
| 53 |
+
|
| 54 |
+
# Use the uploaded PDF's name
|
| 55 |
+
pdf_name = pdf_file.name
|
| 56 |
+
|
| 57 |
+
# Store PDF embeddings
|
| 58 |
+
rag.store_pdf_embeddings(temp_pdf_path, pdf_name)
|
| 59 |
+
st.success(f"Embeddings created for {pdf_name}")
|
| 60 |
+
else:
|
| 61 |
+
st.error("Please upload a PDF file.")
|
| 62 |
+
|
| 63 |
+
with tab2:
|
| 64 |
+
st.header("Search Embeddings")
|
| 65 |
+
|
| 66 |
+
# Input fields for search
|
| 67 |
+
query = st.text_input("Search Query", "example query")
|
| 68 |
+
limit = st.number_input("Limit", min_value=1, max_value=20, value=5)
|
| 69 |
+
|
| 70 |
+
if st.button("Search"):
|
| 71 |
+
# Perform search
|
| 72 |
+
results = rag.search(query, limit)
|
| 73 |
+
|
| 74 |
+
# Display results
|
| 75 |
+
st.subheader("Search Results")
|
| 76 |
+
for result in results.points:
|
| 77 |
+
st.json({
|
| 78 |
+
"id": result.id,
|
| 79 |
+
"score": result.score,
|
| 80 |
+
"payload": result.payload
|
| 81 |
+
})
|
| 82 |
+
else:
|
| 83 |
+
st.warning("Please initialize BestRAG first.")
|
requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
bestrag
|