Spaces:
Runtime error
Runtime error
| import sys | |
| import boto3 | |
| from pathlib import Path | |
| from llama_index.core import Settings | |
| from llama_index.core.storage.docstore import SimpleDocumentStore | |
| from llama_index.retrievers.bm25 import BM25Retriever | |
| from llama_index.core.retrievers import QueryFusionRetriever | |
| from config import aws_access_key_id, aws_secret_access_key | |
| class AppState: | |
| _instance = None | |
| retriever_bm25 = None | |
| def __new__(cls): | |
| if cls._instance is None: | |
| cls._instance = super(AppState, cls).__new__(cls) | |
| return cls._instance | |
| # Параметри S3 | |
| BUCKET_NAME = "legal-position" | |
| PREFIX_RETRIEVER = "Save_Index/" | |
| LOCAL_DIR = Path("Save_Index_Local") | |
| # Створюємо глобальний екземпляр стану | |
| app_state = AppState() | |
| def initialize_s3_client(): | |
| return boto3.client( | |
| "s3", | |
| aws_access_key_id=aws_access_key_id, | |
| aws_secret_access_key=aws_secret_access_key, | |
| region_name="eu-north-1" | |
| ) | |
| def download_s3_file(s3_client, bucket_name, s3_key, local_path): | |
| s3_client.download_file(bucket_name, s3_key, str(local_path)) | |
| print(f"Завантажено: {s3_key} -> {local_path}") | |
| def download_s3_folder(s3_client, bucket_name, prefix, local_dir): | |
| response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=prefix) | |
| if 'Contents' in response: | |
| for obj in response['Contents']: | |
| s3_key = obj['Key'] | |
| if s3_key.endswith('/'): | |
| continue | |
| local_file_path = local_dir / Path(s3_key).relative_to(prefix) | |
| local_file_path.parent.mkdir(parents=True, exist_ok=True) | |
| s3_client.download_file(bucket_name, s3_key, str(local_file_path)) | |
| print(f"Завантажено: {s3_key} -> {local_file_path}") | |
| def initialize_components(): | |
| try: | |
| persist_path = Path("Save_Index_Local") | |
| if not persist_path.exists(): | |
| raise FileNotFoundError(f"Directory not found: {persist_path}") | |
| required_files = ['docstore_es_filter.json', 'bm25_retriever_es'] | |
| missing_files = [f for f in required_files if not (persist_path / f).exists()] | |
| if missing_files: | |
| raise FileNotFoundError(f"Missing required files: {', '.join(missing_files)}") | |
| docstore = SimpleDocumentStore.from_persist_path(str(persist_path / "docstore_es_filter.json")) | |
| bm25_retriever = BM25Retriever.from_persist_dir(str(persist_path / "bm25_retriever_es")) | |
| # Зберігаємо retriever_bm25 в глобальному стані | |
| app_state.retriever_bm25 = QueryFusionRetriever( | |
| [bm25_retriever], | |
| similarity_top_k=Settings.similarity_top_k, | |
| num_queries=1, | |
| use_async=True, | |
| ) | |
| return True | |
| except Exception as e: | |
| print(f"Error initializing components: {str(e)}", file=sys.stderr) | |
| return False |