Langchained_PGPS_RAG

Runtime error

App Files Files Community

SergeyO7 commited on Mar 28

Commit

d3faf56

verified ·

1 Parent(s): b221ab4

Update app.py

Browse files

Files changed (1) hide show

app.py +222 -15

app.py CHANGED Viewed

@@ -3,17 +3,19 @@ from langchain_community.document_loaders import UnstructuredMarkdownLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_core.documents import Document
 from langchain_huggingface import HuggingFaceEmbeddings
-from langchain_community.vectorstores import FAISS  # Обновленный импорт
 from langchain_community.llms import HuggingFaceHub
 from langchain.prompts import ChatPromptTemplate
 from dotenv import load_dotenv
 import os
-# Загрузка переменных окружения
 load_dotenv()
-DATA_PATH = ""  # Укажите путь к вашему файлу
 PROMPT_TEMPLATE = """
 Ответь на вопрос, используя только следующий контекст:
 {context}
@@ -21,10 +23,41 @@ PROMPT_TEMPLATE = """
 Ответь на вопрос на основе приведенного контекста: {question}
 """
-# Глобальная переменная для статуса
 status_message = "Инициализация..."
 def initialize_vectorstore():
     global status_message
     try:
         status_message = "Загрузка и обработка документов..."
@@ -36,12 +69,12 @@ def initialize_vectorstore():
         status_message = "База данных готова к использованию."
         return vectorstore
     except Exception as e:
         status_message = f"Ошибка инициализации: {str(e)}"
         raise
 def load_documents():
     file_path = os.path.join(DATA_PATH, "pl250320252.md")
     if not os.path.exists(file_path):
         raise FileNotFoundError(f"Файл {file_path} не найден")
@@ -49,6 +82,7 @@ def load_documents():
     return loader.load()
 def split_text(documents: list[Document]):
     text_splitter = RecursiveCharacterTextSplitter(
         chunk_size=900,
         chunk_overlap=300,
@@ -58,6 +92,7 @@ def split_text(documents: list[Document]):
     return text_splitter.split_documents(documents)
 def save_to_faiss(chunks: list[Document]):
     embeddings = HuggingFaceEmbeddings(
         model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
         model_kwargs={'device': 'cpu'},
@@ -66,6 +101,7 @@ def save_to_faiss(chunks: list[Document]):
     return FAISS.from_documents(chunks, embeddings)
 def process_query(query_text: str, vectorstore):
     if vectorstore is None:
         return "База данных не инициализирована", []
@@ -87,34 +123,205 @@ def process_query(query_text: str, vectorstore):
         model = HuggingFaceHub(
             repo_id="https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud/",
-            task="text2text-generation",  # Указываем задачу
             model_kwargs={"temperature": 0.5, "max_length": 512}
         )
         response_text = model.predict(prompt)
         sources = list(set([doc.metadata.get("source", "") for doc, _ in results]))
         return response_text, sources
     except Exception as e:
         return f"Ошибка обработки запроса: {str(e)}", []
 def chat_interface(query_text):
     global status_message
     try:
         vectorstore = initialize_vectorstore()
-        response, sources = process_query(query_text, vectorstore)
-        full_response = f"{status_message}\n\nОтвет: {response}\n\nИсточники: {', '.join(sources) if sources else 'Нет источников'}"
-        return full_response
     except Exception as e:
-        return f"Критическая ошибка: {str(e)}"
-# Интерфейс Gradio
 interface = gr.Interface(
     fn=chat_interface,
     inputs=gr.Textbox(lines=2, placeholder="Введите ваш вопрос здесь..."),
-    outputs="text",
     title="Чат с документами",
-    description="Задайте вопрос, и я отвечу на основе загруженных документов."
 )
 if __name__ == "__main__":

 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_core.documents import Document
 from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
 from langchain_community.llms import HuggingFaceHub
 from langchain.prompts import ChatPromptTemplate
 from dotenv import load_dotenv
 import os
+from datetime import datetime
+from skyfield.api import load
+import matplotlib.pyplot as plt
+# Load environment variables
 load_dotenv()
+DATA_PATH = ""  # Specify the path to your file
 PROMPT_TEMPLATE = """
 Ответь на вопрос, используя только следующий контекст:
 {context}
 Ответь на вопрос на основе приведенного контекста: {question}
 """
+# Global variable for status
 status_message = "Инициализация..."
+# Translation dictionaries
+classification_ru = {
+    'Swallowed': 'проглоченная',
+    'Tiny': 'сверхмалая',
+    'Small': 'малая',
+    'Normal': 'нормальная',
+    'Ideal': 'идеальная',
+    'Big': 'большая'
+}
+planet_ru = {
+    'Sun': 'Солнце',
+    'Moon': 'Луна',
+    'Mercury': 'Меркурий',
+    'Venus': 'Венера',
+    'Mars': 'Марс',
+    'Jupiter': 'Юпитер',
+    'Saturn': 'Сатурн'
+}
+planet_symbols = {
+    'Sun': '☉',
+    'Moon': '☾',
+    'Mercury': '☿',
+    'Venus': '♀',
+    'Mars': '♂',
+    'Jupiter': '♃',
+    'Saturn': '♄'
+}
 def initialize_vectorstore():
+    """Initialize the FAISS vector store for document retrieval."""
     global status_message
     try:
         status_message = "Загрузка и обработка документов..."
         status_message = "База данных готова к использованию."
         return vectorstore
     except Exception as e:
         status_message = f"Ошибка инициализации: {str(e)}"
         raise
 def load_documents():
+    """Load documents from the specified file path."""
     file_path = os.path.join(DATA_PATH, "pl250320252.md")
     if not os.path.exists(file_path):
         raise FileNotFoundError(f"Файл {file_path} не найден")
     return loader.load()
 def split_text(documents: list[Document]):
+    """Split documents into chunks for vectorization."""
     text_splitter = RecursiveCharacterTextSplitter(
         chunk_size=900,
         chunk_overlap=300,
     return text_splitter.split_documents(documents)
 def save_to_faiss(chunks: list[Document]):
+    """Save document chunks to a FAISS vector store."""
     embeddings = HuggingFaceEmbeddings(
         model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
         model_kwargs={'device': 'cpu'},
     return FAISS.from_documents(chunks, embeddings)
 def process_query(query_text: str, vectorstore):
+    """Process a query using the RAG system."""
     if vectorstore is None:
         return "База данных не инициализирована", []
         model = HuggingFaceHub(
             repo_id="https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud/",
+            task="text2text-generation",
             model_kwargs={"temperature": 0.5, "max_length": 512}
         )
         response_text = model.predict(prompt)
         sources = list(set([doc.metadata.get("source", "") for doc, _ in results]))
         return response_text, sources
     except Exception as e:
         return f"Ошибка обработки запроса: {str(e)}", []
+def PLadder_ZSizes(date_time_iso: str):
+    """
+    Calculate the planetary ladder and zone sizes for a given date and time.
+    Args:
+        date_time_iso (str): Date and time in ISO format (e.g., '2023-10-10T12:00:00')
+    Returns:
+        dict: Contains 'PLadder' (list of planets) and 'ZSizes' (list of zone sizes with classifications)
+              or an error message if unsuccessful
+    """
+    try:
+        dt = datetime.fromisoformat(date_time_iso)
+        if dt.year < 1900 or dt.year > 2050:
+            return {"error": "Дата вне диапазона. Должна быть между 1900 и 2050 годами."}
+        # Load ephemeris
+        planets = load('de421.bsp')
+        earth = planets['earth']
+        # Define planet objects
+        planet_objects = {
+            'Sun': planets['sun'],
+            'Moon': planets['moon'],
+            'Mercury': planets['mercury'],
+            'Venus': planets['venus'],
+            'Mars': planets['mars'],
+            'Jupiter': planets['jupiter barycenter'],
+            'Saturn': planets['saturn barycenter']
+        }
+        # Create time object
+        ts = load.timescale()
+        t = ts.utc(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second)
+        # Compute ecliptic longitudes
+        longitudes = {}
+        for planet in planet_objects:
+            apparent = earth.at(t).observe(planet_objects[planet]).apparent()
+            _, lon, _ = apparent.ecliptic_latlon()
+            longitudes[planet] = lon.degrees
+        # Sort planets by longitude to form PLadder
+        sorted_planets = sorted(longitudes.items(), key=lambda x: x[1])
+        PLadder = [p for p, _ in sorted_planets]
+        sorted_lons = [lon for _, lon in sorted_planets]
+        # Calculate zone sizes
+        zone_sizes = [sorted_lons[0]] + [sorted_lons[i+1] - sorted_lons[i] for i in range(6)] + [360 - sorted_lons[6]]
+        # Determine bordering planets for classification
+        bordering = [[PLadder[0]]] + [[PLadder[i-1], PLadder[i]] for i in range(1, 7)] + [[PLadder[6]]]
+        # Classify each zone
+        ZSizes = []
+        for i, size in enumerate(zone_sizes):
+            bord = bordering[i]
+            if any(p in ['Sun', 'Moon'] for p in bord):
+                X = 7
+            elif any(p in ['Mercury', 'Venus', 'Mars'] for p in bord):
+                X = 6
+            else:
+                X = 5
+            if size <= 1:
+                classification = 'Swallowed'
+            elif size <= X:
+                classification = 'Tiny'
+            elif size <= 40:
+                classification = 'Small'
+            elif size < 60:
+                if 50 <= size <= 52:
+                    classification = 'Ideal'
+                else:
+                    classification = 'Normal'
+            else:
+                classification = 'Big'
+            # Convert size to degrees and minutes
+            d = int(size)
+            m = int((size - d) * 60)
+            size_str = f"{d}°{m}'"
+            ZSizes.append((size_str, classification))
+        return {'PLadder': PLadder, 'ZSizes': ZSizes}
+    except ValueError:
+        return {"error": "Неверный формат даты и времени. Используйте ISO формат, например, '2023-10-10T12:00:00'"}
+    except Exception as e:
+        return {"error": f"Ошибка при вычислении: {str(e)}"}
+def plot_pladder(PLadder):
+    """
+    Plot the planetary ladder as a right triangle with planet symbols.
+    Args:
+        PLadder (list): List of planet names in order
+    Returns:
+        matplotlib.figure.Figure: The generated plot
+    """
+    fig, ax = plt.subplots()
+    # Draw triangle with vertices (0,0), (0,3), (3,0)
+    ax.plot([0, 0, 3, 0], [0, 3, 0, 0], 'k-')
+    # Draw horizontal lines dividing height into three equal parts
+    ax.plot([0, 3], [1, 1], 'k--')
+    ax.plot([0, 3], [2, 2], 'k--')
+    # Define positions for planets 1 to 7
+    positions = [(0, 0), (0, 1), (0, 2), (0, 3), (1, 2), (2, 1), (3, 0)]
+    for i, pos in enumerate(positions):
+        symbol = planet_symbols[PLadder[i]]
+        ax.text(pos[0], pos[1], symbol, ha='center', va='center', fontsize=12)
+    ax.set_xlim(-0.5, 3.5)
+    ax.set_ylim(-0.5, 3.5)
+    ax.set_aspect('equal')
+    ax.axis('off')
+    return fig
 def chat_interface(query_text):
+    """
+    Handle user queries, either for planetary ladder or general RAG questions.
+    Args:
+        query_text (str): User's input query
+    Returns:
+        tuple: (text response, plot figure or None)
+    """
     global status_message
     try:
         vectorstore = initialize_vectorstore()
+        if query_text.startswith("PLadder "):
+            # Extract date and time from query
+            date_time_iso = query_text.split(" ", 1)[1]
+            result = PLadder_ZSizes(date_time_iso)
+            if "error" in result:
+                return result["error"], None
+            PLadder = result["PLadder"]
+            ZSizes = result["ZSizes"]
+            # Translate to Russian
+            PLadder_ru = [planet_ru[p] for p in PLadder]
+            ZSizes_ru = [(size_str, classification_ru[classification]) for size_str, classification in ZSizes]
+            # Prepare queries and get responses
+            responses = []
+            for i in range(7):
+                planet = PLadder_ru[i]
+                size_str, class_ru = ZSizes_ru[i]
+                query = f"Что значит {planet} на {i+1}-й ступени и {size_str} {class_ru} {i+1}-я зона?"
+                response, _ = process_query(query, vectorstore)
+                responses.append(f"Интерпретация для {i+1}-й ступени и {i+1}-й зоны: {response}")
+            # Query for 8th zone
+            size_str, class_ru = ZSizes_ru[7]
+            query = f"Что значит {size_str} {class_ru} восьмая зона?"
+            response, _ = process_query(query, vectorstore)
+            responses.append(f"Интерпретация для 8-й зоны: {response}")
+            # Generate plot
+            fig = plot_pladder(PLadder)
+            # Compile response text
+            text = "Планетарная лестница: " + ", ".join(PLadder_ru) + "\n"
+            text += "Размеры зон:\n" + "\n".join([f"Зона {i+1}: {size_str} {class_ru}"
+                                                  for i, (size_str, class_ru) in enumerate(ZSizes_ru)]) + "\n\n"
+            text += "\n".join(responses)
+            return text, fig
+        else:
+            # Handle regular RAG query
+            response, sources = process_query(query_text, vectorstore)
+            full_response = f"{status_message}\n\nОтвет: {response}\n\nИсточники: {', '.join(sources) if sources else 'Нет источников'}"
+            return full_response, None
     except Exception as e:
+        return f"Критическая ошибка: {str(e)}", None
+# Define Gradio Interface
 interface = gr.Interface(
     fn=chat_interface,
     inputs=gr.Textbox(lines=2, placeholder="Введите ваш вопрос здесь..."),
+    outputs=[gr.Textbox(), gr.Image()],
     title="Чат с документами",
+    description="Задайте вопрос, и я отвечу на основе загруженных документов. "
+                "Для запроса планетарной лестницы используйте формат: PLadder YYYY-MM-DDTHH:MM:SS"
 )
 if __name__ == "__main__":