SergeyO7 commited on
Commit
d3faf56
·
verified ·
1 Parent(s): b221ab4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +222 -15
app.py CHANGED
@@ -3,17 +3,19 @@ from langchain_community.document_loaders import UnstructuredMarkdownLoader
3
  from langchain.text_splitter import RecursiveCharacterTextSplitter
4
  from langchain_core.documents import Document
5
  from langchain_huggingface import HuggingFaceEmbeddings
6
- from langchain_community.vectorstores import FAISS # Обновленный импорт
7
-
8
  from langchain_community.llms import HuggingFaceHub
9
  from langchain.prompts import ChatPromptTemplate
10
  from dotenv import load_dotenv
11
  import os
 
 
 
12
 
13
- # Загрузка переменных окружения
14
  load_dotenv()
15
 
16
- DATA_PATH = "" # Укажите путь к вашему файлу
17
  PROMPT_TEMPLATE = """
18
  Ответь на вопрос, используя только следующий контекст:
19
  {context}
@@ -21,10 +23,41 @@ PROMPT_TEMPLATE = """
21
  Ответь на вопрос на основе приведенного контекста: {question}
22
  """
23
 
24
- # Глобальная переменная для статуса
25
  status_message = "Инициализация..."
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  def initialize_vectorstore():
 
28
  global status_message
29
  try:
30
  status_message = "Загрузка и обработка документов..."
@@ -36,12 +69,12 @@ def initialize_vectorstore():
36
 
37
  status_message = "База данных готова к использованию."
38
  return vectorstore
39
-
40
  except Exception as e:
41
  status_message = f"Ошибка инициализации: {str(e)}"
42
  raise
43
 
44
  def load_documents():
 
45
  file_path = os.path.join(DATA_PATH, "pl250320252.md")
46
  if not os.path.exists(file_path):
47
  raise FileNotFoundError(f"Файл {file_path} не найден")
@@ -49,6 +82,7 @@ def load_documents():
49
  return loader.load()
50
 
51
  def split_text(documents: list[Document]):
 
52
  text_splitter = RecursiveCharacterTextSplitter(
53
  chunk_size=900,
54
  chunk_overlap=300,
@@ -58,6 +92,7 @@ def split_text(documents: list[Document]):
58
  return text_splitter.split_documents(documents)
59
 
60
  def save_to_faiss(chunks: list[Document]):
 
61
  embeddings = HuggingFaceEmbeddings(
62
  model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
63
  model_kwargs={'device': 'cpu'},
@@ -66,6 +101,7 @@ def save_to_faiss(chunks: list[Document]):
66
  return FAISS.from_documents(chunks, embeddings)
67
 
68
  def process_query(query_text: str, vectorstore):
 
69
  if vectorstore is None:
70
  return "База данных не инициализирована", []
71
 
@@ -87,34 +123,205 @@ def process_query(query_text: str, vectorstore):
87
 
88
  model = HuggingFaceHub(
89
  repo_id="https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud/",
90
- task="text2text-generation", # Указываем задачу
91
  model_kwargs={"temperature": 0.5, "max_length": 512}
92
  )
93
  response_text = model.predict(prompt)
94
 
95
  sources = list(set([doc.metadata.get("source", "") for doc, _ in results]))
96
  return response_text, sources
97
-
98
  except Exception as e:
99
  return f"Ошибка обработки запроса: {str(e)}", []
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  def chat_interface(query_text):
 
 
 
 
 
 
 
 
 
102
  global status_message
103
  try:
104
  vectorstore = initialize_vectorstore()
105
- response, sources = process_query(query_text, vectorstore)
106
- full_response = f"{status_message}\n\nОтвет: {response}\n\nИсточники: {', '.join(sources) if sources else 'Нет источников'}"
107
- return full_response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  except Exception as e:
109
- return f"Критическая ошибка: {str(e)}"
110
 
111
- # Интерфейс Gradio
112
  interface = gr.Interface(
113
  fn=chat_interface,
114
  inputs=gr.Textbox(lines=2, placeholder="Введите ваш вопрос здесь..."),
115
- outputs="text",
116
  title="Чат с документами",
117
- description="Задайте вопрос, и я отвечу на основе загруженных документов."
 
118
  )
119
 
120
  if __name__ == "__main__":
 
3
  from langchain.text_splitter import RecursiveCharacterTextSplitter
4
  from langchain_core.documents import Document
5
  from langchain_huggingface import HuggingFaceEmbeddings
6
+ from langchain_community.vectorstores import FAISS
 
7
  from langchain_community.llms import HuggingFaceHub
8
  from langchain.prompts import ChatPromptTemplate
9
  from dotenv import load_dotenv
10
  import os
11
+ from datetime import datetime
12
+ from skyfield.api import load
13
+ import matplotlib.pyplot as plt
14
 
15
+ # Load environment variables
16
  load_dotenv()
17
 
18
+ DATA_PATH = "" # Specify the path to your file
19
  PROMPT_TEMPLATE = """
20
  Ответь на вопрос, используя только следующий контекст:
21
  {context}
 
23
  Ответь на вопрос на основе приведенного контекста: {question}
24
  """
25
 
26
+ # Global variable for status
27
  status_message = "Инициализация..."
28
 
29
+ # Translation dictionaries
30
+ classification_ru = {
31
+ 'Swallowed': 'проглоченная',
32
+ 'Tiny': 'сверхмалая',
33
+ 'Small': 'малая',
34
+ 'Normal': 'нормальная',
35
+ 'Ideal': 'идеальная',
36
+ 'Big': 'большая'
37
+ }
38
+
39
+ planet_ru = {
40
+ 'Sun': 'Солнце',
41
+ 'Moon': 'Луна',
42
+ 'Mercury': 'Меркурий',
43
+ 'Venus': 'Венера',
44
+ 'Mars': 'Марс',
45
+ 'Jupiter': 'Юпитер',
46
+ 'Saturn': 'Сатурн'
47
+ }
48
+
49
+ planet_symbols = {
50
+ 'Sun': '☉',
51
+ 'Moon': '☾',
52
+ 'Mercury': '☿',
53
+ 'Venus': '♀',
54
+ 'Mars': '♂',
55
+ 'Jupiter': '♃',
56
+ 'Saturn': '♄'
57
+ }
58
+
59
  def initialize_vectorstore():
60
+ """Initialize the FAISS vector store for document retrieval."""
61
  global status_message
62
  try:
63
  status_message = "Загрузка и обработка документов..."
 
69
 
70
  status_message = "База данных готова к использованию."
71
  return vectorstore
 
72
  except Exception as e:
73
  status_message = f"Ошибка инициализации: {str(e)}"
74
  raise
75
 
76
  def load_documents():
77
+ """Load documents from the specified file path."""
78
  file_path = os.path.join(DATA_PATH, "pl250320252.md")
79
  if not os.path.exists(file_path):
80
  raise FileNotFoundError(f"Файл {file_path} не найден")
 
82
  return loader.load()
83
 
84
  def split_text(documents: list[Document]):
85
+ """Split documents into chunks for vectorization."""
86
  text_splitter = RecursiveCharacterTextSplitter(
87
  chunk_size=900,
88
  chunk_overlap=300,
 
92
  return text_splitter.split_documents(documents)
93
 
94
  def save_to_faiss(chunks: list[Document]):
95
+ """Save document chunks to a FAISS vector store."""
96
  embeddings = HuggingFaceEmbeddings(
97
  model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
98
  model_kwargs={'device': 'cpu'},
 
101
  return FAISS.from_documents(chunks, embeddings)
102
 
103
  def process_query(query_text: str, vectorstore):
104
+ """Process a query using the RAG system."""
105
  if vectorstore is None:
106
  return "База данных не инициализирована", []
107
 
 
123
 
124
  model = HuggingFaceHub(
125
  repo_id="https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud/",
126
+ task="text2text-generation",
127
  model_kwargs={"temperature": 0.5, "max_length": 512}
128
  )
129
  response_text = model.predict(prompt)
130
 
131
  sources = list(set([doc.metadata.get("source", "") for doc, _ in results]))
132
  return response_text, sources
 
133
  except Exception as e:
134
  return f"Ошибка обработки запроса: {str(e)}", []
135
 
136
+ def PLadder_ZSizes(date_time_iso: str):
137
+ """
138
+ Calculate the planetary ladder and zone sizes for a given date and time.
139
+
140
+ Args:
141
+ date_time_iso (str): Date and time in ISO format (e.g., '2023-10-10T12:00:00')
142
+
143
+ Returns:
144
+ dict: Contains 'PLadder' (list of planets) and 'ZSizes' (list of zone sizes with classifications)
145
+ or an error message if unsuccessful
146
+ """
147
+ try:
148
+ dt = datetime.fromisoformat(date_time_iso)
149
+ if dt.year < 1900 or dt.year > 2050:
150
+ return {"error": "Дата вне диапазона. Должна быть между 1900 и 2050 годами."}
151
+
152
+ # Load ephemeris
153
+ planets = load('de421.bsp')
154
+ earth = planets['earth']
155
+
156
+ # Define planet objects
157
+ planet_objects = {
158
+ 'Sun': planets['sun'],
159
+ 'Moon': planets['moon'],
160
+ 'Mercury': planets['mercury'],
161
+ 'Venus': planets['venus'],
162
+ 'Mars': planets['mars'],
163
+ 'Jupiter': planets['jupiter barycenter'],
164
+ 'Saturn': planets['saturn barycenter']
165
+ }
166
+
167
+ # Create time object
168
+ ts = load.timescale()
169
+ t = ts.utc(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second)
170
+
171
+ # Compute ecliptic longitudes
172
+ longitudes = {}
173
+ for planet in planet_objects:
174
+ apparent = earth.at(t).observe(planet_objects[planet]).apparent()
175
+ _, lon, _ = apparent.ecliptic_latlon()
176
+ longitudes[planet] = lon.degrees
177
+
178
+ # Sort planets by longitude to form PLadder
179
+ sorted_planets = sorted(longitudes.items(), key=lambda x: x[1])
180
+ PLadder = [p for p, _ in sorted_planets]
181
+ sorted_lons = [lon for _, lon in sorted_planets]
182
+
183
+ # Calculate zone sizes
184
+ zone_sizes = [sorted_lons[0]] + [sorted_lons[i+1] - sorted_lons[i] for i in range(6)] + [360 - sorted_lons[6]]
185
+
186
+ # Determine bordering planets for classification
187
+ bordering = [[PLadder[0]]] + [[PLadder[i-1], PLadder[i]] for i in range(1, 7)] + [[PLadder[6]]]
188
+
189
+ # Classify each zone
190
+ ZSizes = []
191
+ for i, size in enumerate(zone_sizes):
192
+ bord = bordering[i]
193
+ if any(p in ['Sun', 'Moon'] for p in bord):
194
+ X = 7
195
+ elif any(p in ['Mercury', 'Venus', 'Mars'] for p in bord):
196
+ X = 6
197
+ else:
198
+ X = 5
199
+
200
+ if size <= 1:
201
+ classification = 'Swallowed'
202
+ elif size <= X:
203
+ classification = 'Tiny'
204
+ elif size <= 40:
205
+ classification = 'Small'
206
+ elif size < 60:
207
+ if 50 <= size <= 52:
208
+ classification = 'Ideal'
209
+ else:
210
+ classification = 'Normal'
211
+ else:
212
+ classification = 'Big'
213
+
214
+ # Convert size to degrees and minutes
215
+ d = int(size)
216
+ m = int((size - d) * 60)
217
+ size_str = f"{d}°{m}'"
218
+ ZSizes.append((size_str, classification))
219
+
220
+ return {'PLadder': PLadder, 'ZSizes': ZSizes}
221
+
222
+ except ValueError:
223
+ return {"error": "Неверный формат даты и времени. Используйте ISO формат, например, '2023-10-10T12:00:00'"}
224
+ except Exception as e:
225
+ return {"error": f"Ошибка при вычислении: {str(e)}"}
226
+
227
+ def plot_pladder(PLadder):
228
+ """
229
+ Plot the planetary ladder as a right triangle with planet symbols.
230
+
231
+ Args:
232
+ PLadder (list): List of planet names in order
233
+
234
+ Returns:
235
+ matplotlib.figure.Figure: The generated plot
236
+ """
237
+ fig, ax = plt.subplots()
238
+ # Draw triangle with vertices (0,0), (0,3), (3,0)
239
+ ax.plot([0, 0, 3, 0], [0, 3, 0, 0], 'k-')
240
+ # Draw horizontal lines dividing height into three equal parts
241
+ ax.plot([0, 3], [1, 1], 'k--')
242
+ ax.plot([0, 3], [2, 2], 'k--')
243
+ # Define positions for planets 1 to 7
244
+ positions = [(0, 0), (0, 1), (0, 2), (0, 3), (1, 2), (2, 1), (3, 0)]
245
+ for i, pos in enumerate(positions):
246
+ symbol = planet_symbols[PLadder[i]]
247
+ ax.text(pos[0], pos[1], symbol, ha='center', va='center', fontsize=12)
248
+ ax.set_xlim(-0.5, 3.5)
249
+ ax.set_ylim(-0.5, 3.5)
250
+ ax.set_aspect('equal')
251
+ ax.axis('off')
252
+ return fig
253
+
254
  def chat_interface(query_text):
255
+ """
256
+ Handle user queries, either for planetary ladder or general RAG questions.
257
+
258
+ Args:
259
+ query_text (str): User's input query
260
+
261
+ Returns:
262
+ tuple: (text response, plot figure or None)
263
+ """
264
  global status_message
265
  try:
266
  vectorstore = initialize_vectorstore()
267
+
268
+ if query_text.startswith("PLadder "):
269
+ # Extract date and time from query
270
+ date_time_iso = query_text.split(" ", 1)[1]
271
+ result = PLadder_ZSizes(date_time_iso)
272
+
273
+ if "error" in result:
274
+ return result["error"], None
275
+
276
+ PLadder = result["PLadder"]
277
+ ZSizes = result["ZSizes"]
278
+
279
+ # Translate to Russian
280
+ PLadder_ru = [planet_ru[p] for p in PLadder]
281
+ ZSizes_ru = [(size_str, classification_ru[classification]) for size_str, classification in ZSizes]
282
+
283
+ # Prepare queries and get responses
284
+ responses = []
285
+ for i in range(7):
286
+ planet = PLadder_ru[i]
287
+ size_str, class_ru = ZSizes_ru[i]
288
+ query = f"Что значит {planet} на {i+1}-й ступени и {size_str} {class_ru} {i+1}-я зона?"
289
+ response, _ = process_query(query, vectorstore)
290
+ responses.append(f"Интерпретация для {i+1}-й ступени и {i+1}-й зоны: {response}")
291
+
292
+ # Query for 8th zone
293
+ size_str, class_ru = ZSizes_ru[7]
294
+ query = f"Что значит {size_str} {class_ru} восьмая зона?"
295
+ response, _ = process_query(query, vectorstore)
296
+ responses.append(f"Интерпретация для 8-й зоны: {response}")
297
+
298
+ # Generate plot
299
+ fig = plot_pladder(PLadder)
300
+
301
+ # Compile response text
302
+ text = "Планетарная лестница: " + ", ".join(PLadder_ru) + "\n"
303
+ text += "Размеры зон:\n" + "\n".join([f"Зона {i+1}: {size_str} {class_ru}"
304
+ for i, (size_str, class_ru) in enumerate(ZSizes_ru)]) + "\n\n"
305
+ text += "\n".join(responses)
306
+ return text, fig
307
+
308
+ else:
309
+ # Handle regular RAG query
310
+ response, sources = process_query(query_text, vectorstore)
311
+ full_response = f"{status_message}\n\nОтвет: {response}\n\nИсточники: {', '.join(sources) if sources else 'Нет источников'}"
312
+ return full_response, None
313
+
314
  except Exception as e:
315
+ return f"Критическая ошибка: {str(e)}", None
316
 
317
+ # Define Gradio Interface
318
  interface = gr.Interface(
319
  fn=chat_interface,
320
  inputs=gr.Textbox(lines=2, placeholder="Введите ваш вопрос здесь..."),
321
+ outputs=[gr.Textbox(), gr.Image()],
322
  title="Чат с документами",
323
+ description="Задайте вопрос, и я отвечу на основе загруженных документов. "
324
+ "Для запроса планетарной лестницы используйте формат: PLadder YYYY-MM-DDTHH:MM:SS"
325
  )
326
 
327
  if __name__ == "__main__":