abrar-adnan commited on
Commit
4c567cb
Β·
verified Β·
1 Parent(s): 6acfeaf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +423 -422
app.py CHANGED
@@ -1,422 +1,423 @@
1
- import gradio as gr
2
- import hashlib
3
- from typing import List, Dict, Tuple
4
- import os
5
- import time
6
-
7
- from document_processor.file_handler import DocumentProcessor
8
- from retriever.builder import RetrieverBuilder
9
- from agents.workflow import AgentWorkflow
10
- from config import constants, settings
11
- from utils.logging import logger
12
-
13
-
14
- def main():
15
- processor = DocumentProcessor()
16
- retriever_builder = RetrieverBuilder()
17
- workflow = AgentWorkflow()
18
-
19
- # Define custom CSS for ChatGPT-like styling with dark sidebar
20
- css = """
21
- .sidebar {
22
- background: #202123 !important;
23
- border-right: 1px solid #343541 !important;
24
- max-height: 90vh !important;
25
- height: auto !important;
26
- overflow-y: auto !important;
27
- padding: 15px !important;
28
- color: #ffffff !important;
29
- }
30
-
31
- .sidebar * {
32
- color: #ffffff !important;
33
- }
34
-
35
- .sidebar label {
36
- color: #d1d5db !important;
37
- }
38
-
39
- .sidebar input, .sidebar select, .sidebar textarea {
40
- background: #343541 !important;
41
- color: #ffffff !important;
42
- border: 1px solid #565869 !important;
43
- }
44
-
45
- .main-container {
46
- max-height: 90vh !important;
47
- height: auto !important;
48
- overflow-yL: auto !important;
49
- }
50
-
51
- .chat-area {
52
- height: 100vh !important;
53
- display: flex !important;
54
- flex-direction: column !important;
55
- padding: 10px !important;
56
- }
57
-
58
- .chatbot-container {
59
- flex: 1 1 auto !important;
60
- min-height: 300px !important;
61
- max-height: calc(100vh - 280px) !important;
62
- overflow: hidden !important;
63
- }
64
-
65
- .chatbot-container .gradio-chatbot {
66
- height: 100% !important;
67
- max-height: calc(100vh - 280px) !important;
68
- }
69
-
70
- .input-area {
71
- margin-top: 10px !important;
72
- }
73
-
74
- .processing-status {
75
- padding: 8px !important;
76
- border-radius: 6px !important;
77
- margin: 8px 0 !important;
78
- font-size: 0.9em !important;
79
- }
80
-
81
- .success {
82
- background: #d4edda !important;
83
- color: #155724 !important;
84
- border: 1px solid #c3e6cb !important;
85
- }
86
-
87
- .error {
88
- background: #f8d7da !important;
89
- color: #721c24 !important;
90
- border: 1px solid #f5c6cb !important;
91
- }
92
-
93
- .progress-info {
94
- font-size: 0.85em !important;
95
- color: #666 !important;
96
- margin-top: 5px !important;
97
- }
98
- """
99
-
100
- with gr.Blocks(theme=gr.themes.Soft(), title="DocChat", css=css) as demo:
101
- # Session state for document processing
102
- session_state = gr.State({
103
- "file_hashes": frozenset(),
104
- "retriever": None,
105
- "processed_files": [],
106
- "chat_history": []
107
- })
108
-
109
- # Main layout: Sidebar + Chat
110
- with gr.Row(equal_height=True, elem_classes="main-container"):
111
- # Left Sidebar for file management (narrower)
112
- with gr.Column(scale=0.7, min_width=250, elem_classes="sidebar"):
113
- gr.Markdown("## πŸ“ Upload your documents here", elem_classes="title")
114
-
115
- # File upload component - files shown here
116
- files = gr.Files(
117
- label="Upload Documents",
118
- file_types=constants.ALLOWED_TYPES,
119
- height=150,
120
- show_label=True
121
- )
122
-
123
- # Sync button and status
124
- with gr.Row():
125
- sync_btn = gr.Button("πŸ”„ Sync", variant="primary", scale=1, size="sm", elem_classes=["flex-item"])
126
- with gr.Row():
127
- sync_status_indicator = gr.HTML(
128
- '<div style="padding: 6px; text-align: center; border-radius: 4px; background: #343541; color: #9ca3af; font-size: 0.85em; width: 100%;">Not synced</div>',
129
- visible=True,
130
- elem_classes=["flex-item"]
131
- )
132
- # Provide equal layout using some CSS tweaks
133
- gr.HTML("""
134
- <style>
135
- .svelte-1ipelgc.flex-item { flex: 1 1 0 !important; min-width: 0 !important; }
136
- </style>
137
- """)
138
-
139
- # Processing status (only show when processing/processed)
140
- processing_status = gr.Markdown("", elem_classes="processing-status", visible=False)
141
-
142
- # Verification Report Section
143
- gr.HTML("""
144
- <style>
145
- .compact-markdown p,
146
- .compact-markdown h4,
147
- .compact-markdown h5,
148
- .compact-markdown h6 {
149
- margin-top: 0.25rem !important;
150
- margin-bottom: 0.25rem !important;
151
- }
152
- </style>
153
- """)
154
-
155
- # gr.Markdown("---")
156
- gr.Markdown('<span style="font-size: 1em;">πŸ“Š Verification Report</span>', elem_classes="compact-markdown")
157
- # gr.Markdown('<span style="font-size: 0.85em; color: #8e9297;"><em>Of the last message</em></span>', elem_classes="compact-markdown")
158
- verification_output = gr.Textbox(
159
- label="",
160
- interactive=False,
161
- lines=12,
162
- max_lines=12,
163
- value="",
164
- placeholder="""### Verification Report""",
165
- show_label=False
166
- )
167
-
168
- # Right side: Chat interface
169
- with gr.Column(scale=4, elem_classes="chat-area"):
170
- # Header section
171
- gr.Markdown("# πŸ€– GeekBot")
172
- gr.Markdown("*Your personal AI*")
173
- gr.Markdown("*Enter your documents and start chatting about it. Supports ppt,pdf,txt, etc*")
174
-
175
- # Chat interface - flex to fill available space
176
- with gr.Column(elem_classes="chatbot-container"):
177
- chatbot = gr.Chatbot(
178
- label="",
179
- show_label=False,
180
- show_copy_button=True,
181
- avatar_images=(None, "πŸ€–"),
182
- container=True,
183
- height=550
184
- )
185
-
186
- # Input area
187
- with gr.Row(elem_classes="input-area"):
188
- msg = gr.Textbox(
189
- label="",
190
- placeholder="Type your question here...",
191
- show_label=False,
192
- scale=9,
193
- container=False
194
- )
195
- submit_btn = gr.Button("Send", scale=1, variant="primary")
196
-
197
- # Function to remove files from ChromaDB when they're removed from UI
198
- def handle_file_removal(current_files: List, state: Dict):
199
- """Handle file removal - clean up ChromaDB and retriever if files are removed."""
200
- if not current_files:
201
- # All files removed - reset retriever
202
- if state.get("retriever"):
203
- logger.info("All files removed. Resetting retriever.")
204
- state.update({
205
- "retriever": None,
206
- "file_hashes": frozenset(),
207
- "processed_files": []
208
- })
209
- return (
210
- get_sync_status_html("ready"),
211
- "", # processing_status
212
- gr.update(visible=False), # processing_status visibility
213
- state
214
- )
215
- return (
216
- get_sync_status_html("ready"),
217
- "",
218
- gr.update(visible=False),
219
- state
220
- )
221
-
222
- # Check if any files were removed
223
- current_hashes = _get_file_hashes(current_files)
224
- if state.get("file_hashes") and current_hashes != state["file_hashes"]:
225
- # Files were removed - need to rebuild retriever with remaining files
226
- logger.info("Files were removed. Rebuilding retriever with remaining files...")
227
- try:
228
- chunks = processor.process(current_files)
229
- retriever = retriever_builder.build_hybrid_retriever(chunks)
230
-
231
- state.update({
232
- "file_hashes": current_hashes,
233
- "retriever": retriever,
234
- "processed_files": current_files
235
- })
236
-
237
- status_html = "βœ… **Documents resynced**<br>"
238
- status_html += f"<div class='progress-info'>{len(chunks)} chunks indexed.</div>"
239
-
240
- return (
241
- get_sync_status_html("synced", len(chunks)),
242
- status_html,
243
- gr.update(visible=True),
244
- state
245
- )
246
- except Exception as e:
247
- logger.error(f"Error resyncing after file removal: {e}")
248
- return (
249
- get_sync_status_html("error"),
250
- f"❌ Error: {str(e)}",
251
- gr.update(visible=True),
252
- state
253
- )
254
-
255
- return (
256
- get_sync_status_html("synced", len(state.get("processed_files", []))),
257
- "",
258
- gr.update(visible=False),
259
- state
260
- )
261
-
262
- # Function to update sync status indicator
263
- def get_sync_status_html(status: str, count: int = 0) -> str:
264
- """Generate HTML for sync status indicator."""
265
- if status == "synced":
266
- return f'<div style="padding: 8px; text-align: center; border-radius: 4px; background: #16a34a; color: #ffffff; font-weight: bold;">βœ… Synced ({count} chunks)</div>'
267
- elif status == "syncing":
268
- return '<div style="padding: 8px; text-align: center; border-radius: 4px; background: #f59e0b; color: #ffffff; font-weight: bold;">πŸ”„ Syncing...</div>'
269
- elif status == "error":
270
- return '<div style="padding: 8px; text-align: center; border-radius: 4px; background: #dc2626; color: #ffffff; font-weight: bold;">❌ Error</div>'
271
- else:
272
- return '<div style="padding: 8px; text-align: center; border-radius: 4px; background: #343541; color: #9ca3af;">Not synced</div>'
273
-
274
- # Function to process files (called by sync button)
275
- def process_files(uploaded_files: List, state: Dict):
276
- """Process files and build retriever."""
277
- if not uploaded_files:
278
- return (
279
- get_sync_status_html("ready"), # sync_status
280
- "", # processing_status
281
- gr.update(visible=False), # processing_status visibility
282
- state
283
- )
284
-
285
- try:
286
- current_hashes = _get_file_hashes(uploaded_files)
287
-
288
- # Check if files are new or changed
289
- if state["retriever"] is None or current_hashes != state["file_hashes"]:
290
- # Process documents
291
- logger.info("Processing new/changed documents...")
292
- chunks = processor.process(uploaded_files)
293
- logger.info("Building retriever...")
294
- retriever = retriever_builder.build_hybrid_retriever(chunks)
295
- logger.info("Retriever built successfully")
296
-
297
- state.update({
298
- "file_hashes": current_hashes,
299
- "retriever": retriever,
300
- "processed_files": uploaded_files
301
- })
302
-
303
- status_html = "βœ… **Documents synced successfully!**<br>"
304
- status_html += f"<div class='progress-info'>{len(chunks)} chunks indexed. Ready for questions!</div>"
305
-
306
- return (
307
- get_sync_status_html("synced", len(chunks)), # sync_status
308
- status_html, # processing_status
309
- gr.update(visible=True), # processing_status visibility
310
- state
311
- )
312
- else:
313
- # Files unchanged, already synced
314
- status_html = "βœ… **Documents already synced**<br>"
315
- status_html += "<div class='progress-info'>Files are up to date. Ready for questions!</div>"
316
-
317
- # Get chunk count from state if available
318
- chunk_count = len(state.get("processed_files", []))
319
-
320
- return (
321
- get_sync_status_html("synced", chunk_count), # sync_status
322
- status_html, # processing_status
323
- gr.update(visible=True), # processing_status visibility
324
- state
325
- )
326
- except Exception as e:
327
- error_html = f"❌ **Error syncing documents**<br>"
328
- error_html += f"<div class='progress-info'>{str(e)}</div>"
329
- logger.error(f"File processing error: {str(e)}")
330
-
331
- return (
332
- get_sync_status_html("error"), # sync_status
333
- error_html, # processing_status
334
- gr.update(visible=True), # processing_status visibility
335
- state
336
- )
337
-
338
-
339
- # Chat function for handling questions
340
- def chat_function(message: str, history: List, state: Dict, verification_state: str):
341
- """Handle chat messages and generate responses."""
342
- try:
343
- if not message.strip():
344
- history.append((message, "Please enter a question."))
345
- return history, "", state, verification_state
346
-
347
- if state["retriever"] is None:
348
- history.append((message, "❌ No documents uploaded. Please upload documents first."))
349
- return history, "", state, verification_state
350
-
351
- # Get answer from workflow
352
- result = workflow.full_pipeline(
353
- question=message,
354
- retriever=state["retriever"]
355
- )
356
-
357
- answer = result["draft_answer"]
358
- verification_report = result["verification_report"]
359
-
360
- # Add to chat history
361
- history.append((message, answer))
362
-
363
- # Update state
364
- if "chat_history" not in state:
365
- state["chat_history"] = []
366
- state["chat_history"].append({"question": message, "answer": answer})
367
-
368
- return history, "", state, verification_report
369
-
370
- except Exception as e:
371
- logger.error(f"Chat error: {str(e)}")
372
- error_msg = f"❌ Error: {str(e)}"
373
- history.append((message, error_msg))
374
- return history, "", state, ""
375
-
376
- # Event handlers
377
- # Handle file removal - check when files change
378
- files.change(
379
- fn=handle_file_removal,
380
- inputs=[files, session_state],
381
- outputs=[sync_status_indicator, processing_status, processing_status, session_state]
382
- )
383
-
384
- # Sync button to process files
385
- sync_btn.click(
386
- fn=process_files,
387
- inputs=[files, session_state],
388
- outputs=[sync_status_indicator, processing_status, processing_status, session_state],
389
- show_progress=True
390
- )
391
-
392
- # Chat submission
393
- msg.submit(
394
- fn=chat_function,
395
- inputs=[msg, chatbot, session_state, verification_output],
396
- outputs=[chatbot, msg, session_state, verification_output]
397
- )
398
-
399
- submit_btn.click(
400
- fn=chat_function,
401
- inputs=[msg, chatbot, session_state, verification_output],
402
- outputs=[chatbot, msg, session_state, verification_output]
403
- )
404
-
405
- demo.launch(server_name="127.0.0.1", server_port=5000, share=True)
406
-
407
- def _get_file_hashes(uploaded_files: List) -> frozenset:
408
- """Generate SHA-256 hashes for uploaded files."""
409
- hashes = set()
410
- for file in uploaded_files:
411
- # Handle both Gradio file objects and string paths
412
- file_path = file.name if hasattr(file, 'name') else file
413
- try:
414
- with open(file_path, "rb") as f:
415
- hashes.add(hashlib.sha256(f.read()).hexdigest())
416
- except Exception as e:
417
- logger.error(f"Error hashing file {file_path}: {e}")
418
- continue
419
- return frozenset(hashes)
420
-
421
- if __name__ == "__main__":
422
- main()
 
 
1
+ import gradio as gr
2
+ import hashlib
3
+ from typing import List, Dict, Tuple
4
+ import os
5
+ import time
6
+
7
+ from document_processor.file_handler import DocumentProcessor
8
+ from retriever.builder import RetrieverBuilder
9
+ from agents.workflow import AgentWorkflow
10
+ from config import constants, settings
11
+ from utils.logging import logger
12
+
13
+
14
+ def main():
15
+ processor = DocumentProcessor()
16
+ retriever_builder = RetrieverBuilder()
17
+ workflow = AgentWorkflow()
18
+
19
+ # Define custom CSS for ChatGPT-like styling with dark sidebar
20
+ css = """
21
+ .sidebar {
22
+ background: #202123 !important;
23
+ border-right: 1px solid #343541 !important;
24
+ max-height: 90vh !important;
25
+ height: auto !important;
26
+ overflow-y: auto !important;
27
+ padding: 15px !important;
28
+ color: #ffffff !important;
29
+ }
30
+
31
+ .sidebar * {
32
+ color: #ffffff !important;
33
+ }
34
+
35
+ .sidebar label {
36
+ color: #d1d5db !important;
37
+ }
38
+
39
+ .sidebar input, .sidebar select, .sidebar textarea {
40
+ background: #343541 !important;
41
+ color: #ffffff !important;
42
+ border: 1px solid #565869 !important;
43
+ }
44
+
45
+ .main-container {
46
+ max-height: 90vh !important;
47
+ height: auto !important;
48
+ overflow-yL: auto !important;
49
+ }
50
+
51
+ .chat-area {
52
+ height: 100vh !important;
53
+ display: flex !important;
54
+ flex-direction: column !important;
55
+ padding: 10px !important;
56
+ }
57
+
58
+ .chatbot-container {
59
+ flex: 1 1 auto !important;
60
+ min-height: 300px !important;
61
+ max-height: calc(100vh - 280px) !important;
62
+ overflow: hidden !important;
63
+ }
64
+
65
+ .chatbot-container .gradio-chatbot {
66
+ height: 100% !important;
67
+ max-height: calc(100vh - 280px) !important;
68
+ }
69
+
70
+ .input-area {
71
+ margin-top: 10px !important;
72
+ }
73
+
74
+ .processing-status {
75
+ padding: 8px !important;
76
+ border-radius: 6px !important;
77
+ margin: 8px 0 !important;
78
+ font-size: 0.9em !important;
79
+ }
80
+
81
+ .success {
82
+ background: #d4edda !important;
83
+ color: #155724 !important;
84
+ border: 1px solid #c3e6cb !important;
85
+ }
86
+
87
+ .error {
88
+ background: #f8d7da !important;
89
+ color: #721c24 !important;
90
+ border: 1px solid #f5c6cb !important;
91
+ }
92
+
93
+ .progress-info {
94
+ font-size: 0.85em !important;
95
+ color: #666 !important;
96
+ margin-top: 5px !important;
97
+ }
98
+ """
99
+
100
+ with gr.Blocks(theme=gr.themes.Soft(), title="DocChat", css=css) as demo:
101
+ # Session state for document processing
102
+ session_state = gr.State({
103
+ "file_hashes": frozenset(),
104
+ "retriever": None,
105
+ "processed_files": [],
106
+ "chat_history": []
107
+ })
108
+
109
+ # Main layout: Sidebar + Chat
110
+ with gr.Row(equal_height=True, elem_classes="main-container"):
111
+ # Left Sidebar for file management (narrower)
112
+ with gr.Column(scale=0.7, min_width=250, elem_classes="sidebar"):
113
+ gr.Markdown("## πŸ“ Upload your documents here", elem_classes="title")
114
+
115
+ # File upload component - files shown here
116
+ files = gr.Files(
117
+ label="Upload Documents",
118
+ file_types=constants.ALLOWED_TYPES,
119
+ height=150,
120
+ show_label=True
121
+ )
122
+
123
+ # Sync button and status
124
+ with gr.Row():
125
+ sync_btn = gr.Button("πŸ”„ Sync", variant="primary", scale=1, size="sm", elem_classes=["flex-item"])
126
+ with gr.Row():
127
+ sync_status_indicator = gr.HTML(
128
+ '<div style="padding: 6px; text-align: center; border-radius: 4px; background: #343541; color: #9ca3af; font-size: 0.85em; width: 100%;">Not synced</div>',
129
+ visible=True,
130
+ elem_classes=["flex-item"]
131
+ )
132
+ # Provide equal layout using some CSS tweaks
133
+ gr.HTML("""
134
+ <style>
135
+ .svelte-1ipelgc.flex-item { flex: 1 1 0 !important; min-width: 0 !important; }
136
+ </style>
137
+ """)
138
+
139
+ # Processing status (only show when processing/processed)
140
+ processing_status = gr.Markdown("", elem_classes="processing-status", visible=False)
141
+
142
+ # Verification Report Section
143
+ gr.HTML("""
144
+ <style>
145
+ .compact-markdown p,
146
+ .compact-markdown h4,
147
+ .compact-markdown h5,
148
+ .compact-markdown h6 {
149
+ margin-top: 0.25rem !important;
150
+ margin-bottom: 0.25rem !important;
151
+ }
152
+ </style>
153
+ """)
154
+
155
+ # gr.Markdown("---")
156
+ gr.Markdown('<span style="font-size: 1em;">πŸ“Š Verification Report</span>', elem_classes="compact-markdown")
157
+ # gr.Markdown('<span style="font-size: 0.85em; color: #8e9297;"><em>Of the last message</em></span>', elem_classes="compact-markdown")
158
+ verification_output = gr.Textbox(
159
+ label="",
160
+ interactive=False,
161
+ lines=12,
162
+ max_lines=12,
163
+ value="",
164
+ placeholder="""### Verification Report""",
165
+ show_label=False
166
+ )
167
+
168
+ # Right side: Chat interface
169
+ with gr.Column(scale=4, elem_classes="chat-area"):
170
+ # Header section
171
+ gr.Markdown("# πŸ€– GeekBot")
172
+ gr.Markdown("*Your personal AI*")
173
+ gr.Markdown("*Enter your documents and start chatting about it. Supports ppt,pdf,txt, etc*")
174
+
175
+ # Chat interface - flex to fill available space
176
+ with gr.Column(elem_classes="chatbot-container"):
177
+ chatbot = gr.Chatbot(
178
+ label="",
179
+ show_label=False,
180
+ show_copy_button=True,
181
+ avatar_images=(None, "πŸ€–"),
182
+ container=True,
183
+ height=550
184
+ )
185
+
186
+ # Input area
187
+ with gr.Row(elem_classes="input-area"):
188
+ msg = gr.Textbox(
189
+ label="",
190
+ placeholder="Type your question here...",
191
+ show_label=False,
192
+ scale=9,
193
+ container=False
194
+ )
195
+ submit_btn = gr.Button("Send", scale=1, variant="primary")
196
+
197
+ # Function to remove files from ChromaDB when they're removed from UI
198
+ def handle_file_removal(current_files: List, state: Dict):
199
+ """Handle file removal - clean up ChromaDB and retriever if files are removed."""
200
+ if not current_files:
201
+ # All files removed - reset retriever
202
+ if state.get("retriever"):
203
+ logger.info("All files removed. Resetting retriever.")
204
+ state.update({
205
+ "retriever": None,
206
+ "file_hashes": frozenset(),
207
+ "processed_files": []
208
+ })
209
+ return (
210
+ get_sync_status_html("ready"),
211
+ "", # processing_status
212
+ gr.update(visible=False), # processing_status visibility
213
+ state
214
+ )
215
+ return (
216
+ get_sync_status_html("ready"),
217
+ "",
218
+ gr.update(visible=False),
219
+ state
220
+ )
221
+
222
+ # Check if any files were removed
223
+ current_hashes = _get_file_hashes(current_files)
224
+ if state.get("file_hashes") and current_hashes != state["file_hashes"]:
225
+ # Files were removed - need to rebuild retriever with remaining files
226
+ logger.info("Files were removed. Rebuilding retriever with remaining files...")
227
+ try:
228
+ chunks = processor.process(current_files)
229
+ retriever = retriever_builder.build_hybrid_retriever(chunks)
230
+
231
+ state.update({
232
+ "file_hashes": current_hashes,
233
+ "retriever": retriever,
234
+ "processed_files": current_files
235
+ })
236
+
237
+ status_html = "βœ… **Documents resynced**<br>"
238
+ status_html += f"<div class='progress-info'>{len(chunks)} chunks indexed.</div>"
239
+
240
+ return (
241
+ get_sync_status_html("synced", len(chunks)),
242
+ status_html,
243
+ gr.update(visible=True),
244
+ state
245
+ )
246
+ except Exception as e:
247
+ logger.error(f"Error resyncing after file removal: {e}")
248
+ return (
249
+ get_sync_status_html("error"),
250
+ f"❌ Error: {str(e)}",
251
+ gr.update(visible=True),
252
+ state
253
+ )
254
+
255
+ return (
256
+ get_sync_status_html("synced", len(state.get("processed_files", []))),
257
+ "",
258
+ gr.update(visible=False),
259
+ state
260
+ )
261
+
262
+ # Function to update sync status indicator
263
+ def get_sync_status_html(status: str, count: int = 0) -> str:
264
+ """Generate HTML for sync status indicator."""
265
+ if status == "synced":
266
+ return f'<div style="padding: 8px; text-align: center; border-radius: 4px; background: #16a34a; color: #ffffff; font-weight: bold;">βœ… Synced ({count} chunks)</div>'
267
+ elif status == "syncing":
268
+ return '<div style="padding: 8px; text-align: center; border-radius: 4px; background: #f59e0b; color: #ffffff; font-weight: bold;">πŸ”„ Syncing...</div>'
269
+ elif status == "error":
270
+ return '<div style="padding: 8px; text-align: center; border-radius: 4px; background: #dc2626; color: #ffffff; font-weight: bold;">❌ Error</div>'
271
+ else:
272
+ return '<div style="padding: 8px; text-align: center; border-radius: 4px; background: #343541; color: #9ca3af;">Not synced</div>'
273
+
274
+ # Function to process files (called by sync button)
275
+ def process_files(uploaded_files: List, state: Dict):
276
+ """Process files and build retriever."""
277
+ if not uploaded_files:
278
+ return (
279
+ get_sync_status_html("ready"), # sync_status
280
+ "", # processing_status
281
+ gr.update(visible=False), # processing_status visibility
282
+ state
283
+ )
284
+
285
+ try:
286
+ current_hashes = _get_file_hashes(uploaded_files)
287
+
288
+ # Check if files are new or changed
289
+ if state["retriever"] is None or current_hashes != state["file_hashes"]:
290
+ # Process documents
291
+ logger.info("Processing new/changed documents...")
292
+ chunks = processor.process(uploaded_files)
293
+ logger.info("Building retriever...")
294
+ retriever = retriever_builder.build_hybrid_retriever(chunks)
295
+ logger.info("Retriever built successfully")
296
+
297
+ state.update({
298
+ "file_hashes": current_hashes,
299
+ "retriever": retriever,
300
+ "processed_files": uploaded_files
301
+ })
302
+
303
+ status_html = "βœ… **Documents synced successfully!**<br>"
304
+ status_html += f"<div class='progress-info'>{len(chunks)} chunks indexed. Ready for questions!</div>"
305
+
306
+ return (
307
+ get_sync_status_html("synced", len(chunks)), # sync_status
308
+ status_html, # processing_status
309
+ gr.update(visible=True), # processing_status visibility
310
+ state
311
+ )
312
+ else:
313
+ # Files unchanged, already synced
314
+ status_html = "βœ… **Documents already synced**<br>"
315
+ status_html += "<div class='progress-info'>Files are up to date. Ready for questions!</div>"
316
+
317
+ # Get chunk count from state if available
318
+ chunk_count = len(state.get("processed_files", []))
319
+
320
+ return (
321
+ get_sync_status_html("synced", chunk_count), # sync_status
322
+ status_html, # processing_status
323
+ gr.update(visible=True), # processing_status visibility
324
+ state
325
+ )
326
+ except Exception as e:
327
+ error_html = f"❌ **Error syncing documents**<br>"
328
+ error_html += f"<div class='progress-info'>{str(e)}</div>"
329
+ logger.error(f"File processing error: {str(e)}")
330
+
331
+ return (
332
+ get_sync_status_html("error"), # sync_status
333
+ error_html, # processing_status
334
+ gr.update(visible=True), # processing_status visibility
335
+ state
336
+ )
337
+
338
+
339
+ # Chat function for handling questions
340
+ def chat_function(message: str, history: List, state: Dict, verification_state: str):
341
+ """Handle chat messages and generate responses."""
342
+ try:
343
+ if not message.strip():
344
+ history.append((message, "Please enter a question."))
345
+ return history, "", state, verification_state
346
+
347
+ if state["retriever"] is None:
348
+ history.append((message, "❌ No documents uploaded. Please upload documents first."))
349
+ return history, "", state, verification_state
350
+
351
+ # Get answer from workflow
352
+ result = workflow.full_pipeline(
353
+ question=message,
354
+ retriever=state["retriever"]
355
+ )
356
+
357
+ answer = result["draft_answer"]
358
+ verification_report = result["verification_report"]
359
+
360
+ # Add to chat history
361
+ history.append((message, answer))
362
+
363
+ # Update state
364
+ if "chat_history" not in state:
365
+ state["chat_history"] = []
366
+ state["chat_history"].append({"question": message, "answer": answer})
367
+
368
+ return history, "", state, verification_report
369
+
370
+ except Exception as e:
371
+ logger.error(f"Chat error: {str(e)}")
372
+ error_msg = f"❌ Error: {str(e)}"
373
+ history.append((message, error_msg))
374
+ return history, "", state, ""
375
+
376
+ # Event handlers
377
+ # Handle file removal - check when files change
378
+ files.change(
379
+ fn=handle_file_removal,
380
+ inputs=[files, session_state],
381
+ outputs=[sync_status_indicator, processing_status, processing_status, session_state]
382
+ )
383
+
384
+ # Sync button to process files
385
+ sync_btn.click(
386
+ fn=process_files,
387
+ inputs=[files, session_state],
388
+ outputs=[sync_status_indicator, processing_status, processing_status, session_state],
389
+ show_progress=True
390
+ )
391
+
392
+ # Chat submission
393
+ msg.submit(
394
+ fn=chat_function,
395
+ inputs=[msg, chatbot, session_state, verification_output],
396
+ outputs=[chatbot, msg, session_state, verification_output]
397
+ )
398
+
399
+ submit_btn.click(
400
+ fn=chat_function,
401
+ inputs=[msg, chatbot, session_state, verification_output],
402
+ outputs=[chatbot, msg, session_state, verification_output]
403
+ )
404
+
405
+ # demo.launch(server_name="127.0.0.1", server_port=5000, share=True)
406
+ return demo
407
+
408
+ def _get_file_hashes(uploaded_files: List) -> frozenset:
409
+ """Generate SHA-256 hashes for uploaded files."""
410
+ hashes = set()
411
+ for file in uploaded_files:
412
+ # Handle both Gradio file objects and string paths
413
+ file_path = file.name if hasattr(file, 'name') else file
414
+ try:
415
+ with open(file_path, "rb") as f:
416
+ hashes.add(hashlib.sha256(f.read()).hexdigest())
417
+ except Exception as e:
418
+ logger.error(f"Error hashing file {file_path}: {e}")
419
+ continue
420
+ return frozenset(hashes)
421
+
422
+ if __name__ == "__main__":
423
+ main().launch()