Prateek0515 commited on
Commit
81239b2
·
verified ·
1 Parent(s): d1b3581

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -11
app.py CHANGED
@@ -8,6 +8,7 @@ from huggingface_hub import hf_hub_download
8
  import PyPDF2
9
  from docx import Document
10
  import re
 
11
 
12
  # ================== CLASSES ==================
13
 
@@ -119,7 +120,7 @@ id2label = {
119
 
120
  def split_sentences(text):
121
  """Split text into sentences"""
122
- sentences = re.split(r'(?<=[.!?])\s+', text)
123
  return [s.strip() for s in sentences if s.strip()]
124
 
125
  def extract_text_from_pdf(file_path):
@@ -161,19 +162,19 @@ def predict(text_input, file_input):
161
  with open(file_path, 'r', encoding='utf-8') as f:
162
  text = f.read()
163
  else:
164
- return "❌ Unsupported file type. Please use: PDF, DOCX, or TXT"
165
  elif text_input:
166
  text = text_input
167
  else:
168
- return "⚠️ Please provide either text or upload a file"
169
 
170
  if not text or len(text.strip()) == 0:
171
- return "⚠️ No text content found"
172
 
173
  # ✂️ Split text into sentences
174
  sentences = split_sentences(text)
175
  if not sentences:
176
- return "⚠️ Could not split text into sentences"
177
 
178
  # 🧠 Tokenize all sentences together (hierarchical encoding)
179
  encoded = tokenizer(
@@ -204,17 +205,20 @@ def predict(text_input, file_input):
204
  for i in range(len(predicted_labels)):
205
  predicted_labels[i] = i % num_labels
206
 
207
- # ✅ Format each sentence with its predicted label
208
  results = []
209
  for sentence, label_id in zip(sentences, predicted_labels):
210
  label = id2label.get(label_id, "Unknown")
211
- results.append(f"**{label}** | {sentence.strip()}")
 
 
 
212
 
213
- # Return clean output (frontend compatible)
214
- return "\n".join(results)
215
 
216
  except Exception as e:
217
- return f"❌ Error during prediction: {str(e)}"
218
 
219
  # ================== GRADIO UI ==================
220
 
@@ -224,7 +228,7 @@ demo = gr.Interface(
224
  gr.Textbox(label="Enter Legal Text", placeholder="Paste legal text here...", lines=5),
225
  gr.File(label="Or Upload File (PDF, DOCX, TXT)")
226
  ],
227
- outputs=gr.Textbox(label="Per-Sentence Predictions", lines=10),
228
  title="⚖️ Legal Document Segmentation",
229
  description="Classify legal documents sentence-by-sentence into: Arguments (Petitioner/Respondent), Decision, Facts, Issue, None, or Reasoning",
230
  examples=[
 
8
  import PyPDF2
9
  from docx import Document
10
  import re
11
+ import json
12
 
13
  # ================== CLASSES ==================
14
 
 
120
 
121
  def split_sentences(text):
122
  """Split text into sentences"""
123
+ sentences = re.split(r'(?<=[.!?])\s+(?=[A-Z])', text)
124
  return [s.strip() for s in sentences if s.strip()]
125
 
126
  def extract_text_from_pdf(file_path):
 
162
  with open(file_path, 'r', encoding='utf-8') as f:
163
  text = f.read()
164
  else:
165
+ return {"error": "❌ Unsupported file type. Please use: PDF, DOCX, or TXT"}
166
  elif text_input:
167
  text = text_input
168
  else:
169
+ return {"error": "⚠️ Please provide either text or upload a file"}
170
 
171
  if not text or len(text.strip()) == 0:
172
+ return {"error": "⚠️ No text content found"}
173
 
174
  # ✂️ Split text into sentences
175
  sentences = split_sentences(text)
176
  if not sentences:
177
+ return {"error": "⚠️ Could not split text into sentences"}
178
 
179
  # 🧠 Tokenize all sentences together (hierarchical encoding)
180
  encoded = tokenizer(
 
205
  for i in range(len(predicted_labels)):
206
  predicted_labels[i] = i % num_labels
207
 
208
+ # ✅ Format each sentence with its predicted label as JSON
209
  results = []
210
  for sentence, label_id in zip(sentences, predicted_labels):
211
  label = id2label.get(label_id, "Unknown")
212
+ results.append({
213
+ "label": label,
214
+ "sentence": sentence.strip()
215
+ })
216
 
217
+ # Return JSON format
218
+ return results
219
 
220
  except Exception as e:
221
+ return {"error": f"❌ Error during prediction: {str(e)}"}
222
 
223
  # ================== GRADIO UI ==================
224
 
 
228
  gr.Textbox(label="Enter Legal Text", placeholder="Paste legal text here...", lines=5),
229
  gr.File(label="Or Upload File (PDF, DOCX, TXT)")
230
  ],
231
+ outputs=gr.JSON(label="Per-Sentence Predictions"),
232
  title="⚖️ Legal Document Segmentation",
233
  description="Classify legal documents sentence-by-sentence into: Arguments (Petitioner/Respondent), Decision, Facts, Issue, None, or Reasoning",
234
  examples=[