Peiran commited on
Commit
6a51e6d
·
1 Parent(s): 6803948

Fix: robust empty-pairs handling, CSV trimming, and Python 3.8+ type hints; improve slider bounds and graceful no-op when no pending pairs

Browse files
Files changed (1) hide show
  1. app.py +62 -9
app.py CHANGED
@@ -59,7 +59,12 @@ def _load_task_rows(task_name: str) -> List[Dict[str, str]]:
59
 
60
  with open(csv_path, newline="", encoding="utf-8") as csv_file:
61
  reader = csv.DictReader(csv_file)
62
- return [row for row in reader]
 
 
 
 
 
63
 
64
 
65
  def _build_image_pairs(rows: List[Dict[str, str]], task_name: str) -> List[Dict[str, str]]:
@@ -166,12 +171,13 @@ def load_task(task_name: str):
166
  raise gr.Error("Please select a task first.")
167
 
168
  rows = _load_task_rows(task_name)
169
- pairs = _build_image_pairs(rows, task_name)
170
  # Filter out already evaluated pairs from persistent CSV
171
  done_keys = _read_existing_eval_keys(task_name)
172
  def key_of(p: Dict[str, str]):
173
  return (p["test_id"], frozenset({p["model1_name"], p["model2_name"]}), p["org_img"])
174
- pairs = [p for p in pairs if key_of(p) not in done_keys]
 
175
 
176
  # Balanced schedule across test_ids with a stable randomization
177
  seed_env = os.environ.get("SCHEDULE_SEED")
@@ -183,7 +189,15 @@ def load_task(task_name: str):
183
  p["swap"] = bool(idx % 2) # True -> A=B's image; False -> A=A's image
184
 
185
  if not pairs:
186
- raise gr.Error("No valid image pairs found for evaluation. Please check the data.")
 
 
 
 
 
 
 
 
187
 
188
  return pairs
189
 
@@ -289,10 +303,22 @@ def _upload_eval_record_to_dataset(task_name: str, row: Dict[str, object]) -> Tu
289
 
290
  def on_task_change(task_name: str, _state_pairs: List[Dict[str, str]]):
291
  pairs = load_task(task_name)
292
- pair = pairs[0]
293
- header = _format_pair_header(pair)
294
  # Defaults for A and B (8 sliders total)
295
  default_scores = [3, 3, 3, 3, 3, 3, 3, 3]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  # Pick display order according to swap flag
297
  a_path = pair["model2_path"] if pair.get("swap") else pair["model1_path"]
298
  b_path = pair["model1_path"] if pair.get("swap") else pair["model2_path"]
@@ -311,7 +337,16 @@ def on_task_change(task_name: str, _state_pairs: List[Dict[str, str]]):
311
 
312
  def on_pair_navigate(index: int, pairs: List[Dict[str, str]]):
313
  if not pairs:
314
- raise gr.Error("请先选择任务。")
 
 
 
 
 
 
 
 
 
315
  index = int(index)
316
  index = max(0, min(index, len(pairs) - 1))
317
  pair = pairs[index]
@@ -343,10 +378,28 @@ def on_submit(
343
  b_overall_score: int,
344
  ):
345
  if not task_name:
346
- raise gr.Error("请先选择任务。")
 
 
 
 
 
 
 
 
 
347
 
348
  if not pairs:
349
- raise gr.Error("No image pairs loaded for the current task.")
 
 
 
 
 
 
 
 
 
350
 
351
  pair = pairs[index]
352
  score_map = {
 
59
 
60
  with open(csv_path, newline="", encoding="utf-8") as csv_file:
61
  reader = csv.DictReader(csv_file)
62
+ rows: List[Dict[str, str]] = []
63
+ for row in reader:
64
+ # Trim whitespaces in all string fields to avoid path/key mismatches
65
+ cleaned = {k.strip(): (v.strip() if isinstance(v, str) else v) for k, v in row.items()}
66
+ rows.append(cleaned)
67
+ return rows
68
 
69
 
70
  def _build_image_pairs(rows: List[Dict[str, str]], task_name: str) -> List[Dict[str, str]]:
 
171
  raise gr.Error("Please select a task first.")
172
 
173
  rows = _load_task_rows(task_name)
174
+ pairs_all = _build_image_pairs(rows, task_name)
175
  # Filter out already evaluated pairs from persistent CSV
176
  done_keys = _read_existing_eval_keys(task_name)
177
  def key_of(p: Dict[str, str]):
178
  return (p["test_id"], frozenset({p["model1_name"], p["model2_name"]}), p["org_img"])
179
+ pairs = [p for p in pairs_all if key_of(p) not in done_keys]
180
+ done_len = len([p for p in pairs_all if key_of(p) in done_keys])
181
 
182
  # Balanced schedule across test_ids with a stable randomization
183
  seed_env = os.environ.get("SCHEDULE_SEED")
 
189
  p["swap"] = bool(idx % 2) # True -> A=B's image; False -> A=A's image
190
 
191
  if not pairs:
192
+ try:
193
+ print("[VisArena] No pending pairs.")
194
+ print("[VisArena] total_pairs=", len(pairs_all))
195
+ print("[VisArena] already_done=", done_len)
196
+ print("[VisArena] persist_csv=", _persist_csv_path_for_task(task_name))
197
+ except Exception:
198
+ pass
199
+ # Return empty list; UI will render an informative message instead of erroring out
200
+ return []
201
 
202
  return pairs
203
 
 
303
 
304
  def on_task_change(task_name: str, _state_pairs: List[Dict[str, str]]):
305
  pairs = load_task(task_name)
 
 
306
  # Defaults for A and B (8 sliders total)
307
  default_scores = [3, 3, 3, 3, 3, 3, 3, 3]
308
+ if not pairs:
309
+ return (
310
+ [],
311
+ gr.update(value=0, minimum=0, maximum=0, visible=False),
312
+ gr.update(value=""),
313
+ gr.update(value=None),
314
+ gr.update(value=None),
315
+ gr.update(value=None),
316
+ *default_scores,
317
+ gr.update(value="No pending pairs. Either all pairs are already evaluated or data paths are invalid."),
318
+ )
319
+
320
+ pair = pairs[0]
321
+ header = _format_pair_header(pair)
322
  # Pick display order according to swap flag
323
  a_path = pair["model2_path"] if pair.get("swap") else pair["model1_path"]
324
  b_path = pair["model1_path"] if pair.get("swap") else pair["model2_path"]
 
337
 
338
  def on_pair_navigate(index: int, pairs: List[Dict[str, str]]):
339
  if not pairs:
340
+ # Gracefully no-op when no pairs
341
+ return (
342
+ gr.update(value=0, minimum=0, maximum=0, visible=False),
343
+ gr.update(value=""),
344
+ gr.update(value=None),
345
+ gr.update(value=None),
346
+ gr.update(value=None),
347
+ 3, 3, 3, 3, # A
348
+ 3, 3, 3, 3, # B
349
+ )
350
  index = int(index)
351
  index = max(0, min(index, len(pairs) - 1))
352
  pair = pairs[index]
 
378
  b_overall_score: int,
379
  ):
380
  if not task_name:
381
+ return (
382
+ gr.update(value=0),
383
+ gr.update(value=""),
384
+ gr.update(value=None),
385
+ gr.update(value=None),
386
+ gr.update(value=None),
387
+ 3, 3, 3, 3,
388
+ 3, 3, 3, 3,
389
+ gr.update(value="Please select a task first."),
390
+ )
391
 
392
  if not pairs:
393
+ return (
394
+ gr.update(value=0, minimum=0, maximum=0, visible=False),
395
+ gr.update(value=""),
396
+ gr.update(value=None),
397
+ gr.update(value=None),
398
+ gr.update(value=None),
399
+ 3, 3, 3, 3,
400
+ 3, 3, 3, 3,
401
+ gr.update(value="No pending pairs to submit."),
402
+ )
403
 
404
  pair = pairs[index]
405
  score_map = {