Spaces:
Running
Running
Peiran
commited on
Commit
·
6a51e6d
1
Parent(s):
6803948
Fix: robust empty-pairs handling, CSV trimming, and Python 3.8+ type hints; improve slider bounds and graceful no-op when no pending pairs
Browse files
app.py
CHANGED
|
@@ -59,7 +59,12 @@ def _load_task_rows(task_name: str) -> List[Dict[str, str]]:
|
|
| 59 |
|
| 60 |
with open(csv_path, newline="", encoding="utf-8") as csv_file:
|
| 61 |
reader = csv.DictReader(csv_file)
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
|
| 65 |
def _build_image_pairs(rows: List[Dict[str, str]], task_name: str) -> List[Dict[str, str]]:
|
|
@@ -166,12 +171,13 @@ def load_task(task_name: str):
|
|
| 166 |
raise gr.Error("Please select a task first.")
|
| 167 |
|
| 168 |
rows = _load_task_rows(task_name)
|
| 169 |
-
|
| 170 |
# Filter out already evaluated pairs from persistent CSV
|
| 171 |
done_keys = _read_existing_eval_keys(task_name)
|
| 172 |
def key_of(p: Dict[str, str]):
|
| 173 |
return (p["test_id"], frozenset({p["model1_name"], p["model2_name"]}), p["org_img"])
|
| 174 |
-
pairs = [p for p in
|
|
|
|
| 175 |
|
| 176 |
# Balanced schedule across test_ids with a stable randomization
|
| 177 |
seed_env = os.environ.get("SCHEDULE_SEED")
|
|
@@ -183,7 +189,15 @@ def load_task(task_name: str):
|
|
| 183 |
p["swap"] = bool(idx % 2) # True -> A=B's image; False -> A=A's image
|
| 184 |
|
| 185 |
if not pairs:
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
return pairs
|
| 189 |
|
|
@@ -289,10 +303,22 @@ def _upload_eval_record_to_dataset(task_name: str, row: Dict[str, object]) -> Tu
|
|
| 289 |
|
| 290 |
def on_task_change(task_name: str, _state_pairs: List[Dict[str, str]]):
|
| 291 |
pairs = load_task(task_name)
|
| 292 |
-
pair = pairs[0]
|
| 293 |
-
header = _format_pair_header(pair)
|
| 294 |
# Defaults for A and B (8 sliders total)
|
| 295 |
default_scores = [3, 3, 3, 3, 3, 3, 3, 3]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 296 |
# Pick display order according to swap flag
|
| 297 |
a_path = pair["model2_path"] if pair.get("swap") else pair["model1_path"]
|
| 298 |
b_path = pair["model1_path"] if pair.get("swap") else pair["model2_path"]
|
|
@@ -311,7 +337,16 @@ def on_task_change(task_name: str, _state_pairs: List[Dict[str, str]]):
|
|
| 311 |
|
| 312 |
def on_pair_navigate(index: int, pairs: List[Dict[str, str]]):
|
| 313 |
if not pairs:
|
| 314 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
index = int(index)
|
| 316 |
index = max(0, min(index, len(pairs) - 1))
|
| 317 |
pair = pairs[index]
|
|
@@ -343,10 +378,28 @@ def on_submit(
|
|
| 343 |
b_overall_score: int,
|
| 344 |
):
|
| 345 |
if not task_name:
|
| 346 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
|
| 348 |
if not pairs:
|
| 349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
|
| 351 |
pair = pairs[index]
|
| 352 |
score_map = {
|
|
|
|
| 59 |
|
| 60 |
with open(csv_path, newline="", encoding="utf-8") as csv_file:
|
| 61 |
reader = csv.DictReader(csv_file)
|
| 62 |
+
rows: List[Dict[str, str]] = []
|
| 63 |
+
for row in reader:
|
| 64 |
+
# Trim whitespaces in all string fields to avoid path/key mismatches
|
| 65 |
+
cleaned = {k.strip(): (v.strip() if isinstance(v, str) else v) for k, v in row.items()}
|
| 66 |
+
rows.append(cleaned)
|
| 67 |
+
return rows
|
| 68 |
|
| 69 |
|
| 70 |
def _build_image_pairs(rows: List[Dict[str, str]], task_name: str) -> List[Dict[str, str]]:
|
|
|
|
| 171 |
raise gr.Error("Please select a task first.")
|
| 172 |
|
| 173 |
rows = _load_task_rows(task_name)
|
| 174 |
+
pairs_all = _build_image_pairs(rows, task_name)
|
| 175 |
# Filter out already evaluated pairs from persistent CSV
|
| 176 |
done_keys = _read_existing_eval_keys(task_name)
|
| 177 |
def key_of(p: Dict[str, str]):
|
| 178 |
return (p["test_id"], frozenset({p["model1_name"], p["model2_name"]}), p["org_img"])
|
| 179 |
+
pairs = [p for p in pairs_all if key_of(p) not in done_keys]
|
| 180 |
+
done_len = len([p for p in pairs_all if key_of(p) in done_keys])
|
| 181 |
|
| 182 |
# Balanced schedule across test_ids with a stable randomization
|
| 183 |
seed_env = os.environ.get("SCHEDULE_SEED")
|
|
|
|
| 189 |
p["swap"] = bool(idx % 2) # True -> A=B's image; False -> A=A's image
|
| 190 |
|
| 191 |
if not pairs:
|
| 192 |
+
try:
|
| 193 |
+
print("[VisArena] No pending pairs.")
|
| 194 |
+
print("[VisArena] total_pairs=", len(pairs_all))
|
| 195 |
+
print("[VisArena] already_done=", done_len)
|
| 196 |
+
print("[VisArena] persist_csv=", _persist_csv_path_for_task(task_name))
|
| 197 |
+
except Exception:
|
| 198 |
+
pass
|
| 199 |
+
# Return empty list; UI will render an informative message instead of erroring out
|
| 200 |
+
return []
|
| 201 |
|
| 202 |
return pairs
|
| 203 |
|
|
|
|
| 303 |
|
| 304 |
def on_task_change(task_name: str, _state_pairs: List[Dict[str, str]]):
|
| 305 |
pairs = load_task(task_name)
|
|
|
|
|
|
|
| 306 |
# Defaults for A and B (8 sliders total)
|
| 307 |
default_scores = [3, 3, 3, 3, 3, 3, 3, 3]
|
| 308 |
+
if not pairs:
|
| 309 |
+
return (
|
| 310 |
+
[],
|
| 311 |
+
gr.update(value=0, minimum=0, maximum=0, visible=False),
|
| 312 |
+
gr.update(value=""),
|
| 313 |
+
gr.update(value=None),
|
| 314 |
+
gr.update(value=None),
|
| 315 |
+
gr.update(value=None),
|
| 316 |
+
*default_scores,
|
| 317 |
+
gr.update(value="No pending pairs. Either all pairs are already evaluated or data paths are invalid."),
|
| 318 |
+
)
|
| 319 |
+
|
| 320 |
+
pair = pairs[0]
|
| 321 |
+
header = _format_pair_header(pair)
|
| 322 |
# Pick display order according to swap flag
|
| 323 |
a_path = pair["model2_path"] if pair.get("swap") else pair["model1_path"]
|
| 324 |
b_path = pair["model1_path"] if pair.get("swap") else pair["model2_path"]
|
|
|
|
| 337 |
|
| 338 |
def on_pair_navigate(index: int, pairs: List[Dict[str, str]]):
|
| 339 |
if not pairs:
|
| 340 |
+
# Gracefully no-op when no pairs
|
| 341 |
+
return (
|
| 342 |
+
gr.update(value=0, minimum=0, maximum=0, visible=False),
|
| 343 |
+
gr.update(value=""),
|
| 344 |
+
gr.update(value=None),
|
| 345 |
+
gr.update(value=None),
|
| 346 |
+
gr.update(value=None),
|
| 347 |
+
3, 3, 3, 3, # A
|
| 348 |
+
3, 3, 3, 3, # B
|
| 349 |
+
)
|
| 350 |
index = int(index)
|
| 351 |
index = max(0, min(index, len(pairs) - 1))
|
| 352 |
pair = pairs[index]
|
|
|
|
| 378 |
b_overall_score: int,
|
| 379 |
):
|
| 380 |
if not task_name:
|
| 381 |
+
return (
|
| 382 |
+
gr.update(value=0),
|
| 383 |
+
gr.update(value=""),
|
| 384 |
+
gr.update(value=None),
|
| 385 |
+
gr.update(value=None),
|
| 386 |
+
gr.update(value=None),
|
| 387 |
+
3, 3, 3, 3,
|
| 388 |
+
3, 3, 3, 3,
|
| 389 |
+
gr.update(value="Please select a task first."),
|
| 390 |
+
)
|
| 391 |
|
| 392 |
if not pairs:
|
| 393 |
+
return (
|
| 394 |
+
gr.update(value=0, minimum=0, maximum=0, visible=False),
|
| 395 |
+
gr.update(value=""),
|
| 396 |
+
gr.update(value=None),
|
| 397 |
+
gr.update(value=None),
|
| 398 |
+
gr.update(value=None),
|
| 399 |
+
3, 3, 3, 3,
|
| 400 |
+
3, 3, 3, 3,
|
| 401 |
+
gr.update(value="No pending pairs to submit."),
|
| 402 |
+
)
|
| 403 |
|
| 404 |
pair = pairs[index]
|
| 405 |
score_map = {
|