Peiran commited on
Commit
64125ec
·
1 Parent(s): bf7288d

Per-user scheduling + global balancing: add Annotator ID, per-user dedup, count-based prioritization, CSV adds annotator_id; update UI bindings accordingly

Browse files
Files changed (2) hide show
  1. __pycache__/app.cpython-311.pyc +0 -0
  2. app.py +56 -16
__pycache__/app.cpython-311.pyc CHANGED
Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ
 
app.py CHANGED
@@ -121,11 +121,36 @@ def _build_image_pairs(rows: List[Dict[str, str]], task_name: str) -> List[Dict[
121
  return pairs
122
 
123
 
124
- def _read_existing_eval_keys(task_name: str) -> set:
125
- """Read already-evaluated pair keys from persistent CSV, return a set of keys.
126
- Key is (test_id, frozenset({model1_name, model2_name}), org_img) to ignore A/B order.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  """
128
  keys = set()
 
 
129
  csv_path = _persist_csv_path_for_task(task_name)
130
  if not os.path.exists(csv_path):
131
  return keys
@@ -133,6 +158,8 @@ def _read_existing_eval_keys(task_name: str) -> set:
133
  with open(csv_path, newline="", encoding="utf-8") as f:
134
  reader = csv.DictReader(f)
135
  for r in reader:
 
 
136
  tid = str(r.get("test_id", "")).strip()
137
  m1 = str(r.get("model1_name", "")).strip()
138
  m2 = str(r.get("model2_name", "")).strip()
@@ -166,23 +193,31 @@ def _schedule_round_robin_by_test_id(pairs: List[Dict[str, str]], seed: Optional
166
  return ordered
167
 
168
 
169
- def load_task(task_name: str):
170
  if not task_name:
171
  raise gr.Error("Please select a task first.")
172
 
173
  rows = _load_task_rows(task_name)
174
  pairs_all = _build_image_pairs(rows, task_name)
175
- # Filter out already evaluated pairs from persistent CSV
176
- done_keys = _read_existing_eval_keys(task_name)
177
  def key_of(p: Dict[str, str]):
178
  return (p["test_id"], frozenset({p["model1_name"], p["model2_name"]}), p["org_img"])
179
- pairs = [p for p in pairs_all if key_of(p) not in done_keys]
180
- done_len = len([p for p in pairs_all if key_of(p) in done_keys])
 
181
 
182
- # Balanced schedule across test_ids with a stable randomization
183
  seed_env = os.environ.get("SCHEDULE_SEED")
184
  seed = int(seed_env) if seed_env and seed_env.isdigit() else None
185
- pairs = _schedule_round_robin_by_test_id(pairs, seed=seed)
 
 
 
 
 
 
 
 
186
 
187
  # Assign A/B order to counteract position bias: alternate after scheduling
188
  for idx, p in enumerate(pairs):
@@ -192,7 +227,7 @@ def load_task(task_name: str):
192
  try:
193
  print("[VisArena] No pending pairs.")
194
  print("[VisArena] total_pairs=", len(pairs_all))
195
- print("[VisArena] already_done=", done_len)
196
  print("[VisArena] persist_csv=", _persist_csv_path_for_task(task_name))
197
  except Exception:
198
  pass
@@ -234,6 +269,7 @@ def _append_local_persist_csv(task_name: str, row: Dict[str, object]) -> bool:
234
  csv_exists = os.path.exists(csv_path)
235
  fieldnames = [
236
  "eval_date",
 
237
  "test_id",
238
  "model1_name",
239
  "model2_name",
@@ -301,8 +337,8 @@ def _upload_eval_record_to_dataset(task_name: str, row: Dict[str, object]) -> Tu
301
  return False, f"Exception: {type(e).__name__}: {e}"
302
 
303
 
304
- def on_task_change(task_name: str, _state_pairs: List[Dict[str, str]]):
305
- pairs = load_task(task_name)
306
  # Defaults for A and B (8 sliders total)
307
  default_scores = [3, 3, 3, 3, 3, 3, 3, 3]
308
  if not pairs:
@@ -366,6 +402,7 @@ def on_pair_navigate(index: int, pairs: List[Dict[str, str]]):
366
 
367
  def on_submit(
368
  task_name: str,
 
369
  index: int,
370
  pairs: List[Dict[str, str]],
371
  a_physical_score: int,
@@ -431,9 +468,10 @@ def on_submit(
431
  }
432
  # Build record
433
  row = _build_eval_row(pair, score_map)
 
434
 
435
  # Idempotency: check if this pair already evaluated; if so, skip writing
436
- done_keys = _read_existing_eval_keys(task_name)
437
  eval_key = (pair["test_id"], frozenset({pair["model1_name"], pair["model2_name"]}), pair["org_img"])
438
  if eval_key in done_keys:
439
  ok_local = False
@@ -502,6 +540,7 @@ with gr.Blocks(title="VisArena Human Evaluation") as demo:
502
  interactive=True,
503
  value="Scene Composition & Object Insertion",
504
  )
 
505
  index_slider = gr.Slider(
506
  label="Pair Index",
507
  value=0,
@@ -541,7 +580,7 @@ with gr.Blocks(title="VisArena Human Evaluation") as demo:
541
  # Event bindings
542
  task_selector.change(
543
  fn=on_task_change,
544
- inputs=[task_selector, pair_state],
545
  outputs=[
546
  pair_state,
547
  index_slider,
@@ -585,6 +624,7 @@ with gr.Blocks(title="VisArena Human Evaluation") as demo:
585
  fn=on_submit,
586
  inputs=[
587
  task_selector,
 
588
  index_slider,
589
  pair_state,
590
  a_physical_input,
@@ -618,7 +658,7 @@ with gr.Blocks(title="VisArena Human Evaluation") as demo:
618
  # Auto-load default task on startup
619
  demo.load(
620
  fn=on_task_change,
621
- inputs=[task_selector, pair_state],
622
  outputs=[
623
  pair_state,
624
  index_slider,
 
121
  return pairs
122
 
123
 
124
+ def _read_eval_counts(task_name: str) -> Dict[Tuple[str, frozenset, str], int]:
125
+ """Global counts per pair key across all annotators."""
126
+ counts: Dict[Tuple[str, frozenset, str], int] = {}
127
+ csv_path = _persist_csv_path_for_task(task_name)
128
+ if not os.path.exists(csv_path):
129
+ return counts
130
+ try:
131
+ with open(csv_path, newline="", encoding="utf-8") as f:
132
+ reader = csv.DictReader(f)
133
+ for r in reader:
134
+ tid = str(r.get("test_id", "")).strip()
135
+ m1 = str(r.get("model1_name", "")).strip()
136
+ m2 = str(r.get("model2_name", "")).strip()
137
+ org = str(r.get("org_img", "")).strip()
138
+ if not (tid and m1 and m2 and org):
139
+ continue
140
+ key = (tid, frozenset({m1, m2}), org)
141
+ counts[key] = counts.get(key, 0) + 1
142
+ except Exception:
143
+ pass
144
+ return counts
145
+
146
+
147
+ def _read_user_done_keys(task_name: str, annotator_id: str) -> set:
148
+ """Keys already evaluated by the given annotator.
149
+ If CSV has no annotator_id column (legacy rows), those rows are ignored for per-user filtering.
150
  """
151
  keys = set()
152
+ if not annotator_id:
153
+ return keys
154
  csv_path = _persist_csv_path_for_task(task_name)
155
  if not os.path.exists(csv_path):
156
  return keys
 
158
  with open(csv_path, newline="", encoding="utf-8") as f:
159
  reader = csv.DictReader(f)
160
  for r in reader:
161
+ if str(r.get("annotator_id", "")).strip() != str(annotator_id).strip():
162
+ continue
163
  tid = str(r.get("test_id", "")).strip()
164
  m1 = str(r.get("model1_name", "")).strip()
165
  m2 = str(r.get("model2_name", "")).strip()
 
193
  return ordered
194
 
195
 
196
+ def load_task(task_name: str, annotator_id: str = ""):
197
  if not task_name:
198
  raise gr.Error("Please select a task first.")
199
 
200
  rows = _load_task_rows(task_name)
201
  pairs_all = _build_image_pairs(rows, task_name)
202
+ # Per-user filtering and global balancing
 
203
  def key_of(p: Dict[str, str]):
204
  return (p["test_id"], frozenset({p["model1_name"], p["model2_name"]}), p["org_img"])
205
+ user_done_keys = _read_user_done_keys(task_name, annotator_id)
206
+ global_counts = _read_eval_counts(task_name)
207
+ pairs = [p for p in pairs_all if key_of(p) not in user_done_keys]
208
 
209
+ # Balanced schedule: prioritize low-count pairs, and within same count do round-robin by test_id
210
  seed_env = os.environ.get("SCHEDULE_SEED")
211
  seed = int(seed_env) if seed_env and seed_env.isdigit() else None
212
+ def count_of(p: Dict[str, str]):
213
+ return global_counts.get(key_of(p), 0)
214
+ buckets: Dict[int, List[Dict[str, str]]] = {}
215
+ for p in sorted(pairs, key=count_of):
216
+ buckets.setdefault(count_of(p), []).append(p)
217
+ ordered: List[Dict[str, str]] = []
218
+ for c in sorted(buckets.keys()):
219
+ ordered.extend(_schedule_round_robin_by_test_id(buckets[c], seed=seed))
220
+ pairs = ordered
221
 
222
  # Assign A/B order to counteract position bias: alternate after scheduling
223
  for idx, p in enumerate(pairs):
 
227
  try:
228
  print("[VisArena] No pending pairs.")
229
  print("[VisArena] total_pairs=", len(pairs_all))
230
+ print("[VisArena] already_done_by_user=", len(user_done_keys))
231
  print("[VisArena] persist_csv=", _persist_csv_path_for_task(task_name))
232
  except Exception:
233
  pass
 
269
  csv_exists = os.path.exists(csv_path)
270
  fieldnames = [
271
  "eval_date",
272
+ "annotator_id",
273
  "test_id",
274
  "model1_name",
275
  "model2_name",
 
337
  return False, f"Exception: {type(e).__name__}: {e}"
338
 
339
 
340
+ def on_task_change(task_name: str, annotator_id: str, _state_pairs: List[Dict[str, str]]):
341
+ pairs = load_task(task_name, annotator_id)
342
  # Defaults for A and B (8 sliders total)
343
  default_scores = [3, 3, 3, 3, 3, 3, 3, 3]
344
  if not pairs:
 
402
 
403
  def on_submit(
404
  task_name: str,
405
+ annotator_id: str,
406
  index: int,
407
  pairs: List[Dict[str, str]],
408
  a_physical_score: int,
 
468
  }
469
  # Build record
470
  row = _build_eval_row(pair, score_map)
471
+ row["annotator_id"] = annotator_id
472
 
473
  # Idempotency: check if this pair already evaluated; if so, skip writing
474
+ done_keys = _read_user_done_keys(task_name, annotator_id)
475
  eval_key = (pair["test_id"], frozenset({pair["model1_name"], pair["model2_name"]}), pair["org_img"])
476
  if eval_key in done_keys:
477
  ok_local = False
 
540
  interactive=True,
541
  value="Scene Composition & Object Insertion",
542
  )
543
+ annotator_id_input = gr.Textbox(label="Annotator ID", placeholder="请输入你的唯一标识 (如昵称/学号)")
544
  index_slider = gr.Slider(
545
  label="Pair Index",
546
  value=0,
 
580
  # Event bindings
581
  task_selector.change(
582
  fn=on_task_change,
583
+ inputs=[task_selector, annotator_id_input, pair_state],
584
  outputs=[
585
  pair_state,
586
  index_slider,
 
624
  fn=on_submit,
625
  inputs=[
626
  task_selector,
627
+ annotator_id_input,
628
  index_slider,
629
  pair_state,
630
  a_physical_input,
 
658
  # Auto-load default task on startup
659
  demo.load(
660
  fn=on_task_change,
661
+ inputs=[task_selector, annotator_id_input, pair_state],
662
  outputs=[
663
  pair_state,
664
  index_slider,