Spaces:
Running
Running
Peiran
commited on
Commit
·
78b57a9
1
Parent(s):
9f6abf4
UI: switch to English-only labels/messages; minor cleanup. No logic changes.
Browse files
app.py
CHANGED
|
@@ -163,7 +163,7 @@ def _schedule_round_robin_by_test_id(pairs: List[Dict[str, str]], seed: int | No
|
|
| 163 |
|
| 164 |
def load_task(task_name: str):
|
| 165 |
if not task_name:
|
| 166 |
-
raise gr.Error("
|
| 167 |
|
| 168 |
rows = _load_task_rows(task_name)
|
| 169 |
pairs = _build_image_pairs(rows, task_name)
|
|
@@ -183,7 +183,7 @@ def load_task(task_name: str):
|
|
| 183 |
p["swap"] = bool(idx % 2) # True -> A=B's image; False -> A=A's image
|
| 184 |
|
| 185 |
if not pairs:
|
| 186 |
-
raise gr.Error("
|
| 187 |
|
| 188 |
return pairs
|
| 189 |
|
|
@@ -254,13 +254,13 @@ def _upload_eval_record_to_dataset(task_name: str, row: Dict[str, object]) -> tu
|
|
| 254 |
Returns (ok, message) for UI feedback and debugging.
|
| 255 |
"""
|
| 256 |
if HfApi is None:
|
| 257 |
-
return False, "huggingface_hub
|
| 258 |
token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACEHUB_API_TOKEN")
|
| 259 |
repo_id = os.environ.get("EVAL_REPO_ID", "peiranli0930/VisEval")
|
| 260 |
if not token:
|
| 261 |
-
return False, "
|
| 262 |
if not repo_id:
|
| 263 |
-
return False, "
|
| 264 |
try:
|
| 265 |
from huggingface_hub import CommitOperationAdd
|
| 266 |
|
|
@@ -277,14 +277,14 @@ def _upload_eval_record_to_dataset(task_name: str, row: Dict[str, object]) -> tu
|
|
| 277 |
operations=operations,
|
| 278 |
commit_message=f"Add eval {folder} {row.get('test_id')} {uid}",
|
| 279 |
)
|
| 280 |
-
return True, f"
|
| 281 |
except Exception as e:
|
| 282 |
# Print to logs for debugging in Space
|
| 283 |
try:
|
| 284 |
print("[VisArena] Upload to dataset failed:", repr(e))
|
| 285 |
except Exception:
|
| 286 |
pass
|
| 287 |
-
return False, f"
|
| 288 |
|
| 289 |
|
| 290 |
def on_task_change(task_name: str, _state_pairs: List[Dict[str, str]]):
|
|
@@ -304,7 +304,7 @@ def on_task_change(task_name: str, _state_pairs: List[Dict[str, str]]):
|
|
| 304 |
_resolve_image_path(a_path),
|
| 305 |
_resolve_image_path(b_path),
|
| 306 |
*default_scores,
|
| 307 |
-
gr.update(value=f"
|
| 308 |
)
|
| 309 |
|
| 310 |
|
|
@@ -345,7 +345,7 @@ def on_submit(
|
|
| 345 |
raise gr.Error("请先选择任务。")
|
| 346 |
|
| 347 |
if not pairs:
|
| 348 |
-
raise gr.Error("
|
| 349 |
|
| 350 |
pair = pairs[index]
|
| 351 |
score_map = {
|
|
@@ -378,9 +378,9 @@ def on_submit(
|
|
| 378 |
ok_hub, hub_msg = _upload_eval_record_to_dataset(task_name, row)
|
| 379 |
|
| 380 |
next_index = min(index + 1, len(pairs) - 1)
|
| 381 |
-
info = f"
|
| 382 |
-
info += "
|
| 383 |
-
info += "
|
| 384 |
|
| 385 |
if next_index != index:
|
| 386 |
pair = pairs[next_index]
|
|
@@ -395,7 +395,7 @@ def on_submit(
|
|
| 395 |
_resolve_image_path(b_path),
|
| 396 |
3, 3, 3, 3,
|
| 397 |
3, 3, 3, 3,
|
| 398 |
-
gr.update(value=info + f"
|
| 399 |
)
|
| 400 |
|
| 401 |
return (
|
|
@@ -406,7 +406,7 @@ def on_submit(
|
|
| 406 |
gr.update(),
|
| 407 |
3, 3, 3, 3,
|
| 408 |
3, 3, 3, 3,
|
| 409 |
-
gr.update(value=info + "
|
| 410 |
)
|
| 411 |
|
| 412 |
|
|
@@ -414,7 +414,7 @@ with gr.Blocks(title="VisArena Human Evaluation") as demo:
|
|
| 414 |
gr.Markdown(
|
| 415 |
"""
|
| 416 |
# VisArena Human Evaluation
|
| 417 |
-
|
| 418 |
"""
|
| 419 |
)
|
| 420 |
|
|
@@ -442,21 +442,21 @@ with gr.Blocks(title="VisArena Human Evaluation") as demo:
|
|
| 442 |
# Layout: Original on top, two outputs below with their own sliders
|
| 443 |
with gr.Row():
|
| 444 |
with gr.Column(scale=12):
|
| 445 |
-
orig_image = gr.Image(type="filepath", label="
|
| 446 |
|
| 447 |
with gr.Row():
|
| 448 |
with gr.Column(scale=6):
|
| 449 |
-
model1_image = gr.Image(type="filepath", label="
|
| 450 |
-
a_physical_input = gr.Slider(1, 5, value=3, step=1, label="A:
|
| 451 |
-
a_optical_input = gr.Slider(1, 5, value=3, step=1, label="A:
|
| 452 |
-
a_semantic_input = gr.Slider(1, 5, value=3, step=1, label="A:
|
| 453 |
-
a_overall_input = gr.Slider(1, 5, value=3, step=1, label="A:
|
| 454 |
with gr.Column(scale=6):
|
| 455 |
-
model2_image = gr.Image(type="filepath", label="
|
| 456 |
-
b_physical_input = gr.Slider(1, 5, value=3, step=1, label="B:
|
| 457 |
-
b_optical_input = gr.Slider(1, 5, value=3, step=1, label="B:
|
| 458 |
-
b_semantic_input = gr.Slider(1, 5, value=3, step=1, label="B:
|
| 459 |
-
b_overall_input = gr.Slider(1, 5, value=3, step=1, label="B:
|
| 460 |
|
| 461 |
submit_button = gr.Button("Submit Evaluation", variant="primary")
|
| 462 |
feedback_box = gr.Markdown("")
|
|
|
|
| 163 |
|
| 164 |
def load_task(task_name: str):
|
| 165 |
if not task_name:
|
| 166 |
+
raise gr.Error("Please select a task first.")
|
| 167 |
|
| 168 |
rows = _load_task_rows(task_name)
|
| 169 |
pairs = _build_image_pairs(rows, task_name)
|
|
|
|
| 183 |
p["swap"] = bool(idx % 2) # True -> A=B's image; False -> A=A's image
|
| 184 |
|
| 185 |
if not pairs:
|
| 186 |
+
raise gr.Error("No valid image pairs found for evaluation. Please check the data.")
|
| 187 |
|
| 188 |
return pairs
|
| 189 |
|
|
|
|
| 254 |
Returns (ok, message) for UI feedback and debugging.
|
| 255 |
"""
|
| 256 |
if HfApi is None:
|
| 257 |
+
return False, "huggingface_hub not installed"
|
| 258 |
token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACEHUB_API_TOKEN")
|
| 259 |
repo_id = os.environ.get("EVAL_REPO_ID", "peiranli0930/VisEval")
|
| 260 |
if not token:
|
| 261 |
+
return False, "Missing write token (HF_TOKEN/HUGGINGFACEHUB_API_TOKEN)"
|
| 262 |
if not repo_id:
|
| 263 |
+
return False, "EVAL_REPO_ID is not set"
|
| 264 |
try:
|
| 265 |
from huggingface_hub import CommitOperationAdd
|
| 266 |
|
|
|
|
| 277 |
operations=operations,
|
| 278 |
commit_message=f"Add eval {folder} {row.get('test_id')} {uid}",
|
| 279 |
)
|
| 280 |
+
return True, f"Uploaded: {repo_id}/{path_in_repo}"
|
| 281 |
except Exception as e:
|
| 282 |
# Print to logs for debugging in Space
|
| 283 |
try:
|
| 284 |
print("[VisArena] Upload to dataset failed:", repr(e))
|
| 285 |
except Exception:
|
| 286 |
pass
|
| 287 |
+
return False, f"Exception: {type(e).__name__}: {e}"
|
| 288 |
|
| 289 |
|
| 290 |
def on_task_change(task_name: str, _state_pairs: List[Dict[str, str]]):
|
|
|
|
| 304 |
_resolve_image_path(a_path),
|
| 305 |
_resolve_image_path(b_path),
|
| 306 |
*default_scores,
|
| 307 |
+
gr.update(value=f"Total {len(pairs)} pairs pending evaluation."),
|
| 308 |
)
|
| 309 |
|
| 310 |
|
|
|
|
| 345 |
raise gr.Error("请先选择任务。")
|
| 346 |
|
| 347 |
if not pairs:
|
| 348 |
+
raise gr.Error("No image pairs loaded for the current task.")
|
| 349 |
|
| 350 |
pair = pairs[index]
|
| 351 |
score_map = {
|
|
|
|
| 378 |
ok_hub, hub_msg = _upload_eval_record_to_dataset(task_name, row)
|
| 379 |
|
| 380 |
next_index = min(index + 1, len(pairs) - 1)
|
| 381 |
+
info = f"Saved evaluation for Test ID {pair['test_id']}."
|
| 382 |
+
info += " Local persistence " + ("succeeded" if ok_local else "failed") + "."
|
| 383 |
+
info += " Dataset upload " + ("succeeded" if ok_hub else "failed") + (f" ({hub_msg})" if hub_msg else "") + "."
|
| 384 |
|
| 385 |
if next_index != index:
|
| 386 |
pair = pairs[next_index]
|
|
|
|
| 395 |
_resolve_image_path(b_path),
|
| 396 |
3, 3, 3, 3,
|
| 397 |
3, 3, 3, 3,
|
| 398 |
+
gr.update(value=info + f" Moved to next pair ({next_index + 1}/{len(pairs)})."),
|
| 399 |
)
|
| 400 |
|
| 401 |
return (
|
|
|
|
| 406 |
gr.update(),
|
| 407 |
3, 3, 3, 3,
|
| 408 |
3, 3, 3, 3,
|
| 409 |
+
gr.update(value=info + " This is the last pair."),
|
| 410 |
)
|
| 411 |
|
| 412 |
|
|
|
|
| 414 |
gr.Markdown(
|
| 415 |
"""
|
| 416 |
# VisArena Human Evaluation
|
| 417 |
+
Please select a task and rate the generated images. Each score ranges from 1 (poor) to 5 (excellent).
|
| 418 |
"""
|
| 419 |
)
|
| 420 |
|
|
|
|
| 442 |
# Layout: Original on top, two outputs below with their own sliders
|
| 443 |
with gr.Row():
|
| 444 |
with gr.Column(scale=12):
|
| 445 |
+
orig_image = gr.Image(type="filepath", label="Original", interactive=False)
|
| 446 |
|
| 447 |
with gr.Row():
|
| 448 |
with gr.Column(scale=6):
|
| 449 |
+
model1_image = gr.Image(type="filepath", label="Output A", interactive=False)
|
| 450 |
+
a_physical_input = gr.Slider(1, 5, value=3, step=1, label="A: Physical Interaction Fidelity")
|
| 451 |
+
a_optical_input = gr.Slider(1, 5, value=3, step=1, label="A: Optical Effect Accuracy")
|
| 452 |
+
a_semantic_input = gr.Slider(1, 5, value=3, step=1, label="A: Semantic/Functional Alignment")
|
| 453 |
+
a_overall_input = gr.Slider(1, 5, value=3, step=1, label="A: Overall Photorealism")
|
| 454 |
with gr.Column(scale=6):
|
| 455 |
+
model2_image = gr.Image(type="filepath", label="Output B", interactive=False)
|
| 456 |
+
b_physical_input = gr.Slider(1, 5, value=3, step=1, label="B: Physical Interaction Fidelity")
|
| 457 |
+
b_optical_input = gr.Slider(1, 5, value=3, step=1, label="B: Optical Effect Accuracy")
|
| 458 |
+
b_semantic_input = gr.Slider(1, 5, value=3, step=1, label="B: Semantic/Functional Alignment")
|
| 459 |
+
b_overall_input = gr.Slider(1, 5, value=3, step=1, label="B: Overall Photorealism")
|
| 460 |
|
| 461 |
submit_button = gr.Button("Submit Evaluation", variant="primary")
|
| 462 |
feedback_box = gr.Markdown("")
|