advanced

Running on CPU Upgrade

App Files Files Community

alozowski HF Staff commited on Mar 25

Commit

78afa9e

1 Parent(s): 6883f10

Improve Evaluate tab

Browse files

Files changed (2) hide show

yourbench_space/app.py +19 -10
yourbench_space/utils.py +11 -0

yourbench_space/app.py CHANGED Viewed

@@ -12,9 +12,11 @@ from datasets import load_dataset
 from huggingface_hub import whoami, HfApi
 from yourbench_space import PATH
 from yourbench_space.utils import (
     STAGES,
     SubprocessManagerGroup,
     save_files,
     update_dataset,
     map_stage_names,
     is_running_locally,
@@ -234,6 +236,12 @@ def init_session(profile: gr.OAuthProfile | None):
     logger.info(f"Started session for {local_uuid}")
     return gr.State(local_uuid, delete_callback=lambda uid: MANAGERS.remove(uid))
 with gr.Blocks(theme=gr.themes.Default()) as app:
     session_state = gr.State()
@@ -349,14 +357,18 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
                     with gr.Accordion("Lighteval Preview"):
                         lighteval_df = gr.DataFrame()
                 stages_table.change(
                     update_dataset,
                     inputs=[stages_table, hf_org_dropdown, hf_dataset_name],
                     outputs=[ingestion_df, summarization_df, single_shot_df, multi_hop_df, lighteval_df],
                 )
                 # TODO: this timer should only be active when the second tab is passed to active for the first time
                 log_timer = gr.Timer(1.0, active=True)
                 log_timer.tick(
@@ -365,20 +377,16 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
                     outputs=[log_output, stages_table],
                 )
-        # with gr.Tab("Evaluate", id=2):
-        #     with gr.Row():
-        #         btn_launch_evals = gr.Button("Launch evaluations")
-        #         status = gr.Textbox(label="Status")
-        #     btn_launch_evals.click(run_evaluation_pipeline, [hf_org_dropdown, hf_dataset_name, gr.State("lighteval")], status)
         with gr.Tab("Evaluate", id=2):
             with gr.Column():
                 gr.Markdown("### 🧪 Run YourBench Evaluation")
                 gr.Markdown("Run the full evaluation pipeline on the uploaded dataset. This includes computing metrics, creating the leaderboard, and pushing results.")
                 with gr.Row():
-                    btn_launch_evals = gr.Button("🚀 Launch Evaluation", variant="primary")
-                    clear_status_btn = gr.Button("Clear", variant="secondary")
                 with gr.Accordion("Evaluation Log", open=True):
                     eval_status = gr.Textbox(label="", lines=6, interactive=False, show_label=False)
@@ -390,6 +398,7 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
                 )
                 clear_status_btn.click(lambda: "", outputs=eval_status)
     app.load(init_session, outputs=session_state)
 app.launch(allowed_paths=[PATH])

 from huggingface_hub import whoami, HfApi
 from yourbench_space import PATH
 from yourbench_space.utils import (
+    STAGE_DISPLAY_MAP,
     STAGES,
     SubprocessManagerGroup,
     save_files,
+    on_generation_succsess,
     update_dataset,
     map_stage_names,
     is_running_locally,
     logger.info(f"Started session for {local_uuid}")
     return gr.State(local_uuid, delete_callback=lambda uid: MANAGERS.remove(uid))
+btn_launch_evals = gr.Button(
+    "🚀 Launch Evaluation",
+    visible=True,
+    interactive=False,  # Start non-interactive
+    variant="primary"
+)
 with gr.Blocks(theme=gr.themes.Default()) as app:
     session_state = gr.State()
                     with gr.Accordion("Lighteval Preview"):
                         lighteval_df = gr.DataFrame()
                 stages_table.change(
                     update_dataset,
                     inputs=[stages_table, hf_org_dropdown, hf_dataset_name],
                     outputs=[ingestion_df, summarization_df, single_shot_df, multi_hop_df, lighteval_df],
                 )
+                stages_table.change(
+                    on_generation_succsess,
+                    inputs=stages_table,
+                    outputs=[tabs,btn_launch_evals],
+                )
                 # TODO: this timer should only be active when the second tab is passed to active for the first time
                 log_timer = gr.Timer(1.0, active=True)
                 log_timer.tick(
                     outputs=[log_output, stages_table],
                 )
         with gr.Tab("Evaluate", id=2):
             with gr.Column():
                 gr.Markdown("### 🧪 Run YourBench Evaluation")
                 gr.Markdown("Run the full evaluation pipeline on the uploaded dataset. This includes computing metrics, creating the leaderboard, and pushing results.")
                 with gr.Row():
+                    with gr.Column():
+                        btn_launch_evals.render()
+                    with gr.Column():
+                        clear_status_btn = gr.Button("Clear", variant="secondary")
                 with gr.Accordion("Evaluation Log", open=True):
                     eval_status = gr.Textbox(label="", lines=6, interactive=False, show_label=False)
                 )
                 clear_status_btn.click(lambda: "", outputs=eval_status)
     app.load(init_session, outputs=session_state)
 app.launch(allowed_paths=[PATH])

yourbench_space/utils.py CHANGED Viewed

@@ -129,6 +129,17 @@ def update_dataset(stages: list, hf_org: str, hf_prefix: str, oauth_token: gr.OA
     return (ingestion_df, summarization_df, single_shot_df, multi_hop_df, lighteval_df)
 class SubprocessManagerGroup:
     """Instanciates one manager per user (should be used as a singleton class)"""

     return (ingestion_df, summarization_df, single_shot_df, multi_hop_df, lighteval_df)
+def should_enable_eval_tab(stages):
+    logger.info(f"Stages received: {stages}")
+    logger.info(f"Lighteval stage name: {STAGE_DISPLAY_MAP['lighteval']}")
+    return STAGE_DISPLAY_MAP["lighteval"] in stages
+def on_generation_succsess(stages):
+    stages = stages or []
+    if STAGE_DISPLAY_MAP["lighteval"] in stages:
+        gr.Success("🌟 Your Dataset is ready for evaluation!")
+        return gr.update(selected=2), gr.update(interactive=True, visible=True)
+    return gr.update(), gr.update(interactive=False, visible=True)
 class SubprocessManagerGroup:
     """Instanciates one manager per user (should be used as a singleton class)"""