leaderboard / app.py
basma-b's picture
Update app.py
dd2a905 verified
import gradio as gr
from gradio_leaderboard import Leaderboard, ColumnFilter
import pandas as pd
from apscheduler.schedulers.background import BackgroundScheduler
from huggingface_hub import snapshot_download
from src.about import TITLE
from src.display.css_html_js import custom_css
from src.display.utils import COLS, AutoEvalColumn, fields
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
from src.populate import get_leaderboard_df
import threading
import time
def restart_space():
time.sleep(30 * 60) # 30 minutes
os._exit(0) # Force exit, HF will restart the space
# Start restart timer in background
threading.Thread(target=restart_space, daemon=True).start()
def restart_space():
API.restart_space(repo_id=REPO_ID)
def get_best_per_team(df: pd.DataFrame) -> pd.DataFrame:
"""Get the best (max global_score) row for each team."""
if "team" not in df.columns or "global_score" not in df.columns:
return df # fallback
return df.sort_values("global_score", ascending=False).groupby("team", as_index=False).first()
def init_leaderboard(dataframe):
field_names = [f.name for f in AutoEvalColumn.__dataclass_fields__.values()]
datatypes = [getattr(AutoEvalColumn, f).type for f in field_names]
hide_columns = [f for f in field_names if getattr(AutoEvalColumn, f).hidden]
filter_columns=[
ColumnFilter("Team", type="dropdown", label="Filter by Team"),
ColumnFilter("Submitter", type="dropdown", label="Filter by Submitter"),
]
return Leaderboard(
value=dataframe,
datatype=datatypes,
search_columns=[
AutoEvalColumn.team.name,
],
# hide_columns=hide_columns,
filter_columns=filter_columns,
interactive=True,
)
### Download Data
try:
snapshot_download(
repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
)
except Exception:
restart_space()
try:
snapshot_download(
repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
)
except Exception:
restart_space()
# Initial Data
FULL_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS)
def filter_best_submissions(show_best):
if show_best:
# For each team, get the row with highest global_score
filtered_df = FULL_DF.loc[FULL_DF.groupby("Team")["Global Score"].idxmax()]
else:
filtered_df = FULL_DF
return filtered_df
demo = gr.Blocks(css=custom_css)
with demo:
gr.HTML(TITLE)
with gr.Tabs(elem_classes="tab-buttons"):
with gr.TabItem("🏅 E2LMC Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
show_best_checkbox = gr.Checkbox(label="Show only best submission per team", value=False)
leaderboard_container = gr.Column()
# Initialize leaderboard
with leaderboard_container:
# leaderboard_component = init_leaderboard(get_best_per_team(FULL_DF))
leaderboard_component = init_leaderboard(filter_best_submissions(show_best_checkbox.value))
# Update leaderboard on checkbox change
show_best_checkbox.change(
fn=filter_best_submissions,
inputs=[show_best_checkbox],
outputs=[leaderboard_component],
queue=False,
)
# Scheduler
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=1800)
scheduler.start()
demo.queue(default_concurrency_limit=40).launch()