Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
d317f64
1
Parent(s):
2c64c31
Moved build leaderboard to different folder
Browse files- app.py +1 -60
- src/leaderboard/build_leaderboard.py +68 -0
app.py
CHANGED
|
@@ -1,15 +1,11 @@
|
|
| 1 |
-
import json
|
| 2 |
import logging
|
| 3 |
import os
|
| 4 |
import subprocess
|
| 5 |
-
import time
|
| 6 |
|
| 7 |
import gradio as gr
|
| 8 |
-
import pandas as pd
|
| 9 |
from apscheduler.schedulers.background import BackgroundScheduler
|
| 10 |
from gradio_leaderboard import Leaderboard, SelectColumns
|
| 11 |
from gradio_space_ci import enable_space_ci
|
| 12 |
-
from huggingface_hub import snapshot_download
|
| 13 |
|
| 14 |
from src.display.about import (
|
| 15 |
INTRODUCTION_TEXT,
|
|
@@ -22,11 +18,11 @@ from src.display.utils import (
|
|
| 22 |
)
|
| 23 |
from src.envs import (
|
| 24 |
API,
|
| 25 |
-
EVAL_RESULTS_PATH,
|
| 26 |
H4_TOKEN,
|
| 27 |
REPO_ID,
|
| 28 |
RESET_JUDGEMENT_ENV,
|
| 29 |
)
|
|
|
|
| 30 |
|
| 31 |
os.environ['GRADIO_ANALYTICS_ENABLED']='false'
|
| 32 |
|
|
@@ -40,61 +36,6 @@ def restart_space():
|
|
| 40 |
API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
|
| 41 |
|
| 42 |
|
| 43 |
-
def time_diff_wrapper(func):
|
| 44 |
-
def wrapper(*args, **kwargs):
|
| 45 |
-
start_time = time.time()
|
| 46 |
-
result = func(*args, **kwargs)
|
| 47 |
-
end_time = time.time()
|
| 48 |
-
diff = end_time - start_time
|
| 49 |
-
logging.info(f"Time taken for {func.__name__}: {diff} seconds")
|
| 50 |
-
return result
|
| 51 |
-
return wrapper
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
@time_diff_wrapper
|
| 55 |
-
def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, backoff_factor=1.5):
|
| 56 |
-
"""Download dataset with exponential backoff retries."""
|
| 57 |
-
attempt = 0
|
| 58 |
-
while attempt < max_attempts:
|
| 59 |
-
try:
|
| 60 |
-
logging.info(f"Downloading {repo_id} to {local_dir}")
|
| 61 |
-
snapshot_download(
|
| 62 |
-
repo_id=repo_id,
|
| 63 |
-
local_dir=local_dir,
|
| 64 |
-
repo_type=repo_type,
|
| 65 |
-
tqdm_class=None,
|
| 66 |
-
token=os.environ.get("HF_TOKEN"),
|
| 67 |
-
etag_timeout=30,
|
| 68 |
-
max_workers=8,
|
| 69 |
-
)
|
| 70 |
-
logging.info("Download successful")
|
| 71 |
-
return
|
| 72 |
-
except Exception as e:
|
| 73 |
-
wait_time = backoff_factor ** attempt
|
| 74 |
-
logging.error(f"Error downloading {repo_id}: {e}, retrying in {wait_time}s")
|
| 75 |
-
time.sleep(wait_time)
|
| 76 |
-
attempt += 1
|
| 77 |
-
raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
|
| 78 |
-
|
| 79 |
-
def build_leadearboard_df():
|
| 80 |
-
"""Initializes the application space, loading only necessary data."""
|
| 81 |
-
# Check ENV LEADERBOARD_DOWNLOAD if wee need to download the leaderboard
|
| 82 |
-
if os.getenv("LEADERBOARD_DOWNLOAD", "True") == "True":
|
| 83 |
-
# These downloads only occur on full initialization
|
| 84 |
-
# try:
|
| 85 |
-
# download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH)
|
| 86 |
-
# download_dataset(DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH)
|
| 87 |
-
download_dataset("Vikhrmodels/openbench-eval", EVAL_RESULTS_PATH)
|
| 88 |
-
# print(subprocess.Popen('ls src'))
|
| 89 |
-
subprocess.run(['rsync', '-avzP', '--ignore-existing', f'{EVAL_RESULTS_PATH[2:]}/external/*', 'src/gen/data/arena-hard-v0.1/model_answer/'], check=False)
|
| 90 |
-
subprocess.run(['rsync', '-avzP', '--ignore-existing', f'{EVAL_RESULTS_PATH[2:]}/model_judgment/*', 'src/gen/data/arena-hard-v0.1/model_judgement/'], check=False)
|
| 91 |
-
# except Exception:
|
| 92 |
-
# restart_space()
|
| 93 |
-
|
| 94 |
-
# Always retrieve the leaderboard DataFrame
|
| 95 |
-
leaderboard_df = pd.DataFrame.from_records(json.load(open('eval-results/evals/upd.json','r')))
|
| 96 |
-
return leaderboard_df.copy()
|
| 97 |
-
|
| 98 |
def build_demo():
|
| 99 |
demo = gr.Blocks(
|
| 100 |
title = "Chatbot Arena Leaderboard",
|
|
|
|
|
|
|
| 1 |
import logging
|
| 2 |
import os
|
| 3 |
import subprocess
|
|
|
|
| 4 |
|
| 5 |
import gradio as gr
|
|
|
|
| 6 |
from apscheduler.schedulers.background import BackgroundScheduler
|
| 7 |
from gradio_leaderboard import Leaderboard, SelectColumns
|
| 8 |
from gradio_space_ci import enable_space_ci
|
|
|
|
| 9 |
|
| 10 |
from src.display.about import (
|
| 11 |
INTRODUCTION_TEXT,
|
|
|
|
| 18 |
)
|
| 19 |
from src.envs import (
|
| 20 |
API,
|
|
|
|
| 21 |
H4_TOKEN,
|
| 22 |
REPO_ID,
|
| 23 |
RESET_JUDGEMENT_ENV,
|
| 24 |
)
|
| 25 |
+
from src.leaderboard.build_leaderboard import build_leadearboard_df
|
| 26 |
|
| 27 |
os.environ['GRADIO_ANALYTICS_ENABLED']='false'
|
| 28 |
|
|
|
|
| 36 |
API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
|
| 37 |
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
def build_demo():
|
| 40 |
demo = gr.Blocks(
|
| 41 |
title = "Chatbot Arena Leaderboard",
|
src/leaderboard/build_leaderboard.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import json
|
| 3 |
+
import logging
|
| 4 |
+
import os
|
| 5 |
+
import subprocess
|
| 6 |
+
import time
|
| 7 |
+
|
| 8 |
+
import pandas as pd
|
| 9 |
+
from huggingface_hub import snapshot_download
|
| 10 |
+
|
| 11 |
+
from src.envs import EVAL_RESULTS_PATH
|
| 12 |
+
|
| 13 |
+
# Configure logging
|
| 14 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
| 15 |
+
|
| 16 |
+
def time_diff_wrapper(func):
|
| 17 |
+
def wrapper(*args, **kwargs):
|
| 18 |
+
start_time = time.time()
|
| 19 |
+
result = func(*args, **kwargs)
|
| 20 |
+
end_time = time.time()
|
| 21 |
+
diff = end_time - start_time
|
| 22 |
+
logging.info(f"Time taken for {func.__name__}: {diff} seconds")
|
| 23 |
+
return result
|
| 24 |
+
return wrapper
|
| 25 |
+
|
| 26 |
+
@time_diff_wrapper
|
| 27 |
+
def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, backoff_factor=1.5):
|
| 28 |
+
"""Download dataset with exponential backoff retries."""
|
| 29 |
+
attempt = 0
|
| 30 |
+
while attempt < max_attempts:
|
| 31 |
+
try:
|
| 32 |
+
logging.info(f"Downloading {repo_id} to {local_dir}")
|
| 33 |
+
snapshot_download(
|
| 34 |
+
repo_id=repo_id,
|
| 35 |
+
local_dir=local_dir,
|
| 36 |
+
repo_type=repo_type,
|
| 37 |
+
tqdm_class=None,
|
| 38 |
+
token=os.environ.get("HF_TOKEN"),
|
| 39 |
+
etag_timeout=30,
|
| 40 |
+
max_workers=8,
|
| 41 |
+
)
|
| 42 |
+
logging.info("Download successful")
|
| 43 |
+
return
|
| 44 |
+
except Exception as e:
|
| 45 |
+
wait_time = backoff_factor ** attempt
|
| 46 |
+
logging.error(f"Error downloading {repo_id}: {e}, retrying in {wait_time}s")
|
| 47 |
+
time.sleep(wait_time)
|
| 48 |
+
attempt += 1
|
| 49 |
+
raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
|
| 50 |
+
|
| 51 |
+
def build_leadearboard_df():
|
| 52 |
+
"""Initializes the application space, loading only necessary data."""
|
| 53 |
+
# Check ENV LEADERBOARD_DOWNLOAD if wee need to download the leaderboard
|
| 54 |
+
if os.getenv("LEADERBOARD_DOWNLOAD", "True") == "True":
|
| 55 |
+
# These downloads only occur on full initialization
|
| 56 |
+
# try:
|
| 57 |
+
# download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH)
|
| 58 |
+
# download_dataset(DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH)
|
| 59 |
+
download_dataset("Vikhrmodels/openbench-eval", EVAL_RESULTS_PATH)
|
| 60 |
+
# print(subprocess.Popen('ls src'))
|
| 61 |
+
subprocess.run(['rsync', '-avzP', '--ignore-existing', f'{EVAL_RESULTS_PATH[2:]}/external/*', 'src/gen/data/arena-hard-v0.1/model_answer/'], check=False)
|
| 62 |
+
subprocess.run(['rsync', '-avzP', '--ignore-existing', f'{EVAL_RESULTS_PATH[2:]}/model_judgment/*', 'src/gen/data/arena-hard-v0.1/model_judgement/'], check=False)
|
| 63 |
+
# except Exception:
|
| 64 |
+
# restart_space()
|
| 65 |
+
|
| 66 |
+
# Always retrieve the leaderboard DataFrame
|
| 67 |
+
leaderboard_df = pd.DataFrame.from_records(json.load(open('eval-results/evals/upd.json','r')))
|
| 68 |
+
return leaderboard_df.copy()
|