kaz-llm-lb

Running on CPU Upgrade

App Files Files Community

hi-melnikov commited on May 21, 2024

Commit

d317f64

1 Parent(s): 2c64c31

Moved build leaderboard to different folder

Browse files

Files changed (2) hide show

app.py +1 -60
src/leaderboard/build_leaderboard.py +68 -0

app.py CHANGED Viewed

@@ -1,15 +1,11 @@
-import json
 import logging
 import os
 import subprocess
-import time
 import gradio as gr
-import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
 from gradio_leaderboard import Leaderboard, SelectColumns
 from gradio_space_ci import enable_space_ci
-from huggingface_hub import snapshot_download
 from src.display.about import (
     INTRODUCTION_TEXT,
@@ -22,11 +18,11 @@ from src.display.utils import (
 )
 from src.envs import (
     API,
-    EVAL_RESULTS_PATH,
     H4_TOKEN,
     REPO_ID,
     RESET_JUDGEMENT_ENV,
 )
 os.environ['GRADIO_ANALYTICS_ENABLED']='false'
@@ -40,61 +36,6 @@ def restart_space():
     API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
-def time_diff_wrapper(func):
-    def wrapper(*args, **kwargs):
-        start_time = time.time()
-        result = func(*args, **kwargs)
-        end_time = time.time()
-        diff = end_time - start_time
-        logging.info(f"Time taken for {func.__name__}: {diff} seconds")
-        return result
-    return wrapper
-@time_diff_wrapper
-def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, backoff_factor=1.5):
-    """Download dataset with exponential backoff retries."""
-    attempt = 0
-    while attempt < max_attempts:
-        try:
-            logging.info(f"Downloading {repo_id} to {local_dir}")
-            snapshot_download(
-                repo_id=repo_id,
-                local_dir=local_dir,
-                repo_type=repo_type,
-                tqdm_class=None,
-                token=os.environ.get("HF_TOKEN"),
-                etag_timeout=30,
-                max_workers=8,
-            )
-            logging.info("Download successful")
-            return
-        except Exception as e:
-            wait_time = backoff_factor ** attempt
-            logging.error(f"Error downloading {repo_id}: {e}, retrying in {wait_time}s")
-            time.sleep(wait_time)
-            attempt += 1
-    raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
-def build_leadearboard_df():
-    """Initializes the application space, loading only necessary data."""
-    # Check ENV LEADERBOARD_DOWNLOAD if wee need to download the leaderboard
-    if os.getenv("LEADERBOARD_DOWNLOAD", "True") == "True":
-        # These downloads only occur on full initialization
-        # try:
-            # download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH)
-            # download_dataset(DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH)
-            download_dataset("Vikhrmodels/openbench-eval", EVAL_RESULTS_PATH)
-            # print(subprocess.Popen('ls src'))
-            subprocess.run(['rsync', '-avzP', '--ignore-existing', f'{EVAL_RESULTS_PATH[2:]}/external/*', 'src/gen/data/arena-hard-v0.1/model_answer/'], check=False)
-            subprocess.run(['rsync', '-avzP', '--ignore-existing', f'{EVAL_RESULTS_PATH[2:]}/model_judgment/*', 'src/gen/data/arena-hard-v0.1/model_judgement/'], check=False)
-        # except Exception:
-        #     restart_space()
-    # Always retrieve the leaderboard DataFrame
-    leaderboard_df = pd.DataFrame.from_records(json.load(open('eval-results/evals/upd.json','r')))
-    return leaderboard_df.copy()
 def build_demo():
     demo = gr.Blocks(
         title = "Chatbot Arena Leaderboard",

 import logging
 import os
 import subprocess
 import gradio as gr
 from apscheduler.schedulers.background import BackgroundScheduler
 from gradio_leaderboard import Leaderboard, SelectColumns
 from gradio_space_ci import enable_space_ci
 from src.display.about import (
     INTRODUCTION_TEXT,
 )
 from src.envs import (
     API,
     H4_TOKEN,
     REPO_ID,
     RESET_JUDGEMENT_ENV,
 )
+from src.leaderboard.build_leaderboard import build_leadearboard_df
 os.environ['GRADIO_ANALYTICS_ENABLED']='false'
     API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
 def build_demo():
     demo = gr.Blocks(
         title = "Chatbot Arena Leaderboard",

src/leaderboard/build_leaderboard.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import json
+import logging
+import os
+import subprocess
+import time
+import pandas as pd
+from huggingface_hub import snapshot_download
+from src.envs import EVAL_RESULTS_PATH
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+def time_diff_wrapper(func):
+    def wrapper(*args, **kwargs):
+        start_time = time.time()
+        result = func(*args, **kwargs)
+        end_time = time.time()
+        diff = end_time - start_time
+        logging.info(f"Time taken for {func.__name__}: {diff} seconds")
+        return result
+    return wrapper
+@time_diff_wrapper
+def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, backoff_factor=1.5):
+    """Download dataset with exponential backoff retries."""
+    attempt = 0
+    while attempt < max_attempts:
+        try:
+            logging.info(f"Downloading {repo_id} to {local_dir}")
+            snapshot_download(
+                repo_id=repo_id,
+                local_dir=local_dir,
+                repo_type=repo_type,
+                tqdm_class=None,
+                token=os.environ.get("HF_TOKEN"),
+                etag_timeout=30,
+                max_workers=8,
+            )
+            logging.info("Download successful")
+            return
+        except Exception as e:
+            wait_time = backoff_factor ** attempt
+            logging.error(f"Error downloading {repo_id}: {e}, retrying in {wait_time}s")
+            time.sleep(wait_time)
+            attempt += 1
+    raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
+def build_leadearboard_df():
+    """Initializes the application space, loading only necessary data."""
+    # Check ENV LEADERBOARD_DOWNLOAD if wee need to download the leaderboard
+    if os.getenv("LEADERBOARD_DOWNLOAD", "True") == "True":
+        # These downloads only occur on full initialization
+        # try:
+            # download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH)
+            # download_dataset(DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH)
+            download_dataset("Vikhrmodels/openbench-eval", EVAL_RESULTS_PATH)
+            # print(subprocess.Popen('ls src'))
+            subprocess.run(['rsync', '-avzP', '--ignore-existing', f'{EVAL_RESULTS_PATH[2:]}/external/*', 'src/gen/data/arena-hard-v0.1/model_answer/'], check=False)
+            subprocess.run(['rsync', '-avzP', '--ignore-existing', f'{EVAL_RESULTS_PATH[2:]}/model_judgment/*', 'src/gen/data/arena-hard-v0.1/model_judgement/'], check=False)
+        # except Exception:
+        #     restart_space()
+    # Always retrieve the leaderboard DataFrame
+    leaderboard_df = pd.DataFrame.from_records(json.load(open('eval-results/evals/upd.json','r')))
+    return leaderboard_df.copy()