Spaces:
Runtime error
Runtime error
Commit
·
c5558c5
1
Parent(s):
a654acb
update
Browse files- backend-cli.py +77 -14
backend-cli.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
| 3 |
import os
|
| 4 |
import json
|
| 5 |
|
|
|
|
| 6 |
import random
|
| 7 |
from datetime import datetime
|
| 8 |
|
|
@@ -17,6 +18,10 @@ from src.leaderboard.read_evals import EvalResult
|
|
| 17 |
from src.envs import QUEUE_REPO, RESULTS_REPO, API
|
| 18 |
from src.utils import my_snapshot_download
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
import time
|
| 21 |
|
| 22 |
import logging
|
|
@@ -124,15 +129,11 @@ def process_finished_requests(thr: int) -> bool:
|
|
| 124 |
|
| 125 |
# Get all eval request that are FINISHED, if you want to run other evals, change this parameter
|
| 126 |
eval_requests: list[EvalRequest] = get_eval_requests(job_status=current_finished_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
|
| 127 |
-
# Sort the evals by priority (first submitted first run)
|
| 128 |
eval_requests: list[EvalRequest] = sort_models_by_priority(api=API, models=eval_requests)
|
| 129 |
|
| 130 |
-
# XXX
|
| 131 |
-
# eval_requests = [r for r in eval_requests if 'bloom-560m' in r.model]
|
| 132 |
-
|
| 133 |
random.shuffle(eval_requests)
|
| 134 |
|
| 135 |
-
from src.leaderboard.read_evals import get_raw_eval_results
|
| 136 |
eval_results: list[EvalResult] = get_raw_eval_results(EVAL_RESULTS_PATH_BACKEND, EVAL_REQUESTS_PATH_BACKEND, True)
|
| 137 |
|
| 138 |
result_name_to_request = {request_to_result_name(r): r for r in eval_requests}
|
|
@@ -143,9 +144,10 @@ def process_finished_requests(thr: int) -> bool:
|
|
| 143 |
result_name: str = request_to_result_name(eval_request)
|
| 144 |
|
| 145 |
# Check the corresponding result
|
| 146 |
-
from typing import Optional
|
| 147 |
eval_result: Optional[EvalResult] = result_name_to_result[result_name] if result_name in result_name_to_result else None
|
| 148 |
|
|
|
|
|
|
|
| 149 |
task_lst = TASKS_HARNESS.copy()
|
| 150 |
random.shuffle(task_lst)
|
| 151 |
|
|
@@ -169,6 +171,58 @@ def process_finished_requests(thr: int) -> bool:
|
|
| 169 |
return False
|
| 170 |
|
| 171 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
def process_pending_requests() -> bool:
|
| 173 |
sanity_checks()
|
| 174 |
|
|
@@ -176,7 +230,7 @@ def process_pending_requests() -> bool:
|
|
| 176 |
|
| 177 |
# Get all eval request that are PENDING, if you want to run other evals, change this parameter
|
| 178 |
eval_requests = get_eval_requests(job_status=current_pending_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
|
| 179 |
-
# Sort the evals by priority (first submitted first run)
|
| 180 |
eval_requests = sort_models_by_priority(api=API, models=eval_requests)
|
| 181 |
|
| 182 |
random.shuffle(eval_requests)
|
|
@@ -207,19 +261,28 @@ def process_pending_requests() -> bool:
|
|
| 207 |
if __name__ == "__main__":
|
| 208 |
wait = True
|
| 209 |
|
| 210 |
-
|
| 211 |
-
if socket.gethostname() in {'hamburg'} or os.path.isdir("/home/pminervi"):
|
| 212 |
wait = False
|
| 213 |
|
| 214 |
if wait:
|
| 215 |
time.sleep(60 * random.randint(5, 10))
|
| 216 |
-
pass
|
| 217 |
|
| 218 |
-
|
| 219 |
-
|
|
|
|
|
|
|
|
|
|
| 220 |
|
| 221 |
if res is False:
|
| 222 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
|
| 224 |
if res is False:
|
| 225 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
import os
|
| 4 |
import json
|
| 5 |
|
| 6 |
+
import socket
|
| 7 |
import random
|
| 8 |
from datetime import datetime
|
| 9 |
|
|
|
|
| 18 |
from src.envs import QUEUE_REPO, RESULTS_REPO, API
|
| 19 |
from src.utils import my_snapshot_download
|
| 20 |
|
| 21 |
+
from src.leaderboard.read_evals import get_raw_eval_results
|
| 22 |
+
|
| 23 |
+
from typing import Optional
|
| 24 |
+
|
| 25 |
import time
|
| 26 |
|
| 27 |
import logging
|
|
|
|
| 129 |
|
| 130 |
# Get all eval request that are FINISHED, if you want to run other evals, change this parameter
|
| 131 |
eval_requests: list[EvalRequest] = get_eval_requests(job_status=current_finished_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
|
| 132 |
+
# Sort the evals by priority (first submitted, first run)
|
| 133 |
eval_requests: list[EvalRequest] = sort_models_by_priority(api=API, models=eval_requests)
|
| 134 |
|
|
|
|
|
|
|
|
|
|
| 135 |
random.shuffle(eval_requests)
|
| 136 |
|
|
|
|
| 137 |
eval_results: list[EvalResult] = get_raw_eval_results(EVAL_RESULTS_PATH_BACKEND, EVAL_REQUESTS_PATH_BACKEND, True)
|
| 138 |
|
| 139 |
result_name_to_request = {request_to_result_name(r): r for r in eval_requests}
|
|
|
|
| 144 |
result_name: str = request_to_result_name(eval_request)
|
| 145 |
|
| 146 |
# Check the corresponding result
|
|
|
|
| 147 |
eval_result: Optional[EvalResult] = result_name_to_result[result_name] if result_name in result_name_to_result else None
|
| 148 |
|
| 149 |
+
breakpoint()
|
| 150 |
+
|
| 151 |
task_lst = TASKS_HARNESS.copy()
|
| 152 |
random.shuffle(task_lst)
|
| 153 |
|
|
|
|
| 171 |
return False
|
| 172 |
|
| 173 |
|
| 174 |
+
def maybe_refresh_results(thr: int) -> bool:
|
| 175 |
+
sanity_checks()
|
| 176 |
+
|
| 177 |
+
current_finished_status = [PENDING_STATUS, FINISHED_STATUS, FAILED_STATUS]
|
| 178 |
+
|
| 179 |
+
# Get all eval request that are FINISHED, if you want to run other evals, change this parameter
|
| 180 |
+
eval_requests: list[EvalRequest] = get_eval_requests(job_status=current_finished_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
|
| 181 |
+
# Sort the evals by priority (first submitted, first run)
|
| 182 |
+
eval_requests: list[EvalRequest] = sort_models_by_priority(api=API, models=eval_requests)
|
| 183 |
+
|
| 184 |
+
random.shuffle(eval_requests)
|
| 185 |
+
|
| 186 |
+
eval_results: list[EvalResult] = get_raw_eval_results(EVAL_RESULTS_PATH_BACKEND, EVAL_REQUESTS_PATH_BACKEND, True)
|
| 187 |
+
|
| 188 |
+
result_name_to_request = {request_to_result_name(r): r for r in eval_requests}
|
| 189 |
+
result_name_to_result = {r.eval_name: r for r in eval_results}
|
| 190 |
+
|
| 191 |
+
for eval_request in eval_requests:
|
| 192 |
+
if eval_request.likes >= thr:
|
| 193 |
+
result_name: str = request_to_result_name(eval_request)
|
| 194 |
+
|
| 195 |
+
# Check the corresponding result
|
| 196 |
+
eval_result: Optional[EvalResult] = result_name_to_result[result_name] if result_name in result_name_to_result else None
|
| 197 |
+
|
| 198 |
+
breakpoint()
|
| 199 |
+
|
| 200 |
+
task_lst = TASKS_HARNESS.copy()
|
| 201 |
+
random.shuffle(task_lst)
|
| 202 |
+
|
| 203 |
+
# Iterate over tasks and, if we do not have results for a task, run the relevant evaluations
|
| 204 |
+
for task in task_lst:
|
| 205 |
+
task_name = task.benchmark
|
| 206 |
+
|
| 207 |
+
if (eval_result is None or
|
| 208 |
+
task_name not in eval_result.results or
|
| 209 |
+
'nq' in task_name or 'trivia' in task_name or 'tqa' in task_name or 'self' in task_name):
|
| 210 |
+
eval_request: EvalRequest = result_name_to_request[result_name]
|
| 211 |
+
|
| 212 |
+
my_snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
|
| 213 |
+
my_set_eval_request(api=API, eval_request=eval_request, set_to_status=RUNNING_STATUS, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
|
| 214 |
+
|
| 215 |
+
results = process_evaluation(task, eval_request)
|
| 216 |
+
|
| 217 |
+
my_snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
|
| 218 |
+
my_set_eval_request(api=API, eval_request=eval_request, set_to_status=FINISHED_STATUS, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
|
| 219 |
+
|
| 220 |
+
return True
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
return False
|
| 224 |
+
|
| 225 |
+
|
| 226 |
def process_pending_requests() -> bool:
|
| 227 |
sanity_checks()
|
| 228 |
|
|
|
|
| 230 |
|
| 231 |
# Get all eval request that are PENDING, if you want to run other evals, change this parameter
|
| 232 |
eval_requests = get_eval_requests(job_status=current_pending_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
|
| 233 |
+
# Sort the evals by priority (first submitted, first run)
|
| 234 |
eval_requests = sort_models_by_priority(api=API, models=eval_requests)
|
| 235 |
|
| 236 |
random.shuffle(eval_requests)
|
|
|
|
| 261 |
if __name__ == "__main__":
|
| 262 |
wait = True
|
| 263 |
|
| 264 |
+
if socket.gethostname() in {'hamburg', 'neuromancer'} or os.path.isdir("/home/pminervi"):
|
|
|
|
| 265 |
wait = False
|
| 266 |
|
| 267 |
if wait:
|
| 268 |
time.sleep(60 * random.randint(5, 10))
|
|
|
|
| 269 |
|
| 270 |
+
res = False
|
| 271 |
+
|
| 272 |
+
if random.randint(0, 1) == 0:
|
| 273 |
+
res = process_pending_requests()
|
| 274 |
+
time.sleep(60)
|
| 275 |
|
| 276 |
if res is False:
|
| 277 |
+
if random.randint(0, 1) == 0:
|
| 278 |
+
res = maybe_refresh_results(100)
|
| 279 |
+
else:
|
| 280 |
+
res = process_finished_requests(100)
|
| 281 |
+
|
| 282 |
+
time.sleep(60)
|
| 283 |
|
| 284 |
if res is False:
|
| 285 |
+
if random.randint(0, 1) == 0:
|
| 286 |
+
res = maybe_refresh_results(0)
|
| 287 |
+
else:
|
| 288 |
+
res = process_finished_requests(0)
|