Spaces:
Running
Running
chore: fix model repo names
Browse files
data/u_math_eval_results.json
CHANGED
|
@@ -195,7 +195,7 @@
|
|
| 195 |
]
|
| 196 |
},
|
| 197 |
{
|
| 198 |
-
"model_name": "Nexusflow/Athene-V2-
|
| 199 |
"judge_model_name": "gpt-4o-2024-08-06",
|
| 200 |
"u_math": [
|
| 201 |
54.90909090909091,
|
|
@@ -429,7 +429,7 @@
|
|
| 429 |
]
|
| 430 |
},
|
| 431 |
{
|
| 432 |
-
"model_name": "mistralai/Ministral-8B-2410",
|
| 433 |
"judge_model_name": "gpt-4o-2024-08-06",
|
| 434 |
"u_math": [
|
| 435 |
23.09090909090909,
|
|
@@ -468,7 +468,7 @@
|
|
| 468 |
]
|
| 469 |
},
|
| 470 |
{
|
| 471 |
-
"model_name": "mistralai/Mistral-Large-2411",
|
| 472 |
"judge_model_name": "gpt-4o-2024-08-06",
|
| 473 |
"u_math": [
|
| 474 |
47.63636363636364,
|
|
@@ -624,7 +624,7 @@
|
|
| 624 |
]
|
| 625 |
},
|
| 626 |
{
|
| 627 |
-
"model_name": "mistralai/Mistral-Small-2501",
|
| 628 |
"judge_model_name": "gpt-4o-2024-08-06",
|
| 629 |
"u_math": [
|
| 630 |
34.81818181818182,
|
|
@@ -975,7 +975,7 @@
|
|
| 975 |
]
|
| 976 |
},
|
| 977 |
{
|
| 978 |
-
"model_name": "
|
| 979 |
"judge_model_name": "gpt-4o-2024-08-06",
|
| 980 |
"u_math": [
|
| 981 |
42.54545454545455,
|
|
@@ -1170,7 +1170,7 @@
|
|
| 1170 |
]
|
| 1171 |
},
|
| 1172 |
{
|
| 1173 |
-
"model_name": "mistralai/Pixtral-Large-2411",
|
| 1174 |
"judge_model_name": "gpt-4o-2024-08-06",
|
| 1175 |
"u_math": [
|
| 1176 |
47.81818181818182,
|
|
|
|
| 195 |
]
|
| 196 |
},
|
| 197 |
{
|
| 198 |
+
"model_name": "Nexusflow/Athene-V2-Chat",
|
| 199 |
"judge_model_name": "gpt-4o-2024-08-06",
|
| 200 |
"u_math": [
|
| 201 |
54.90909090909091,
|
|
|
|
| 429 |
]
|
| 430 |
},
|
| 431 |
{
|
| 432 |
+
"model_name": "mistralai/Ministral-8B-Instruct-2410",
|
| 433 |
"judge_model_name": "gpt-4o-2024-08-06",
|
| 434 |
"u_math": [
|
| 435 |
23.09090909090909,
|
|
|
|
| 468 |
]
|
| 469 |
},
|
| 470 |
{
|
| 471 |
+
"model_name": "mistralai/Mistral-Large-Instruct-2411",
|
| 472 |
"judge_model_name": "gpt-4o-2024-08-06",
|
| 473 |
"u_math": [
|
| 474 |
47.63636363636364,
|
|
|
|
| 624 |
]
|
| 625 |
},
|
| 626 |
{
|
| 627 |
+
"model_name": "mistralai/Mistral-Small-Instruct-2501",
|
| 628 |
"judge_model_name": "gpt-4o-2024-08-06",
|
| 629 |
"u_math": [
|
| 630 |
34.81818181818182,
|
|
|
|
| 975 |
]
|
| 976 |
},
|
| 977 |
{
|
| 978 |
+
"model_name": "nvidia/Llama-3.1-Nemotron-70B",
|
| 979 |
"judge_model_name": "gpt-4o-2024-08-06",
|
| 980 |
"u_math": [
|
| 981 |
42.54545454545455,
|
|
|
|
| 1170 |
]
|
| 1171 |
},
|
| 1172 |
{
|
| 1173 |
+
"model_name": "mistralai/Pixtral-Large-Instruct-2411",
|
| 1174 |
"judge_model_name": "gpt-4o-2024-08-06",
|
| 1175 |
"u_math": [
|
| 1176 |
47.81818181818182,
|