cogwheelhead commited on
Commit
11fdbf3
·
verified ·
1 Parent(s): bf42c1e

chore: fix model repo names

Browse files
Files changed (1) hide show
  1. data/u_math_eval_results.json +13 -13
data/u_math_eval_results.json CHANGED
@@ -1,6 +1,6 @@
1
  [
2
  {
3
- "model_name": "meta-llama/Llama-3.1-8B",
4
  "judge_model_name": "gpt-4o-2024-08-06",
5
  "u_math": [
6
  29.545454545454547,
@@ -39,7 +39,7 @@
39
  ]
40
  },
41
  {
42
- "model_name": "Qwen/Qwen2.5-7B",
43
  "judge_model_name": "gpt-4o-2024-08-06",
44
  "u_math": [
45
  43.27272727272727,
@@ -78,7 +78,7 @@
78
  ]
79
  },
80
  {
81
- "model_name": "Qwen/Qwen2.5-72B",
82
  "judge_model_name": "gpt-4o-2024-08-06",
83
  "u_math": [
84
  51.18181818181819,
@@ -117,7 +117,7 @@
117
  ]
118
  },
119
  {
120
- "model_name": "Qwen/Qwen2.5-Math-7B",
121
  "judge_model_name": "gpt-4o-2024-08-06",
122
  "u_math": [
123
  45.45454545454545,
@@ -156,7 +156,7 @@
156
  ]
157
  },
158
  {
159
- "model_name": "Qwen/Qwen2.5-Math-72B",
160
  "judge_model_name": "gpt-4o-2024-08-06",
161
  "u_math": [
162
  59.45454545454546,
@@ -546,7 +546,7 @@
546
  ]
547
  },
548
  {
549
- "model_name": "meta-llama/Llama-3.2-11B-Vision",
550
  "judge_model_name": "gpt-4o-2024-08-06",
551
  "u_math": [
552
  20.363636363636363,
@@ -663,7 +663,7 @@
663
  ]
664
  },
665
  {
666
- "model_name": "Qwen/Qwen2.5-32B",
667
  "judge_model_name": "gpt-4o-2024-08-06",
668
  "u_math": [
669
  52.36363636363637,
@@ -936,7 +936,7 @@
936
  ]
937
  },
938
  {
939
- "model_name": "meta-llama/Llama-3.1-70B",
940
  "judge_model_name": "gpt-4o-2024-08-06",
941
  "u_math": [
942
  34.27272727272727,
@@ -975,7 +975,7 @@
975
  ]
976
  },
977
  {
978
- "model_name": "nvidia/Llama-3.1-Nemotron-70B",
979
  "judge_model_name": "gpt-4o-2024-08-06",
980
  "u_math": [
981
  42.54545454545455,
@@ -1014,7 +1014,7 @@
1014
  ]
1015
  },
1016
  {
1017
- "model_name": "meta-llama/Llama-3.3-70B",
1018
  "judge_model_name": "gpt-4o-2024-08-06",
1019
  "u_math": [
1020
  44.72727272727273,
@@ -1053,7 +1053,7 @@
1053
  ]
1054
  },
1055
  {
1056
- "model_name": "meta-llama/Llama-3.2-90B-Vision",
1057
  "judge_model_name": "gpt-4o-2024-08-06",
1058
  "u_math": [
1059
  37.18181818181818,
@@ -1092,7 +1092,7 @@
1092
  ]
1093
  },
1094
  {
1095
- "model_name": "Qwen/Qwen2-VL-7B",
1096
  "judge_model_name": "gpt-4o-2024-08-06",
1097
  "u_math": [
1098
  26.272727272727277,
@@ -1131,7 +1131,7 @@
1131
  ]
1132
  },
1133
  {
1134
- "model_name": "Qwen/Qwen2-VL-72B",
1135
  "judge_model_name": "gpt-4o-2024-08-06",
1136
  "u_math": [
1137
  41.81818181818181,
 
1
  [
2
  {
3
+ "model_name": "meta-llama/Llama-3.1-8B-Instruct",
4
  "judge_model_name": "gpt-4o-2024-08-06",
5
  "u_math": [
6
  29.545454545454547,
 
39
  ]
40
  },
41
  {
42
+ "model_name": "Qwen/Qwen2.5-7B-Instruct",
43
  "judge_model_name": "gpt-4o-2024-08-06",
44
  "u_math": [
45
  43.27272727272727,
 
78
  ]
79
  },
80
  {
81
+ "model_name": "Qwen/Qwen2.5-72B-Instruct",
82
  "judge_model_name": "gpt-4o-2024-08-06",
83
  "u_math": [
84
  51.18181818181819,
 
117
  ]
118
  },
119
  {
120
+ "model_name": "Qwen/Qwen2.5-Math-7B-Instruct",
121
  "judge_model_name": "gpt-4o-2024-08-06",
122
  "u_math": [
123
  45.45454545454545,
 
156
  ]
157
  },
158
  {
159
+ "model_name": "Qwen/Qwen2.5-Math-72B-Instruct",
160
  "judge_model_name": "gpt-4o-2024-08-06",
161
  "u_math": [
162
  59.45454545454546,
 
546
  ]
547
  },
548
  {
549
+ "model_name": "meta-llama/Llama-3.2-11B-Vision-Instruct",
550
  "judge_model_name": "gpt-4o-2024-08-06",
551
  "u_math": [
552
  20.363636363636363,
 
663
  ]
664
  },
665
  {
666
+ "model_name": "Qwen/Qwen2.5-32B-Instruct",
667
  "judge_model_name": "gpt-4o-2024-08-06",
668
  "u_math": [
669
  52.36363636363637,
 
936
  ]
937
  },
938
  {
939
+ "model_name": "meta-llama/Llama-3.1-70B-Instruct",
940
  "judge_model_name": "gpt-4o-2024-08-06",
941
  "u_math": [
942
  34.27272727272727,
 
975
  ]
976
  },
977
  {
978
+ "model_name": "nvidia/Llama-3.1-Nemotron-70B-Instruct",
979
  "judge_model_name": "gpt-4o-2024-08-06",
980
  "u_math": [
981
  42.54545454545455,
 
1014
  ]
1015
  },
1016
  {
1017
+ "model_name": "meta-llama/Llama-3.3-70B-Instruct",
1018
  "judge_model_name": "gpt-4o-2024-08-06",
1019
  "u_math": [
1020
  44.72727272727273,
 
1053
  ]
1054
  },
1055
  {
1056
+ "model_name": "meta-llama/Llama-3.2-90B-Vision-Instruct",
1057
  "judge_model_name": "gpt-4o-2024-08-06",
1058
  "u_math": [
1059
  37.18181818181818,
 
1092
  ]
1093
  },
1094
  {
1095
+ "model_name": "Qwen/Qwen2-VL-7B-Instruct",
1096
  "judge_model_name": "gpt-4o-2024-08-06",
1097
  "u_math": [
1098
  26.272727272727277,
 
1131
  ]
1132
  },
1133
  {
1134
+ "model_name": "Qwen/Qwen2-VL-72B-Instruct",
1135
  "judge_model_name": "gpt-4o-2024-08-06",
1136
  "u_math": [
1137
  41.81818181818181,