sukrucildirr commited on
Commit
c00b5fd
·
verified ·
1 Parent(s): 52a205a

End of training

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 2.727621313169948e-06,
4
- "train_runtime": 377.7644,
5
- "train_samples": 160,
6
- "train_samples_per_second": 0.847,
7
- "train_steps_per_second": 0.053
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 1.7470321199652972e-05,
4
+ "train_runtime": 777.5233,
5
+ "train_samples": 316,
6
+ "train_samples_per_second": 0.412,
7
+ "train_steps_per_second": 0.026
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b98f651468c2d65adf2a8906702d308cef3335895e599b86b8b670cb4be53ca0
3
  size 1976163472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c98923f941bb1c89087d2d86c413aa24697fbf13481a19214e276a473ca53cc
3
  size 1976163472
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 2.727621313169948e-06,
4
- "train_runtime": 377.7644,
5
- "train_samples": 160,
6
- "train_samples_per_second": 0.847,
7
- "train_steps_per_second": 0.053
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 1.7470321199652972e-05,
4
+ "train_runtime": 777.5233,
5
+ "train_samples": 316,
6
+ "train_samples_per_second": 0.412,
7
+ "train_steps_per_second": 0.026
8
  }
trainer_state.json CHANGED
@@ -2,7 +2,7 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.0,
6
  "eval_steps": 500,
7
  "global_step": 20,
8
  "is_hyper_param_search": false,
@@ -10,203 +10,203 @@
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "completion_length": 405.0,
14
- "epoch": 0.1,
15
- "grad_norm": 3.542961359024048,
16
  "kl": 0.0,
17
  "learning_rate": 4.965903258506806e-07,
18
- "loss": 0.0,
19
- "reward": 1.4710291721858084,
20
- "reward_std": 1.442350228317082,
21
- "rewards/concensus_correctness_reward_func": 0.6257500000065193,
22
  "rewards/consensus_reward_func": 0.0625,
23
  "rewards/cumulative_reward_2": 0.0,
24
- "rewards/final_correctness_reward_func": 0.125,
25
- "rewards/question_recreation_reward_func": 0.41421666427049786,
26
  "rewards/soft_format_reward_func": 0.0,
27
  "rewards/strict_format_reward_func": 0.0,
28
- "rewards/xmlcount_reward_func": 0.243562500923872,
29
  "step": 2
30
  },
31
  {
32
- "completion_length": 457.65625,
33
- "epoch": 0.2,
34
- "grad_norm": 3.708958625793457,
35
- "kl": 0.0009309585911978502,
36
  "learning_rate": 4.698684378016222e-07,
37
- "loss": 0.0,
38
- "reward": 1.0695764030097052,
39
- "reward_std": 0.8023686404339969,
40
- "rewards/concensus_correctness_reward_func": 0.08512499928474426,
41
- "rewards/consensus_reward_func": 0.125,
42
  "rewards/cumulative_reward_2": 0.0,
43
- "rewards/final_correctness_reward_func": 0.0,
44
- "rewards/question_recreation_reward_func": 0.5073889201739803,
45
  "rewards/soft_format_reward_func": 0.0,
46
  "rewards/strict_format_reward_func": 0.0,
47
- "rewards/xmlcount_reward_func": 0.35206249984912574,
48
  "step": 4
49
  },
50
  {
51
- "completion_length": 452.5625,
52
- "epoch": 0.3,
53
- "grad_norm": 4.777602672576904,
54
- "kl": 0.0013669177387782838,
55
  "learning_rate": 4.193203929064353e-07,
56
  "loss": 0.0,
57
- "reward": 1.1037784151558299,
58
- "reward_std": 0.9202575778181199,
59
- "rewards/concensus_correctness_reward_func": 0.12250000238418579,
60
- "rewards/consensus_reward_func": 0.1875,
61
  "rewards/cumulative_reward_2": 0.0,
62
  "rewards/final_correctness_reward_func": 0.25,
63
- "rewards/question_recreation_reward_func": 0.4263096438080538,
64
  "rewards/soft_format_reward_func": 0.0,
65
  "rewards/strict_format_reward_func": 0.0,
66
- "rewards/xmlcount_reward_func": 0.11746875196695328,
67
  "step": 6
68
  },
69
  {
70
- "completion_length": 460.96875,
71
- "epoch": 0.4,
72
- "grad_norm": 4.447817325592041,
73
- "kl": 0.0016320224494847935,
74
  "learning_rate": 3.5042385616324236e-07,
75
  "loss": 0.0,
76
- "reward": 0.974716882687062,
77
- "reward_std": 1.3214201303198934,
78
- "rewards/concensus_correctness_reward_func": 0.13868750259280205,
79
- "rewards/consensus_reward_func": 0.1875,
80
  "rewards/cumulative_reward_2": 0.0,
81
- "rewards/final_correctness_reward_func": 0.0625,
82
- "rewards/question_recreation_reward_func": 0.46731058473233134,
83
  "rewards/soft_format_reward_func": 0.0,
84
- "rewards/strict_format_reward_func": 0.015625,
85
- "rewards/xmlcount_reward_func": 0.10309376195073128,
86
  "step": 8
87
  },
88
  {
89
- "completion_length": 388.28125,
90
- "epoch": 0.5,
91
- "grad_norm": 5.731485366821289,
92
- "kl": 0.007084455588483252,
93
  "learning_rate": 2.706448363680831e-07,
94
  "loss": 0.0,
95
- "reward": 0.7409542207606137,
96
- "reward_std": 0.8507989638310391,
97
- "rewards/concensus_correctness_reward_func": 0.018812499940395355,
98
- "rewards/consensus_reward_func": 0.125,
99
  "rewards/cumulative_reward_2": 0.0,
100
- "rewards/final_correctness_reward_func": 0.0625,
101
- "rewards/question_recreation_reward_func": 0.4011729843914509,
102
  "rewards/soft_format_reward_func": 0.0,
103
  "rewards/strict_format_reward_func": 0.0,
104
- "rewards/xmlcount_reward_func": 0.13346874713897705,
105
  "step": 10
106
  },
107
  {
108
- "completion_length": 452.84375,
109
- "epoch": 0.6,
110
- "grad_norm": 4.6150994300842285,
111
- "kl": 0.004119118915696163,
112
  "learning_rate": 1.886286282148002e-07,
113
  "loss": 0.0,
114
- "reward": 1.4759507272392511,
115
- "reward_std": 1.8463579853996634,
116
- "rewards/concensus_correctness_reward_func": 0.6328125,
117
- "rewards/consensus_reward_func": 0.0625,
118
  "rewards/cumulative_reward_2": 0.0,
119
- "rewards/final_correctness_reward_func": 0.0625,
120
- "rewards/question_recreation_reward_func": 0.3737632445991039,
121
  "rewards/soft_format_reward_func": 0.0,
122
  "rewards/strict_format_reward_func": 0.0,
123
- "rewards/xmlcount_reward_func": 0.34437499660998583,
124
  "step": 12
125
  },
126
  {
127
- "completion_length": 445.71875,
128
- "epoch": 0.7,
129
- "grad_norm": 4.404483318328857,
130
- "kl": 0.0020462702559598256,
131
  "learning_rate": 1.1326296046939333e-07,
132
  "loss": 0.0,
133
- "reward": 0.8386208694428205,
134
- "reward_std": 0.7303225318901241,
135
  "rewards/concensus_correctness_reward_func": 0.0,
136
- "rewards/consensus_reward_func": 0.0,
137
  "rewards/cumulative_reward_2": 0.0,
138
  "rewards/final_correctness_reward_func": 0.0625,
139
- "rewards/question_recreation_reward_func": 0.4480896024033427,
140
  "rewards/soft_format_reward_func": 0.0,
141
  "rewards/strict_format_reward_func": 0.0,
142
- "rewards/xmlcount_reward_func": 0.3280312493443489,
143
  "step": 14
144
  },
145
  {
146
- "completion_length": 441.46875,
147
- "epoch": 0.8,
148
- "grad_norm": 3.878316879272461,
149
- "kl": 0.002502717266906984,
150
  "learning_rate": 5.271487265090163e-08,
151
- "loss": 0.0,
152
- "reward": 1.5705258045345545,
153
- "reward_std": 0.7504621744155884,
154
- "rewards/concensus_correctness_reward_func": 0.04600000008940697,
155
- "rewards/consensus_reward_func": 0.1875,
156
  "rewards/cumulative_reward_2": 0.0,
157
- "rewards/final_correctness_reward_func": 0.1875,
158
- "rewards/question_recreation_reward_func": 0.5816507926210761,
159
  "rewards/soft_format_reward_func": 0.0,
160
- "rewards/strict_format_reward_func": 0.0,
161
- "rewards/xmlcount_reward_func": 0.5678749927319586,
162
  "step": 16
163
  },
164
  {
165
- "completion_length": 429.53125,
166
- "epoch": 0.9,
167
- "grad_norm": 4.667481899261475,
168
- "kl": 0.0050733851312543266,
169
  "learning_rate": 1.3545689574841341e-08,
170
  "loss": 0.0,
171
- "reward": 1.3714904030784965,
172
- "reward_std": 0.9252548192162067,
173
- "rewards/concensus_correctness_reward_func": 0.20068750344216824,
174
- "rewards/consensus_reward_func": 0.1875,
175
  "rewards/cumulative_reward_2": 0.0,
176
  "rewards/final_correctness_reward_func": 0.0625,
177
- "rewards/question_recreation_reward_func": 0.40092792897485197,
178
  "rewards/soft_format_reward_func": 0.0,
179
  "rewards/strict_format_reward_func": 0.0,
180
- "rewards/xmlcount_reward_func": 0.5198749983683228,
181
  "step": 18
182
  },
183
  {
184
- "completion_length": 426.34375,
185
- "epoch": 1.0,
186
- "grad_norm": 4.335679054260254,
187
- "kl": 0.002546306644944707,
188
  "learning_rate": 0.0,
189
  "loss": 0.0,
190
- "reward": 0.981014184653759,
191
- "reward_std": 0.4036117763584457,
192
- "rewards/concensus_correctness_reward_func": 0.06537499837577343,
193
- "rewards/consensus_reward_func": 0.1875,
194
  "rewards/cumulative_reward_2": 0.0,
195
- "rewards/final_correctness_reward_func": 0.0,
196
- "rewards/question_recreation_reward_func": 0.4608266893774271,
197
  "rewards/soft_format_reward_func": 0.0,
198
  "rewards/strict_format_reward_func": 0.0,
199
- "rewards/xmlcount_reward_func": 0.2673124959692359,
200
  "step": 20
201
  },
202
  {
203
- "epoch": 1.0,
204
  "step": 20,
205
  "total_flos": 0.0,
206
- "train_loss": 2.727621313169948e-06,
207
- "train_runtime": 377.7644,
208
- "train_samples_per_second": 0.847,
209
- "train_steps_per_second": 0.053
210
  }
211
  ],
212
  "logging_steps": 2,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.5063291139240507,
6
  "eval_steps": 500,
7
  "global_step": 20,
8
  "is_hyper_param_search": false,
 
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "completion_length": 390.625,
14
+ "epoch": 0.05063291139240506,
15
+ "grad_norm": 19.693897247314453,
16
  "kl": 0.0,
17
  "learning_rate": 4.965903258506806e-07,
18
+ "loss": -0.0,
19
+ "reward": 0.5654189372435212,
20
+ "reward_std": 0.6884031481022248,
21
+ "rewards/concensus_correctness_reward_func": 0.10256250202655792,
22
  "rewards/consensus_reward_func": 0.0625,
23
  "rewards/cumulative_reward_2": 0.0,
24
+ "rewards/final_correctness_reward_func": 0.1875,
25
+ "rewards/question_recreation_reward_func": 0.10379392973845825,
26
  "rewards/soft_format_reward_func": 0.0,
27
  "rewards/strict_format_reward_func": 0.0,
28
+ "rewards/xmlcount_reward_func": 0.10906249936670065,
29
  "step": 2
30
  },
31
  {
32
+ "completion_length": 423.5,
33
+ "epoch": 0.10126582278481013,
34
+ "grad_norm": 15.050867080688477,
35
+ "kl": 0.0015515557388425805,
36
  "learning_rate": 4.698684378016222e-07,
37
+ "loss": -0.0,
38
+ "reward": 0.41121767554432154,
39
+ "reward_std": 0.2975026599524426,
40
+ "rewards/concensus_correctness_reward_func": 0.0,
41
+ "rewards/consensus_reward_func": 0.0,
42
  "rewards/cumulative_reward_2": 0.0,
43
+ "rewards/final_correctness_reward_func": 0.3125,
44
+ "rewards/question_recreation_reward_func": 0.10174894565716386,
45
  "rewards/soft_format_reward_func": 0.0,
46
  "rewards/strict_format_reward_func": 0.0,
47
+ "rewards/xmlcount_reward_func": -0.003031250322237611,
48
  "step": 4
49
  },
50
  {
51
+ "completion_length": 302.0625,
52
+ "epoch": 0.1518987341772152,
53
+ "grad_norm": 105.33124542236328,
54
+ "kl": 0.01090021853451617,
55
  "learning_rate": 4.193203929064353e-07,
56
  "loss": 0.0,
57
+ "reward": 0.016203501319978386,
58
+ "reward_std": 0.8818220303510316,
59
+ "rewards/concensus_correctness_reward_func": 0.0,
60
+ "rewards/consensus_reward_func": 0.0,
61
  "rewards/cumulative_reward_2": 0.0,
62
  "rewards/final_correctness_reward_func": 0.25,
63
+ "rewards/question_recreation_reward_func": 0.10098475415725261,
64
  "rewards/soft_format_reward_func": 0.0,
65
  "rewards/strict_format_reward_func": 0.0,
66
+ "rewards/xmlcount_reward_func": -0.33478125697001815,
67
  "step": 6
68
  },
69
  {
70
+ "completion_length": 376.53125,
71
+ "epoch": 0.20253164556962025,
72
+ "grad_norm": 6.963199138641357,
73
+ "kl": 0.00975047947395069,
74
  "learning_rate": 3.5042385616324236e-07,
75
  "loss": 0.0,
76
+ "reward": 2.377215757209342,
77
+ "reward_std": 1.1443396444774407,
78
+ "rewards/concensus_correctness_reward_func": 1.875,
79
+ "rewards/consensus_reward_func": 0.25,
80
  "rewards/cumulative_reward_2": 0.0,
81
+ "rewards/final_correctness_reward_func": 0.125,
82
+ "rewards/question_recreation_reward_func": 0.15671575430314988,
83
  "rewards/soft_format_reward_func": 0.0,
84
+ "rewards/strict_format_reward_func": 0.0,
85
+ "rewards/xmlcount_reward_func": -0.029499998316168785,
86
  "step": 8
87
  },
88
  {
89
+ "completion_length": 326.84375,
90
+ "epoch": 0.25316455696202533,
91
+ "grad_norm": 13.946148872375488,
92
+ "kl": 0.0013114591529301833,
93
  "learning_rate": 2.706448363680831e-07,
94
  "loss": 0.0,
95
+ "reward": 0.4975748333381489,
96
+ "reward_std": 0.4967209550086409,
97
+ "rewards/concensus_correctness_reward_func": 0.0,
98
+ "rewards/consensus_reward_func": 0.0,
99
  "rewards/cumulative_reward_2": 0.0,
100
+ "rewards/final_correctness_reward_func": 0.125,
101
+ "rewards/question_recreation_reward_func": 0.24107483273837715,
102
  "rewards/soft_format_reward_func": 0.0,
103
  "rewards/strict_format_reward_func": 0.0,
104
+ "rewards/xmlcount_reward_func": 0.13149999920278788,
105
  "step": 10
106
  },
107
  {
108
+ "completion_length": 295.90625,
109
+ "epoch": 0.3037974683544304,
110
+ "grad_norm": 16.219940185546875,
111
+ "kl": 0.0020969353536202107,
112
  "learning_rate": 1.886286282148002e-07,
113
  "loss": 0.0,
114
+ "reward": 1.0173564599826932,
115
+ "reward_std": 1.5090986860057,
116
+ "rewards/concensus_correctness_reward_func": 0.625,
117
+ "rewards/consensus_reward_func": 0.0,
118
  "rewards/cumulative_reward_2": 0.0,
119
+ "rewards/final_correctness_reward_func": 0.3125,
120
+ "rewards/question_recreation_reward_func": 0.19388769299257547,
121
  "rewards/soft_format_reward_func": 0.0,
122
  "rewards/strict_format_reward_func": 0.0,
123
+ "rewards/xmlcount_reward_func": -0.11403124465141445,
124
  "step": 12
125
  },
126
  {
127
+ "completion_length": 333.96875,
128
+ "epoch": 0.35443037974683544,
129
+ "grad_norm": 55.26883316040039,
130
+ "kl": 0.02062846499393345,
131
  "learning_rate": 1.1326296046939333e-07,
132
  "loss": 0.0,
133
+ "reward": 0.4555127089843154,
134
+ "reward_std": 0.49959600171496277,
135
  "rewards/concensus_correctness_reward_func": 0.0,
136
+ "rewards/consensus_reward_func": 0.0625,
137
  "rewards/cumulative_reward_2": 0.0,
138
  "rewards/final_correctness_reward_func": 0.0625,
139
+ "rewards/question_recreation_reward_func": 0.18432520679198205,
140
  "rewards/soft_format_reward_func": 0.0,
141
  "rewards/strict_format_reward_func": 0.0,
142
+ "rewards/xmlcount_reward_func": 0.14618749916553497,
143
  "step": 14
144
  },
145
  {
146
+ "completion_length": 350.71875,
147
+ "epoch": 0.4050632911392405,
148
+ "grad_norm": 131.1157684326172,
149
+ "kl": 0.11864930617957725,
150
  "learning_rate": 5.271487265090163e-08,
151
+ "loss": 0.0001,
152
+ "reward": 0.45018581731710583,
153
+ "reward_std": 0.4574645821703598,
154
+ "rewards/concensus_correctness_reward_func": 0.0,
155
+ "rewards/consensus_reward_func": 0.125,
156
  "rewards/cumulative_reward_2": 0.0,
157
+ "rewards/final_correctness_reward_func": 0.0625,
158
+ "rewards/question_recreation_reward_func": 0.13740457454696298,
159
  "rewards/soft_format_reward_func": 0.0,
160
+ "rewards/strict_format_reward_func": 0.015625,
161
+ "rewards/xmlcount_reward_func": 0.10965625010430813,
162
  "step": 16
163
  },
164
  {
165
+ "completion_length": 311.78125,
166
+ "epoch": 0.45569620253164556,
167
+ "grad_norm": 28.433929443359375,
168
+ "kl": 0.002976935877086362,
169
  "learning_rate": 1.3545689574841341e-08,
170
  "loss": 0.0,
171
+ "reward": 0.34964469261467457,
172
+ "reward_std": 0.34213452675612643,
173
+ "rewards/concensus_correctness_reward_func": 0.0,
174
+ "rewards/consensus_reward_func": 0.0,
175
  "rewards/cumulative_reward_2": 0.0,
176
  "rewards/final_correctness_reward_func": 0.0625,
177
+ "rewards/question_recreation_reward_func": 0.23998843505978584,
178
  "rewards/soft_format_reward_func": 0.0,
179
  "rewards/strict_format_reward_func": 0.0,
180
+ "rewards/xmlcount_reward_func": 0.04715625708922744,
181
  "step": 18
182
  },
183
  {
184
+ "completion_length": 227.90625,
185
+ "epoch": 0.5063291139240507,
186
+ "grad_norm": 9.338957786560059,
187
+ "kl": 0.013950712120276876,
188
  "learning_rate": 0.0,
189
  "loss": 0.0,
190
+ "reward": 0.5095127180684358,
191
+ "reward_std": 0.5383867279160768,
192
+ "rewards/concensus_correctness_reward_func": 0.0,
193
+ "rewards/consensus_reward_func": 0.0625,
194
  "rewards/cumulative_reward_2": 0.0,
195
+ "rewards/final_correctness_reward_func": 0.1875,
196
+ "rewards/question_recreation_reward_func": 0.13626271882094443,
197
  "rewards/soft_format_reward_func": 0.0,
198
  "rewards/strict_format_reward_func": 0.0,
199
+ "rewards/xmlcount_reward_func": 0.12324999878183007,
200
  "step": 20
201
  },
202
  {
203
+ "epoch": 0.5063291139240507,
204
  "step": 20,
205
  "total_flos": 0.0,
206
+ "train_loss": 1.7470321199652972e-05,
207
+ "train_runtime": 777.5233,
208
+ "train_samples_per_second": 0.412,
209
+ "train_steps_per_second": 0.026
210
  }
211
  ],
212
  "logging_steps": 2,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:577f25fa8e3e784bc7ee5e169a012f7020e2d21576732b30eeaf080e28b4ed43
3
  size 6008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce77f6e14977aab39201ed9d80237953c712a0476f7b2159056c099d7befbfa7
3
  size 6008