Shawon16 commited on
Commit
4355ef2
·
verified ·
1 Parent(s): 6bde966

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +7 -7
  2. test_results.json +7 -7
  3. trainer_state.json +1114 -1005
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "accuracy": 0.49224806201550386,
3
- "f1": 0.4904608096468562,
4
- "precision": 0.5755906238464378,
5
- "recall": 0.49224806201550386,
6
- "top_10_accuracy": 0.8682170542635659,
7
- "top_1_accuracy": 0.49224806201550386,
8
- "top_5_accuracy": 0.7558139534883721
9
  }
 
1
  {
2
+ "accuracy": 0.5697674418604651,
3
+ "f1": 0.5385868653310515,
4
+ "precision": 0.5749307862679955,
5
+ "recall": 0.5697674418604651,
6
+ "top_10_accuracy": 0.875968992248062,
7
+ "top_1_accuracy": 0.5697674418604651,
8
+ "top_5_accuracy": 0.8217054263565892
9
  }
test_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "accuracy": 0.49224806201550386,
3
- "f1": 0.4904608096468562,
4
- "precision": 0.5755906238464378,
5
- "recall": 0.49224806201550386,
6
- "top_10_accuracy": 0.8682170542635659,
7
- "top_1_accuracy": 0.49224806201550386,
8
- "top_5_accuracy": 0.7558139534883721
9
  }
 
1
  {
2
+ "accuracy": 0.5697674418604651,
3
+ "f1": 0.5385868653310515,
4
+ "precision": 0.5749307862679955,
5
+ "recall": 0.5697674418604651,
6
+ "top_10_accuracy": 0.875968992248062,
7
+ "top_1_accuracy": 0.5697674418604651,
8
+ "top_5_accuracy": 0.8217054263565892
9
  }
trainer_state.json CHANGED
@@ -1,1539 +1,1648 @@
1
  {
2
- "best_metric": 0.5680473372781065,
3
- "best_model_checkpoint": "/media/cse/HDD/Shawon/shawon/MY DATA/VideoMAE_Base_WLASL_100_200_epochs_p20_SR_8/checkpoint-6308",
4
- "epoch": 54.00498611111111,
5
  "eval_steps": 500,
6
- "global_step": 9913,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.002777777777777778,
13
- "grad_norm": 44.761619567871094,
14
  "learning_rate": 1.3333333333333334e-06,
15
- "loss": 18.6841,
16
  "step": 100
17
  },
18
  {
19
  "epoch": 0.005,
20
  "eval_accuracy": 0.008875739644970414,
21
- "eval_f1": 0.0028090252942323947,
22
- "eval_loss": 4.650303840637207,
23
- "eval_precision": 0.002358710338722453,
24
  "eval_recall": 0.008875739644970414,
25
- "eval_runtime": 64.7947,
26
- "eval_samples_per_second": 5.216,
27
- "eval_steps_per_second": 2.608,
28
- "eval_top_10_accuracy": 0.10946745562130178,
29
  "eval_top_1_accuracy": 0.008875739644970414,
30
- "eval_top_5_accuracy": 0.047337278106508875,
31
  "step": 180
32
  },
33
  {
34
- "epoch": 1.000548611111111,
35
- "grad_norm": 73.21803283691406,
36
  "learning_rate": 2.7222222222222224e-06,
37
- "loss": 18.6074,
38
  "step": 200
39
  },
40
  {
41
- "epoch": 1.0033263888888888,
42
- "grad_norm": 66.99571990966797,
43
  "learning_rate": 4.111111111111112e-06,
44
- "loss": 18.5513,
45
  "step": 300
46
  },
47
  {
48
- "epoch": 1.0049930555555555,
49
- "eval_accuracy": 0.014792899408284023,
50
- "eval_f1": 0.0007428992223267716,
51
- "eval_loss": 4.6342573165893555,
52
- "eval_precision": 0.00038125026745873357,
53
- "eval_recall": 0.014792899408284023,
54
- "eval_runtime": 57.017,
55
- "eval_samples_per_second": 5.928,
56
- "eval_steps_per_second": 2.964,
57
- "eval_top_10_accuracy": 0.13609467455621302,
58
- "eval_top_1_accuracy": 0.014792899408284023,
59
  "eval_top_5_accuracy": 0.0621301775147929,
60
  "step": 360
61
  },
62
  {
63
- "epoch": 2.001097222222222,
64
- "grad_norm": 42.89289474487305,
65
  "learning_rate": 5.500000000000001e-06,
66
- "loss": 18.5436,
67
  "step": 400
68
  },
69
  {
70
- "epoch": 2.003875,
71
- "grad_norm": 36.9390869140625,
72
  "learning_rate": 6.888888888888889e-06,
73
- "loss": 18.4806,
74
  "step": 500
75
  },
76
  {
77
- "epoch": 2.004986111111111,
78
- "eval_accuracy": 0.014792899408284023,
79
- "eval_f1": 0.0008787861034624173,
80
- "eval_loss": 4.611328125,
81
- "eval_precision": 0.00045284385943726607,
82
- "eval_recall": 0.014792899408284023,
83
- "eval_runtime": 60.6335,
84
- "eval_samples_per_second": 5.574,
85
- "eval_steps_per_second": 2.787,
86
- "eval_top_10_accuracy": 0.11834319526627218,
87
- "eval_top_1_accuracy": 0.014792899408284023,
88
- "eval_top_5_accuracy": 0.06804733727810651,
89
- "step": 540
90
  },
91
  {
92
- "epoch": 3.0016458333333333,
93
- "grad_norm": 35.67766189575195,
94
  "learning_rate": 8.27777777777778e-06,
95
- "loss": 18.4495,
96
  "step": 600
97
  },
98
  {
99
- "epoch": 3.004423611111111,
100
- "grad_norm": 35.079776763916016,
101
  "learning_rate": 9.666666666666667e-06,
102
- "loss": 18.434,
103
  "step": 700
104
  },
105
  {
106
- "epoch": 3.0050069444444443,
107
- "eval_accuracy": 0.020710059171597635,
108
- "eval_f1": 0.0028698333117318383,
109
- "eval_loss": 4.623179912567139,
110
- "eval_precision": 0.0015918315646110785,
111
- "eval_recall": 0.020710059171597635,
112
- "eval_runtime": 58.2114,
113
- "eval_samples_per_second": 5.806,
114
- "eval_steps_per_second": 2.903,
115
- "eval_top_10_accuracy": 0.10946745562130178,
116
- "eval_top_1_accuracy": 0.020710059171597635,
117
- "eval_top_5_accuracy": 0.0650887573964497,
118
  "step": 721
119
  },
120
  {
121
- "epoch": 4.002194444444444,
122
- "grad_norm": 32.86574172973633,
123
  "learning_rate": 1.1055555555555556e-05,
124
- "loss": 18.3155,
125
  "step": 800
126
  },
127
  {
128
- "epoch": 4.004972222222222,
129
- "grad_norm": 38.9669075012207,
130
  "learning_rate": 1.2444444444444445e-05,
131
- "loss": 18.4438,
132
  "step": 900
133
  },
134
  {
135
- "epoch": 4.005,
136
- "eval_accuracy": 0.01775147928994083,
137
- "eval_f1": 0.004888222824104215,
138
- "eval_loss": 4.616748332977295,
139
- "eval_precision": 0.00515812106198576,
140
- "eval_recall": 0.01775147928994083,
141
- "eval_runtime": 75.0582,
142
- "eval_samples_per_second": 4.503,
143
- "eval_steps_per_second": 2.252,
144
- "eval_top_10_accuracy": 0.14201183431952663,
145
- "eval_top_1_accuracy": 0.01775147928994083,
146
- "eval_top_5_accuracy": 0.0621301775147929,
147
  "step": 901
148
  },
149
  {
150
- "epoch": 5.002743055555555,
151
- "grad_norm": 30.948198318481445,
152
  "learning_rate": 1.3833333333333334e-05,
153
- "loss": 18.2038,
154
  "step": 1000
155
  },
156
  {
157
- "epoch": 5.0049930555555555,
158
- "eval_accuracy": 0.020710059171597635,
159
- "eval_f1": 0.002628381440247109,
160
- "eval_loss": 4.628068447113037,
161
- "eval_precision": 0.0014603929265178319,
162
- "eval_recall": 0.020710059171597635,
163
- "eval_runtime": 56.6857,
164
- "eval_samples_per_second": 5.963,
165
- "eval_steps_per_second": 2.981,
166
- "eval_top_10_accuracy": 0.13313609467455623,
167
- "eval_top_1_accuracy": 0.020710059171597635,
168
- "eval_top_5_accuracy": 0.08284023668639054,
169
  "step": 1081
170
  },
171
  {
172
- "epoch": 6.0005138888888885,
173
- "grad_norm": 32.544097900390625,
174
  "learning_rate": 1.5222222222222224e-05,
175
- "loss": 18.2819,
176
  "step": 1100
177
  },
178
  {
179
- "epoch": 6.003291666666667,
180
- "grad_norm": 31.63671875,
181
  "learning_rate": 1.661111111111111e-05,
182
- "loss": 18.2475,
183
  "step": 1200
184
  },
185
  {
186
- "epoch": 6.004986111111111,
187
- "eval_accuracy": 0.020710059171597635,
188
- "eval_f1": 0.0026314295054457367,
189
- "eval_loss": 4.627294063568115,
190
- "eval_precision": 0.0014502725830579143,
191
- "eval_recall": 0.020710059171597635,
192
- "eval_runtime": 60.0837,
193
- "eval_samples_per_second": 5.625,
194
- "eval_steps_per_second": 2.813,
195
- "eval_top_10_accuracy": 0.11834319526627218,
196
- "eval_top_1_accuracy": 0.020710059171597635,
197
- "eval_top_5_accuracy": 0.0650887573964497,
198
- "step": 1261
199
  },
200
  {
201
- "epoch": 7.0010625,
202
- "grad_norm": 33.69670486450195,
203
  "learning_rate": 1.8e-05,
204
- "loss": 18.1596,
205
  "step": 1300
206
  },
207
  {
208
- "epoch": 7.003840277777778,
209
- "grad_norm": 30.9010009765625,
210
  "learning_rate": 1.938888888888889e-05,
211
- "loss": 18.1407,
212
  "step": 1400
213
  },
214
  {
215
- "epoch": 7.005006944444444,
216
- "eval_accuracy": 0.014792899408284023,
217
- "eval_f1": 0.010736730310071949,
218
- "eval_loss": 4.602706432342529,
219
- "eval_precision": 0.016151808947041886,
220
- "eval_recall": 0.014792899408284023,
221
- "eval_runtime": 62.4422,
222
- "eval_samples_per_second": 5.413,
223
- "eval_steps_per_second": 2.707,
224
- "eval_top_10_accuracy": 0.15384615384615385,
225
- "eval_top_1_accuracy": 0.014792899408284023,
226
- "eval_top_5_accuracy": 0.07988165680473373,
227
  "step": 1442
228
  },
229
  {
230
- "epoch": 8.001611111111112,
231
- "grad_norm": 38.095611572265625,
232
  "learning_rate": 2.077777777777778e-05,
233
- "loss": 18.0154,
234
  "step": 1500
235
  },
236
  {
237
- "epoch": 8.004388888888888,
238
- "grad_norm": 28.888912200927734,
239
  "learning_rate": 2.216666666666667e-05,
240
- "loss": 17.9877,
241
  "step": 1600
242
  },
243
  {
244
- "epoch": 8.005,
245
- "eval_accuracy": 0.026627218934911243,
246
- "eval_f1": 0.002959754620541719,
247
- "eval_loss": 4.57106351852417,
248
- "eval_precision": 0.001621455467609314,
249
- "eval_recall": 0.026627218934911243,
250
- "eval_runtime": 60.0394,
251
- "eval_samples_per_second": 5.63,
252
- "eval_steps_per_second": 2.815,
253
- "eval_top_10_accuracy": 0.14201183431952663,
254
- "eval_top_1_accuracy": 0.026627218934911243,
255
- "eval_top_5_accuracy": 0.08875739644970414,
256
  "step": 1622
257
  },
258
  {
259
- "epoch": 9.002159722222222,
260
- "grad_norm": 39.636756896972656,
261
  "learning_rate": 2.3555555555555556e-05,
262
- "loss": 17.6586,
263
  "step": 1700
264
  },
265
  {
266
- "epoch": 9.0049375,
267
- "grad_norm": 36.179622650146484,
268
  "learning_rate": 2.4944444444444447e-05,
269
- "loss": 17.6844,
270
  "step": 1800
271
  },
272
  {
273
- "epoch": 9.004993055555556,
274
- "eval_accuracy": 0.038461538461538464,
275
- "eval_f1": 0.014575543949312847,
276
- "eval_loss": 4.48525333404541,
277
- "eval_precision": 0.01083091455885828,
278
- "eval_recall": 0.038461538461538464,
279
- "eval_runtime": 58.3286,
280
- "eval_samples_per_second": 5.795,
281
- "eval_steps_per_second": 2.897,
282
- "eval_top_10_accuracy": 0.1952662721893491,
283
- "eval_top_1_accuracy": 0.038461538461538464,
284
- "eval_top_5_accuracy": 0.11834319526627218,
285
  "step": 1802
286
  },
287
  {
288
- "epoch": 10.002708333333333,
289
- "grad_norm": 33.06723403930664,
290
  "learning_rate": 2.633333333333333e-05,
291
- "loss": 16.9804,
292
  "step": 1900
293
  },
294
  {
295
- "epoch": 10.00498611111111,
296
- "eval_accuracy": 0.038461538461538464,
297
- "eval_f1": 0.007837026266938529,
298
- "eval_loss": 4.283570289611816,
299
- "eval_precision": 0.004664077362357343,
300
- "eval_recall": 0.038461538461538464,
301
- "eval_runtime": 60.5645,
302
- "eval_samples_per_second": 5.581,
303
- "eval_steps_per_second": 2.79,
304
- "eval_top_10_accuracy": 0.2781065088757396,
305
- "eval_top_1_accuracy": 0.038461538461538464,
306
- "eval_top_5_accuracy": 0.15088757396449703,
307
- "step": 1982
308
  },
309
  {
310
- "epoch": 11.000479166666667,
311
- "grad_norm": 37.544349670410156,
312
  "learning_rate": 2.772222222222222e-05,
313
- "loss": 16.683,
314
  "step": 2000
315
  },
316
  {
317
- "epoch": 11.003256944444445,
318
- "grad_norm": 39.149925231933594,
319
  "learning_rate": 2.9111111111111112e-05,
320
- "loss": 15.9246,
321
  "step": 2100
322
  },
323
  {
324
- "epoch": 11.005006944444444,
325
- "eval_accuracy": 0.07396449704142012,
326
- "eval_f1": 0.03186227070036653,
327
- "eval_loss": 3.9420433044433594,
328
- "eval_precision": 0.024074685343264134,
329
- "eval_recall": 0.07396449704142012,
330
- "eval_runtime": 61.6221,
331
- "eval_samples_per_second": 5.485,
332
- "eval_steps_per_second": 2.743,
333
- "eval_top_10_accuracy": 0.4378698224852071,
334
- "eval_top_1_accuracy": 0.07396449704142012,
335
- "eval_top_5_accuracy": 0.27514792899408286,
336
  "step": 2163
337
  },
338
  {
339
- "epoch": 12.001027777777777,
340
- "grad_norm": 33.2915153503418,
341
  "learning_rate": 3.05e-05,
342
- "loss": 14.9644,
343
  "step": 2200
344
  },
345
  {
346
- "epoch": 12.003805555555555,
347
- "grad_norm": 43.72281265258789,
348
  "learning_rate": 3.188888888888889e-05,
349
- "loss": 14.253,
350
  "step": 2300
351
  },
352
  {
353
- "epoch": 12.005,
354
- "eval_accuracy": 0.15384615384615385,
355
- "eval_f1": 0.10824208896259017,
356
- "eval_loss": 3.5585851669311523,
357
- "eval_precision": 0.11833714349597253,
358
- "eval_recall": 0.15384615384615385,
359
- "eval_runtime": 62.5539,
360
- "eval_samples_per_second": 5.403,
361
- "eval_steps_per_second": 2.702,
362
- "eval_top_10_accuracy": 0.6420118343195266,
363
- "eval_top_1_accuracy": 0.15384615384615385,
364
- "eval_top_5_accuracy": 0.47337278106508873,
365
  "step": 2343
366
  },
367
  {
368
- "epoch": 13.00157638888889,
369
- "grad_norm": 53.25724411010742,
370
  "learning_rate": 3.327777777777778e-05,
371
- "loss": 12.9497,
372
  "step": 2400
373
  },
374
  {
375
- "epoch": 13.004354166666667,
376
- "grad_norm": 50.47941207885742,
377
  "learning_rate": 3.466666666666667e-05,
378
- "loss": 12.4027,
379
  "step": 2500
380
  },
381
  {
382
- "epoch": 13.004993055555556,
383
- "eval_accuracy": 0.22781065088757396,
384
- "eval_f1": 0.18300783700191986,
385
- "eval_loss": 3.2830753326416016,
386
- "eval_precision": 0.19724533226557517,
387
- "eval_recall": 0.22781065088757396,
388
- "eval_runtime": 63.6578,
389
- "eval_samples_per_second": 5.31,
390
- "eval_steps_per_second": 2.655,
391
- "eval_top_10_accuracy": 0.7130177514792899,
392
- "eval_top_1_accuracy": 0.22781065088757396,
393
- "eval_top_5_accuracy": 0.5443786982248521,
394
  "step": 2523
395
  },
396
  {
397
- "epoch": 14.002125,
398
- "grad_norm": 45.9249153137207,
399
  "learning_rate": 3.605555555555556e-05,
400
- "loss": 11.0163,
401
  "step": 2600
402
  },
403
  {
404
- "epoch": 14.004902777777778,
405
- "grad_norm": 69.36324310302734,
406
  "learning_rate": 3.7444444444444446e-05,
407
- "loss": 10.6164,
408
  "step": 2700
409
  },
410
  {
411
- "epoch": 14.00498611111111,
412
- "eval_accuracy": 0.30177514792899407,
413
- "eval_f1": 0.24254924983033319,
414
- "eval_loss": 2.9951417446136475,
415
- "eval_precision": 0.25831286563535083,
416
- "eval_recall": 0.30177514792899407,
417
- "eval_runtime": 74.4359,
418
- "eval_samples_per_second": 4.541,
419
- "eval_steps_per_second": 2.27,
420
- "eval_top_10_accuracy": 0.7633136094674556,
421
- "eval_top_1_accuracy": 0.30177514792899407,
422
- "eval_top_5_accuracy": 0.6301775147928994,
423
- "step": 2703
424
  },
425
  {
426
- "epoch": 15.002673611111112,
427
- "grad_norm": 58.75260925292969,
428
  "learning_rate": 3.883333333333333e-05,
429
- "loss": 9.0753,
430
  "step": 2800
431
  },
432
  {
433
- "epoch": 15.005006944444444,
434
- "eval_accuracy": 0.34023668639053256,
435
- "eval_f1": 0.28895195862651485,
436
- "eval_loss": 2.8074893951416016,
437
- "eval_precision": 0.3187842072457457,
438
- "eval_recall": 0.34023668639053256,
439
- "eval_runtime": 60.9495,
440
- "eval_samples_per_second": 5.546,
441
- "eval_steps_per_second": 2.773,
442
- "eval_top_10_accuracy": 0.7988165680473372,
443
- "eval_top_1_accuracy": 0.34023668639053256,
444
- "eval_top_5_accuracy": 0.6597633136094675,
445
  "step": 2884
446
  },
447
  {
448
- "epoch": 16.000444444444444,
449
- "grad_norm": 39.87100601196289,
450
  "learning_rate": 4.022222222222222e-05,
451
- "loss": 8.5838,
452
  "step": 2900
453
  },
454
  {
455
- "epoch": 16.003222222222224,
456
- "grad_norm": 64.38956451416016,
457
  "learning_rate": 4.1611111111111114e-05,
458
- "loss": 7.3114,
459
  "step": 3000
460
  },
461
  {
462
- "epoch": 16.005,
463
- "eval_accuracy": 0.41124260355029585,
464
- "eval_f1": 0.37015384136608054,
465
- "eval_loss": 2.5131821632385254,
466
- "eval_precision": 0.4357026356286711,
467
- "eval_recall": 0.41124260355029585,
468
- "eval_runtime": 58.7645,
469
- "eval_samples_per_second": 5.752,
470
- "eval_steps_per_second": 2.876,
471
- "eval_top_10_accuracy": 0.8668639053254438,
472
- "eval_top_1_accuracy": 0.41124260355029585,
473
- "eval_top_5_accuracy": 0.7337278106508875,
474
  "step": 3064
475
  },
476
  {
477
- "epoch": 17.000993055555554,
478
- "grad_norm": 48.52119445800781,
479
  "learning_rate": 4.3e-05,
480
- "loss": 6.6622,
481
  "step": 3100
482
  },
483
  {
484
- "epoch": 17.003770833333334,
485
- "grad_norm": 67.13871765136719,
486
  "learning_rate": 4.438888888888889e-05,
487
- "loss": 6.0168,
488
  "step": 3200
489
  },
490
  {
491
- "epoch": 17.004993055555556,
492
- "eval_accuracy": 0.4408284023668639,
493
- "eval_f1": 0.41085208348521957,
494
- "eval_loss": 2.3302810192108154,
495
- "eval_precision": 0.45284331053561816,
496
- "eval_recall": 0.4408284023668639,
497
- "eval_runtime": 57.7604,
498
- "eval_samples_per_second": 5.852,
499
- "eval_steps_per_second": 2.926,
500
- "eval_top_10_accuracy": 0.8698224852071006,
501
- "eval_top_1_accuracy": 0.4408284023668639,
502
- "eval_top_5_accuracy": 0.7633136094674556,
503
  "step": 3244
504
  },
505
  {
506
- "epoch": 18.001541666666668,
507
- "grad_norm": 46.7744140625,
508
  "learning_rate": 4.577777777777778e-05,
509
- "loss": 5.1754,
510
  "step": 3300
511
  },
512
  {
513
- "epoch": 18.004319444444445,
514
- "grad_norm": 54.56682205200195,
515
  "learning_rate": 4.716666666666667e-05,
516
- "loss": 4.6862,
517
  "step": 3400
518
  },
519
  {
520
- "epoch": 18.004986111111112,
521
- "eval_accuracy": 0.4467455621301775,
522
- "eval_f1": 0.4292371153230883,
523
- "eval_loss": 2.2995729446411133,
524
- "eval_precision": 0.521234854888701,
525
- "eval_recall": 0.4467455621301775,
526
- "eval_runtime": 58.8868,
527
- "eval_samples_per_second": 5.74,
528
- "eval_steps_per_second": 2.87,
529
- "eval_top_10_accuracy": 0.8579881656804734,
530
- "eval_top_1_accuracy": 0.4467455621301775,
531
- "eval_top_5_accuracy": 0.7514792899408284,
532
- "step": 3424
533
  },
534
  {
535
- "epoch": 19.00209027777778,
536
- "grad_norm": 39.73740768432617,
537
  "learning_rate": 4.855555555555556e-05,
538
- "loss": 4.0003,
539
  "step": 3500
540
  },
541
  {
542
- "epoch": 19.004868055555555,
543
- "grad_norm": 50.9436149597168,
544
  "learning_rate": 4.994444444444445e-05,
545
- "loss": 3.6718,
546
  "step": 3600
547
  },
548
  {
549
- "epoch": 19.005006944444446,
550
- "eval_accuracy": 0.5177514792899408,
551
- "eval_f1": 0.48937878360955284,
552
- "eval_loss": 2.0771615505218506,
553
- "eval_precision": 0.5376796280642435,
554
- "eval_recall": 0.5177514792899408,
555
- "eval_runtime": 60.5121,
556
- "eval_samples_per_second": 5.586,
557
- "eval_steps_per_second": 2.793,
558
- "eval_top_10_accuracy": 0.878698224852071,
559
- "eval_top_1_accuracy": 0.5177514792899408,
560
  "eval_top_5_accuracy": 0.7840236686390533,
561
  "step": 3605
562
  },
563
  {
564
- "epoch": 20.00263888888889,
565
- "grad_norm": 48.8848991394043,
566
  "learning_rate": 4.9851851851851855e-05,
567
- "loss": 2.7961,
568
  "step": 3700
569
  },
570
  {
571
- "epoch": 20.005,
572
- "eval_accuracy": 0.48520710059171596,
573
- "eval_f1": 0.4556548481104694,
574
- "eval_loss": 2.1207311153411865,
575
- "eval_precision": 0.5156628627782474,
576
- "eval_recall": 0.48520710059171596,
577
- "eval_runtime": 57.9018,
578
- "eval_samples_per_second": 5.837,
579
- "eval_steps_per_second": 2.919,
580
- "eval_top_10_accuracy": 0.8579881656804734,
581
- "eval_top_1_accuracy": 0.48520710059171596,
582
- "eval_top_5_accuracy": 0.7662721893491125,
583
  "step": 3785
584
  },
585
  {
586
- "epoch": 21.000409722222223,
587
- "grad_norm": 58.62202072143555,
588
  "learning_rate": 4.969753086419753e-05,
589
- "loss": 2.6912,
590
  "step": 3800
591
  },
592
  {
593
- "epoch": 21.0031875,
594
- "grad_norm": 79.86384582519531,
595
  "learning_rate": 4.954320987654321e-05,
596
- "loss": 2.0751,
597
  "step": 3900
598
  },
599
  {
600
- "epoch": 21.004993055555556,
601
- "eval_accuracy": 0.4881656804733728,
602
- "eval_f1": 0.4816365258672951,
603
- "eval_loss": 2.0227231979370117,
604
- "eval_precision": 0.5489433643279797,
605
- "eval_recall": 0.4881656804733728,
606
- "eval_runtime": 53.8251,
607
- "eval_samples_per_second": 6.28,
608
- "eval_steps_per_second": 3.14,
609
- "eval_top_10_accuracy": 0.8875739644970414,
610
- "eval_top_1_accuracy": 0.4881656804733728,
611
- "eval_top_5_accuracy": 0.7810650887573964,
612
  "step": 3965
613
  },
614
  {
615
- "epoch": 22.000958333333333,
616
- "grad_norm": 37.52458572387695,
617
  "learning_rate": 4.938888888888889e-05,
618
- "loss": 1.9706,
619
  "step": 4000
620
  },
621
  {
622
- "epoch": 22.00373611111111,
623
- "grad_norm": 36.34599304199219,
624
- "learning_rate": 4.9234567901234566e-05,
625
- "loss": 1.7998,
626
  "step": 4100
627
  },
628
  {
629
- "epoch": 22.004986111111112,
630
- "eval_accuracy": 0.4911242603550296,
631
- "eval_f1": 0.4644210408263662,
632
- "eval_loss": 2.0553224086761475,
633
- "eval_precision": 0.5105472272632036,
634
- "eval_recall": 0.4911242603550296,
635
- "eval_runtime": 57.0036,
636
- "eval_samples_per_second": 5.929,
637
- "eval_steps_per_second": 2.965,
638
- "eval_top_10_accuracy": 0.8609467455621301,
639
- "eval_top_1_accuracy": 0.4881656804733728,
640
- "eval_top_5_accuracy": 0.7751479289940828,
641
- "step": 4145
642
  },
643
  {
644
- "epoch": 23.001506944444444,
645
- "grad_norm": 93.64087677001953,
646
  "learning_rate": 4.9081790123456794e-05,
647
- "loss": 1.4459,
648
  "step": 4200
649
  },
650
  {
651
- "epoch": 23.004284722222224,
652
- "grad_norm": 22.98988914489746,
653
  "learning_rate": 4.892746913580247e-05,
654
- "loss": 1.2235,
655
  "step": 4300
656
  },
657
  {
658
- "epoch": 23.005006944444446,
659
- "eval_accuracy": 0.4881656804733728,
660
- "eval_f1": 0.4653609621804887,
661
- "eval_loss": 1.987465739250183,
662
- "eval_precision": 0.5147137871989943,
663
- "eval_recall": 0.4881656804733728,
664
- "eval_runtime": 73.3634,
665
- "eval_samples_per_second": 4.607,
666
- "eval_steps_per_second": 2.304,
667
- "eval_top_10_accuracy": 0.878698224852071,
668
- "eval_top_1_accuracy": 0.4881656804733728,
669
- "eval_top_5_accuracy": 0.8136094674556213,
670
  "step": 4326
671
  },
672
  {
673
- "epoch": 24.002055555555554,
674
- "grad_norm": 14.430959701538086,
675
  "learning_rate": 4.877314814814815e-05,
676
- "loss": 1.0324,
677
  "step": 4400
678
  },
679
  {
680
- "epoch": 24.004833333333334,
681
- "grad_norm": 29.57322120666504,
682
  "learning_rate": 4.861882716049383e-05,
683
- "loss": 1.1509,
684
  "step": 4500
685
  },
686
  {
687
- "epoch": 24.005,
688
- "eval_accuracy": 0.5088757396449705,
689
- "eval_f1": 0.4862273067459284,
690
- "eval_loss": 1.9725861549377441,
691
- "eval_precision": 0.52279579394964,
692
- "eval_recall": 0.5088757396449705,
693
- "eval_runtime": 73.1733,
694
- "eval_samples_per_second": 4.619,
695
- "eval_steps_per_second": 2.31,
696
- "eval_top_10_accuracy": 0.8727810650887574,
697
- "eval_top_1_accuracy": 0.5088757396449705,
698
- "eval_top_5_accuracy": 0.8047337278106509,
699
  "step": 4506
700
  },
701
  {
702
- "epoch": 25.002604166666668,
703
- "grad_norm": 22.385639190673828,
704
  "learning_rate": 4.8464506172839505e-05,
705
- "loss": 0.7166,
706
  "step": 4600
707
  },
708
  {
709
- "epoch": 25.004993055555556,
710
- "eval_accuracy": 0.5118343195266272,
711
- "eval_f1": 0.5096006557545019,
712
- "eval_loss": 1.9579885005950928,
713
- "eval_precision": 0.5969287123133278,
714
- "eval_recall": 0.5118343195266272,
715
- "eval_runtime": 56.9942,
716
- "eval_samples_per_second": 5.93,
717
- "eval_steps_per_second": 2.965,
718
- "eval_top_10_accuracy": 0.8964497041420119,
719
- "eval_top_1_accuracy": 0.5118343195266272,
720
- "eval_top_5_accuracy": 0.8106508875739645,
721
  "step": 4686
722
  },
723
  {
724
- "epoch": 26.000375,
725
- "grad_norm": 7.943602085113525,
726
  "learning_rate": 4.831018518518518e-05,
727
- "loss": 0.8199,
728
  "step": 4700
729
  },
730
  {
731
- "epoch": 26.00315277777778,
732
- "grad_norm": 50.43260192871094,
733
  "learning_rate": 4.815586419753087e-05,
734
- "loss": 0.6218,
735
  "step": 4800
736
  },
737
  {
738
- "epoch": 26.004986111111112,
739
  "eval_accuracy": 0.5384615384615384,
740
- "eval_f1": 0.5059408665743507,
741
- "eval_loss": 1.9028863906860352,
742
- "eval_precision": 0.5594793766391399,
743
  "eval_recall": 0.5384615384615384,
744
- "eval_runtime": 58.8866,
745
- "eval_samples_per_second": 5.74,
746
- "eval_steps_per_second": 2.87,
747
- "eval_top_10_accuracy": 0.8757396449704142,
748
- "eval_top_1_accuracy": 0.5355029585798816,
749
- "eval_top_5_accuracy": 0.8136094674556213,
750
- "step": 4866
751
  },
752
  {
753
- "epoch": 27.000923611111112,
754
- "grad_norm": 11.617535591125488,
755
  "learning_rate": 4.8001543209876545e-05,
756
- "loss": 0.622,
757
  "step": 4900
758
  },
759
  {
760
- "epoch": 27.00370138888889,
761
- "grad_norm": 44.96900939941406,
762
  "learning_rate": 4.784722222222223e-05,
763
- "loss": 0.419,
764
  "step": 5000
765
  },
766
  {
767
- "epoch": 27.005006944444446,
768
- "eval_accuracy": 0.5059171597633136,
769
- "eval_f1": 0.4846225273740067,
770
- "eval_loss": 2.0808358192443848,
771
- "eval_precision": 0.5606762895224434,
772
- "eval_recall": 0.5059171597633136,
773
- "eval_runtime": 65.9507,
774
- "eval_samples_per_second": 5.125,
775
- "eval_steps_per_second": 2.563,
776
- "eval_top_10_accuracy": 0.878698224852071,
777
- "eval_top_1_accuracy": 0.5059171597633136,
778
- "eval_top_5_accuracy": 0.7869822485207101,
779
  "step": 5047
780
  },
781
  {
782
- "epoch": 28.001472222222223,
783
- "grad_norm": 4.978194236755371,
784
  "learning_rate": 4.769290123456791e-05,
785
- "loss": 0.4478,
786
  "step": 5100
787
  },
788
  {
789
- "epoch": 28.00425,
790
- "grad_norm": 14.902109146118164,
791
  "learning_rate": 4.7538580246913585e-05,
792
- "loss": 0.4645,
793
  "step": 5200
794
  },
795
  {
796
- "epoch": 28.005,
797
- "eval_accuracy": 0.5266272189349113,
798
- "eval_f1": 0.518000712305446,
799
- "eval_loss": 1.9836277961730957,
800
- "eval_precision": 0.6049293408760864,
801
- "eval_recall": 0.5266272189349113,
802
- "eval_runtime": 60.1324,
803
- "eval_samples_per_second": 5.621,
804
- "eval_steps_per_second": 2.81,
805
- "eval_top_10_accuracy": 0.878698224852071,
806
- "eval_top_1_accuracy": 0.5266272189349113,
807
- "eval_top_5_accuracy": 0.8017751479289941,
808
  "step": 5227
809
  },
810
  {
811
- "epoch": 29.002020833333333,
812
- "grad_norm": 13.311244010925293,
813
  "learning_rate": 4.738425925925926e-05,
814
- "loss": 0.3302,
815
  "step": 5300
816
  },
817
  {
818
- "epoch": 29.00479861111111,
819
- "grad_norm": 8.797450065612793,
820
  "learning_rate": 4.722993827160494e-05,
821
- "loss": 0.3358,
822
  "step": 5400
823
  },
824
  {
825
- "epoch": 29.004993055555556,
826
- "eval_accuracy": 0.5207100591715976,
827
- "eval_f1": 0.5115516304865416,
828
- "eval_loss": 2.1230368614196777,
829
- "eval_precision": 0.5973255377101531,
830
- "eval_recall": 0.5207100591715976,
831
- "eval_runtime": 59.5159,
832
- "eval_samples_per_second": 5.679,
833
- "eval_steps_per_second": 2.84,
834
- "eval_top_10_accuracy": 0.8875739644970414,
835
- "eval_top_1_accuracy": 0.5236686390532544,
836
- "eval_top_5_accuracy": 0.7988165680473372,
837
  "step": 5407
838
  },
839
  {
840
- "epoch": 30.002569444444443,
841
- "grad_norm": 45.688323974609375,
842
  "learning_rate": 4.707561728395062e-05,
843
- "loss": 0.3703,
844
  "step": 5500
845
  },
846
  {
847
- "epoch": 30.004986111111112,
848
- "eval_accuracy": 0.47928994082840237,
849
- "eval_f1": 0.4653445995161971,
850
- "eval_loss": 2.4011411666870117,
851
- "eval_precision": 0.5361116447654909,
852
- "eval_recall": 0.47928994082840237,
853
- "eval_runtime": 61.5315,
854
- "eval_samples_per_second": 5.493,
855
- "eval_steps_per_second": 2.747,
856
- "eval_top_10_accuracy": 0.8668639053254438,
857
- "eval_top_1_accuracy": 0.47928994082840237,
858
- "eval_top_5_accuracy": 0.7840236686390533,
859
- "step": 5587
860
- },
861
- {
862
- "epoch": 31.000340277777777,
863
- "grad_norm": 19.079675674438477,
864
- "learning_rate": 4.6921296296296296e-05,
865
- "loss": 0.3028,
866
  "step": 5600
867
  },
868
  {
869
- "epoch": 31.003118055555557,
870
- "grad_norm": 1.1767876148223877,
871
- "learning_rate": 4.676697530864197e-05,
872
- "loss": 0.2019,
873
  "step": 5700
874
  },
875
  {
876
- "epoch": 31.005006944444446,
877
- "eval_accuracy": 0.514792899408284,
878
- "eval_f1": 0.5004872346588323,
879
- "eval_loss": 2.358884572982788,
880
- "eval_precision": 0.5748994595148442,
881
- "eval_recall": 0.514792899408284,
882
- "eval_runtime": 56.0547,
883
- "eval_samples_per_second": 6.03,
884
- "eval_steps_per_second": 3.015,
885
- "eval_top_10_accuracy": 0.8727810650887574,
886
- "eval_top_1_accuracy": 0.514792899408284,
887
- "eval_top_5_accuracy": 0.7928994082840237,
888
  "step": 5768
889
  },
890
  {
891
- "epoch": 32.00088888888889,
892
- "grad_norm": 1.4734101295471191,
893
- "learning_rate": 4.661265432098766e-05,
894
- "loss": 0.2676,
895
  "step": 5800
896
  },
897
  {
898
- "epoch": 32.00366666666667,
899
- "grad_norm": 18.391063690185547,
900
- "learning_rate": 4.6458333333333335e-05,
901
- "loss": 0.1428,
902
  "step": 5900
903
  },
904
  {
905
- "epoch": 32.005,
906
- "eval_accuracy": 0.5325443786982249,
907
- "eval_f1": 0.5082406416725944,
908
- "eval_loss": 2.2091736793518066,
909
- "eval_precision": 0.564035643843336,
910
- "eval_recall": 0.5325443786982249,
911
- "eval_runtime": 58.8513,
912
- "eval_samples_per_second": 5.743,
913
- "eval_steps_per_second": 2.872,
914
- "eval_top_10_accuracy": 0.878698224852071,
915
- "eval_top_1_accuracy": 0.5325443786982249,
916
  "eval_top_5_accuracy": 0.8136094674556213,
917
  "step": 5948
918
  },
919
  {
920
- "epoch": 33.0014375,
921
- "grad_norm": 21.9639949798584,
922
- "learning_rate": 4.630401234567901e-05,
923
- "loss": 0.134,
924
  "step": 6000
925
  },
926
  {
927
- "epoch": 33.004215277777774,
928
- "grad_norm": 12.84200668334961,
929
- "learning_rate": 4.614969135802469e-05,
930
- "loss": 0.099,
931
  "step": 6100
932
  },
933
  {
934
- "epoch": 33.00499305555556,
935
- "eval_accuracy": 0.5443786982248521,
936
- "eval_f1": 0.5305546524481435,
937
- "eval_loss": 2.2472469806671143,
938
- "eval_precision": 0.5975943927867006,
939
- "eval_recall": 0.5443786982248521,
940
- "eval_runtime": 65.5901,
941
- "eval_samples_per_second": 5.153,
942
- "eval_steps_per_second": 2.577,
943
- "eval_top_10_accuracy": 0.8727810650887574,
944
- "eval_top_1_accuracy": 0.5443786982248521,
945
- "eval_top_5_accuracy": 0.8224852071005917,
946
  "step": 6128
947
  },
948
  {
949
- "epoch": 34.00198611111111,
950
- "grad_norm": 7.78519868850708,
951
  "learning_rate": 4.599691358024691e-05,
952
- "loss": 0.2075,
953
  "step": 6200
954
  },
955
  {
956
- "epoch": 34.00476388888889,
957
- "grad_norm": 9.09951400756836,
958
  "learning_rate": 4.584259259259259e-05,
959
- "loss": 0.1691,
960
  "step": 6300
961
  },
962
  {
963
- "epoch": 34.00498611111111,
964
- "eval_accuracy": 0.5680473372781065,
965
- "eval_f1": 0.5641834825266777,
966
- "eval_loss": 2.2171597480773926,
967
- "eval_precision": 0.6236840082993929,
968
- "eval_recall": 0.5680473372781065,
969
- "eval_runtime": 56.9955,
970
- "eval_samples_per_second": 5.93,
971
- "eval_steps_per_second": 2.965,
972
- "eval_top_10_accuracy": 0.8846153846153846,
973
- "eval_top_1_accuracy": 0.5680473372781065,
974
- "eval_top_5_accuracy": 0.8076923076923077,
975
- "step": 6308
976
  },
977
  {
978
- "epoch": 35.00253472222222,
979
- "grad_norm": 42.78754425048828,
980
  "learning_rate": 4.5688271604938275e-05,
981
- "loss": 0.2057,
982
  "step": 6400
983
  },
984
  {
985
- "epoch": 35.005006944444446,
986
- "eval_accuracy": 0.4822485207100592,
987
- "eval_f1": 0.45554867551908973,
988
- "eval_loss": 2.816495895385742,
989
- "eval_precision": 0.5523162734701196,
990
- "eval_recall": 0.4822485207100592,
991
- "eval_runtime": 58.7504,
992
- "eval_samples_per_second": 5.753,
993
- "eval_steps_per_second": 2.877,
994
- "eval_top_10_accuracy": 0.849112426035503,
995
- "eval_top_1_accuracy": 0.4822485207100592,
996
- "eval_top_5_accuracy": 0.7603550295857988,
997
  "step": 6489
998
  },
999
  {
1000
- "epoch": 36.000305555555556,
1001
- "grad_norm": 0.602039098739624,
1002
- "learning_rate": 4.5535493827160496e-05,
1003
- "loss": 0.1708,
1004
  "step": 6500
1005
  },
1006
  {
1007
- "epoch": 36.003083333333336,
1008
- "grad_norm": 2.1404974460601807,
1009
- "learning_rate": 4.5381172839506174e-05,
1010
- "loss": 0.107,
1011
  "step": 6600
1012
  },
1013
  {
1014
- "epoch": 36.005,
1015
- "eval_accuracy": 0.5295857988165681,
1016
- "eval_f1": 0.5226000063122455,
1017
- "eval_loss": 2.5726656913757324,
1018
- "eval_precision": 0.5998878340594317,
1019
- "eval_recall": 0.5295857988165681,
1020
- "eval_runtime": 62.9629,
1021
- "eval_samples_per_second": 5.368,
1022
- "eval_steps_per_second": 2.684,
1023
- "eval_top_10_accuracy": 0.8668639053254438,
1024
- "eval_top_1_accuracy": 0.5295857988165681,
1025
- "eval_top_5_accuracy": 0.7958579881656804,
1026
  "step": 6669
1027
  },
1028
  {
1029
- "epoch": 37.00085416666667,
1030
- "grad_norm": 0.16459858417510986,
1031
- "learning_rate": 4.522685185185185e-05,
1032
- "loss": 0.2533,
1033
  "step": 6700
1034
  },
1035
  {
1036
- "epoch": 37.00363194444444,
1037
- "grad_norm": 0.40061742067337036,
1038
- "learning_rate": 4.507253086419753e-05,
1039
- "loss": 0.076,
1040
  "step": 6800
1041
  },
1042
  {
1043
- "epoch": 37.00499305555556,
1044
- "eval_accuracy": 0.5473372781065089,
1045
- "eval_f1": 0.5397134249901392,
1046
- "eval_loss": 2.3755838871002197,
1047
- "eval_precision": 0.6036862709939633,
1048
- "eval_recall": 0.5473372781065089,
1049
- "eval_runtime": 63.34,
1050
- "eval_samples_per_second": 5.336,
1051
- "eval_steps_per_second": 2.668,
1052
- "eval_top_10_accuracy": 0.8609467455621301,
1053
- "eval_top_1_accuracy": 0.5473372781065089,
1054
- "eval_top_5_accuracy": 0.8017751479289941,
1055
  "step": 6849
1056
  },
1057
  {
1058
- "epoch": 38.00140277777778,
1059
- "grad_norm": 68.52092742919922,
1060
- "learning_rate": 4.4918209876543214e-05,
1061
- "loss": 0.1643,
1062
  "step": 6900
1063
  },
1064
  {
1065
- "epoch": 38.00418055555556,
1066
- "grad_norm": 0.26428428292274475,
1067
- "learning_rate": 4.476388888888889e-05,
1068
- "loss": 0.1524,
1069
  "step": 7000
1070
  },
1071
  {
1072
- "epoch": 38.00498611111111,
1073
- "eval_accuracy": 0.5591715976331361,
1074
- "eval_f1": 0.5585875662798738,
1075
- "eval_loss": 2.3974249362945557,
1076
- "eval_precision": 0.6428844659613892,
1077
- "eval_recall": 0.5591715976331361,
1078
- "eval_runtime": 57.95,
1079
- "eval_samples_per_second": 5.833,
1080
- "eval_steps_per_second": 2.916,
1081
- "eval_top_10_accuracy": 0.8698224852071006,
1082
- "eval_top_1_accuracy": 0.5591715976331361,
1083
- "eval_top_5_accuracy": 0.8047337278106509,
1084
- "step": 7029
1085
- },
1086
- {
1087
- "epoch": 39.00195138888889,
1088
- "grad_norm": 1.915513038635254,
1089
- "learning_rate": 4.4609567901234576e-05,
1090
- "loss": 0.1378,
1091
  "step": 7100
1092
  },
1093
  {
1094
- "epoch": 39.004729166666664,
1095
- "grad_norm": 63.36919021606445,
1096
- "learning_rate": 4.4455246913580253e-05,
1097
- "loss": 0.1064,
1098
  "step": 7200
1099
  },
1100
  {
1101
- "epoch": 39.005006944444446,
1102
- "eval_accuracy": 0.5562130177514792,
1103
- "eval_f1": 0.5445995490374188,
1104
- "eval_loss": 2.524350881576538,
1105
- "eval_precision": 0.6056530008453086,
1106
- "eval_recall": 0.5562130177514792,
1107
- "eval_runtime": 57.3136,
1108
- "eval_samples_per_second": 5.897,
1109
- "eval_steps_per_second": 2.949,
1110
- "eval_top_10_accuracy": 0.8757396449704142,
1111
- "eval_top_1_accuracy": 0.5562130177514792,
1112
- "eval_top_5_accuracy": 0.8047337278106509,
1113
  "step": 7210
1114
  },
1115
  {
1116
- "epoch": 40.0025,
1117
- "grad_norm": 0.10949143022298813,
1118
- "learning_rate": 4.430092592592593e-05,
1119
- "loss": 0.1598,
1120
  "step": 7300
1121
  },
1122
  {
1123
- "epoch": 40.005,
1124
- "eval_accuracy": 0.5355029585798816,
1125
- "eval_f1": 0.514647958878728,
1126
- "eval_loss": 2.4414660930633545,
1127
- "eval_precision": 0.5692061143984222,
1128
- "eval_recall": 0.5355029585798816,
1129
- "eval_runtime": 65.9765,
1130
- "eval_samples_per_second": 5.123,
1131
- "eval_steps_per_second": 2.562,
1132
- "eval_top_10_accuracy": 0.8727810650887574,
1133
- "eval_top_1_accuracy": 0.5355029585798816,
1134
- "eval_top_5_accuracy": 0.8195266272189349,
1135
  "step": 7390
1136
  },
1137
  {
1138
- "epoch": 41.00027083333333,
1139
- "grad_norm": 0.9827488660812378,
1140
- "learning_rate": 4.414660493827161e-05,
1141
- "loss": 0.1946,
1142
  "step": 7400
1143
  },
1144
  {
1145
- "epoch": 41.00304861111111,
1146
- "grad_norm": 25.53615951538086,
1147
- "learning_rate": 4.3992283950617286e-05,
1148
- "loss": 0.0981,
1149
  "step": 7500
1150
  },
1151
  {
1152
- "epoch": 41.00499305555556,
1153
- "eval_accuracy": 0.5059171597633136,
1154
- "eval_f1": 0.4826208356751343,
1155
- "eval_loss": 2.6215736865997314,
1156
- "eval_precision": 0.5420139689370459,
1157
- "eval_recall": 0.5059171597633136,
1158
- "eval_runtime": 58.6534,
1159
- "eval_samples_per_second": 5.763,
1160
- "eval_steps_per_second": 2.881,
1161
- "eval_top_10_accuracy": 0.8609467455621301,
1162
- "eval_top_1_accuracy": 0.5059171597633136,
1163
- "eval_top_5_accuracy": 0.7751479289940828,
1164
  "step": 7570
1165
  },
1166
  {
1167
- "epoch": 42.000819444444446,
1168
- "grad_norm": 2.552562713623047,
1169
- "learning_rate": 4.3837962962962964e-05,
1170
- "loss": 0.3576,
1171
  "step": 7600
1172
  },
1173
  {
1174
- "epoch": 42.003597222222226,
1175
- "grad_norm": 0.08632488548755646,
1176
- "learning_rate": 4.368364197530864e-05,
1177
- "loss": 0.1254,
1178
  "step": 7700
1179
  },
1180
  {
1181
- "epoch": 42.00498611111111,
1182
- "eval_accuracy": 0.5059171597633136,
1183
- "eval_f1": 0.4900915700689456,
1184
- "eval_loss": 2.794423818588257,
1185
- "eval_precision": 0.5575904864366402,
1186
- "eval_recall": 0.5059171597633136,
1187
- "eval_runtime": 64.2698,
1188
- "eval_samples_per_second": 5.259,
1189
- "eval_steps_per_second": 2.63,
1190
- "eval_top_10_accuracy": 0.8550295857988166,
1191
- "eval_top_1_accuracy": 0.5059171597633136,
1192
- "eval_top_5_accuracy": 0.7662721893491125,
1193
- "step": 7750
1194
- },
1195
- {
1196
- "epoch": 43.00136805555555,
1197
- "grad_norm": 0.17759816348552704,
1198
- "learning_rate": 4.352932098765432e-05,
1199
- "loss": 0.1162,
1200
  "step": 7800
1201
  },
1202
  {
1203
- "epoch": 43.00414583333333,
1204
- "grad_norm": 0.07255858927965164,
1205
- "learning_rate": 4.3375000000000004e-05,
1206
- "loss": 0.1749,
1207
  "step": 7900
1208
  },
1209
  {
1210
- "epoch": 43.005006944444446,
1211
- "eval_accuracy": 0.5355029585798816,
1212
- "eval_f1": 0.5193510467179105,
1213
- "eval_loss": 2.5535783767700195,
1214
- "eval_precision": 0.5675824175824176,
1215
- "eval_recall": 0.5355029585798816,
1216
- "eval_runtime": 71.3588,
1217
- "eval_samples_per_second": 4.737,
1218
- "eval_steps_per_second": 2.368,
1219
- "eval_top_10_accuracy": 0.8668639053254438,
1220
- "eval_top_1_accuracy": 0.5355029585798816,
1221
- "eval_top_5_accuracy": 0.7928994082840237,
1222
  "step": 7931
1223
  },
1224
  {
1225
- "epoch": 44.001916666666666,
1226
- "grad_norm": 1.3160659074783325,
1227
  "learning_rate": 4.322067901234568e-05,
1228
- "loss": 0.1926,
1229
  "step": 8000
1230
  },
1231
  {
1232
- "epoch": 44.004694444444446,
1233
- "grad_norm": 0.07331795245409012,
1234
  "learning_rate": 4.306635802469136e-05,
1235
- "loss": 0.1931,
1236
  "step": 8100
1237
  },
1238
  {
1239
- "epoch": 44.005,
1240
- "eval_accuracy": 0.4970414201183432,
1241
- "eval_f1": 0.47531933878087734,
1242
- "eval_loss": 2.8650310039520264,
1243
- "eval_precision": 0.5347263847263848,
1244
- "eval_recall": 0.4970414201183432,
1245
- "eval_runtime": 62.3948,
1246
- "eval_samples_per_second": 5.417,
1247
- "eval_steps_per_second": 2.709,
1248
- "eval_top_10_accuracy": 0.8609467455621301,
1249
- "eval_top_1_accuracy": 0.4970414201183432,
1250
- "eval_top_5_accuracy": 0.7899408284023669,
1251
  "step": 8111
1252
  },
1253
  {
1254
- "epoch": 45.00246527777778,
1255
- "grad_norm": 1.3005619049072266,
1256
  "learning_rate": 4.291203703703704e-05,
1257
- "loss": 0.1515,
1258
  "step": 8200
1259
  },
1260
  {
1261
- "epoch": 45.00499305555556,
1262
- "eval_accuracy": 0.5591715976331361,
1263
- "eval_f1": 0.54668814091891,
1264
- "eval_loss": 2.482553005218506,
1265
- "eval_precision": 0.6056072132995209,
1266
- "eval_recall": 0.5591715976331361,
1267
- "eval_runtime": 58.2674,
1268
- "eval_samples_per_second": 5.801,
1269
- "eval_steps_per_second": 2.9,
1270
- "eval_top_10_accuracy": 0.863905325443787,
1271
- "eval_top_1_accuracy": 0.5591715976331361,
1272
- "eval_top_5_accuracy": 0.8106508875739645,
1273
  "step": 8291
1274
  },
1275
  {
1276
- "epoch": 46.000236111111114,
1277
- "grad_norm": 16.438777923583984,
1278
  "learning_rate": 4.2757716049382715e-05,
1279
- "loss": 0.1387,
1280
  "step": 8300
1281
  },
1282
  {
1283
- "epoch": 46.00301388888889,
1284
- "grad_norm": 0.10719487816095352,
1285
- "learning_rate": 4.26033950617284e-05,
1286
- "loss": 0.1895,
1287
  "step": 8400
1288
  },
1289
  {
1290
- "epoch": 46.00498611111111,
1291
- "eval_accuracy": 0.5414201183431953,
1292
- "eval_f1": 0.519332771107919,
1293
- "eval_loss": 2.8682615756988525,
1294
- "eval_precision": 0.5870526908988447,
1295
- "eval_recall": 0.5414201183431953,
1296
- "eval_runtime": 57.5393,
1297
- "eval_samples_per_second": 5.874,
1298
- "eval_steps_per_second": 2.937,
1299
- "eval_top_10_accuracy": 0.8579881656804734,
1300
- "eval_top_1_accuracy": 0.5414201183431953,
1301
- "eval_top_5_accuracy": 0.8017751479289941,
1302
- "step": 8471
1303
  },
1304
  {
1305
- "epoch": 47.00078472222222,
1306
- "grad_norm": 0.019902631640434265,
1307
- "learning_rate": 4.244907407407408e-05,
1308
- "loss": 0.2449,
1309
  "step": 8500
1310
  },
1311
  {
1312
- "epoch": 47.0035625,
1313
- "grad_norm": 0.10022391378879547,
1314
- "learning_rate": 4.2294753086419755e-05,
1315
- "loss": 0.1179,
1316
  "step": 8600
1317
  },
1318
  {
1319
- "epoch": 47.005006944444446,
1320
- "eval_accuracy": 0.4940828402366864,
1321
- "eval_f1": 0.49023220369374215,
1322
- "eval_loss": 2.936835765838623,
1323
- "eval_precision": 0.5903516732136642,
1324
- "eval_recall": 0.4940828402366864,
1325
- "eval_runtime": 53.3024,
1326
- "eval_samples_per_second": 6.341,
1327
- "eval_steps_per_second": 3.171,
1328
- "eval_top_10_accuracy": 0.8727810650887574,
1329
- "eval_top_1_accuracy": 0.4940828402366864,
1330
- "eval_top_5_accuracy": 0.7869822485207101,
1331
  "step": 8652
1332
  },
1333
  {
1334
- "epoch": 48.001333333333335,
1335
- "grad_norm": 0.11750177294015884,
1336
- "learning_rate": 4.214043209876544e-05,
1337
- "loss": 0.162,
1338
  "step": 8700
1339
  },
1340
  {
1341
- "epoch": 48.00411111111111,
1342
- "grad_norm": 14.434788703918457,
1343
- "learning_rate": 4.198611111111112e-05,
1344
- "loss": 0.1028,
1345
  "step": 8800
1346
  },
1347
  {
1348
- "epoch": 48.005,
1349
- "eval_accuracy": 0.5562130177514792,
1350
- "eval_f1": 0.5491296671770045,
1351
- "eval_loss": 2.8997340202331543,
1352
- "eval_precision": 0.634636517328825,
1353
- "eval_recall": 0.5562130177514792,
1354
- "eval_runtime": 58.0314,
1355
- "eval_samples_per_second": 5.824,
1356
- "eval_steps_per_second": 2.912,
1357
- "eval_top_10_accuracy": 0.8402366863905325,
1358
- "eval_top_1_accuracy": 0.5562130177514792,
1359
- "eval_top_5_accuracy": 0.7810650887573964,
1360
  "step": 8832
1361
  },
1362
  {
1363
- "epoch": 49.00188194444444,
1364
- "grad_norm": 14.458342552185059,
1365
  "learning_rate": 4.183333333333334e-05,
1366
- "loss": 0.2122,
1367
  "step": 8900
1368
  },
1369
  {
1370
- "epoch": 49.00465972222222,
1371
- "grad_norm": 0.6533581614494324,
1372
  "learning_rate": 4.1679012345679016e-05,
1373
- "loss": 0.2336,
1374
  "step": 9000
1375
  },
1376
  {
1377
- "epoch": 49.00499305555556,
1378
- "eval_accuracy": 0.5295857988165681,
1379
- "eval_f1": 0.5090540623085001,
1380
- "eval_loss": 2.839053153991699,
1381
- "eval_precision": 0.5684427751735445,
1382
- "eval_recall": 0.5295857988165681,
1383
- "eval_runtime": 62.4599,
1384
- "eval_samples_per_second": 5.411,
1385
- "eval_steps_per_second": 2.706,
1386
- "eval_top_10_accuracy": 0.8284023668639053,
1387
- "eval_top_1_accuracy": 0.5295857988165681,
1388
- "eval_top_5_accuracy": 0.7633136094674556,
1389
  "step": 9012
1390
  },
1391
  {
1392
- "epoch": 50.002430555555556,
1393
- "grad_norm": 0.03816818445920944,
1394
  "learning_rate": 4.1524691358024694e-05,
1395
- "loss": 0.1344,
1396
  "step": 9100
1397
  },
1398
  {
1399
- "epoch": 50.00498611111111,
1400
- "eval_accuracy": 0.5266272189349113,
1401
- "eval_f1": 0.5092456524409186,
1402
- "eval_loss": 2.980726957321167,
1403
- "eval_precision": 0.5702895395203087,
1404
- "eval_recall": 0.5266272189349113,
1405
- "eval_runtime": 55.9563,
1406
- "eval_samples_per_second": 6.04,
1407
- "eval_steps_per_second": 3.02,
1408
- "eval_top_10_accuracy": 0.849112426035503,
1409
- "eval_top_1_accuracy": 0.5266272189349113,
1410
- "eval_top_5_accuracy": 0.7544378698224852,
1411
- "step": 9192
1412
  },
1413
  {
1414
- "epoch": 51.00020138888889,
1415
- "grad_norm": 0.027867300435900688,
1416
  "learning_rate": 4.137037037037037e-05,
1417
- "loss": 0.2343,
1418
  "step": 9200
1419
  },
1420
  {
1421
- "epoch": 51.00297916666667,
1422
- "grad_norm": 0.946140468120575,
1423
  "learning_rate": 4.121604938271605e-05,
1424
- "loss": 0.0973,
1425
  "step": 9300
1426
  },
1427
  {
1428
- "epoch": 51.005006944444446,
1429
- "eval_accuracy": 0.4970414201183432,
1430
- "eval_f1": 0.4804763284053225,
1431
- "eval_loss": 3.074336051940918,
1432
- "eval_precision": 0.5363200901662439,
1433
- "eval_recall": 0.4970414201183432,
1434
- "eval_runtime": 58.4314,
1435
- "eval_samples_per_second": 5.785,
1436
- "eval_steps_per_second": 2.892,
1437
- "eval_top_10_accuracy": 0.849112426035503,
1438
- "eval_top_1_accuracy": 0.4970414201183432,
1439
- "eval_top_5_accuracy": 0.7662721893491125,
1440
  "step": 9373
1441
  },
1442
  {
1443
- "epoch": 52.00075,
1444
- "grad_norm": 0.018307339400053024,
1445
  "learning_rate": 4.1061728395061733e-05,
1446
- "loss": 0.2218,
1447
  "step": 9400
1448
  },
1449
  {
1450
- "epoch": 52.00352777777778,
1451
- "grad_norm": 0.04463661462068558,
1452
  "learning_rate": 4.090740740740741e-05,
1453
- "loss": 0.1436,
1454
  "step": 9500
1455
  },
1456
  {
1457
- "epoch": 52.005,
1458
- "eval_accuracy": 0.5088757396449705,
1459
- "eval_f1": 0.501149902037476,
1460
- "eval_loss": 2.6934008598327637,
1461
- "eval_precision": 0.5551845590307128,
1462
- "eval_recall": 0.5088757396449705,
1463
- "eval_runtime": 72.3552,
1464
- "eval_samples_per_second": 4.671,
1465
- "eval_steps_per_second": 2.336,
1466
- "eval_top_10_accuracy": 0.8846153846153846,
1467
- "eval_top_1_accuracy": 0.5088757396449705,
1468
- "eval_top_5_accuracy": 0.7988165680473372,
1469
  "step": 9553
1470
  },
1471
  {
1472
- "epoch": 53.00129861111111,
1473
- "grad_norm": 0.04878831282258034,
1474
  "learning_rate": 4.075308641975309e-05,
1475
- "loss": 0.1846,
1476
  "step": 9600
1477
  },
1478
  {
1479
- "epoch": 53.00407638888889,
1480
- "grad_norm": 0.009379712864756584,
1481
  "learning_rate": 4.0598765432098767e-05,
1482
- "loss": 0.153,
1483
  "step": 9700
1484
  },
1485
  {
1486
- "epoch": 53.00499305555556,
1487
- "eval_accuracy": 0.5118343195266272,
1488
- "eval_f1": 0.491153718076795,
1489
- "eval_loss": 3.0898663997650146,
1490
- "eval_precision": 0.530217218678757,
1491
- "eval_recall": 0.5118343195266272,
1492
- "eval_runtime": 75.7894,
1493
- "eval_samples_per_second": 4.46,
1494
- "eval_steps_per_second": 2.23,
1495
- "eval_top_10_accuracy": 0.8461538461538461,
1496
- "eval_top_1_accuracy": 0.5118343195266272,
1497
- "eval_top_5_accuracy": 0.7928994082840237,
1498
  "step": 9733
1499
  },
1500
  {
1501
- "epoch": 54.001847222222224,
1502
- "grad_norm": 25.075729370117188,
1503
  "learning_rate": 4.0444444444444444e-05,
1504
- "loss": 0.1801,
1505
  "step": 9800
1506
  },
1507
  {
1508
- "epoch": 54.004625,
1509
- "grad_norm": 0.09503714740276337,
1510
  "learning_rate": 4.029012345679012e-05,
1511
- "loss": 0.1402,
1512
  "step": 9900
1513
  },
1514
  {
1515
- "epoch": 54.00498611111111,
1516
- "eval_accuracy": 0.5562130177514792,
1517
- "eval_f1": 0.5358985032061955,
1518
- "eval_loss": 2.773455858230591,
1519
- "eval_precision": 0.5991582135812905,
1520
- "eval_recall": 0.5562130177514792,
1521
- "eval_runtime": 58.9397,
1522
- "eval_samples_per_second": 5.735,
1523
- "eval_steps_per_second": 2.867,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1524
  "eval_top_10_accuracy": 0.8698224852071006,
1525
- "eval_top_1_accuracy": 0.5562130177514792,
1526
- "eval_top_5_accuracy": 0.7899408284023669,
1527
- "step": 9913
1528
  },
1529
  {
1530
- "epoch": 54.00498611111111,
1531
- "step": 9913,
1532
- "total_flos": 9.891175377833165e+19,
1533
- "train_loss": 5.434827623268219,
1534
- "train_runtime": 15289.1083,
1535
- "train_samples_per_second": 18.837,
1536
- "train_steps_per_second": 2.355
1537
  }
1538
  ],
1539
  "logging_steps": 100,
@@ -1562,7 +1671,7 @@
1562
  "attributes": {}
1563
  }
1564
  },
1565
- "total_flos": 9.891175377833165e+19,
1566
  "train_batch_size": 2,
1567
  "trial_name": null,
1568
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.6301775147928994,
3
+ "best_model_checkpoint": "/media/cse/HDD/Shawon/shawon/MY DATA/VideoMAE_Base_WLASL_100_200_epochs_p20_SR_8/checkpoint-7030",
4
+ "epoch": 58.005006944444446,
5
  "eval_steps": 500,
6
+ "global_step": 10635,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.002777777777777778,
13
+ "grad_norm": 52.335811614990234,
14
  "learning_rate": 1.3333333333333334e-06,
15
+ "loss": 18.5755,
16
  "step": 100
17
  },
18
  {
19
  "epoch": 0.005,
20
  "eval_accuracy": 0.008875739644970414,
21
+ "eval_f1": 0.0009576475529349738,
22
+ "eval_loss": 4.641324043273926,
23
+ "eval_precision": 0.0005193342938593639,
24
  "eval_recall": 0.008875739644970414,
25
+ "eval_runtime": 181.5799,
26
+ "eval_samples_per_second": 1.861,
27
+ "eval_steps_per_second": 0.931,
28
+ "eval_top_10_accuracy": 0.10059171597633136,
29
  "eval_top_1_accuracy": 0.008875739644970414,
30
+ "eval_top_5_accuracy": 0.04437869822485207,
31
  "step": 180
32
  },
33
  {
34
+ "epoch": 1.0005416666666667,
35
+ "grad_norm": 59.252769470214844,
36
  "learning_rate": 2.7222222222222224e-06,
37
+ "loss": 18.6055,
38
  "step": 200
39
  },
40
  {
41
+ "epoch": 1.0033194444444444,
42
+ "grad_norm": 52.176109313964844,
43
  "learning_rate": 4.111111111111112e-06,
44
+ "loss": 18.4843,
45
  "step": 300
46
  },
47
  {
48
+ "epoch": 1.0049861111111111,
49
+ "eval_accuracy": 0.008875739644970414,
50
+ "eval_f1": 0.0011768857922704076,
51
+ "eval_loss": 4.630766868591309,
52
+ "eval_precision": 0.0006326211365999165,
53
+ "eval_recall": 0.008875739644970414,
54
+ "eval_runtime": 60.2416,
55
+ "eval_samples_per_second": 5.611,
56
+ "eval_steps_per_second": 2.805,
57
+ "eval_top_10_accuracy": 0.10059171597633136,
58
+ "eval_top_1_accuracy": 0.008875739644970414,
59
  "eval_top_5_accuracy": 0.0621301775147929,
60
  "step": 360
61
  },
62
  {
63
+ "epoch": 2.0010902777777777,
64
+ "grad_norm": 37.923606872558594,
65
  "learning_rate": 5.500000000000001e-06,
66
+ "loss": 18.5315,
67
  "step": 400
68
  },
69
  {
70
+ "epoch": 2.0038680555555555,
71
+ "grad_norm": 38.569297790527344,
72
  "learning_rate": 6.888888888888889e-06,
73
+ "loss": 18.4592,
74
  "step": 500
75
  },
76
  {
77
+ "epoch": 2.0050069444444443,
78
+ "eval_accuracy": 0.020710059171597635,
79
+ "eval_f1": 0.002684633160978603,
80
+ "eval_loss": 4.6208624839782715,
81
+ "eval_precision": 0.0016963969626691518,
82
+ "eval_recall": 0.020710059171597635,
83
+ "eval_runtime": 59.58,
84
+ "eval_samples_per_second": 5.673,
85
+ "eval_steps_per_second": 2.837,
86
+ "eval_top_10_accuracy": 0.10650887573964497,
87
+ "eval_top_1_accuracy": 0.020710059171597635,
88
+ "eval_top_5_accuracy": 0.07100591715976332,
89
+ "step": 541
90
  },
91
  {
92
+ "epoch": 3.001638888888889,
93
+ "grad_norm": 41.79819107055664,
94
  "learning_rate": 8.27777777777778e-06,
95
+ "loss": 18.3856,
96
  "step": 600
97
  },
98
  {
99
+ "epoch": 3.0044166666666667,
100
+ "grad_norm": 31.180952072143555,
101
  "learning_rate": 9.666666666666667e-06,
102
+ "loss": 18.4604,
103
  "step": 700
104
  },
105
  {
106
+ "epoch": 3.005,
107
+ "eval_accuracy": 0.01775147928994083,
108
+ "eval_f1": 0.0010059742201274729,
109
+ "eval_loss": 4.617129802703857,
110
+ "eval_precision": 0.0005190329473889052,
111
+ "eval_recall": 0.01775147928994083,
112
+ "eval_runtime": 58.1107,
113
+ "eval_samples_per_second": 5.816,
114
+ "eval_steps_per_second": 2.908,
115
+ "eval_top_10_accuracy": 0.12130177514792899,
116
+ "eval_top_1_accuracy": 0.01775147928994083,
117
+ "eval_top_5_accuracy": 0.06804733727810651,
118
  "step": 721
119
  },
120
  {
121
+ "epoch": 4.0021875,
122
+ "grad_norm": 31.53841781616211,
123
  "learning_rate": 1.1055555555555556e-05,
124
+ "loss": 18.3655,
125
  "step": 800
126
  },
127
  {
128
+ "epoch": 4.004965277777778,
129
+ "grad_norm": 29.355567932128906,
130
  "learning_rate": 1.2444444444444445e-05,
131
+ "loss": 18.4543,
132
  "step": 900
133
  },
134
  {
135
+ "epoch": 4.0049930555555555,
136
+ "eval_accuracy": 0.023668639053254437,
137
+ "eval_f1": 0.0015770532334260577,
138
+ "eval_loss": 4.6106462478637695,
139
+ "eval_precision": 0.0008262008711267436,
140
+ "eval_recall": 0.023668639053254437,
141
+ "eval_runtime": 60.8324,
142
+ "eval_samples_per_second": 5.556,
143
+ "eval_steps_per_second": 2.778,
144
+ "eval_top_10_accuracy": 0.14497041420118342,
145
+ "eval_top_1_accuracy": 0.023668639053254437,
146
+ "eval_top_5_accuracy": 0.08579881656804733,
147
  "step": 901
148
  },
149
  {
150
+ "epoch": 5.002736111111111,
151
+ "grad_norm": 29.539840698242188,
152
  "learning_rate": 1.3833333333333334e-05,
153
+ "loss": 18.3384,
154
  "step": 1000
155
  },
156
  {
157
+ "epoch": 5.004986111111111,
158
+ "eval_accuracy": 0.01775147928994083,
159
+ "eval_f1": 0.000667767246016583,
160
+ "eval_loss": 4.619048118591309,
161
+ "eval_precision": 0.00034028394804998393,
162
+ "eval_recall": 0.01775147928994083,
163
+ "eval_runtime": 55.7817,
164
+ "eval_samples_per_second": 6.059,
165
+ "eval_steps_per_second": 3.03,
166
+ "eval_top_10_accuracy": 0.10946745562130178,
167
+ "eval_top_1_accuracy": 0.01775147928994083,
168
+ "eval_top_5_accuracy": 0.06804733727810651,
169
  "step": 1081
170
  },
171
  {
172
+ "epoch": 6.000506944444444,
173
+ "grad_norm": 31.13288116455078,
174
  "learning_rate": 1.5222222222222224e-05,
175
+ "loss": 18.3755,
176
  "step": 1100
177
  },
178
  {
179
+ "epoch": 6.003284722222222,
180
+ "grad_norm": 37.07182312011719,
181
  "learning_rate": 1.661111111111111e-05,
182
+ "loss": 18.1947,
183
  "step": 1200
184
  },
185
  {
186
+ "epoch": 6.005006944444444,
187
+ "eval_accuracy": 0.029585798816568046,
188
+ "eval_f1": 0.005653493576969065,
189
+ "eval_loss": 4.60960054397583,
190
+ "eval_precision": 0.0037760128257865815,
191
+ "eval_recall": 0.029585798816568046,
192
+ "eval_runtime": 73.4506,
193
+ "eval_samples_per_second": 4.602,
194
+ "eval_steps_per_second": 2.301,
195
+ "eval_top_10_accuracy": 0.13609467455621302,
196
+ "eval_top_1_accuracy": 0.029585798816568046,
197
+ "eval_top_5_accuracy": 0.09171597633136094,
198
+ "step": 1262
199
  },
200
  {
201
+ "epoch": 7.001055555555555,
202
+ "grad_norm": 32.610511779785156,
203
  "learning_rate": 1.8e-05,
204
+ "loss": 18.3084,
205
  "step": 1300
206
  },
207
  {
208
+ "epoch": 7.003833333333334,
209
+ "grad_norm": 31.119285583496094,
210
  "learning_rate": 1.938888888888889e-05,
211
+ "loss": 18.1104,
212
  "step": 1400
213
  },
214
  {
215
+ "epoch": 7.005,
216
+ "eval_accuracy": 0.03550295857988166,
217
+ "eval_f1": 0.008167727362359187,
218
+ "eval_loss": 4.599233627319336,
219
+ "eval_precision": 0.006001785543098896,
220
+ "eval_recall": 0.03550295857988166,
221
+ "eval_runtime": 68.5452,
222
+ "eval_samples_per_second": 4.931,
223
+ "eval_steps_per_second": 2.466,
224
+ "eval_top_10_accuracy": 0.14497041420118342,
225
+ "eval_top_1_accuracy": 0.03550295857988166,
226
+ "eval_top_5_accuracy": 0.09467455621301775,
227
  "step": 1442
228
  },
229
  {
230
+ "epoch": 8.001604166666667,
231
+ "grad_norm": 29.667177200317383,
232
  "learning_rate": 2.077777777777778e-05,
233
+ "loss": 18.0862,
234
  "step": 1500
235
  },
236
  {
237
+ "epoch": 8.004381944444445,
238
+ "grad_norm": 31.353796005249023,
239
  "learning_rate": 2.216666666666667e-05,
240
+ "loss": 18.0825,
241
  "step": 1600
242
  },
243
  {
244
+ "epoch": 8.004993055555556,
245
+ "eval_accuracy": 0.01775147928994083,
246
+ "eval_f1": 0.008027781632833297,
247
+ "eval_loss": 4.593963623046875,
248
+ "eval_precision": 0.005914603105136552,
249
+ "eval_recall": 0.01775147928994083,
250
+ "eval_runtime": 63.1593,
251
+ "eval_samples_per_second": 5.352,
252
+ "eval_steps_per_second": 2.676,
253
+ "eval_top_10_accuracy": 0.15680473372781065,
254
+ "eval_top_1_accuracy": 0.01775147928994083,
255
+ "eval_top_5_accuracy": 0.09763313609467456,
256
  "step": 1622
257
  },
258
  {
259
+ "epoch": 9.002152777777777,
260
+ "grad_norm": 29.327001571655273,
261
  "learning_rate": 2.3555555555555556e-05,
262
+ "loss": 17.8731,
263
  "step": 1700
264
  },
265
  {
266
+ "epoch": 9.004930555555555,
267
+ "grad_norm": 32.24522399902344,
268
  "learning_rate": 2.4944444444444447e-05,
269
+ "loss": 17.9081,
270
  "step": 1800
271
  },
272
  {
273
+ "epoch": 9.00498611111111,
274
+ "eval_accuracy": 0.03254437869822485,
275
+ "eval_f1": 0.010324695547367611,
276
+ "eval_loss": 4.562719345092773,
277
+ "eval_precision": 0.007627220692831552,
278
+ "eval_recall": 0.03254437869822485,
279
+ "eval_runtime": 60.367,
280
+ "eval_samples_per_second": 5.599,
281
+ "eval_steps_per_second": 2.8,
282
+ "eval_top_10_accuracy": 0.16863905325443787,
283
+ "eval_top_1_accuracy": 0.03254437869822485,
284
+ "eval_top_5_accuracy": 0.10650887573964497,
285
  "step": 1802
286
  },
287
  {
288
+ "epoch": 10.00270138888889,
289
+ "grad_norm": 35.42505645751953,
290
  "learning_rate": 2.633333333333333e-05,
291
+ "loss": 17.4986,
292
  "step": 1900
293
  },
294
  {
295
+ "epoch": 10.005006944444444,
296
+ "eval_accuracy": 0.03550295857988166,
297
+ "eval_f1": 0.012635094540038232,
298
+ "eval_loss": 4.428702354431152,
299
+ "eval_precision": 0.008686492137909141,
300
+ "eval_recall": 0.03550295857988166,
301
+ "eval_runtime": 65.5677,
302
+ "eval_samples_per_second": 5.155,
303
+ "eval_steps_per_second": 2.577,
304
+ "eval_top_10_accuracy": 0.20118343195266272,
305
+ "eval_top_1_accuracy": 0.03550295857988166,
306
+ "eval_top_5_accuracy": 0.12130177514792899,
307
+ "step": 1983
308
  },
309
  {
310
+ "epoch": 11.000472222222223,
311
+ "grad_norm": 33.5329475402832,
312
  "learning_rate": 2.772222222222222e-05,
313
+ "loss": 17.2661,
314
  "step": 2000
315
  },
316
  {
317
+ "epoch": 11.00325,
318
+ "grad_norm": 34.00961685180664,
319
  "learning_rate": 2.9111111111111112e-05,
320
+ "loss": 16.3869,
321
  "step": 2100
322
  },
323
  {
324
+ "epoch": 11.005,
325
+ "eval_accuracy": 0.0650887573964497,
326
+ "eval_f1": 0.028937908468581415,
327
+ "eval_loss": 4.136640548706055,
328
+ "eval_precision": 0.022820963480770305,
329
+ "eval_recall": 0.0650887573964497,
330
+ "eval_runtime": 69.729,
331
+ "eval_samples_per_second": 4.847,
332
+ "eval_steps_per_second": 2.424,
333
+ "eval_top_10_accuracy": 0.3757396449704142,
334
+ "eval_top_1_accuracy": 0.0650887573964497,
335
+ "eval_top_5_accuracy": 0.23372781065088757,
336
  "step": 2163
337
  },
338
  {
339
+ "epoch": 12.001020833333333,
340
+ "grad_norm": 31.025253295898438,
341
  "learning_rate": 3.05e-05,
342
+ "loss": 15.7548,
343
  "step": 2200
344
  },
345
  {
346
+ "epoch": 12.003798611111112,
347
+ "grad_norm": 38.748043060302734,
348
  "learning_rate": 3.188888888888889e-05,
349
+ "loss": 15.0278,
350
  "step": 2300
351
  },
352
  {
353
+ "epoch": 12.004993055555556,
354
+ "eval_accuracy": 0.10650887573964497,
355
+ "eval_f1": 0.0571553229183928,
356
+ "eval_loss": 3.7821457386016846,
357
+ "eval_precision": 0.060074719736818764,
358
+ "eval_recall": 0.10650887573964497,
359
+ "eval_runtime": 57.4783,
360
+ "eval_samples_per_second": 5.88,
361
+ "eval_steps_per_second": 2.94,
362
+ "eval_top_10_accuracy": 0.5177514792899408,
363
+ "eval_top_1_accuracy": 0.10650887573964497,
364
+ "eval_top_5_accuracy": 0.35798816568047337,
365
  "step": 2343
366
  },
367
  {
368
+ "epoch": 13.001569444444444,
369
+ "grad_norm": 42.372962951660156,
370
  "learning_rate": 3.327777777777778e-05,
371
+ "loss": 14.1209,
372
  "step": 2400
373
  },
374
  {
375
+ "epoch": 13.004347222222222,
376
+ "grad_norm": 53.95140075683594,
377
  "learning_rate": 3.466666666666667e-05,
378
+ "loss": 13.4974,
379
  "step": 2500
380
  },
381
  {
382
+ "epoch": 13.00498611111111,
383
+ "eval_accuracy": 0.20118343195266272,
384
+ "eval_f1": 0.1544137887289645,
385
+ "eval_loss": 3.441917657852173,
386
+ "eval_precision": 0.17607648509523535,
387
+ "eval_recall": 0.20118343195266272,
388
+ "eval_runtime": 61.9222,
389
+ "eval_samples_per_second": 5.458,
390
+ "eval_steps_per_second": 2.729,
391
+ "eval_top_10_accuracy": 0.6686390532544378,
392
+ "eval_top_1_accuracy": 0.20118343195266272,
393
+ "eval_top_5_accuracy": 0.5029585798816568,
394
  "step": 2523
395
  },
396
  {
397
+ "epoch": 14.002118055555556,
398
+ "grad_norm": 47.6016731262207,
399
  "learning_rate": 3.605555555555556e-05,
400
+ "loss": 12.1829,
401
  "step": 2600
402
  },
403
  {
404
+ "epoch": 14.004895833333334,
405
+ "grad_norm": 35.87943649291992,
406
  "learning_rate": 3.7444444444444446e-05,
407
+ "loss": 11.5774,
408
  "step": 2700
409
  },
410
  {
411
+ "epoch": 14.005006944444444,
412
+ "eval_accuracy": 0.21893491124260356,
413
+ "eval_f1": 0.17250136680337552,
414
+ "eval_loss": 3.2604691982269287,
415
+ "eval_precision": 0.20685830348019885,
416
+ "eval_recall": 0.21893491124260356,
417
+ "eval_runtime": 58.8349,
418
+ "eval_samples_per_second": 5.745,
419
+ "eval_steps_per_second": 2.872,
420
+ "eval_top_10_accuracy": 0.6982248520710059,
421
+ "eval_top_1_accuracy": 0.21893491124260356,
422
+ "eval_top_5_accuracy": 0.5355029585798816,
423
+ "step": 2704
424
  },
425
  {
426
+ "epoch": 15.002666666666666,
427
+ "grad_norm": 50.407718658447266,
428
  "learning_rate": 3.883333333333333e-05,
429
+ "loss": 10.263,
430
  "step": 2800
431
  },
432
  {
433
+ "epoch": 15.005,
434
+ "eval_accuracy": 0.31952662721893493,
435
+ "eval_f1": 0.29516694216805966,
436
+ "eval_loss": 2.8508408069610596,
437
+ "eval_precision": 0.3730181143642682,
438
+ "eval_recall": 0.31952662721893493,
439
+ "eval_runtime": 228.2279,
440
+ "eval_samples_per_second": 1.481,
441
+ "eval_steps_per_second": 0.74,
442
+ "eval_top_10_accuracy": 0.8076923076923077,
443
+ "eval_top_1_accuracy": 0.31952662721893493,
444
+ "eval_top_5_accuracy": 0.6627218934911243,
445
  "step": 2884
446
  },
447
  {
448
+ "epoch": 16.0004375,
449
+ "grad_norm": 51.00344467163086,
450
  "learning_rate": 4.022222222222222e-05,
451
+ "loss": 9.6519,
452
  "step": 2900
453
  },
454
  {
455
+ "epoch": 16.003215277777777,
456
+ "grad_norm": 55.5887451171875,
457
  "learning_rate": 4.1611111111111114e-05,
458
+ "loss": 8.1589,
459
  "step": 3000
460
  },
461
  {
462
+ "epoch": 16.004993055555556,
463
+ "eval_accuracy": 0.3905325443786982,
464
+ "eval_f1": 0.3592464386756068,
465
+ "eval_loss": 2.594480514526367,
466
+ "eval_precision": 0.41684405338251496,
467
+ "eval_recall": 0.3905325443786982,
468
+ "eval_runtime": 190.8374,
469
+ "eval_samples_per_second": 1.771,
470
+ "eval_steps_per_second": 0.886,
471
+ "eval_top_10_accuracy": 0.8461538461538461,
472
+ "eval_top_1_accuracy": 0.3905325443786982,
473
+ "eval_top_5_accuracy": 0.7307692307692307,
474
  "step": 3064
475
  },
476
  {
477
+ "epoch": 17.00098611111111,
478
+ "grad_norm": 54.826148986816406,
479
  "learning_rate": 4.3e-05,
480
+ "loss": 7.9513,
481
  "step": 3100
482
  },
483
  {
484
+ "epoch": 17.003763888888887,
485
+ "grad_norm": 76.26374053955078,
486
  "learning_rate": 4.438888888888889e-05,
487
+ "loss": 6.8221,
488
  "step": 3200
489
  },
490
  {
491
+ "epoch": 17.004986111111112,
492
+ "eval_accuracy": 0.3994082840236686,
493
+ "eval_f1": 0.3639671675936601,
494
+ "eval_loss": 2.4310550689697266,
495
+ "eval_precision": 0.3999949948836589,
496
+ "eval_recall": 0.3994082840236686,
497
+ "eval_runtime": 187.0322,
498
+ "eval_samples_per_second": 1.807,
499
+ "eval_steps_per_second": 0.904,
500
+ "eval_top_10_accuracy": 0.8727810650887574,
501
+ "eval_top_1_accuracy": 0.3994082840236686,
502
+ "eval_top_5_accuracy": 0.7337278106508875,
503
  "step": 3244
504
  },
505
  {
506
+ "epoch": 18.00153472222222,
507
+ "grad_norm": 55.72072219848633,
508
  "learning_rate": 4.577777777777778e-05,
509
+ "loss": 5.8926,
510
  "step": 3300
511
  },
512
  {
513
+ "epoch": 18.0043125,
514
+ "grad_norm": 42.606868743896484,
515
  "learning_rate": 4.716666666666667e-05,
516
+ "loss": 5.4923,
517
  "step": 3400
518
  },
519
  {
520
+ "epoch": 18.005006944444446,
521
+ "eval_accuracy": 0.46449704142011833,
522
+ "eval_f1": 0.43497012092329823,
523
+ "eval_loss": 2.213894844055176,
524
+ "eval_precision": 0.48856049681841535,
525
+ "eval_recall": 0.46449704142011833,
526
+ "eval_runtime": 171.9348,
527
+ "eval_samples_per_second": 1.966,
528
+ "eval_steps_per_second": 0.983,
529
+ "eval_top_10_accuracy": 0.8668639053254438,
530
+ "eval_top_1_accuracy": 0.46153846153846156,
531
+ "eval_top_5_accuracy": 0.7928994082840237,
532
+ "step": 3425
533
  },
534
  {
535
+ "epoch": 19.002083333333335,
536
+ "grad_norm": 66.9966812133789,
537
  "learning_rate": 4.855555555555556e-05,
538
+ "loss": 4.6739,
539
  "step": 3500
540
  },
541
  {
542
+ "epoch": 19.00486111111111,
543
+ "grad_norm": 29.572689056396484,
544
  "learning_rate": 4.994444444444445e-05,
545
+ "loss": 4.1619,
546
  "step": 3600
547
  },
548
  {
549
+ "epoch": 19.005,
550
+ "eval_accuracy": 0.47337278106508873,
551
+ "eval_f1": 0.4502898182762436,
552
+ "eval_loss": 2.1384310722351074,
553
+ "eval_precision": 0.5276356764827005,
554
+ "eval_recall": 0.47337278106508873,
555
+ "eval_runtime": 180.7053,
556
+ "eval_samples_per_second": 1.87,
557
+ "eval_steps_per_second": 0.935,
558
+ "eval_top_10_accuracy": 0.8816568047337278,
559
+ "eval_top_1_accuracy": 0.47337278106508873,
560
  "eval_top_5_accuracy": 0.7840236686390533,
561
  "step": 3605
562
  },
563
  {
564
+ "epoch": 20.002631944444445,
565
+ "grad_norm": 62.397911071777344,
566
  "learning_rate": 4.9851851851851855e-05,
567
+ "loss": 3.3413,
568
  "step": 3700
569
  },
570
  {
571
+ "epoch": 20.004993055555556,
572
+ "eval_accuracy": 0.5118343195266272,
573
+ "eval_f1": 0.49080667210330176,
574
+ "eval_loss": 1.958280324935913,
575
+ "eval_precision": 0.5484620698689525,
576
+ "eval_recall": 0.5118343195266272,
577
+ "eval_runtime": 173.8049,
578
+ "eval_samples_per_second": 1.945,
579
+ "eval_steps_per_second": 0.972,
580
+ "eval_top_10_accuracy": 0.9053254437869822,
581
+ "eval_top_1_accuracy": 0.5118343195266272,
582
+ "eval_top_5_accuracy": 0.8106508875739645,
583
  "step": 3785
584
  },
585
  {
586
+ "epoch": 21.00040277777778,
587
+ "grad_norm": 40.85105514526367,
588
  "learning_rate": 4.969753086419753e-05,
589
+ "loss": 3.4134,
590
  "step": 3800
591
  },
592
  {
593
+ "epoch": 21.003180555555556,
594
+ "grad_norm": 32.64407730102539,
595
  "learning_rate": 4.954320987654321e-05,
596
+ "loss": 2.5832,
597
  "step": 3900
598
  },
599
  {
600
+ "epoch": 21.004986111111112,
601
+ "eval_accuracy": 0.5,
602
+ "eval_f1": 0.47126995292675755,
603
+ "eval_loss": 1.86040198802948,
604
+ "eval_precision": 0.49999156826079894,
605
+ "eval_recall": 0.5,
606
+ "eval_runtime": 182.2543,
607
+ "eval_samples_per_second": 1.855,
608
+ "eval_steps_per_second": 0.927,
609
+ "eval_top_10_accuracy": 0.9201183431952663,
610
+ "eval_top_1_accuracy": 0.5,
611
+ "eval_top_5_accuracy": 0.8284023668639053,
612
  "step": 3965
613
  },
614
  {
615
+ "epoch": 22.00095138888889,
616
+ "grad_norm": 15.257333755493164,
617
  "learning_rate": 4.938888888888889e-05,
618
+ "loss": 2.2189,
619
  "step": 4000
620
  },
621
  {
622
+ "epoch": 22.003729166666666,
623
+ "grad_norm": 26.528512954711914,
624
+ "learning_rate": 4.923611111111112e-05,
625
+ "loss": 1.9003,
626
  "step": 4100
627
  },
628
  {
629
+ "epoch": 22.005006944444446,
630
+ "eval_accuracy": 0.5295857988165681,
631
+ "eval_f1": 0.5139563428024967,
632
+ "eval_loss": 1.9389936923980713,
633
+ "eval_precision": 0.5860205028757064,
634
+ "eval_recall": 0.5295857988165681,
635
+ "eval_runtime": 176.2308,
636
+ "eval_samples_per_second": 1.918,
637
+ "eval_steps_per_second": 0.959,
638
+ "eval_top_10_accuracy": 0.8905325443786982,
639
+ "eval_top_1_accuracy": 0.5295857988165681,
640
+ "eval_top_5_accuracy": 0.8195266272189349,
641
+ "step": 4146
642
  },
643
  {
644
+ "epoch": 23.0015,
645
+ "grad_norm": 42.9820556640625,
646
  "learning_rate": 4.9081790123456794e-05,
647
+ "loss": 1.5994,
648
  "step": 4200
649
  },
650
  {
651
+ "epoch": 23.004277777777776,
652
+ "grad_norm": 44.47490692138672,
653
  "learning_rate": 4.892746913580247e-05,
654
+ "loss": 1.4226,
655
  "step": 4300
656
  },
657
  {
658
+ "epoch": 23.005,
659
+ "eval_accuracy": 0.5266272189349113,
660
+ "eval_f1": 0.5303364916400071,
661
+ "eval_loss": 1.9288358688354492,
662
+ "eval_precision": 0.6340754166937599,
663
+ "eval_recall": 0.5266272189349113,
664
+ "eval_runtime": 175.095,
665
+ "eval_samples_per_second": 1.93,
666
+ "eval_steps_per_second": 0.965,
667
+ "eval_top_10_accuracy": 0.908284023668639,
668
+ "eval_top_1_accuracy": 0.5266272189349113,
669
+ "eval_top_5_accuracy": 0.8076923076923077,
670
  "step": 4326
671
  },
672
  {
673
+ "epoch": 24.00204861111111,
674
+ "grad_norm": 43.410343170166016,
675
  "learning_rate": 4.877314814814815e-05,
676
+ "loss": 1.1426,
677
  "step": 4400
678
  },
679
  {
680
+ "epoch": 24.00482638888889,
681
+ "grad_norm": 89.68692016601562,
682
  "learning_rate": 4.861882716049383e-05,
683
+ "loss": 1.1341,
684
  "step": 4500
685
  },
686
  {
687
+ "epoch": 24.004993055555556,
688
+ "eval_accuracy": 0.5266272189349113,
689
+ "eval_f1": 0.5078898808033033,
690
+ "eval_loss": 1.8854230642318726,
691
+ "eval_precision": 0.5719492128515797,
692
+ "eval_recall": 0.5266272189349113,
693
+ "eval_runtime": 184.1675,
694
+ "eval_samples_per_second": 1.835,
695
+ "eval_steps_per_second": 0.918,
696
+ "eval_top_10_accuracy": 0.8964497041420119,
697
+ "eval_top_1_accuracy": 0.5266272189349113,
698
+ "eval_top_5_accuracy": 0.7899408284023669,
699
  "step": 4506
700
  },
701
  {
702
+ "epoch": 25.00259722222222,
703
+ "grad_norm": 7.714802265167236,
704
  "learning_rate": 4.8464506172839505e-05,
705
+ "loss": 0.9315,
706
  "step": 4600
707
  },
708
  {
709
+ "epoch": 25.004986111111112,
710
+ "eval_accuracy": 0.5769230769230769,
711
+ "eval_f1": 0.5564708795478026,
712
+ "eval_loss": 1.7328224182128906,
713
+ "eval_precision": 0.6260812905043673,
714
+ "eval_recall": 0.5769230769230769,
715
+ "eval_runtime": 67.8286,
716
+ "eval_samples_per_second": 4.983,
717
+ "eval_steps_per_second": 2.492,
718
+ "eval_top_10_accuracy": 0.8994082840236687,
719
+ "eval_top_1_accuracy": 0.5769230769230769,
720
+ "eval_top_5_accuracy": 0.8461538461538461,
721
  "step": 4686
722
  },
723
  {
724
+ "epoch": 26.000368055555555,
725
+ "grad_norm": 11.666605949401855,
726
  "learning_rate": 4.831018518518518e-05,
727
+ "loss": 0.7589,
728
  "step": 4700
729
  },
730
  {
731
+ "epoch": 26.003145833333335,
732
+ "grad_norm": 80.17562866210938,
733
  "learning_rate": 4.815586419753087e-05,
734
+ "loss": 0.7536,
735
  "step": 4800
736
  },
737
  {
738
+ "epoch": 26.005006944444446,
739
  "eval_accuracy": 0.5384615384615384,
740
+ "eval_f1": 0.5296188525478466,
741
+ "eval_loss": 1.8349210023880005,
742
+ "eval_precision": 0.6288461538461538,
743
  "eval_recall": 0.5384615384615384,
744
+ "eval_runtime": 58.1307,
745
+ "eval_samples_per_second": 5.814,
746
+ "eval_steps_per_second": 2.907,
747
+ "eval_top_10_accuracy": 0.8964497041420119,
748
+ "eval_top_1_accuracy": 0.5384615384615384,
749
+ "eval_top_5_accuracy": 0.8195266272189349,
750
+ "step": 4867
751
  },
752
  {
753
+ "epoch": 27.000916666666665,
754
+ "grad_norm": 11.256963729858398,
755
  "learning_rate": 4.8001543209876545e-05,
756
+ "loss": 0.6656,
757
  "step": 4900
758
  },
759
  {
760
+ "epoch": 27.003694444444445,
761
+ "grad_norm": 13.579202651977539,
762
  "learning_rate": 4.784722222222223e-05,
763
+ "loss": 0.4518,
764
  "step": 5000
765
  },
766
  {
767
+ "epoch": 27.005,
768
+ "eval_accuracy": 0.5532544378698225,
769
+ "eval_f1": 0.5436848818505623,
770
+ "eval_loss": 1.7999422550201416,
771
+ "eval_precision": 0.6275922795153565,
772
+ "eval_recall": 0.5532544378698225,
773
+ "eval_runtime": 56.1604,
774
+ "eval_samples_per_second": 6.018,
775
+ "eval_steps_per_second": 3.009,
776
+ "eval_top_10_accuracy": 0.9171597633136095,
777
+ "eval_top_1_accuracy": 0.5532544378698225,
778
+ "eval_top_5_accuracy": 0.8550295857988166,
779
  "step": 5047
780
  },
781
  {
782
+ "epoch": 28.00146527777778,
783
+ "grad_norm": 18.927217483520508,
784
  "learning_rate": 4.769290123456791e-05,
785
+ "loss": 0.3555,
786
  "step": 5100
787
  },
788
  {
789
+ "epoch": 28.004243055555555,
790
+ "grad_norm": 7.268669605255127,
791
  "learning_rate": 4.7538580246913585e-05,
792
+ "loss": 0.3322,
793
  "step": 5200
794
  },
795
  {
796
+ "epoch": 28.004993055555556,
797
+ "eval_accuracy": 0.6005917159763313,
798
+ "eval_f1": 0.5895704722627799,
799
+ "eval_loss": 1.693082571029663,
800
+ "eval_precision": 0.6577521837137221,
801
+ "eval_recall": 0.6005917159763313,
802
+ "eval_runtime": 60.4433,
803
+ "eval_samples_per_second": 5.592,
804
+ "eval_steps_per_second": 2.796,
805
+ "eval_top_10_accuracy": 0.9260355029585798,
806
+ "eval_top_1_accuracy": 0.6005917159763313,
807
+ "eval_top_5_accuracy": 0.849112426035503,
808
  "step": 5227
809
  },
810
  {
811
+ "epoch": 29.00201388888889,
812
+ "grad_norm": 17.36412239074707,
813
  "learning_rate": 4.738425925925926e-05,
814
+ "loss": 0.2749,
815
  "step": 5300
816
  },
817
  {
818
+ "epoch": 29.004791666666666,
819
+ "grad_norm": 1.4117032289505005,
820
  "learning_rate": 4.722993827160494e-05,
821
+ "loss": 0.403,
822
  "step": 5400
823
  },
824
  {
825
+ "epoch": 29.004986111111112,
826
+ "eval_accuracy": 0.5739644970414202,
827
+ "eval_f1": 0.5584189150757197,
828
+ "eval_loss": 1.7999780178070068,
829
+ "eval_precision": 0.6239610652332546,
830
+ "eval_recall": 0.5739644970414202,
831
+ "eval_runtime": 57.9837,
832
+ "eval_samples_per_second": 5.829,
833
+ "eval_steps_per_second": 2.915,
834
+ "eval_top_10_accuracy": 0.9142011834319527,
835
+ "eval_top_1_accuracy": 0.5739644970414202,
836
+ "eval_top_5_accuracy": 0.8461538461538461,
837
  "step": 5407
838
  },
839
  {
840
+ "epoch": 30.0025625,
841
+ "grad_norm": 0.5923660397529602,
842
  "learning_rate": 4.707561728395062e-05,
843
+ "loss": 0.1837,
844
  "step": 5500
845
  },
846
  {
847
+ "epoch": 30.005006944444446,
848
+ "eval_accuracy": 0.5769230769230769,
849
+ "eval_f1": 0.5664270128680152,
850
+ "eval_loss": 1.876481056213379,
851
+ "eval_precision": 0.6391236968160044,
852
+ "eval_recall": 0.5769230769230769,
853
+ "eval_runtime": 60.7728,
854
+ "eval_samples_per_second": 5.562,
855
+ "eval_steps_per_second": 2.781,
856
+ "eval_top_10_accuracy": 0.893491124260355,
857
+ "eval_top_1_accuracy": 0.5769230769230769,
858
+ "eval_top_5_accuracy": 0.8372781065088757,
859
+ "step": 5588
860
+ },
861
+ {
862
+ "epoch": 31.000333333333334,
863
+ "grad_norm": 20.429960250854492,
864
+ "learning_rate": 4.6922839506172846e-05,
865
+ "loss": 0.3429,
866
  "step": 5600
867
  },
868
  {
869
+ "epoch": 31.00311111111111,
870
+ "grad_norm": 11.760103225708008,
871
+ "learning_rate": 4.6768518518518524e-05,
872
+ "loss": 0.1579,
873
  "step": 5700
874
  },
875
  {
876
+ "epoch": 31.005,
877
+ "eval_accuracy": 0.5473372781065089,
878
+ "eval_f1": 0.5426257667381927,
879
+ "eval_loss": 2.075249195098877,
880
+ "eval_precision": 0.6336519815026602,
881
+ "eval_recall": 0.5473372781065089,
882
+ "eval_runtime": 60.9135,
883
+ "eval_samples_per_second": 5.549,
884
+ "eval_steps_per_second": 2.774,
885
+ "eval_top_10_accuracy": 0.8994082840236687,
886
+ "eval_top_1_accuracy": 0.5473372781065089,
887
+ "eval_top_5_accuracy": 0.8431952662721893,
888
  "step": 5768
889
  },
890
  {
891
+ "epoch": 32.000881944444444,
892
+ "grad_norm": 1.1906695365905762,
893
+ "learning_rate": 4.66141975308642e-05,
894
+ "loss": 0.3029,
895
  "step": 5800
896
  },
897
  {
898
+ "epoch": 32.003659722222224,
899
+ "grad_norm": 75.9498519897461,
900
+ "learning_rate": 4.645987654320988e-05,
901
+ "loss": 0.2079,
902
  "step": 5900
903
  },
904
  {
905
+ "epoch": 32.00499305555556,
906
+ "eval_accuracy": 0.5946745562130178,
907
+ "eval_f1": 0.5791488754506505,
908
+ "eval_loss": 1.9234323501586914,
909
+ "eval_precision": 0.6524024052870208,
910
+ "eval_recall": 0.5946745562130178,
911
+ "eval_runtime": 57.5153,
912
+ "eval_samples_per_second": 5.877,
913
+ "eval_steps_per_second": 2.938,
914
+ "eval_top_10_accuracy": 0.8964497041420119,
915
+ "eval_top_1_accuracy": 0.5946745562130178,
916
  "eval_top_5_accuracy": 0.8136094674556213,
917
  "step": 5948
918
  },
919
  {
920
+ "epoch": 33.00143055555556,
921
+ "grad_norm": 0.5180000066757202,
922
+ "learning_rate": 4.630555555555556e-05,
923
+ "loss": 0.1503,
924
  "step": 6000
925
  },
926
  {
927
+ "epoch": 33.00420833333333,
928
+ "grad_norm": 0.7615421414375305,
929
+ "learning_rate": 4.6151234567901235e-05,
930
+ "loss": 0.2738,
931
  "step": 6100
932
  },
933
  {
934
+ "epoch": 33.00498611111111,
935
+ "eval_accuracy": 0.606508875739645,
936
+ "eval_f1": 0.5892127021564892,
937
+ "eval_loss": 1.8528887033462524,
938
+ "eval_precision": 0.6621978202599504,
939
+ "eval_recall": 0.606508875739645,
940
+ "eval_runtime": 61.6919,
941
+ "eval_samples_per_second": 5.479,
942
+ "eval_steps_per_second": 2.739,
943
+ "eval_top_10_accuracy": 0.9023668639053254,
944
+ "eval_top_1_accuracy": 0.6035502958579881,
945
+ "eval_top_5_accuracy": 0.849112426035503,
946
  "step": 6128
947
  },
948
  {
949
+ "epoch": 34.001979166666665,
950
+ "grad_norm": 0.34027883410453796,
951
  "learning_rate": 4.599691358024691e-05,
952
+ "loss": 0.0955,
953
  "step": 6200
954
  },
955
  {
956
+ "epoch": 34.004756944444445,
957
+ "grad_norm": 0.2066214680671692,
958
  "learning_rate": 4.584259259259259e-05,
959
+ "loss": 0.2621,
960
  "step": 6300
961
  },
962
  {
963
+ "epoch": 34.005006944444446,
964
+ "eval_accuracy": 0.5739644970414202,
965
+ "eval_f1": 0.5639503658734427,
966
+ "eval_loss": 1.9905767440795898,
967
+ "eval_precision": 0.6191074950690336,
968
+ "eval_recall": 0.5739644970414202,
969
+ "eval_runtime": 75.9577,
970
+ "eval_samples_per_second": 4.45,
971
+ "eval_steps_per_second": 2.225,
972
+ "eval_top_10_accuracy": 0.9171597633136095,
973
+ "eval_top_1_accuracy": 0.5739644970414202,
974
+ "eval_top_5_accuracy": 0.8284023668639053,
975
+ "step": 6309
976
  },
977
  {
978
+ "epoch": 35.00252777777778,
979
+ "grad_norm": 0.1237727478146553,
980
  "learning_rate": 4.5688271604938275e-05,
981
+ "loss": 0.2024,
982
  "step": 6400
983
  },
984
  {
985
+ "epoch": 35.005,
986
+ "eval_accuracy": 0.5976331360946746,
987
+ "eval_f1": 0.5885829981983829,
988
+ "eval_loss": 1.8942253589630127,
989
+ "eval_precision": 0.6614584133814904,
990
+ "eval_recall": 0.5976331360946746,
991
+ "eval_runtime": 60.0376,
992
+ "eval_samples_per_second": 5.63,
993
+ "eval_steps_per_second": 2.815,
994
+ "eval_top_10_accuracy": 0.9260355029585798,
995
+ "eval_top_1_accuracy": 0.5976331360946746,
996
+ "eval_top_5_accuracy": 0.863905325443787,
997
  "step": 6489
998
  },
999
  {
1000
+ "epoch": 36.00029861111111,
1001
+ "grad_norm": 0.21603639423847198,
1002
+ "learning_rate": 4.553395061728395e-05,
1003
+ "loss": 0.1411,
1004
  "step": 6500
1005
  },
1006
  {
1007
+ "epoch": 36.003076388888886,
1008
+ "grad_norm": 0.11866825819015503,
1009
+ "learning_rate": 4.537962962962963e-05,
1010
+ "loss": 0.0983,
1011
  "step": 6600
1012
  },
1013
  {
1014
+ "epoch": 36.00499305555556,
1015
+ "eval_accuracy": 0.5857988165680473,
1016
+ "eval_f1": 0.5728611953464025,
1017
+ "eval_loss": 2.0340006351470947,
1018
+ "eval_precision": 0.6499939163400703,
1019
+ "eval_recall": 0.5857988165680473,
1020
+ "eval_runtime": 58.3576,
1021
+ "eval_samples_per_second": 5.792,
1022
+ "eval_steps_per_second": 2.896,
1023
+ "eval_top_10_accuracy": 0.8846153846153846,
1024
+ "eval_top_1_accuracy": 0.5857988165680473,
1025
+ "eval_top_5_accuracy": 0.8254437869822485,
1026
  "step": 6669
1027
  },
1028
  {
1029
+ "epoch": 37.00084722222222,
1030
+ "grad_norm": 8.090852737426758,
1031
+ "learning_rate": 4.5225308641975314e-05,
1032
+ "loss": 0.13,
1033
  "step": 6700
1034
  },
1035
  {
1036
+ "epoch": 37.003625,
1037
+ "grad_norm": 0.11927696317434311,
1038
+ "learning_rate": 4.507098765432099e-05,
1039
+ "loss": 0.0592,
1040
  "step": 6800
1041
  },
1042
  {
1043
+ "epoch": 37.00498611111111,
1044
+ "eval_accuracy": 0.6094674556213018,
1045
+ "eval_f1": 0.5985522367179171,
1046
+ "eval_loss": 1.8492796421051025,
1047
+ "eval_precision": 0.6775016650016649,
1048
+ "eval_recall": 0.6094674556213018,
1049
+ "eval_runtime": 58.2549,
1050
+ "eval_samples_per_second": 5.802,
1051
+ "eval_steps_per_second": 2.901,
1052
+ "eval_top_10_accuracy": 0.9230769230769231,
1053
+ "eval_top_1_accuracy": 0.6094674556213018,
1054
+ "eval_top_5_accuracy": 0.8609467455621301,
1055
  "step": 6849
1056
  },
1057
  {
1058
+ "epoch": 38.00139583333333,
1059
+ "grad_norm": 0.092709481716156,
1060
+ "learning_rate": 4.491666666666667e-05,
1061
+ "loss": 0.1859,
1062
  "step": 6900
1063
  },
1064
  {
1065
+ "epoch": 38.00417361111111,
1066
+ "grad_norm": 1.1074143648147583,
1067
+ "learning_rate": 4.476234567901235e-05,
1068
+ "loss": 0.0922,
1069
  "step": 7000
1070
  },
1071
  {
1072
+ "epoch": 38.005006944444446,
1073
+ "eval_accuracy": 0.6301775147928994,
1074
+ "eval_f1": 0.6124992036234641,
1075
+ "eval_loss": 1.9035998582839966,
1076
+ "eval_precision": 0.6824995517303208,
1077
+ "eval_recall": 0.6301775147928994,
1078
+ "eval_runtime": 55.9719,
1079
+ "eval_samples_per_second": 6.039,
1080
+ "eval_steps_per_second": 3.019,
1081
+ "eval_top_10_accuracy": 0.9260355029585798,
1082
+ "eval_top_1_accuracy": 0.6301775147928994,
1083
+ "eval_top_5_accuracy": 0.8668639053254438,
1084
+ "step": 7030
1085
+ },
1086
+ {
1087
+ "epoch": 39.00194444444445,
1088
+ "grad_norm": 25.988325119018555,
1089
+ "learning_rate": 4.4608024691358025e-05,
1090
+ "loss": 0.1847,
1091
  "step": 7100
1092
  },
1093
  {
1094
+ "epoch": 39.00472222222222,
1095
+ "grad_norm": 22.964984893798828,
1096
+ "learning_rate": 4.445370370370371e-05,
1097
+ "loss": 0.1547,
1098
  "step": 7200
1099
  },
1100
  {
1101
+ "epoch": 39.005,
1102
+ "eval_accuracy": 0.6035502958579881,
1103
+ "eval_f1": 0.5948209446121037,
1104
+ "eval_loss": 1.989665150642395,
1105
+ "eval_precision": 0.6725563391243864,
1106
+ "eval_recall": 0.6035502958579881,
1107
+ "eval_runtime": 61.9464,
1108
+ "eval_samples_per_second": 5.456,
1109
+ "eval_steps_per_second": 2.728,
1110
+ "eval_top_10_accuracy": 0.9053254437869822,
1111
+ "eval_top_1_accuracy": 0.6035502958579881,
1112
+ "eval_top_5_accuracy": 0.8431952662721893,
1113
  "step": 7210
1114
  },
1115
  {
1116
+ "epoch": 40.002493055555554,
1117
+ "grad_norm": 0.05825158208608627,
1118
+ "learning_rate": 4.429938271604939e-05,
1119
+ "loss": 0.1162,
1120
  "step": 7300
1121
  },
1122
  {
1123
+ "epoch": 40.00499305555556,
1124
+ "eval_accuracy": 0.5828402366863905,
1125
+ "eval_f1": 0.5674965566977401,
1126
+ "eval_loss": 2.305551767349243,
1127
+ "eval_precision": 0.651775147928994,
1128
+ "eval_recall": 0.5828402366863905,
1129
+ "eval_runtime": 63.1766,
1130
+ "eval_samples_per_second": 5.35,
1131
+ "eval_steps_per_second": 2.675,
1132
+ "eval_top_10_accuracy": 0.8875739644970414,
1133
+ "eval_top_1_accuracy": 0.5828402366863905,
1134
+ "eval_top_5_accuracy": 0.8284023668639053,
1135
  "step": 7390
1136
  },
1137
  {
1138
+ "epoch": 41.00026388888889,
1139
+ "grad_norm": 0.08664705604314804,
1140
+ "learning_rate": 4.4145061728395065e-05,
1141
+ "loss": 0.1941,
1142
  "step": 7400
1143
  },
1144
  {
1145
+ "epoch": 41.00304166666667,
1146
+ "grad_norm": 1.6665265560150146,
1147
+ "learning_rate": 4.399074074074074e-05,
1148
+ "loss": 0.0514,
1149
  "step": 7500
1150
  },
1151
  {
1152
+ "epoch": 41.00498611111111,
1153
+ "eval_accuracy": 0.5887573964497042,
1154
+ "eval_f1": 0.5767313882698497,
1155
+ "eval_loss": 2.3211424350738525,
1156
+ "eval_precision": 0.6510214144829529,
1157
+ "eval_recall": 0.5887573964497042,
1158
+ "eval_runtime": 55.5478,
1159
+ "eval_samples_per_second": 6.085,
1160
+ "eval_steps_per_second": 3.042,
1161
+ "eval_top_10_accuracy": 0.8816568047337278,
1162
+ "eval_top_1_accuracy": 0.5887573964497042,
1163
+ "eval_top_5_accuracy": 0.7988165680473372,
1164
  "step": 7570
1165
  },
1166
  {
1167
+ "epoch": 42.0008125,
1168
+ "grad_norm": 0.07863179594278336,
1169
+ "learning_rate": 4.383641975308642e-05,
1170
+ "loss": 0.1709,
1171
  "step": 7600
1172
  },
1173
  {
1174
+ "epoch": 42.003590277777775,
1175
+ "grad_norm": 0.14477179944515228,
1176
+ "learning_rate": 4.36820987654321e-05,
1177
+ "loss": 0.1138,
1178
  "step": 7700
1179
  },
1180
  {
1181
+ "epoch": 42.005006944444446,
1182
+ "eval_accuracy": 0.5739644970414202,
1183
+ "eval_f1": 0.562847029406375,
1184
+ "eval_loss": 2.3148906230926514,
1185
+ "eval_precision": 0.6284772919388303,
1186
+ "eval_recall": 0.5739644970414202,
1187
+ "eval_runtime": 59.6045,
1188
+ "eval_samples_per_second": 5.671,
1189
+ "eval_steps_per_second": 2.835,
1190
+ "eval_top_10_accuracy": 0.9053254437869822,
1191
+ "eval_top_1_accuracy": 0.5739644970414202,
1192
+ "eval_top_5_accuracy": 0.849112426035503,
1193
+ "step": 7751
1194
+ },
1195
+ {
1196
+ "epoch": 43.00136111111111,
1197
+ "grad_norm": 0.37179991602897644,
1198
+ "learning_rate": 4.3527777777777776e-05,
1199
+ "loss": 0.1083,
1200
  "step": 7800
1201
  },
1202
  {
1203
+ "epoch": 43.00413888888889,
1204
+ "grad_norm": 0.11481554806232452,
1205
+ "learning_rate": 4.337345679012345e-05,
1206
+ "loss": 0.1197,
1207
  "step": 7900
1208
  },
1209
  {
1210
+ "epoch": 43.005,
1211
+ "eval_accuracy": 0.6124260355029586,
1212
+ "eval_f1": 0.6072961965713446,
1213
+ "eval_loss": 2.1155619621276855,
1214
+ "eval_precision": 0.6907422459197607,
1215
+ "eval_recall": 0.6124260355029586,
1216
+ "eval_runtime": 73.3826,
1217
+ "eval_samples_per_second": 4.606,
1218
+ "eval_steps_per_second": 2.303,
1219
+ "eval_top_10_accuracy": 0.9171597633136095,
1220
+ "eval_top_1_accuracy": 0.6124260355029586,
1221
+ "eval_top_5_accuracy": 0.8668639053254438,
1222
  "step": 7931
1223
  },
1224
  {
1225
+ "epoch": 44.00190972222222,
1226
+ "grad_norm": 0.03723059222102165,
1227
  "learning_rate": 4.322067901234568e-05,
1228
+ "loss": 0.1633,
1229
  "step": 8000
1230
  },
1231
  {
1232
+ "epoch": 44.0046875,
1233
+ "grad_norm": 2.34633207321167,
1234
  "learning_rate": 4.306635802469136e-05,
1235
+ "loss": 0.0673,
1236
  "step": 8100
1237
  },
1238
  {
1239
+ "epoch": 44.00499305555556,
1240
+ "eval_accuracy": 0.5828402366863905,
1241
+ "eval_f1": 0.5640534640360605,
1242
+ "eval_loss": 2.2834787368774414,
1243
+ "eval_precision": 0.6274122984182156,
1244
+ "eval_recall": 0.5828402366863905,
1245
+ "eval_runtime": 72.2307,
1246
+ "eval_samples_per_second": 4.679,
1247
+ "eval_steps_per_second": 2.34,
1248
+ "eval_top_10_accuracy": 0.908284023668639,
1249
+ "eval_top_1_accuracy": 0.5828402366863905,
1250
+ "eval_top_5_accuracy": 0.8579881656804734,
1251
  "step": 8111
1252
  },
1253
  {
1254
+ "epoch": 45.00245833333334,
1255
+ "grad_norm": 0.08984989672899246,
1256
  "learning_rate": 4.291203703703704e-05,
1257
+ "loss": 0.1501,
1258
  "step": 8200
1259
  },
1260
  {
1261
+ "epoch": 45.00498611111111,
1262
+ "eval_accuracy": 0.591715976331361,
1263
+ "eval_f1": 0.5756663372787633,
1264
+ "eval_loss": 2.271914482116699,
1265
+ "eval_precision": 0.6418798903266358,
1266
+ "eval_recall": 0.591715976331361,
1267
+ "eval_runtime": 57.8957,
1268
+ "eval_samples_per_second": 5.838,
1269
+ "eval_steps_per_second": 2.919,
1270
+ "eval_top_10_accuracy": 0.8905325443786982,
1271
+ "eval_top_1_accuracy": 0.591715976331361,
1272
+ "eval_top_5_accuracy": 0.8520710059171598,
1273
  "step": 8291
1274
  },
1275
  {
1276
+ "epoch": 46.000229166666664,
1277
+ "grad_norm": 0.03222246095538139,
1278
  "learning_rate": 4.2757716049382715e-05,
1279
+ "loss": 0.059,
1280
  "step": 8300
1281
  },
1282
  {
1283
+ "epoch": 46.003006944444444,
1284
+ "grad_norm": 0.024875333532691002,
1285
+ "learning_rate": 4.2604938271604936e-05,
1286
+ "loss": 0.2022,
1287
  "step": 8400
1288
  },
1289
  {
1290
+ "epoch": 46.005006944444446,
1291
+ "eval_accuracy": 0.5562130177514792,
1292
+ "eval_f1": 0.5402277209969517,
1293
+ "eval_loss": 2.3422443866729736,
1294
+ "eval_precision": 0.6034023668639052,
1295
+ "eval_recall": 0.5562130177514792,
1296
+ "eval_runtime": 58.5821,
1297
+ "eval_samples_per_second": 5.77,
1298
+ "eval_steps_per_second": 2.885,
1299
+ "eval_top_10_accuracy": 0.9053254437869822,
1300
+ "eval_top_1_accuracy": 0.5562130177514792,
1301
+ "eval_top_5_accuracy": 0.849112426035503,
1302
+ "step": 8472
1303
  },
1304
  {
1305
+ "epoch": 47.00077777777778,
1306
+ "grad_norm": 1.308508038520813,
1307
+ "learning_rate": 4.2450617283950614e-05,
1308
+ "loss": 0.1197,
1309
  "step": 8500
1310
  },
1311
  {
1312
+ "epoch": 47.00355555555556,
1313
+ "grad_norm": 4.812650203704834,
1314
+ "learning_rate": 4.22962962962963e-05,
1315
+ "loss": 0.2185,
1316
  "step": 8600
1317
  },
1318
  {
1319
+ "epoch": 47.005,
1320
+ "eval_accuracy": 0.5236686390532544,
1321
+ "eval_f1": 0.5125209685120928,
1322
+ "eval_loss": 2.64306378364563,
1323
+ "eval_precision": 0.580839847007616,
1324
+ "eval_recall": 0.5236686390532544,
1325
+ "eval_runtime": 57.4714,
1326
+ "eval_samples_per_second": 5.881,
1327
+ "eval_steps_per_second": 2.941,
1328
+ "eval_top_10_accuracy": 0.8816568047337278,
1329
+ "eval_top_1_accuracy": 0.5236686390532544,
1330
+ "eval_top_5_accuracy": 0.8284023668639053,
1331
  "step": 8652
1332
  },
1333
  {
1334
+ "epoch": 48.00132638888889,
1335
+ "grad_norm": 0.9780556559562683,
1336
+ "learning_rate": 4.2141975308641976e-05,
1337
+ "loss": 0.1822,
1338
  "step": 8700
1339
  },
1340
  {
1341
+ "epoch": 48.004104166666664,
1342
+ "grad_norm": 0.10755691677331924,
1343
+ "learning_rate": 4.1987654320987654e-05,
1344
+ "loss": 0.2385,
1345
  "step": 8800
1346
  },
1347
  {
1348
+ "epoch": 48.00499305555556,
1349
+ "eval_accuracy": 0.5798816568047337,
1350
+ "eval_f1": 0.5622720967395524,
1351
+ "eval_loss": 2.3147056102752686,
1352
+ "eval_precision": 0.6323823612285151,
1353
+ "eval_recall": 0.5798816568047337,
1354
+ "eval_runtime": 61.6966,
1355
+ "eval_samples_per_second": 5.478,
1356
+ "eval_steps_per_second": 2.739,
1357
+ "eval_top_10_accuracy": 0.9053254437869822,
1358
+ "eval_top_1_accuracy": 0.5798816568047337,
1359
+ "eval_top_5_accuracy": 0.8520710059171598,
1360
  "step": 8832
1361
  },
1362
  {
1363
+ "epoch": 49.001875,
1364
+ "grad_norm": 0.048398274928331375,
1365
  "learning_rate": 4.183333333333334e-05,
1366
+ "loss": 0.1209,
1367
  "step": 8900
1368
  },
1369
  {
1370
+ "epoch": 49.00465277777778,
1371
+ "grad_norm": 116.13093566894531,
1372
  "learning_rate": 4.1679012345679016e-05,
1373
+ "loss": 0.1769,
1374
  "step": 9000
1375
  },
1376
  {
1377
+ "epoch": 49.00498611111111,
1378
+ "eval_accuracy": 0.5769230769230769,
1379
+ "eval_f1": 0.5621830092983939,
1380
+ "eval_loss": 2.345111131668091,
1381
+ "eval_precision": 0.6246256701878005,
1382
+ "eval_recall": 0.5769230769230769,
1383
+ "eval_runtime": 56.4804,
1384
+ "eval_samples_per_second": 5.984,
1385
+ "eval_steps_per_second": 2.992,
1386
+ "eval_top_10_accuracy": 0.8875739644970414,
1387
+ "eval_top_1_accuracy": 0.5769230769230769,
1388
+ "eval_top_5_accuracy": 0.8372781065088757,
1389
  "step": 9012
1390
  },
1391
  {
1392
+ "epoch": 50.00242361111111,
1393
+ "grad_norm": 6.833434104919434,
1394
  "learning_rate": 4.1524691358024694e-05,
1395
+ "loss": 0.1927,
1396
  "step": 9100
1397
  },
1398
  {
1399
+ "epoch": 50.005006944444446,
1400
+ "eval_accuracy": 0.5562130177514792,
1401
+ "eval_f1": 0.5347152354548804,
1402
+ "eval_loss": 2.7140073776245117,
1403
+ "eval_precision": 0.6024619611158072,
1404
+ "eval_recall": 0.5562130177514792,
1405
+ "eval_runtime": 57.0126,
1406
+ "eval_samples_per_second": 5.929,
1407
+ "eval_steps_per_second": 2.964,
1408
+ "eval_top_10_accuracy": 0.8727810650887574,
1409
+ "eval_top_1_accuracy": 0.5562130177514792,
1410
+ "eval_top_5_accuracy": 0.8017751479289941,
1411
+ "step": 9193
1412
  },
1413
  {
1414
+ "epoch": 51.000194444444446,
1415
+ "grad_norm": 169.2981719970703,
1416
  "learning_rate": 4.137037037037037e-05,
1417
+ "loss": 0.302,
1418
  "step": 9200
1419
  },
1420
  {
1421
+ "epoch": 51.00297222222222,
1422
+ "grad_norm": 0.07411950081586838,
1423
  "learning_rate": 4.121604938271605e-05,
1424
+ "loss": 0.2048,
1425
  "step": 9300
1426
  },
1427
  {
1428
+ "epoch": 51.005,
1429
+ "eval_accuracy": 0.591715976331361,
1430
+ "eval_f1": 0.5748348298644157,
1431
+ "eval_loss": 2.3875668048858643,
1432
+ "eval_precision": 0.6367497886728656,
1433
+ "eval_recall": 0.591715976331361,
1434
+ "eval_runtime": 57.5583,
1435
+ "eval_samples_per_second": 5.872,
1436
+ "eval_steps_per_second": 2.936,
1437
+ "eval_top_10_accuracy": 0.893491124260355,
1438
+ "eval_top_1_accuracy": 0.591715976331361,
1439
+ "eval_top_5_accuracy": 0.8224852071005917,
1440
  "step": 9373
1441
  },
1442
  {
1443
+ "epoch": 52.00074305555555,
1444
+ "grad_norm": 80.62044525146484,
1445
  "learning_rate": 4.1061728395061733e-05,
1446
+ "loss": 0.2414,
1447
  "step": 9400
1448
  },
1449
  {
1450
+ "epoch": 52.00352083333333,
1451
+ "grad_norm": 2.4975171089172363,
1452
  "learning_rate": 4.090740740740741e-05,
1453
+ "loss": 0.1608,
1454
  "step": 9500
1455
  },
1456
  {
1457
+ "epoch": 52.00499305555556,
1458
+ "eval_accuracy": 0.5266272189349113,
1459
+ "eval_f1": 0.5013487498694599,
1460
+ "eval_loss": 2.6982638835906982,
1461
+ "eval_precision": 0.5645437895437895,
1462
+ "eval_recall": 0.5266272189349113,
1463
+ "eval_runtime": 59.8704,
1464
+ "eval_samples_per_second": 5.646,
1465
+ "eval_steps_per_second": 2.823,
1466
+ "eval_top_10_accuracy": 0.8579881656804734,
1467
+ "eval_top_1_accuracy": 0.5266272189349113,
1468
+ "eval_top_5_accuracy": 0.8076923076923077,
1469
  "step": 9553
1470
  },
1471
  {
1472
+ "epoch": 53.00129166666667,
1473
+ "grad_norm": 0.140806183218956,
1474
  "learning_rate": 4.075308641975309e-05,
1475
+ "loss": 0.1078,
1476
  "step": 9600
1477
  },
1478
  {
1479
+ "epoch": 53.00406944444445,
1480
+ "grad_norm": 0.03946617618203163,
1481
  "learning_rate": 4.0598765432098767e-05,
1482
+ "loss": 0.1256,
1483
  "step": 9700
1484
  },
1485
  {
1486
+ "epoch": 53.00498611111111,
1487
+ "eval_accuracy": 0.5384615384615384,
1488
+ "eval_f1": 0.5257081839922076,
1489
+ "eval_loss": 2.7464194297790527,
1490
+ "eval_precision": 0.5773428494582341,
1491
+ "eval_recall": 0.5384615384615384,
1492
+ "eval_runtime": 79.9762,
1493
+ "eval_samples_per_second": 4.226,
1494
+ "eval_steps_per_second": 2.113,
1495
+ "eval_top_10_accuracy": 0.8905325443786982,
1496
+ "eval_top_1_accuracy": 0.5384615384615384,
1497
+ "eval_top_5_accuracy": 0.8017751479289941,
1498
  "step": 9733
1499
  },
1500
  {
1501
+ "epoch": 54.00184027777778,
1502
+ "grad_norm": 0.018318645656108856,
1503
  "learning_rate": 4.0444444444444444e-05,
1504
+ "loss": 0.1146,
1505
  "step": 9800
1506
  },
1507
  {
1508
+ "epoch": 54.004618055555554,
1509
+ "grad_norm": 107.91517639160156,
1510
  "learning_rate": 4.029012345679012e-05,
1511
+ "loss": 0.1327,
1512
  "step": 9900
1513
  },
1514
  {
1515
+ "epoch": 54.005006944444446,
1516
+ "eval_accuracy": 0.5650887573964497,
1517
+ "eval_f1": 0.5342032063925555,
1518
+ "eval_loss": 2.513326644897461,
1519
+ "eval_precision": 0.5812130177514792,
1520
+ "eval_recall": 0.5650887573964497,
1521
+ "eval_runtime": 57.9139,
1522
+ "eval_samples_per_second": 5.836,
1523
+ "eval_steps_per_second": 2.918,
1524
+ "eval_top_10_accuracy": 0.8846153846153846,
1525
+ "eval_top_1_accuracy": 0.5650887573964497,
1526
+ "eval_top_5_accuracy": 0.7988165680473372,
1527
+ "step": 9914
1528
+ },
1529
+ {
1530
+ "epoch": 55.00238888888889,
1531
+ "grad_norm": 0.045430343598127365,
1532
+ "learning_rate": 4.01358024691358e-05,
1533
+ "loss": 0.0503,
1534
+ "step": 10000
1535
+ },
1536
+ {
1537
+ "epoch": 55.005,
1538
+ "eval_accuracy": 0.5769230769230769,
1539
+ "eval_f1": 0.5635061355475556,
1540
+ "eval_loss": 2.5686514377593994,
1541
+ "eval_precision": 0.6335951441720672,
1542
+ "eval_recall": 0.5769230769230769,
1543
+ "eval_runtime": 62.2713,
1544
+ "eval_samples_per_second": 5.428,
1545
+ "eval_steps_per_second": 2.714,
1546
+ "eval_top_10_accuracy": 0.8964497041420119,
1547
+ "eval_top_1_accuracy": 0.5769230769230769,
1548
+ "eval_top_5_accuracy": 0.8313609467455622,
1549
+ "step": 10094
1550
+ },
1551
+ {
1552
+ "epoch": 56.00015972222222,
1553
+ "grad_norm": 0.011242308653891087,
1554
+ "learning_rate": 3.9981481481481484e-05,
1555
+ "loss": 0.1127,
1556
+ "step": 10100
1557
+ },
1558
+ {
1559
+ "epoch": 56.0029375,
1560
+ "grad_norm": 0.01931876875460148,
1561
+ "learning_rate": 3.982716049382716e-05,
1562
+ "loss": 0.0841,
1563
+ "step": 10200
1564
+ },
1565
+ {
1566
+ "epoch": 56.00499305555556,
1567
+ "eval_accuracy": 0.5502958579881657,
1568
+ "eval_f1": 0.53286526097177,
1569
+ "eval_loss": 2.731083869934082,
1570
+ "eval_precision": 0.6024830938292477,
1571
+ "eval_recall": 0.5502958579881657,
1572
+ "eval_runtime": 64.4351,
1573
+ "eval_samples_per_second": 5.246,
1574
+ "eval_steps_per_second": 2.623,
1575
+ "eval_top_10_accuracy": 0.8905325443786982,
1576
+ "eval_top_1_accuracy": 0.5502958579881657,
1577
+ "eval_top_5_accuracy": 0.8284023668639053,
1578
+ "step": 10274
1579
+ },
1580
+ {
1581
+ "epoch": 57.000708333333336,
1582
+ "grad_norm": 0.7857475280761719,
1583
+ "learning_rate": 3.9672839506172846e-05,
1584
+ "loss": 0.2066,
1585
+ "step": 10300
1586
+ },
1587
+ {
1588
+ "epoch": 57.00348611111111,
1589
+ "grad_norm": 0.0037257361691445112,
1590
+ "learning_rate": 3.9518518518518524e-05,
1591
+ "loss": 0.0888,
1592
+ "step": 10400
1593
+ },
1594
+ {
1595
+ "epoch": 57.00498611111111,
1596
+ "eval_accuracy": 0.5591715976331361,
1597
+ "eval_f1": 0.5473162081759367,
1598
+ "eval_loss": 2.6771368980407715,
1599
+ "eval_precision": 0.6141800507185123,
1600
+ "eval_recall": 0.5591715976331361,
1601
+ "eval_runtime": 66.5741,
1602
+ "eval_samples_per_second": 5.077,
1603
+ "eval_steps_per_second": 2.539,
1604
+ "eval_top_10_accuracy": 0.893491124260355,
1605
+ "eval_top_1_accuracy": 0.5591715976331361,
1606
+ "eval_top_5_accuracy": 0.8195266272189349,
1607
+ "step": 10454
1608
+ },
1609
+ {
1610
+ "epoch": 58.00125694444444,
1611
+ "grad_norm": 217.6994171142578,
1612
+ "learning_rate": 3.93641975308642e-05,
1613
+ "loss": 0.0478,
1614
+ "step": 10500
1615
+ },
1616
+ {
1617
+ "epoch": 58.00403472222222,
1618
+ "grad_norm": 0.19375790655612946,
1619
+ "learning_rate": 3.920987654320988e-05,
1620
+ "loss": 0.0629,
1621
+ "step": 10600
1622
+ },
1623
+ {
1624
+ "epoch": 58.005006944444446,
1625
+ "eval_accuracy": 0.5295857988165681,
1626
+ "eval_f1": 0.5132937382067211,
1627
+ "eval_loss": 2.8300716876983643,
1628
+ "eval_precision": 0.5767704517704518,
1629
+ "eval_recall": 0.5295857988165681,
1630
+ "eval_runtime": 66.7729,
1631
+ "eval_samples_per_second": 5.062,
1632
+ "eval_steps_per_second": 2.531,
1633
  "eval_top_10_accuracy": 0.8698224852071006,
1634
+ "eval_top_1_accuracy": 0.5295857988165681,
1635
+ "eval_top_5_accuracy": 0.7988165680473372,
1636
+ "step": 10635
1637
  },
1638
  {
1639
+ "epoch": 58.005006944444446,
1640
+ "step": 10635,
1641
+ "total_flos": 1.0610658302492344e+20,
1642
+ "train_loss": 5.244687236773334,
1643
+ "train_runtime": 25999.5358,
1644
+ "train_samples_per_second": 11.077,
1645
+ "train_steps_per_second": 1.385
1646
  }
1647
  ],
1648
  "logging_steps": 100,
 
1671
  "attributes": {}
1672
  }
1673
  },
1674
+ "total_flos": 1.0610658302492344e+20,
1675
  "train_batch_size": 2,
1676
  "trial_name": null,
1677
  "trial_params": null