dyang39 commited on
Commit
0e1cc45
·
verified ·
1 Parent(s): a3caedc

Initial commit

Browse files
Files changed (1) hide show
  1. trainer_state.json +708 -0
trainer_state.json ADDED
@@ -0,0 +1,708 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 46266,
3
+ "best_metric": 0.838254282517001,
4
+ "best_model_checkpoint": "dm_training/finetuned_checkpoints/SIM-RAG-full/checkpoint-46266",
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 46266,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.02161414429602732,
14
+ "grad_norm": 0.7032617926597595,
15
+ "learning_rate": 2.994e-05,
16
+ "loss": 0.3292,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 0.04322828859205464,
21
+ "grad_norm": 8.765708923339844,
22
+ "learning_rate": 2.9782725438685614e-05,
23
+ "loss": 0.3537,
24
+ "step": 1000
25
+ },
26
+ {
27
+ "epoch": 0.06484243288808196,
28
+ "grad_norm": 3.9439210891723633,
29
+ "learning_rate": 2.956501545740867e-05,
30
+ "loss": 0.3206,
31
+ "step": 1500
32
+ },
33
+ {
34
+ "epoch": 0.08645657718410928,
35
+ "grad_norm": 12.651052474975586,
36
+ "learning_rate": 2.934730547613173e-05,
37
+ "loss": 0.3505,
38
+ "step": 2000
39
+ },
40
+ {
41
+ "epoch": 0.1080707214801366,
42
+ "grad_norm": 0.14961493015289307,
43
+ "learning_rate": 2.9129595494854787e-05,
44
+ "loss": 0.335,
45
+ "step": 2500
46
+ },
47
+ {
48
+ "epoch": 0.12968486577616392,
49
+ "grad_norm": 2.6468780040740967,
50
+ "learning_rate": 2.8911885513577847e-05,
51
+ "loss": 0.3601,
52
+ "step": 3000
53
+ },
54
+ {
55
+ "epoch": 0.15129901007219124,
56
+ "grad_norm": 27.052431106567383,
57
+ "learning_rate": 2.8694175532300907e-05,
58
+ "loss": 0.3292,
59
+ "step": 3500
60
+ },
61
+ {
62
+ "epoch": 0.17291315436821855,
63
+ "grad_norm": 0.17826782166957855,
64
+ "learning_rate": 2.8476465551023964e-05,
65
+ "loss": 0.3218,
66
+ "step": 4000
67
+ },
68
+ {
69
+ "epoch": 0.1945272986642459,
70
+ "grad_norm": 0.4498291015625,
71
+ "learning_rate": 2.825875556974702e-05,
72
+ "loss": 0.2925,
73
+ "step": 4500
74
+ },
75
+ {
76
+ "epoch": 0.2161414429602732,
77
+ "grad_norm": 0.27510523796081543,
78
+ "learning_rate": 2.804104558847008e-05,
79
+ "loss": 0.3428,
80
+ "step": 5000
81
+ },
82
+ {
83
+ "epoch": 0.23775558725630053,
84
+ "grad_norm": 38.518646240234375,
85
+ "learning_rate": 2.782333560719314e-05,
86
+ "loss": 0.2883,
87
+ "step": 5500
88
+ },
89
+ {
90
+ "epoch": 0.25936973155232784,
91
+ "grad_norm": 0.18676885962486267,
92
+ "learning_rate": 2.7605625625916196e-05,
93
+ "loss": 0.3015,
94
+ "step": 6000
95
+ },
96
+ {
97
+ "epoch": 0.28098387584835516,
98
+ "grad_norm": 0.5126625895500183,
99
+ "learning_rate": 2.7387915644639256e-05,
100
+ "loss": 0.2988,
101
+ "step": 6500
102
+ },
103
+ {
104
+ "epoch": 0.3025980201443825,
105
+ "grad_norm": 36.30085754394531,
106
+ "learning_rate": 2.7170205663362313e-05,
107
+ "loss": 0.2914,
108
+ "step": 7000
109
+ },
110
+ {
111
+ "epoch": 0.3242121644404098,
112
+ "grad_norm": 5.256961345672607,
113
+ "learning_rate": 2.695249568208537e-05,
114
+ "loss": 0.3128,
115
+ "step": 7500
116
+ },
117
+ {
118
+ "epoch": 0.3458263087364371,
119
+ "grad_norm": 0.18740588426589966,
120
+ "learning_rate": 2.6734785700808432e-05,
121
+ "loss": 0.2849,
122
+ "step": 8000
123
+ },
124
+ {
125
+ "epoch": 0.3674404530324644,
126
+ "grad_norm": 33.268550872802734,
127
+ "learning_rate": 2.651707571953149e-05,
128
+ "loss": 0.3023,
129
+ "step": 8500
130
+ },
131
+ {
132
+ "epoch": 0.3890545973284918,
133
+ "grad_norm": 0.0934978798031807,
134
+ "learning_rate": 2.629936573825455e-05,
135
+ "loss": 0.2882,
136
+ "step": 9000
137
+ },
138
+ {
139
+ "epoch": 0.4106687416245191,
140
+ "grad_norm": 17.90414047241211,
141
+ "learning_rate": 2.6081655756977605e-05,
142
+ "loss": 0.2699,
143
+ "step": 9500
144
+ },
145
+ {
146
+ "epoch": 0.4322828859205464,
147
+ "grad_norm": 0.9339249134063721,
148
+ "learning_rate": 2.5863945775700662e-05,
149
+ "loss": 0.3039,
150
+ "step": 10000
151
+ },
152
+ {
153
+ "epoch": 0.45389703021657374,
154
+ "grad_norm": 0.04919858276844025,
155
+ "learning_rate": 2.5646235794423725e-05,
156
+ "loss": 0.2661,
157
+ "step": 10500
158
+ },
159
+ {
160
+ "epoch": 0.47551117451260105,
161
+ "grad_norm": 0.06951851397752762,
162
+ "learning_rate": 2.542852581314678e-05,
163
+ "loss": 0.2478,
164
+ "step": 11000
165
+ },
166
+ {
167
+ "epoch": 0.49712531880862837,
168
+ "grad_norm": 0.058041177690029144,
169
+ "learning_rate": 2.5210815831869838e-05,
170
+ "loss": 0.2489,
171
+ "step": 11500
172
+ },
173
+ {
174
+ "epoch": 0.5187394631046557,
175
+ "grad_norm": 22.77214813232422,
176
+ "learning_rate": 2.4993105850592898e-05,
177
+ "loss": 0.2744,
178
+ "step": 12000
179
+ },
180
+ {
181
+ "epoch": 0.5403536074006831,
182
+ "grad_norm": 26.296802520751953,
183
+ "learning_rate": 2.4775395869315954e-05,
184
+ "loss": 0.2627,
185
+ "step": 12500
186
+ },
187
+ {
188
+ "epoch": 0.5619677516967103,
189
+ "grad_norm": 18.95213508605957,
190
+ "learning_rate": 2.4557685888039014e-05,
191
+ "loss": 0.2405,
192
+ "step": 13000
193
+ },
194
+ {
195
+ "epoch": 0.5835818959927377,
196
+ "grad_norm": 10.959939002990723,
197
+ "learning_rate": 2.4339975906762074e-05,
198
+ "loss": 0.2472,
199
+ "step": 13500
200
+ },
201
+ {
202
+ "epoch": 0.605196040288765,
203
+ "grad_norm": 0.1388530284166336,
204
+ "learning_rate": 2.412226592548513e-05,
205
+ "loss": 0.2532,
206
+ "step": 14000
207
+ },
208
+ {
209
+ "epoch": 0.6268101845847923,
210
+ "grad_norm": 0.28232917189598083,
211
+ "learning_rate": 2.3904555944208187e-05,
212
+ "loss": 0.2727,
213
+ "step": 14500
214
+ },
215
+ {
216
+ "epoch": 0.6484243288808196,
217
+ "grad_norm": 0.17236585915088654,
218
+ "learning_rate": 2.3686845962931247e-05,
219
+ "loss": 0.2593,
220
+ "step": 15000
221
+ },
222
+ {
223
+ "epoch": 0.670038473176847,
224
+ "grad_norm": 0.013697458431124687,
225
+ "learning_rate": 2.3469135981654307e-05,
226
+ "loss": 0.2163,
227
+ "step": 15500
228
+ },
229
+ {
230
+ "epoch": 0.6916526174728742,
231
+ "grad_norm": 0.04355601221323013,
232
+ "learning_rate": 2.3251426000377363e-05,
233
+ "loss": 0.2418,
234
+ "step": 16000
235
+ },
236
+ {
237
+ "epoch": 0.7132667617689016,
238
+ "grad_norm": 0.15288621187210083,
239
+ "learning_rate": 2.3033716019100423e-05,
240
+ "loss": 0.2458,
241
+ "step": 16500
242
+ },
243
+ {
244
+ "epoch": 0.7348809060649288,
245
+ "grad_norm": 0.11191035062074661,
246
+ "learning_rate": 2.281600603782348e-05,
247
+ "loss": 0.2299,
248
+ "step": 17000
249
+ },
250
+ {
251
+ "epoch": 0.7564950503609562,
252
+ "grad_norm": 0.06535373628139496,
253
+ "learning_rate": 2.259829605654654e-05,
254
+ "loss": 0.2299,
255
+ "step": 17500
256
+ },
257
+ {
258
+ "epoch": 0.7781091946569836,
259
+ "grad_norm": 0.10419075191020966,
260
+ "learning_rate": 2.23805860752696e-05,
261
+ "loss": 0.2606,
262
+ "step": 18000
263
+ },
264
+ {
265
+ "epoch": 0.7997233389530108,
266
+ "grad_norm": 25.499109268188477,
267
+ "learning_rate": 2.2162876093992656e-05,
268
+ "loss": 0.2452,
269
+ "step": 18500
270
+ },
271
+ {
272
+ "epoch": 0.8213374832490382,
273
+ "grad_norm": 7.74832010269165,
274
+ "learning_rate": 2.1945166112715716e-05,
275
+ "loss": 0.2125,
276
+ "step": 19000
277
+ },
278
+ {
279
+ "epoch": 0.8429516275450655,
280
+ "grad_norm": 0.14268887042999268,
281
+ "learning_rate": 2.1727456131438773e-05,
282
+ "loss": 0.2155,
283
+ "step": 19500
284
+ },
285
+ {
286
+ "epoch": 0.8645657718410928,
287
+ "grad_norm": 0.7247521877288818,
288
+ "learning_rate": 2.1509746150161832e-05,
289
+ "loss": 0.2234,
290
+ "step": 20000
291
+ },
292
+ {
293
+ "epoch": 0.8861799161371201,
294
+ "grad_norm": 14.978250503540039,
295
+ "learning_rate": 2.1292036168884892e-05,
296
+ "loss": 0.2129,
297
+ "step": 20500
298
+ },
299
+ {
300
+ "epoch": 0.9077940604331475,
301
+ "grad_norm": 0.235914945602417,
302
+ "learning_rate": 2.107432618760795e-05,
303
+ "loss": 0.2189,
304
+ "step": 21000
305
+ },
306
+ {
307
+ "epoch": 0.9294082047291747,
308
+ "grad_norm": 20.6143798828125,
309
+ "learning_rate": 2.0856616206331005e-05,
310
+ "loss": 0.2051,
311
+ "step": 21500
312
+ },
313
+ {
314
+ "epoch": 0.9510223490252021,
315
+ "grad_norm": 67.3742904663086,
316
+ "learning_rate": 2.0638906225054065e-05,
317
+ "loss": 0.2045,
318
+ "step": 22000
319
+ },
320
+ {
321
+ "epoch": 0.9726364933212294,
322
+ "grad_norm": 0.51312255859375,
323
+ "learning_rate": 2.0421196243777125e-05,
324
+ "loss": 0.2013,
325
+ "step": 22500
326
+ },
327
+ {
328
+ "epoch": 0.9942506376172567,
329
+ "grad_norm": 0.40760791301727295,
330
+ "learning_rate": 2.020348626250018e-05,
331
+ "loss": 0.2329,
332
+ "step": 23000
333
+ },
334
+ {
335
+ "epoch": 1.0,
336
+ "eval_accuracy": 0.8315399845054662,
337
+ "eval_f1_0": 0.8379296066252587,
338
+ "eval_f1_1": 0.8246258625324849,
339
+ "eval_loss": 0.27029550075531006,
340
+ "eval_precision_0": 0.8346807457515262,
341
+ "eval_precision_1": 0.828113750899928,
342
+ "eval_recall_0": 0.8412038576654473,
343
+ "eval_recall_1": 0.8211672318400857,
344
+ "eval_runtime": 6851.1436,
345
+ "eval_samples_per_second": 1.696,
346
+ "eval_steps_per_second": 0.848,
347
+ "step": 23133
348
+ },
349
+ {
350
+ "epoch": 1.015864781913284,
351
+ "grad_norm": 0.1842740774154663,
352
+ "learning_rate": 1.998577628122324e-05,
353
+ "loss": 0.1819,
354
+ "step": 23500
355
+ },
356
+ {
357
+ "epoch": 1.0374789262093114,
358
+ "grad_norm": 18.2067928314209,
359
+ "learning_rate": 1.9768066299946298e-05,
360
+ "loss": 0.1862,
361
+ "step": 24000
362
+ },
363
+ {
364
+ "epoch": 1.0590930705053387,
365
+ "grad_norm": 56.502193450927734,
366
+ "learning_rate": 1.9550356318669354e-05,
367
+ "loss": 0.1461,
368
+ "step": 24500
369
+ },
370
+ {
371
+ "epoch": 1.0807072148013661,
372
+ "grad_norm": 0.013557116501033306,
373
+ "learning_rate": 1.9332646337392418e-05,
374
+ "loss": 0.1521,
375
+ "step": 25000
376
+ },
377
+ {
378
+ "epoch": 1.1023213590973933,
379
+ "grad_norm": 0.05783897638320923,
380
+ "learning_rate": 1.9114936356115474e-05,
381
+ "loss": 0.1428,
382
+ "step": 25500
383
+ },
384
+ {
385
+ "epoch": 1.1239355033934206,
386
+ "grad_norm": 0.026215313002467155,
387
+ "learning_rate": 1.889722637483853e-05,
388
+ "loss": 0.1491,
389
+ "step": 26000
390
+ },
391
+ {
392
+ "epoch": 1.145549647689448,
393
+ "grad_norm": 0.0602092407643795,
394
+ "learning_rate": 1.867951639356159e-05,
395
+ "loss": 0.1442,
396
+ "step": 26500
397
+ },
398
+ {
399
+ "epoch": 1.1671637919854754,
400
+ "grad_norm": 0.03851708024740219,
401
+ "learning_rate": 1.8461806412284647e-05,
402
+ "loss": 0.1511,
403
+ "step": 27000
404
+ },
405
+ {
406
+ "epoch": 1.1887779362815025,
407
+ "grad_norm": 0.029633022844791412,
408
+ "learning_rate": 1.8244096431007707e-05,
409
+ "loss": 0.1438,
410
+ "step": 27500
411
+ },
412
+ {
413
+ "epoch": 1.21039208057753,
414
+ "grad_norm": 4.901973247528076,
415
+ "learning_rate": 1.8026386449730767e-05,
416
+ "loss": 0.1541,
417
+ "step": 28000
418
+ },
419
+ {
420
+ "epoch": 1.2320062248735573,
421
+ "grad_norm": 0.05654510483145714,
422
+ "learning_rate": 1.7808676468453823e-05,
423
+ "loss": 0.1537,
424
+ "step": 28500
425
+ },
426
+ {
427
+ "epoch": 1.2536203691695846,
428
+ "grad_norm": 0.0678759291768074,
429
+ "learning_rate": 1.7590966487176883e-05,
430
+ "loss": 0.1362,
431
+ "step": 29000
432
+ },
433
+ {
434
+ "epoch": 1.275234513465612,
435
+ "grad_norm": 14.40847110748291,
436
+ "learning_rate": 1.737325650589994e-05,
437
+ "loss": 0.1611,
438
+ "step": 29500
439
+ },
440
+ {
441
+ "epoch": 1.2968486577616392,
442
+ "grad_norm": 0.13242945075035095,
443
+ "learning_rate": 1.7155546524623e-05,
444
+ "loss": 0.1743,
445
+ "step": 30000
446
+ },
447
+ {
448
+ "epoch": 1.3184628020576665,
449
+ "grad_norm": 0.05444726720452309,
450
+ "learning_rate": 1.693783654334606e-05,
451
+ "loss": 0.1427,
452
+ "step": 30500
453
+ },
454
+ {
455
+ "epoch": 1.340076946353694,
456
+ "grad_norm": 28.47621726989746,
457
+ "learning_rate": 1.6720126562069116e-05,
458
+ "loss": 0.1485,
459
+ "step": 31000
460
+ },
461
+ {
462
+ "epoch": 1.361691090649721,
463
+ "grad_norm": 0.04358465224504471,
464
+ "learning_rate": 1.6502416580792172e-05,
465
+ "loss": 0.1422,
466
+ "step": 31500
467
+ },
468
+ {
469
+ "epoch": 1.3833052349457484,
470
+ "grad_norm": 0.024943144991993904,
471
+ "learning_rate": 1.6284706599515236e-05,
472
+ "loss": 0.1386,
473
+ "step": 32000
474
+ },
475
+ {
476
+ "epoch": 1.4049193792417758,
477
+ "grad_norm": 0.1587284654378891,
478
+ "learning_rate": 1.6066996618238292e-05,
479
+ "loss": 0.1252,
480
+ "step": 32500
481
+ },
482
+ {
483
+ "epoch": 1.4265335235378032,
484
+ "grad_norm": 0.054883528500795364,
485
+ "learning_rate": 1.584928663696135e-05,
486
+ "loss": 0.1429,
487
+ "step": 33000
488
+ },
489
+ {
490
+ "epoch": 1.4481476678338305,
491
+ "grad_norm": 0.21283945441246033,
492
+ "learning_rate": 1.563157665568441e-05,
493
+ "loss": 0.1465,
494
+ "step": 33500
495
+ },
496
+ {
497
+ "epoch": 1.4697618121298577,
498
+ "grad_norm": 63.26069259643555,
499
+ "learning_rate": 1.5413866674407465e-05,
500
+ "loss": 0.1157,
501
+ "step": 34000
502
+ },
503
+ {
504
+ "epoch": 1.491375956425885,
505
+ "grad_norm": 20.822044372558594,
506
+ "learning_rate": 1.5196156693130527e-05,
507
+ "loss": 0.1505,
508
+ "step": 34500
509
+ },
510
+ {
511
+ "epoch": 1.5129901007219124,
512
+ "grad_norm": 0.2792131006717682,
513
+ "learning_rate": 1.4978446711853583e-05,
514
+ "loss": 0.1205,
515
+ "step": 35000
516
+ },
517
+ {
518
+ "epoch": 1.5346042450179398,
519
+ "grad_norm": 0.04913631081581116,
520
+ "learning_rate": 1.4760736730576641e-05,
521
+ "loss": 0.124,
522
+ "step": 35500
523
+ },
524
+ {
525
+ "epoch": 1.5562183893139672,
526
+ "grad_norm": 0.007014845497906208,
527
+ "learning_rate": 1.45430267492997e-05,
528
+ "loss": 0.1404,
529
+ "step": 36000
530
+ },
531
+ {
532
+ "epoch": 1.5778325336099943,
533
+ "grad_norm": 0.03032066859304905,
534
+ "learning_rate": 1.432531676802276e-05,
535
+ "loss": 0.1345,
536
+ "step": 36500
537
+ },
538
+ {
539
+ "epoch": 1.5994466779060217,
540
+ "grad_norm": 0.02347446419298649,
541
+ "learning_rate": 1.4107606786745816e-05,
542
+ "loss": 0.141,
543
+ "step": 37000
544
+ },
545
+ {
546
+ "epoch": 1.621060822202049,
547
+ "grad_norm": 0.03960123285651207,
548
+ "learning_rate": 1.3889896805468876e-05,
549
+ "loss": 0.1399,
550
+ "step": 37500
551
+ },
552
+ {
553
+ "epoch": 1.6426749664980762,
554
+ "grad_norm": 0.0023522686678916216,
555
+ "learning_rate": 1.3672186824191934e-05,
556
+ "loss": 0.1301,
557
+ "step": 38000
558
+ },
559
+ {
560
+ "epoch": 1.6642891107941038,
561
+ "grad_norm": 0.10293618589639664,
562
+ "learning_rate": 1.345447684291499e-05,
563
+ "loss": 0.1504,
564
+ "step": 38500
565
+ },
566
+ {
567
+ "epoch": 1.685903255090131,
568
+ "grad_norm": 0.6177674531936646,
569
+ "learning_rate": 1.323676686163805e-05,
570
+ "loss": 0.1244,
571
+ "step": 39000
572
+ },
573
+ {
574
+ "epoch": 1.7075173993861583,
575
+ "grad_norm": 0.2255789041519165,
576
+ "learning_rate": 1.3019056880361109e-05,
577
+ "loss": 0.1229,
578
+ "step": 39500
579
+ },
580
+ {
581
+ "epoch": 1.7291315436821857,
582
+ "grad_norm": 0.01264307089149952,
583
+ "learning_rate": 1.2801346899084167e-05,
584
+ "loss": 0.1199,
585
+ "step": 40000
586
+ },
587
+ {
588
+ "epoch": 1.7507456879782128,
589
+ "grad_norm": 21.45499610900879,
590
+ "learning_rate": 1.2583636917807225e-05,
591
+ "loss": 0.1234,
592
+ "step": 40500
593
+ },
594
+ {
595
+ "epoch": 1.7723598322742402,
596
+ "grad_norm": 0.02090781182050705,
597
+ "learning_rate": 1.2365926936530283e-05,
598
+ "loss": 0.121,
599
+ "step": 41000
600
+ },
601
+ {
602
+ "epoch": 1.7939739765702676,
603
+ "grad_norm": 0.054082971066236496,
604
+ "learning_rate": 1.2148216955253343e-05,
605
+ "loss": 0.1158,
606
+ "step": 41500
607
+ },
608
+ {
609
+ "epoch": 1.815588120866295,
610
+ "grad_norm": 0.04447195306420326,
611
+ "learning_rate": 1.19305069739764e-05,
612
+ "loss": 0.1183,
613
+ "step": 42000
614
+ },
615
+ {
616
+ "epoch": 1.8372022651623223,
617
+ "grad_norm": 38.29100799560547,
618
+ "learning_rate": 1.171279699269946e-05,
619
+ "loss": 0.1302,
620
+ "step": 42500
621
+ },
622
+ {
623
+ "epoch": 1.8588164094583495,
624
+ "grad_norm": 0.014447253197431564,
625
+ "learning_rate": 1.1495087011422518e-05,
626
+ "loss": 0.1011,
627
+ "step": 43000
628
+ },
629
+ {
630
+ "epoch": 1.8804305537543768,
631
+ "grad_norm": 0.0001848287502070889,
632
+ "learning_rate": 1.1277377030145576e-05,
633
+ "loss": 0.1161,
634
+ "step": 43500
635
+ },
636
+ {
637
+ "epoch": 1.9020446980504042,
638
+ "grad_norm": 0.003150364151224494,
639
+ "learning_rate": 1.1059667048868634e-05,
640
+ "loss": 0.1332,
641
+ "step": 44000
642
+ },
643
+ {
644
+ "epoch": 1.9236588423464314,
645
+ "grad_norm": 0.2078738808631897,
646
+ "learning_rate": 1.0841957067591692e-05,
647
+ "loss": 0.1409,
648
+ "step": 44500
649
+ },
650
+ {
651
+ "epoch": 1.945272986642459,
652
+ "grad_norm": 0.059768468141555786,
653
+ "learning_rate": 1.062424708631475e-05,
654
+ "loss": 0.1326,
655
+ "step": 45000
656
+ },
657
+ {
658
+ "epoch": 1.966887130938486,
659
+ "grad_norm": 0.038765549659729004,
660
+ "learning_rate": 1.0406537105037809e-05,
661
+ "loss": 0.0918,
662
+ "step": 45500
663
+ },
664
+ {
665
+ "epoch": 1.9885012752345135,
666
+ "grad_norm": 18.927526473999023,
667
+ "learning_rate": 1.0188827123760869e-05,
668
+ "loss": 0.1091,
669
+ "step": 46000
670
+ },
671
+ {
672
+ "epoch": 2.0,
673
+ "eval_accuracy": 0.838254282517001,
674
+ "eval_f1_0": 0.8413944458512703,
675
+ "eval_f1_1": 0.8349872661807324,
676
+ "eval_loss": 0.3542537987232208,
677
+ "eval_precision_0": 0.8544488256471798,
678
+ "eval_precision_1": 0.8219225449515906,
679
+ "eval_recall_0": 0.828732956434985,
680
+ "eval_recall_1": 0.8484740317686953,
681
+ "eval_runtime": 6835.7043,
682
+ "eval_samples_per_second": 1.699,
683
+ "eval_steps_per_second": 0.85,
684
+ "step": 46266
685
+ }
686
+ ],
687
+ "logging_steps": 500,
688
+ "max_steps": 69399,
689
+ "num_input_tokens_seen": 0,
690
+ "num_train_epochs": 3,
691
+ "save_steps": 500,
692
+ "stateful_callbacks": {
693
+ "TrainerControl": {
694
+ "args": {
695
+ "should_epoch_stop": false,
696
+ "should_evaluate": false,
697
+ "should_log": false,
698
+ "should_save": true,
699
+ "should_training_stop": false
700
+ },
701
+ "attributes": {}
702
+ }
703
+ },
704
+ "total_flos": 1.5826915766462054e+18,
705
+ "train_batch_size": 2,
706
+ "trial_name": null,
707
+ "trial_params": null
708
+ }