| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.9934640522875817, | |
| "global_step": 304, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0001125, | |
| "loss": 3.8539, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.000225, | |
| "loss": 3.5808, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00033749999999999996, | |
| "loss": 3.335, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00045, | |
| "loss": 3.2031, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0005625, | |
| "loss": 3.0706, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0005999286081239726, | |
| "loss": 3.084, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0005995538936819289, | |
| "loss": 2.9744, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0005988584094275236, | |
| "loss": 2.8987, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0005978429001027164, | |
| "loss": 2.8293, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0005965084531403281, | |
| "loss": 2.7609, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0005948564974995903, | |
| "loss": 2.7155, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00059288880213598, | |
| "loss": 2.7184, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0005906074741069779, | |
| "loss": 2.6533, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.0005880149563157786, | |
| "loss": 2.6509, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.0005851140248953683, | |
| "loss": 2.5859, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.0005819077862357724, | |
| "loss": 2.5516, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.0005783996736576553, | |
| "loss": 2.5855, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.0005745934437358341, | |
| "loss": 2.5306, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.0005704931722766448, | |
| "loss": 2.5159, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0005661032499534664, | |
| "loss": 2.5304, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.0005614283776050784, | |
| "loss": 2.5005, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0005564735612018839, | |
| "loss": 2.4884, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.0005512441064853923, | |
| "loss": 2.4729, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.0005457456132866975, | |
| "loss": 2.4226, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.0005399839695300389, | |
| "loss": 2.412, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.5457690353760842, | |
| "eval_loss": 2.502744436264038, | |
| "eval_runtime": 56.9596, | |
| "eval_samples_per_second": 19.154, | |
| "eval_steps_per_second": 19.154, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.0005339653449278644, | |
| "loss": 2.6265, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.0005276961843741485, | |
| "loss": 2.148, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.0005211832010430372, | |
| "loss": 2.1056, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.0005144333692002139, | |
| "loss": 2.1074, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.0005074539167346808, | |
| "loss": 2.118, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.0005002523174189542, | |
| "loss": 2.1045, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.0004928362829059618, | |
| "loss": 2.061, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.0004852137544712115, | |
| "loss": 2.0959, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.0004773928945090747, | |
| "loss": 2.0637, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 0.0004693820777922901, | |
| "loss": 2.004, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.00046118988250404714, | |
| "loss": 2.0516, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.00045282508105225254, | |
| "loss": 2.1182, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 0.00044429663067581626, | |
| "loss": 2.0648, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 0.0004356136638530159, | |
| "loss": 2.058, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 0.0004267854785222098, | |
| "loss": 2.012, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 0.00041782152812537223, | |
| "loss": 2.0105, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 0.00040873141148511043, | |
| "loss": 1.9976, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.00039952486252600565, | |
| "loss": 2.0034, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 0.00039021173985128186, | |
| "loss": 2.0171, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 0.00038080201618596784, | |
| "loss": 2.0163, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.0003713057676978519, | |
| "loss": 1.9683, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 0.00036173316320767046, | |
| "loss": 2.0209, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 0.0003520944533000791, | |
| "loss": 1.9655, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 0.0003423999593470703, | |
| "loss": 2.0127, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 0.00033266006245558934, | |
| "loss": 1.9702, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "eval_accuracy": 0.5849799255079998, | |
| "eval_loss": 2.2756919860839844, | |
| "eval_runtime": 56.7417, | |
| "eval_samples_per_second": 19.227, | |
| "eval_steps_per_second": 19.227, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 0.00032288519235118573, | |
| "loss": 2.1744, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 0.00031308581620960083, | |
| "loss": 1.6022, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 0.0003032724274482547, | |
| "loss": 1.6168, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 0.0002934555344896317, | |
| "loss": 1.5467, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 0.00028364564950859807, | |
| "loss": 1.6023, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 0.0002738532771757025, | |
| "loss": 1.4977, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 0.0002640889034085113, | |
| "loss": 1.5448, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 0.00025436298414302494, | |
| "loss": 1.5738, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 0.000244685934137201, | |
| "loss": 1.559, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 0.00023506811581856912, | |
| "loss": 1.5734, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 0.00022551982818788506, | |
| "loss": 1.4986, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 0.00021605129579070238, | |
| "loss": 1.545, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 0.00020667265776867276, | |
| "loss": 1.5496, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 0.00019739395700229937, | |
| "loss": 1.5426, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 0.0001882251293567691, | |
| "loss": 1.4687, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 0.00017917599304237886, | |
| "loss": 1.5415, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 0.0001702562381009501, | |
| "loss": 1.5289, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 0.0001614754160294899, | |
| "loss": 1.5449, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 0.0001528429295522076, | |
| "loss": 1.5273, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 0.0001443680225518435, | |
| "loss": 1.5146, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 0.000136059770171087, | |
| "loss": 1.5164, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 0.00012792706909468623, | |
| "loss": 1.5239, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 0.00011997862802265573, | |
| "loss": 1.4905, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 0.00011222295834478227, | |
| "loss": 1.4968, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 0.0001046683650264153, | |
| "loss": 1.4934, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 9.732293771530192e-05, | |
| "loss": 1.4628, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_accuracy": 0.6081706452777681, | |
| "eval_loss": 2.2162108421325684, | |
| "eval_runtime": 57.053, | |
| "eval_samples_per_second": 19.123, | |
| "eval_steps_per_second": 19.123, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 9.019454207898983e-05, | |
| "loss": 1.4385, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 8.329081138207334e-05, | |
| "loss": 1.2032, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 7.661913831230212e-05, | |
| "loss": 1.1659, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 7.018666706430662e-05, | |
| "loss": 1.1521, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 6.40002856894149e-05, | |
| "loss": 1.1916, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 5.8066618719755195e-05, | |
| "loss": 1.163, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 5.239202007454086e-05, | |
| "loss": 1.1624, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 4.698256625613435e-05, | |
| "loss": 1.1203, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 4.1844049843176334e-05, | |
| "loss": 1.1709, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 3.698197328774769e-05, | |
| "loss": 1.1263, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 3.2401543023205764e-05, | |
| "loss": 1.1277, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 2.8107663889005016e-05, | |
| "loss": 1.1759, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 2.410493387847232e-05, | |
| "loss": 1.157, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 2.0397639215160466e-05, | |
| "loss": 1.1721, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 1.698974976305243e-05, | |
| "loss": 1.1577, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 1.3884914775531952e-05, | |
| "loss": 1.1546, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 1.1086458987671187e-05, | |
| "loss": 1.1564, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "learning_rate": 8.59737905602157e-06, | |
| "loss": 1.1241, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 6.4203403497185e-06, | |
| "loss": 1.1605, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 4.557674096337593e-06, | |
| "loss": 1.1114, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 3.011374885557638e-06, | |
| "loss": 1.1377, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 1.783098533304106e-06, | |
| "loss": 1.1541, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 8.741603086600102e-07, | |
| "loss": 1.174, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 2.855335254426605e-07, | |
| "loss": 1.1415, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 1.7848499955075423e-08, | |
| "loss": 1.1662, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "eval_accuracy": 0.611281497151223, | |
| "eval_loss": 2.2855756282806396, | |
| "eval_runtime": 56.9619, | |
| "eval_samples_per_second": 19.153, | |
| "eval_steps_per_second": 19.153, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "step": 304, | |
| "total_flos": 2.9714731304484864e+16, | |
| "train_loss": 1.8919498779271777, | |
| "train_runtime": 5682.4938, | |
| "train_samples_per_second": 6.892, | |
| "train_steps_per_second": 0.053 | |
| } | |
| ], | |
| "max_steps": 304, | |
| "num_train_epochs": 4, | |
| "total_flos": 2.9714731304484864e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |