| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 2485, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04024144869215292, | |
| "grad_norm": 0.8856053732817273, | |
| "learning_rate": 4.999278820633416e-05, | |
| "loss": 0.5826, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.08048289738430583, | |
| "grad_norm": 0.7385891532669175, | |
| "learning_rate": 4.996961926996955e-05, | |
| "loss": 0.5449, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.12072434607645875, | |
| "grad_norm": 0.72926414566868, | |
| "learning_rate": 4.9930488023869036e-05, | |
| "loss": 0.5618, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.16096579476861167, | |
| "grad_norm": 0.6312617479451788, | |
| "learning_rate": 4.987541948343489e-05, | |
| "loss": 0.515, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.2012072434607646, | |
| "grad_norm": 0.6639335448923683, | |
| "learning_rate": 4.980444885229155e-05, | |
| "loss": 0.5328, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2414486921529175, | |
| "grad_norm": 0.6557066466913923, | |
| "learning_rate": 4.9717621499781034e-05, | |
| "loss": 0.523, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.28169014084507044, | |
| "grad_norm": 0.646966336694488, | |
| "learning_rate": 4.961499293195967e-05, | |
| "loss": 0.5214, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.32193158953722334, | |
| "grad_norm": 0.6798500591020816, | |
| "learning_rate": 4.949662875611487e-05, | |
| "loss": 0.5143, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.36217303822937624, | |
| "grad_norm": 0.7622327150861464, | |
| "learning_rate": 4.936260463882431e-05, | |
| "loss": 0.507, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4024144869215292, | |
| "grad_norm": 0.7701558072544397, | |
| "learning_rate": 4.921300625758468e-05, | |
| "loss": 0.5098, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4426559356136821, | |
| "grad_norm": 0.6721766598194193, | |
| "learning_rate": 4.9047929246040684e-05, | |
| "loss": 0.4959, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.482897384305835, | |
| "grad_norm": 0.5759821922844843, | |
| "learning_rate": 4.886747913284935e-05, | |
| "loss": 0.4786, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5231388329979879, | |
| "grad_norm": 0.6241582936916275, | |
| "learning_rate": 4.8671771274218864e-05, | |
| "loss": 0.4936, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5633802816901409, | |
| "grad_norm": 0.6085499176420257, | |
| "learning_rate": 4.846093078016486e-05, | |
| "loss": 0.4927, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6036217303822937, | |
| "grad_norm": 0.7195639159658046, | |
| "learning_rate": 4.823509243453144e-05, | |
| "loss": 0.4921, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6438631790744467, | |
| "grad_norm": 0.6346981783065212, | |
| "learning_rate": 4.7994400608828007e-05, | |
| "loss": 0.4765, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6841046277665996, | |
| "grad_norm": 0.6760210496604738, | |
| "learning_rate": 4.7739009169937e-05, | |
| "loss": 0.4694, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7243460764587525, | |
| "grad_norm": 0.7309281782293976, | |
| "learning_rate": 4.7469081381751526e-05, | |
| "loss": 0.4693, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7645875251509054, | |
| "grad_norm": 0.5121852796257732, | |
| "learning_rate": 4.7184789800805785e-05, | |
| "loss": 0.4626, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8048289738430584, | |
| "grad_norm": 0.5626013205724073, | |
| "learning_rate": 4.688631616596496e-05, | |
| "loss": 0.463, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8450704225352113, | |
| "grad_norm": 0.6626243373594697, | |
| "learning_rate": 4.657385128224517e-05, | |
| "loss": 0.4781, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.8853118712273642, | |
| "grad_norm": 0.5995753685938809, | |
| "learning_rate": 4.624759489883771e-05, | |
| "loss": 0.445, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9255533199195171, | |
| "grad_norm": 0.5780765450132732, | |
| "learning_rate": 4.5907755581415454e-05, | |
| "loss": 0.4626, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.96579476861167, | |
| "grad_norm": 0.5478910002284612, | |
| "learning_rate": 4.555455057880334e-05, | |
| "loss": 0.4597, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.0060362173038229, | |
| "grad_norm": 0.48568823231381825, | |
| "learning_rate": 4.518820568409781e-05, | |
| "loss": 0.406, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0462776659959758, | |
| "grad_norm": 0.5062057380908122, | |
| "learning_rate": 4.480895509032424e-05, | |
| "loss": 0.1795, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.0865191146881288, | |
| "grad_norm": 0.5205860419378047, | |
| "learning_rate": 4.441704124072455e-05, | |
| "loss": 0.1902, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.1267605633802817, | |
| "grad_norm": 0.519552686982609, | |
| "learning_rate": 4.40127146737707e-05, | |
| "loss": 0.1751, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.1670020120724347, | |
| "grad_norm": 0.519651130331484, | |
| "learning_rate": 4.3596233863003135e-05, | |
| "loss": 0.1847, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.2072434607645874, | |
| "grad_norm": 0.48247829692592525, | |
| "learning_rate": 4.316786505179659e-05, | |
| "loss": 0.1861, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.2474849094567404, | |
| "grad_norm": 0.4487348974547772, | |
| "learning_rate": 4.27278820831589e-05, | |
| "loss": 0.2026, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.2877263581488934, | |
| "grad_norm": 0.41599318043186545, | |
| "learning_rate": 4.227656622467162e-05, | |
| "loss": 0.2016, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.3279678068410463, | |
| "grad_norm": 0.4540433485384787, | |
| "learning_rate": 4.181420598868425e-05, | |
| "loss": 0.1933, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.3682092555331993, | |
| "grad_norm": 0.5514617072950766, | |
| "learning_rate": 4.13410969478772e-05, | |
| "loss": 0.2015, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.408450704225352, | |
| "grad_norm": 0.4605005045885439, | |
| "learning_rate": 4.085754154631125e-05, | |
| "loss": 0.1957, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.448692152917505, | |
| "grad_norm": 0.4596046168091651, | |
| "learning_rate": 4.036384890608438e-05, | |
| "loss": 0.2088, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.488933601609658, | |
| "grad_norm": 0.5197294768215174, | |
| "learning_rate": 3.9860334629719484e-05, | |
| "loss": 0.208, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.529175050301811, | |
| "grad_norm": 0.6580375239377614, | |
| "learning_rate": 3.9347320598409434e-05, | |
| "loss": 0.2124, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.5694164989939638, | |
| "grad_norm": 0.4181955378555661, | |
| "learning_rate": 3.8825134766248266e-05, | |
| "loss": 0.2032, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.6096579476861166, | |
| "grad_norm": 0.49205001089005324, | |
| "learning_rate": 3.829411095058029e-05, | |
| "loss": 0.2077, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.6498993963782698, | |
| "grad_norm": 0.4873486109938958, | |
| "learning_rate": 3.775458861860086e-05, | |
| "loss": 0.197, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.6901408450704225, | |
| "grad_norm": 0.5213796288743413, | |
| "learning_rate": 3.720691267034547e-05, | |
| "loss": 0.2024, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.7303822937625755, | |
| "grad_norm": 0.5064104251566499, | |
| "learning_rate": 3.665143321820576e-05, | |
| "loss": 0.2046, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.7706237424547284, | |
| "grad_norm": 0.5249037742075398, | |
| "learning_rate": 3.6088505363113435e-05, | |
| "loss": 0.2113, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.8108651911468812, | |
| "grad_norm": 0.5323135228705763, | |
| "learning_rate": 3.5518488967535144e-05, | |
| "loss": 0.2015, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.8511066398390343, | |
| "grad_norm": 0.45702728996195346, | |
| "learning_rate": 3.4941748425423506e-05, | |
| "loss": 0.2091, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.891348088531187, | |
| "grad_norm": 0.48980668848273645, | |
| "learning_rate": 3.435865242927119e-05, | |
| "loss": 0.2089, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.93158953722334, | |
| "grad_norm": 0.43577327809651956, | |
| "learning_rate": 3.3769573734417256e-05, | |
| "loss": 0.2115, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.971830985915493, | |
| "grad_norm": 0.5094162027924247, | |
| "learning_rate": 3.317488892075601e-05, | |
| "loss": 0.2094, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.0120724346076457, | |
| "grad_norm": 0.3457362763246621, | |
| "learning_rate": 3.257497815200116e-05, | |
| "loss": 0.1726, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.052313883299799, | |
| "grad_norm": 0.37034508814670536, | |
| "learning_rate": 3.1970224932658735e-05, | |
| "loss": 0.0859, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.0925553319919517, | |
| "grad_norm": 0.37642286093876015, | |
| "learning_rate": 3.136101586286457e-05, | |
| "loss": 0.0862, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.132796780684105, | |
| "grad_norm": 0.3194432979842797, | |
| "learning_rate": 3.0747740391242634e-05, | |
| "loss": 0.0879, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.1730382293762576, | |
| "grad_norm": 0.31686036565206543, | |
| "learning_rate": 3.0130790565942552e-05, | |
| "loss": 0.083, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.2132796780684103, | |
| "grad_norm": 0.48975203689721203, | |
| "learning_rate": 2.9510560784015257e-05, | |
| "loss": 0.0887, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.2535211267605635, | |
| "grad_norm": 0.3970890768904178, | |
| "learning_rate": 2.8887447539287083e-05, | |
| "loss": 0.0938, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.2937625754527162, | |
| "grad_norm": 0.42930287541107787, | |
| "learning_rate": 2.8261849168893462e-05, | |
| "loss": 0.0894, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.3340040241448694, | |
| "grad_norm": 0.40823847069100183, | |
| "learning_rate": 2.763416559863425e-05, | |
| "loss": 0.0868, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.374245472837022, | |
| "grad_norm": 0.42121750405319264, | |
| "learning_rate": 2.7004798087313437e-05, | |
| "loss": 0.0912, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.414486921529175, | |
| "grad_norm": 0.3214982105059483, | |
| "learning_rate": 2.6374148970226774e-05, | |
| "loss": 0.0918, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.454728370221328, | |
| "grad_norm": 0.34082865221526726, | |
| "learning_rate": 2.5742621401961143e-05, | |
| "loss": 0.091, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.494969818913481, | |
| "grad_norm": 0.4793154273361008, | |
| "learning_rate": 2.5110619098670263e-05, | |
| "loss": 0.0864, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.535211267605634, | |
| "grad_norm": 0.39451008511666086, | |
| "learning_rate": 2.447854607999135e-05, | |
| "loss": 0.0927, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.5754527162977867, | |
| "grad_norm": 0.3656682972169247, | |
| "learning_rate": 2.38468064107678e-05, | |
| "loss": 0.0895, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.6156941649899395, | |
| "grad_norm": 0.3605202374697158, | |
| "learning_rate": 2.3215803942742938e-05, | |
| "loss": 0.0953, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.6559356136820926, | |
| "grad_norm": 0.33578430684018057, | |
| "learning_rate": 2.2585942056390058e-05, | |
| "loss": 0.0966, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.6961770623742454, | |
| "grad_norm": 0.3794779373893201, | |
| "learning_rate": 2.195762340304364e-05, | |
| "loss": 0.0915, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.7364185110663986, | |
| "grad_norm": 0.33351508610885644, | |
| "learning_rate": 2.133124964749678e-05, | |
| "loss": 0.0879, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.7766599597585513, | |
| "grad_norm": 0.41067282583568215, | |
| "learning_rate": 2.0707221211229205e-05, | |
| "loss": 0.0979, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.816901408450704, | |
| "grad_norm": 0.4291530941671277, | |
| "learning_rate": 2.008593701643017e-05, | |
| "loss": 0.0946, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 0.3613865415142941, | |
| "learning_rate": 1.9467794230979712e-05, | |
| "loss": 0.0957, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.89738430583501, | |
| "grad_norm": 0.35323941645271395, | |
| "learning_rate": 1.8853188014551533e-05, | |
| "loss": 0.0883, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.937625754527163, | |
| "grad_norm": 0.38038366932243284, | |
| "learning_rate": 1.8242511265999452e-05, | |
| "loss": 0.0944, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.977867203219316, | |
| "grad_norm": 0.4211867067510389, | |
| "learning_rate": 1.7636154372189363e-05, | |
| "loss": 0.0859, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 3.0181086519114686, | |
| "grad_norm": 0.23971000463797287, | |
| "learning_rate": 1.7034504958436843e-05, | |
| "loss": 0.0686, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.058350100603622, | |
| "grad_norm": 0.3934268138965833, | |
| "learning_rate": 1.643794764071024e-05, | |
| "loss": 0.0387, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 3.0985915492957745, | |
| "grad_norm": 0.2953008266603751, | |
| "learning_rate": 1.5846863779757492e-05, | |
| "loss": 0.0402, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 3.1388329979879277, | |
| "grad_norm": 0.30010306958393923, | |
| "learning_rate": 1.5261631237313967e-05, | |
| "loss": 0.0393, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 3.1790744466800804, | |
| "grad_norm": 0.30120051063507675, | |
| "learning_rate": 1.4682624134547021e-05, | |
| "loss": 0.0363, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 3.219315895372233, | |
| "grad_norm": 0.2515653178386247, | |
| "learning_rate": 1.4110212612891887e-05, | |
| "loss": 0.0383, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.2595573440643864, | |
| "grad_norm": 0.27249388315493484, | |
| "learning_rate": 1.3544762597431607e-05, | |
| "loss": 0.0396, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 3.299798792756539, | |
| "grad_norm": 0.3824387788042599, | |
| "learning_rate": 1.2986635562972413e-05, | |
| "loss": 0.0406, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 3.3400402414486923, | |
| "grad_norm": 0.4190218976563753, | |
| "learning_rate": 1.2436188302963944e-05, | |
| "loss": 0.0439, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 3.380281690140845, | |
| "grad_norm": 0.2317410734176642, | |
| "learning_rate": 1.1893772701412233e-05, | |
| "loss": 0.0423, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 3.4205231388329977, | |
| "grad_norm": 0.278591876157434, | |
| "learning_rate": 1.1359735507931035e-05, | |
| "loss": 0.0407, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.460764587525151, | |
| "grad_norm": 0.24119224796247, | |
| "learning_rate": 1.0834418116075484e-05, | |
| "loss": 0.0417, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 3.5010060362173037, | |
| "grad_norm": 0.2958581734865895, | |
| "learning_rate": 1.0318156345099692e-05, | |
| "loss": 0.0371, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 3.541247484909457, | |
| "grad_norm": 0.27477669064501764, | |
| "learning_rate": 9.811280225277786e-06, | |
| "loss": 0.0403, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 3.5814889336016096, | |
| "grad_norm": 0.32430825636968, | |
| "learning_rate": 9.314113786925777e-06, | |
| "loss": 0.0396, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 3.6217303822937623, | |
| "grad_norm": 0.3312827951095654, | |
| "learning_rate": 8.826974853258884e-06, | |
| "loss": 0.0407, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.6619718309859155, | |
| "grad_norm": 0.24013332678773616, | |
| "learning_rate": 8.35017483721696e-06, | |
| "loss": 0.0414, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 3.7022132796780687, | |
| "grad_norm": 0.28553574970459594, | |
| "learning_rate": 7.884018542387731e-06, | |
| "loss": 0.0416, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 3.7424547283702214, | |
| "grad_norm": 0.2631019454087187, | |
| "learning_rate": 7.428803968155307e-06, | |
| "loss": 0.0401, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 3.782696177062374, | |
| "grad_norm": 0.26131465743997634, | |
| "learning_rate": 6.984822119198253e-06, | |
| "loss": 0.0409, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 3.822937625754527, | |
| "grad_norm": 0.23335316567031417, | |
| "learning_rate": 6.552356819459354e-06, | |
| "loss": 0.0437, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 3.86317907444668, | |
| "grad_norm": 0.2745249201872649, | |
| "learning_rate": 6.131684530705572e-06, | |
| "loss": 0.0397, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 3.9034205231388333, | |
| "grad_norm": 0.2608587874278287, | |
| "learning_rate": 5.7230741757946485e-06, | |
| "loss": 0.0409, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 3.943661971830986, | |
| "grad_norm": 0.3249419421073309, | |
| "learning_rate": 5.326786966760922e-06, | |
| "loss": 0.0408, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 3.9839034205231387, | |
| "grad_norm": 0.29014887197719835, | |
| "learning_rate": 4.943076237830541e-06, | |
| "loss": 0.042, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 4.0241448692152915, | |
| "grad_norm": 0.13140332328630258, | |
| "learning_rate": 4.5721872834726755e-06, | |
| "loss": 0.0285, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 4.064386317907445, | |
| "grad_norm": 0.1599909046333746, | |
| "learning_rate": 4.214357201590316e-06, | |
| "loss": 0.0163, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 4.104627766599598, | |
| "grad_norm": 0.22268037817504605, | |
| "learning_rate": 3.869814741950833e-06, | |
| "loss": 0.0169, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 4.144869215291751, | |
| "grad_norm": 0.19008998119875475, | |
| "learning_rate": 3.5387801599533475e-06, | |
| "loss": 0.0178, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 4.185110663983903, | |
| "grad_norm": 0.28236212559906976, | |
| "learning_rate": 3.2214650758261854e-06, | |
| "loss": 0.0168, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 4.225352112676056, | |
| "grad_norm": 0.16411536065892163, | |
| "learning_rate": 2.918072339344585e-06, | |
| "loss": 0.0147, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 4.26559356136821, | |
| "grad_norm": 0.22329493065983927, | |
| "learning_rate": 2.6287959001550787e-06, | |
| "loss": 0.0172, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 4.305835010060362, | |
| "grad_norm": 0.23074348370306091, | |
| "learning_rate": 2.3538206837894262e-06, | |
| "loss": 0.0152, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 4.346076458752515, | |
| "grad_norm": 0.16346465568469334, | |
| "learning_rate": 2.093322473447448e-06, | |
| "loss": 0.0162, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 4.386317907444668, | |
| "grad_norm": 0.20894982405636947, | |
| "learning_rate": 1.8474677976241973e-06, | |
| "loss": 0.017, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 4.426559356136821, | |
| "grad_norm": 0.2114272503288231, | |
| "learning_rate": 1.6164138236534287e-06, | |
| "loss": 0.0153, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 4.466800804828974, | |
| "grad_norm": 0.2549097134831871, | |
| "learning_rate": 1.400308257235347e-06, | |
| "loss": 0.0165, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 4.507042253521127, | |
| "grad_norm": 0.2629485973872836, | |
| "learning_rate": 1.199289248012911e-06, | |
| "loss": 0.0167, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 4.54728370221328, | |
| "grad_norm": 0.15948181770721337, | |
| "learning_rate": 1.0134853012569918e-06, | |
| "loss": 0.0147, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 4.5875251509054324, | |
| "grad_norm": 0.2377257196332741, | |
| "learning_rate": 8.430151957169341e-07, | |
| "loss": 0.0172, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 4.627766599597585, | |
| "grad_norm": 0.12488849403623657, | |
| "learning_rate": 6.879879076889223e-07, | |
| "loss": 0.0169, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 4.668008048289739, | |
| "grad_norm": 0.17954160642164035, | |
| "learning_rate": 5.485025413508122e-07, | |
| "loss": 0.0173, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 4.7082494969818915, | |
| "grad_norm": 0.22632001709175115, | |
| "learning_rate": 4.246482654078565e-07, | |
| "loss": 0.0178, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 4.748490945674044, | |
| "grad_norm": 0.17862035740855048, | |
| "learning_rate": 3.1650425608991397e-07, | |
| "loss": 0.0169, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 4.788732394366197, | |
| "grad_norm": 0.22768890327395705, | |
| "learning_rate": 2.2413964653651142e-07, | |
| "loss": 0.0182, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 4.82897384305835, | |
| "grad_norm": 0.18617516446042512, | |
| "learning_rate": 1.476134826021436e-07, | |
| "loss": 0.0159, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 4.869215291750503, | |
| "grad_norm": 0.22313146556884694, | |
| "learning_rate": 8.697468511008888e-08, | |
| "loss": 0.0174, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 4.909456740442656, | |
| "grad_norm": 0.17950462343068288, | |
| "learning_rate": 4.226201857882584e-08, | |
| "loss": 0.017, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 4.949698189134809, | |
| "grad_norm": 0.20930529588954816, | |
| "learning_rate": 1.3504066441069608e-08, | |
| "loss": 0.0153, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 4.989939637826962, | |
| "grad_norm": 0.18167014719982993, | |
| "learning_rate": 7.192127712579711e-10, | |
| "loss": 0.0171, | |
| "step": 2480 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 2485, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 2000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 173976693145600.0, | |
| "train_batch_size": 3, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |