| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.44483985765124556, | |
| "eval_steps": 500, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00044483985765124553, | |
| "grad_norm": 0.5906099081039429, | |
| "learning_rate": 4e-05, | |
| "loss": 1.504, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0008896797153024911, | |
| "grad_norm": 0.8538464903831482, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4735, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0013345195729537367, | |
| "grad_norm": 0.46255260705947876, | |
| "learning_rate": 0.00012, | |
| "loss": 1.3887, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0017793594306049821, | |
| "grad_norm": 0.40842387080192566, | |
| "learning_rate": 0.00016, | |
| "loss": 1.2993, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.002224199288256228, | |
| "grad_norm": 0.41691651940345764, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3105, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0026690391459074734, | |
| "grad_norm": 0.40152719616889954, | |
| "learning_rate": 0.00019979899497487438, | |
| "loss": 1.1942, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.003113879003558719, | |
| "grad_norm": 0.39211127161979675, | |
| "learning_rate": 0.00019959798994974876, | |
| "loss": 1.1688, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0035587188612099642, | |
| "grad_norm": 0.43954452872276306, | |
| "learning_rate": 0.00019939698492462313, | |
| "loss": 1.1211, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.00400355871886121, | |
| "grad_norm": 0.38755497336387634, | |
| "learning_rate": 0.0001991959798994975, | |
| "loss": 1.1039, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.004448398576512456, | |
| "grad_norm": 0.3547092378139496, | |
| "learning_rate": 0.00019899497487437187, | |
| "loss": 1.126, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.004893238434163701, | |
| "grad_norm": 0.37269020080566406, | |
| "learning_rate": 0.00019879396984924622, | |
| "loss": 1.1599, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.005338078291814947, | |
| "grad_norm": 0.3519953191280365, | |
| "learning_rate": 0.00019859296482412062, | |
| "loss": 1.2022, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.005782918149466192, | |
| "grad_norm": 0.3725995421409607, | |
| "learning_rate": 0.000198391959798995, | |
| "loss": 1.2543, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.006227758007117438, | |
| "grad_norm": 0.32365697622299194, | |
| "learning_rate": 0.00019819095477386937, | |
| "loss": 1.145, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0066725978647686835, | |
| "grad_norm": 0.44411811232566833, | |
| "learning_rate": 0.0001979899497487437, | |
| "loss": 1.2582, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0071174377224199285, | |
| "grad_norm": 0.4044501781463623, | |
| "learning_rate": 0.0001977889447236181, | |
| "loss": 1.1424, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.007562277580071174, | |
| "grad_norm": 0.3195618689060211, | |
| "learning_rate": 0.00019758793969849249, | |
| "loss": 1.1522, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.00800711743772242, | |
| "grad_norm": 0.35177746415138245, | |
| "learning_rate": 0.00019738693467336683, | |
| "loss": 1.0172, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.008451957295373666, | |
| "grad_norm": 0.37688982486724854, | |
| "learning_rate": 0.0001971859296482412, | |
| "loss": 1.1343, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.008896797153024912, | |
| "grad_norm": 0.3498818278312683, | |
| "learning_rate": 0.0001969849246231156, | |
| "loss": 1.1138, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.009341637010676156, | |
| "grad_norm": 0.4116188585758209, | |
| "learning_rate": 0.00019678391959798995, | |
| "loss": 1.0602, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.009786476868327402, | |
| "grad_norm": 0.5222630500793457, | |
| "learning_rate": 0.00019658291457286432, | |
| "loss": 1.2328, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.010231316725978648, | |
| "grad_norm": 0.5468437075614929, | |
| "learning_rate": 0.0001963819095477387, | |
| "loss": 1.1929, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.010676156583629894, | |
| "grad_norm": 0.39658740162849426, | |
| "learning_rate": 0.0001961809045226131, | |
| "loss": 0.9775, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.01112099644128114, | |
| "grad_norm": 0.3198809027671814, | |
| "learning_rate": 0.00019597989949748744, | |
| "loss": 1.042, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.011565836298932384, | |
| "grad_norm": 0.49676308035850525, | |
| "learning_rate": 0.00019577889447236181, | |
| "loss": 1.0378, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.01201067615658363, | |
| "grad_norm": 0.3704735338687897, | |
| "learning_rate": 0.0001955778894472362, | |
| "loss": 1.0303, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.012455516014234875, | |
| "grad_norm": 0.33084699511528015, | |
| "learning_rate": 0.00019537688442211056, | |
| "loss": 1.0252, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.012900355871886121, | |
| "grad_norm": 0.35819771885871887, | |
| "learning_rate": 0.00019517587939698493, | |
| "loss": 1.0752, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.013345195729537367, | |
| "grad_norm": 0.36553651094436646, | |
| "learning_rate": 0.0001949748743718593, | |
| "loss": 1.0637, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.013790035587188613, | |
| "grad_norm": 0.41030144691467285, | |
| "learning_rate": 0.00019477386934673368, | |
| "loss": 1.1858, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.014234875444839857, | |
| "grad_norm": 0.3703097105026245, | |
| "learning_rate": 0.00019457286432160805, | |
| "loss": 1.0193, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.014679715302491103, | |
| "grad_norm": 0.37004703283309937, | |
| "learning_rate": 0.00019437185929648243, | |
| "loss": 1.0405, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.015124555160142349, | |
| "grad_norm": 0.33804813027381897, | |
| "learning_rate": 0.0001941708542713568, | |
| "loss": 0.9303, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.015569395017793594, | |
| "grad_norm": 0.5308049917221069, | |
| "learning_rate": 0.00019396984924623117, | |
| "loss": 0.961, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.01601423487544484, | |
| "grad_norm": 0.4099597632884979, | |
| "learning_rate": 0.00019376884422110552, | |
| "loss": 1.1084, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.016459074733096084, | |
| "grad_norm": 0.4100741446018219, | |
| "learning_rate": 0.00019356783919597992, | |
| "loss": 1.063, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.016903914590747332, | |
| "grad_norm": 0.3497965335845947, | |
| "learning_rate": 0.0001933668341708543, | |
| "loss": 1.0864, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.017348754448398576, | |
| "grad_norm": 0.3670850694179535, | |
| "learning_rate": 0.00019316582914572864, | |
| "loss": 1.071, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.017793594306049824, | |
| "grad_norm": 0.38884881138801575, | |
| "learning_rate": 0.000192964824120603, | |
| "loss": 1.0091, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.018238434163701068, | |
| "grad_norm": 0.3449464738368988, | |
| "learning_rate": 0.0001927638190954774, | |
| "loss": 1.0753, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.018683274021352312, | |
| "grad_norm": 0.35679587721824646, | |
| "learning_rate": 0.00019256281407035178, | |
| "loss": 1.0992, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.01912811387900356, | |
| "grad_norm": 0.3696941137313843, | |
| "learning_rate": 0.00019236180904522613, | |
| "loss": 1.0413, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.019572953736654804, | |
| "grad_norm": 0.37430745363235474, | |
| "learning_rate": 0.0001921608040201005, | |
| "loss": 1.0424, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.02001779359430605, | |
| "grad_norm": 0.33420661091804504, | |
| "learning_rate": 0.0001919597989949749, | |
| "loss": 1.0339, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.020462633451957295, | |
| "grad_norm": 0.35058847069740295, | |
| "learning_rate": 0.00019175879396984925, | |
| "loss": 1.0291, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.02090747330960854, | |
| "grad_norm": 0.3396761119365692, | |
| "learning_rate": 0.00019155778894472362, | |
| "loss": 1.1198, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.021352313167259787, | |
| "grad_norm": 0.3136732280254364, | |
| "learning_rate": 0.000191356783919598, | |
| "loss": 1.0288, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.02179715302491103, | |
| "grad_norm": 0.5155500173568726, | |
| "learning_rate": 0.0001911557788944724, | |
| "loss": 1.0353, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.02224199288256228, | |
| "grad_norm": 0.39861586689949036, | |
| "learning_rate": 0.00019095477386934674, | |
| "loss": 0.9865, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.022686832740213523, | |
| "grad_norm": 0.36214709281921387, | |
| "learning_rate": 0.0001907537688442211, | |
| "loss": 1.0886, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.023131672597864767, | |
| "grad_norm": 0.35173431038856506, | |
| "learning_rate": 0.00019055276381909548, | |
| "loss": 1.1213, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.023576512455516015, | |
| "grad_norm": 0.34853485226631165, | |
| "learning_rate": 0.00019035175879396986, | |
| "loss": 1.0123, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.02402135231316726, | |
| "grad_norm": 0.3735390305519104, | |
| "learning_rate": 0.00019015075376884423, | |
| "loss": 1.1078, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.024466192170818506, | |
| "grad_norm": 0.3629351854324341, | |
| "learning_rate": 0.0001899497487437186, | |
| "loss": 1.0864, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.02491103202846975, | |
| "grad_norm": 0.2966906428337097, | |
| "learning_rate": 0.00018974874371859298, | |
| "loss": 1.057, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.025355871886120998, | |
| "grad_norm": 0.34373167157173157, | |
| "learning_rate": 0.00018954773869346732, | |
| "loss": 0.9951, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.025800711743772242, | |
| "grad_norm": 0.454569548368454, | |
| "learning_rate": 0.00018934673366834172, | |
| "loss": 1.0618, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.026245551601423486, | |
| "grad_norm": 0.358252614736557, | |
| "learning_rate": 0.0001891457286432161, | |
| "loss": 1.0242, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.026690391459074734, | |
| "grad_norm": 0.33208489418029785, | |
| "learning_rate": 0.00018894472361809047, | |
| "loss": 1.0812, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.027135231316725978, | |
| "grad_norm": 0.3399137556552887, | |
| "learning_rate": 0.00018874371859296481, | |
| "loss": 0.8889, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.027580071174377226, | |
| "grad_norm": 0.41684690117836, | |
| "learning_rate": 0.00018854271356783921, | |
| "loss": 0.8872, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.02802491103202847, | |
| "grad_norm": 0.5529223680496216, | |
| "learning_rate": 0.0001883417085427136, | |
| "loss": 0.95, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.028469750889679714, | |
| "grad_norm": 0.35778260231018066, | |
| "learning_rate": 0.00018814070351758793, | |
| "loss": 1.0439, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.02891459074733096, | |
| "grad_norm": 0.387169748544693, | |
| "learning_rate": 0.0001879396984924623, | |
| "loss": 1.1486, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.029359430604982206, | |
| "grad_norm": 0.32700011134147644, | |
| "learning_rate": 0.0001877386934673367, | |
| "loss": 1.1394, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.029804270462633453, | |
| "grad_norm": 0.45792707800865173, | |
| "learning_rate": 0.00018753768844221108, | |
| "loss": 1.1815, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.030249110320284697, | |
| "grad_norm": 0.4012312591075897, | |
| "learning_rate": 0.00018733668341708543, | |
| "loss": 1.1024, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.03069395017793594, | |
| "grad_norm": 0.3783579468727112, | |
| "learning_rate": 0.0001871356783919598, | |
| "loss": 0.9901, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.03113879003558719, | |
| "grad_norm": 0.33383217453956604, | |
| "learning_rate": 0.0001869346733668342, | |
| "loss": 1.0832, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.03158362989323844, | |
| "grad_norm": 0.41429656744003296, | |
| "learning_rate": 0.00018673366834170854, | |
| "loss": 1.0593, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.03202846975088968, | |
| "grad_norm": 0.5010104179382324, | |
| "learning_rate": 0.00018653266331658292, | |
| "loss": 1.0602, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.032473309608540925, | |
| "grad_norm": 0.33280321955680847, | |
| "learning_rate": 0.0001863316582914573, | |
| "loss": 1.0004, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.03291814946619217, | |
| "grad_norm": 0.39433741569519043, | |
| "learning_rate": 0.0001861306532663317, | |
| "loss": 1.0071, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.03336298932384341, | |
| "grad_norm": 0.3676820397377014, | |
| "learning_rate": 0.00018592964824120604, | |
| "loss": 0.8539, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.033807829181494664, | |
| "grad_norm": 0.3701139986515045, | |
| "learning_rate": 0.0001857286432160804, | |
| "loss": 1.1197, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.03425266903914591, | |
| "grad_norm": 0.3730039596557617, | |
| "learning_rate": 0.00018552763819095478, | |
| "loss": 1.0727, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.03469750889679715, | |
| "grad_norm": 0.3400503098964691, | |
| "learning_rate": 0.00018532663316582915, | |
| "loss": 1.0385, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.035142348754448396, | |
| "grad_norm": 0.36764055490493774, | |
| "learning_rate": 0.00018512562814070353, | |
| "loss": 1.0818, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.03558718861209965, | |
| "grad_norm": 0.37534597516059875, | |
| "learning_rate": 0.0001849246231155779, | |
| "loss": 0.9744, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.03603202846975089, | |
| "grad_norm": 0.33084404468536377, | |
| "learning_rate": 0.00018472361809045227, | |
| "loss": 0.9885, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.036476868327402136, | |
| "grad_norm": 0.36626842617988586, | |
| "learning_rate": 0.00018452261306532662, | |
| "loss": 1.0442, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.03692170818505338, | |
| "grad_norm": 0.5151258111000061, | |
| "learning_rate": 0.00018432160804020102, | |
| "loss": 1.0693, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.037366548042704624, | |
| "grad_norm": 0.36287805438041687, | |
| "learning_rate": 0.0001841206030150754, | |
| "loss": 1.0257, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.037811387900355875, | |
| "grad_norm": 0.35327619314193726, | |
| "learning_rate": 0.00018391959798994977, | |
| "loss": 0.9902, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.03825622775800712, | |
| "grad_norm": 0.32582369446754456, | |
| "learning_rate": 0.0001837185929648241, | |
| "loss": 1.0425, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.03870106761565836, | |
| "grad_norm": 0.38856419920921326, | |
| "learning_rate": 0.0001835175879396985, | |
| "loss": 0.9939, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.03914590747330961, | |
| "grad_norm": 0.3371952772140503, | |
| "learning_rate": 0.00018331658291457288, | |
| "loss": 1.0968, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.03959074733096085, | |
| "grad_norm": 0.36074796319007874, | |
| "learning_rate": 0.00018311557788944723, | |
| "loss": 1.0878, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.0400355871886121, | |
| "grad_norm": 0.39816179871559143, | |
| "learning_rate": 0.0001829145728643216, | |
| "loss": 1.0504, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.04048042704626335, | |
| "grad_norm": 0.4431018829345703, | |
| "learning_rate": 0.000182713567839196, | |
| "loss": 0.8954, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.04092526690391459, | |
| "grad_norm": 0.3593955338001251, | |
| "learning_rate": 0.00018251256281407038, | |
| "loss": 1.0031, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.041370106761565835, | |
| "grad_norm": 0.2924874722957611, | |
| "learning_rate": 0.00018231155778894472, | |
| "loss": 0.9301, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.04181494661921708, | |
| "grad_norm": 0.29153579473495483, | |
| "learning_rate": 0.0001821105527638191, | |
| "loss": 0.9428, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.04225978647686833, | |
| "grad_norm": 0.3542582392692566, | |
| "learning_rate": 0.0001819095477386935, | |
| "loss": 0.9941, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.042704626334519574, | |
| "grad_norm": 0.3997493088245392, | |
| "learning_rate": 0.00018170854271356784, | |
| "loss": 1.0187, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.04314946619217082, | |
| "grad_norm": 0.3728165626525879, | |
| "learning_rate": 0.00018150753768844221, | |
| "loss": 1.1312, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.04359430604982206, | |
| "grad_norm": 0.35325804352760315, | |
| "learning_rate": 0.0001813065326633166, | |
| "loss": 0.9955, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.04403914590747331, | |
| "grad_norm": 0.39074355363845825, | |
| "learning_rate": 0.00018110552763819096, | |
| "loss": 1.0561, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.04448398576512456, | |
| "grad_norm": 0.346187949180603, | |
| "learning_rate": 0.00018090452261306533, | |
| "loss": 0.9568, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0449288256227758, | |
| "grad_norm": 0.314586877822876, | |
| "learning_rate": 0.0001807035175879397, | |
| "loss": 1.1153, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.045373665480427046, | |
| "grad_norm": 0.3357396423816681, | |
| "learning_rate": 0.00018050251256281408, | |
| "loss": 1.0259, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.04581850533807829, | |
| "grad_norm": 0.3750225007534027, | |
| "learning_rate": 0.00018030150753768845, | |
| "loss": 1.096, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.046263345195729534, | |
| "grad_norm": 0.3401976227760315, | |
| "learning_rate": 0.00018010050251256282, | |
| "loss": 0.9911, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.046708185053380785, | |
| "grad_norm": 0.3866574764251709, | |
| "learning_rate": 0.0001798994974874372, | |
| "loss": 0.9558, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.04715302491103203, | |
| "grad_norm": 0.3394455313682556, | |
| "learning_rate": 0.00017969849246231157, | |
| "loss": 1.0463, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.04759786476868327, | |
| "grad_norm": 0.36298561096191406, | |
| "learning_rate": 0.00017949748743718592, | |
| "loss": 1.0807, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.04804270462633452, | |
| "grad_norm": 0.41382816433906555, | |
| "learning_rate": 0.00017929648241206032, | |
| "loss": 0.9859, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.04848754448398576, | |
| "grad_norm": 0.2769756019115448, | |
| "learning_rate": 0.0001790954773869347, | |
| "loss": 1.0251, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.04893238434163701, | |
| "grad_norm": 0.31425032019615173, | |
| "learning_rate": 0.00017889447236180906, | |
| "loss": 0.9642, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.04937722419928826, | |
| "grad_norm": 0.3441152274608612, | |
| "learning_rate": 0.0001786934673366834, | |
| "loss": 1.0211, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.0498220640569395, | |
| "grad_norm": 0.40452349185943604, | |
| "learning_rate": 0.0001784924623115578, | |
| "loss": 1.109, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.050266903914590745, | |
| "grad_norm": 0.32000288367271423, | |
| "learning_rate": 0.00017829145728643218, | |
| "loss": 1.1237, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.050711743772241996, | |
| "grad_norm": 0.31428951025009155, | |
| "learning_rate": 0.00017809045226130653, | |
| "loss": 1.0443, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.05115658362989324, | |
| "grad_norm": 0.37118658423423767, | |
| "learning_rate": 0.0001778894472361809, | |
| "loss": 0.9842, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.051601423487544484, | |
| "grad_norm": 0.3640018403530121, | |
| "learning_rate": 0.0001776884422110553, | |
| "loss": 0.942, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.05204626334519573, | |
| "grad_norm": 0.3735273778438568, | |
| "learning_rate": 0.00017748743718592967, | |
| "loss": 1.0128, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.05249110320284697, | |
| "grad_norm": 0.37252792716026306, | |
| "learning_rate": 0.00017728643216080402, | |
| "loss": 1.0308, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.052935943060498224, | |
| "grad_norm": 0.3111330568790436, | |
| "learning_rate": 0.0001770854271356784, | |
| "loss": 1.0082, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.05338078291814947, | |
| "grad_norm": 0.40468236804008484, | |
| "learning_rate": 0.0001768844221105528, | |
| "loss": 1.0273, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.05382562277580071, | |
| "grad_norm": 0.3018098473548889, | |
| "learning_rate": 0.00017668341708542714, | |
| "loss": 0.9599, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.054270462633451956, | |
| "grad_norm": 0.3560699224472046, | |
| "learning_rate": 0.0001764824120603015, | |
| "loss": 1.0508, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.0547153024911032, | |
| "grad_norm": 0.3692304193973541, | |
| "learning_rate": 0.00017628140703517588, | |
| "loss": 0.9454, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.05516014234875445, | |
| "grad_norm": 0.38090547919273376, | |
| "learning_rate": 0.00017608040201005026, | |
| "loss": 1.0469, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.055604982206405695, | |
| "grad_norm": 0.37586429715156555, | |
| "learning_rate": 0.00017587939698492463, | |
| "loss": 1.0205, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.05604982206405694, | |
| "grad_norm": 0.4673503041267395, | |
| "learning_rate": 0.000175678391959799, | |
| "loss": 1.1555, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.056494661921708184, | |
| "grad_norm": 0.42561087012290955, | |
| "learning_rate": 0.00017547738693467338, | |
| "loss": 1.1458, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.05693950177935943, | |
| "grad_norm": 0.3206027150154114, | |
| "learning_rate": 0.00017527638190954775, | |
| "loss": 1.0165, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.05738434163701068, | |
| "grad_norm": 0.3541994094848633, | |
| "learning_rate": 0.00017507537688442212, | |
| "loss": 1.0377, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.05782918149466192, | |
| "grad_norm": 0.4175245761871338, | |
| "learning_rate": 0.0001748743718592965, | |
| "loss": 1.0371, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.05827402135231317, | |
| "grad_norm": 0.2889978587627411, | |
| "learning_rate": 0.00017467336683417087, | |
| "loss": 1.0413, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.05871886120996441, | |
| "grad_norm": 0.36901023983955383, | |
| "learning_rate": 0.00017447236180904521, | |
| "loss": 1.042, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.059163701067615655, | |
| "grad_norm": 0.3048287332057953, | |
| "learning_rate": 0.00017427135678391961, | |
| "loss": 1.0587, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.059608540925266906, | |
| "grad_norm": 0.40319743752479553, | |
| "learning_rate": 0.000174070351758794, | |
| "loss": 1.0727, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.06005338078291815, | |
| "grad_norm": 0.30953213572502136, | |
| "learning_rate": 0.00017386934673366836, | |
| "loss": 0.8989, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.060498220640569395, | |
| "grad_norm": 0.3466584384441376, | |
| "learning_rate": 0.0001736683417085427, | |
| "loss": 1.0438, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.06094306049822064, | |
| "grad_norm": 0.4506623148918152, | |
| "learning_rate": 0.0001734673366834171, | |
| "loss": 1.0601, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.06138790035587188, | |
| "grad_norm": 0.4022829830646515, | |
| "learning_rate": 0.00017326633165829148, | |
| "loss": 1.0149, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.061832740213523134, | |
| "grad_norm": 0.32555732131004333, | |
| "learning_rate": 0.00017306532663316582, | |
| "loss": 1.0512, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.06227758007117438, | |
| "grad_norm": 0.33276137709617615, | |
| "learning_rate": 0.0001728643216080402, | |
| "loss": 0.9819, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.06272241992882563, | |
| "grad_norm": 0.32384124398231506, | |
| "learning_rate": 0.0001726633165829146, | |
| "loss": 0.9455, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.06316725978647687, | |
| "grad_norm": 0.3403018116950989, | |
| "learning_rate": 0.00017246231155778897, | |
| "loss": 0.9247, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.06361209964412812, | |
| "grad_norm": 0.3674178421497345, | |
| "learning_rate": 0.00017226130653266332, | |
| "loss": 1.074, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.06405693950177936, | |
| "grad_norm": 0.39759331941604614, | |
| "learning_rate": 0.0001720603015075377, | |
| "loss": 1.0983, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.0645017793594306, | |
| "grad_norm": 0.3763502240180969, | |
| "learning_rate": 0.00017185929648241206, | |
| "loss": 0.9859, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.06494661921708185, | |
| "grad_norm": 0.44169896841049194, | |
| "learning_rate": 0.00017165829145728644, | |
| "loss": 1.0924, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.0653914590747331, | |
| "grad_norm": 0.3722588121891022, | |
| "learning_rate": 0.0001714572864321608, | |
| "loss": 0.9457, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.06583629893238434, | |
| "grad_norm": 0.3731984496116638, | |
| "learning_rate": 0.00017125628140703518, | |
| "loss": 1.1355, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.06628113879003558, | |
| "grad_norm": 0.29889795184135437, | |
| "learning_rate": 0.00017105527638190955, | |
| "loss": 0.9392, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.06672597864768683, | |
| "grad_norm": 0.36229395866394043, | |
| "learning_rate": 0.00017085427135678393, | |
| "loss": 0.8904, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.06717081850533808, | |
| "grad_norm": 0.454025536775589, | |
| "learning_rate": 0.0001706532663316583, | |
| "loss": 0.857, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.06761565836298933, | |
| "grad_norm": 0.34767961502075195, | |
| "learning_rate": 0.00017045226130653267, | |
| "loss": 1.0121, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.06806049822064057, | |
| "grad_norm": 0.3490912616252899, | |
| "learning_rate": 0.00017025125628140705, | |
| "loss": 0.8651, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.06850533807829182, | |
| "grad_norm": 0.41010549664497375, | |
| "learning_rate": 0.00017005025125628142, | |
| "loss": 0.9816, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.06895017793594306, | |
| "grad_norm": 0.348345011472702, | |
| "learning_rate": 0.0001698492462311558, | |
| "loss": 0.9971, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.0693950177935943, | |
| "grad_norm": 0.3389308452606201, | |
| "learning_rate": 0.00016964824120603016, | |
| "loss": 0.9833, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.06983985765124555, | |
| "grad_norm": 0.3532242178916931, | |
| "learning_rate": 0.0001694472361809045, | |
| "loss": 1.1019, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.07028469750889679, | |
| "grad_norm": 0.3977639675140381, | |
| "learning_rate": 0.0001692462311557789, | |
| "loss": 1.1519, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.07072953736654804, | |
| "grad_norm": 0.3684947192668915, | |
| "learning_rate": 0.00016904522613065328, | |
| "loss": 1.0164, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.0711743772241993, | |
| "grad_norm": 0.3370361328125, | |
| "learning_rate": 0.00016884422110552766, | |
| "loss": 0.9961, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.07161921708185054, | |
| "grad_norm": 0.3965550661087036, | |
| "learning_rate": 0.000168643216080402, | |
| "loss": 1.0542, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.07206405693950178, | |
| "grad_norm": 0.39642298221588135, | |
| "learning_rate": 0.0001684422110552764, | |
| "loss": 1.0452, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.07250889679715303, | |
| "grad_norm": 0.3771675229072571, | |
| "learning_rate": 0.00016824120603015078, | |
| "loss": 1.0284, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.07295373665480427, | |
| "grad_norm": 0.3752223253250122, | |
| "learning_rate": 0.00016804020100502512, | |
| "loss": 0.985, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.07339857651245552, | |
| "grad_norm": 0.3507731258869171, | |
| "learning_rate": 0.0001678391959798995, | |
| "loss": 0.9716, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.07384341637010676, | |
| "grad_norm": 0.3909834027290344, | |
| "learning_rate": 0.0001676381909547739, | |
| "loss": 0.8909, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.074288256227758, | |
| "grad_norm": 0.5423752665519714, | |
| "learning_rate": 0.00016743718592964827, | |
| "loss": 0.9922, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.07473309608540925, | |
| "grad_norm": 0.39162302017211914, | |
| "learning_rate": 0.0001672361809045226, | |
| "loss": 1.0075, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.07517793594306049, | |
| "grad_norm": 0.3779022693634033, | |
| "learning_rate": 0.00016703517587939699, | |
| "loss": 0.9451, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.07562277580071175, | |
| "grad_norm": 0.3254469633102417, | |
| "learning_rate": 0.00016683417085427136, | |
| "loss": 0.8934, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.076067615658363, | |
| "grad_norm": 0.3539400100708008, | |
| "learning_rate": 0.00016663316582914573, | |
| "loss": 0.9944, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.07651245551601424, | |
| "grad_norm": 0.41099923849105835, | |
| "learning_rate": 0.0001664321608040201, | |
| "loss": 1.018, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.07695729537366548, | |
| "grad_norm": 0.40103086829185486, | |
| "learning_rate": 0.00016623115577889448, | |
| "loss": 0.9354, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.07740213523131673, | |
| "grad_norm": 0.3429498076438904, | |
| "learning_rate": 0.00016603015075376885, | |
| "loss": 0.9328, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.07784697508896797, | |
| "grad_norm": 0.2685195505619049, | |
| "learning_rate": 0.00016582914572864322, | |
| "loss": 0.6091, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.07829181494661921, | |
| "grad_norm": 0.2993873655796051, | |
| "learning_rate": 0.0001656281407035176, | |
| "loss": 1.0054, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.07873665480427046, | |
| "grad_norm": 0.29764705896377563, | |
| "learning_rate": 0.00016542713567839197, | |
| "loss": 0.9484, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.0791814946619217, | |
| "grad_norm": 0.33408114314079285, | |
| "learning_rate": 0.00016522613065326634, | |
| "loss": 1.0898, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.07962633451957295, | |
| "grad_norm": 0.3865182399749756, | |
| "learning_rate": 0.00016502512562814072, | |
| "loss": 1.0371, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.0800711743772242, | |
| "grad_norm": 0.34564244747161865, | |
| "learning_rate": 0.0001648241206030151, | |
| "loss": 0.9825, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.08051601423487545, | |
| "grad_norm": 0.29182514548301697, | |
| "learning_rate": 0.00016462311557788946, | |
| "loss": 0.8901, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.0809608540925267, | |
| "grad_norm": 0.35713285207748413, | |
| "learning_rate": 0.0001644221105527638, | |
| "loss": 1.0267, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.08140569395017794, | |
| "grad_norm": 0.38320329785346985, | |
| "learning_rate": 0.0001642211055276382, | |
| "loss": 0.9875, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.08185053380782918, | |
| "grad_norm": 0.4025184214115143, | |
| "learning_rate": 0.00016402010050251258, | |
| "loss": 1.1871, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.08229537366548043, | |
| "grad_norm": 0.34486982226371765, | |
| "learning_rate": 0.00016381909547738695, | |
| "loss": 0.982, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.08274021352313167, | |
| "grad_norm": 0.45098572969436646, | |
| "learning_rate": 0.0001636180904522613, | |
| "loss": 0.9052, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.08318505338078291, | |
| "grad_norm": 0.4179227352142334, | |
| "learning_rate": 0.0001634170854271357, | |
| "loss": 0.9985, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.08362989323843416, | |
| "grad_norm": 0.36076629161834717, | |
| "learning_rate": 0.00016321608040201007, | |
| "loss": 0.9035, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.08407473309608542, | |
| "grad_norm": 0.4315630793571472, | |
| "learning_rate": 0.00016301507537688442, | |
| "loss": 0.9815, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.08451957295373666, | |
| "grad_norm": 0.31583619117736816, | |
| "learning_rate": 0.0001628140703517588, | |
| "loss": 1.1461, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.0849644128113879, | |
| "grad_norm": 0.3578287661075592, | |
| "learning_rate": 0.00016261306532663316, | |
| "loss": 0.9802, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.08540925266903915, | |
| "grad_norm": 0.35003262758255005, | |
| "learning_rate": 0.00016241206030150756, | |
| "loss": 0.9406, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.08585409252669039, | |
| "grad_norm": 0.33165210485458374, | |
| "learning_rate": 0.0001622110552763819, | |
| "loss": 1.0484, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.08629893238434164, | |
| "grad_norm": 0.33711302280426025, | |
| "learning_rate": 0.00016201005025125628, | |
| "loss": 1.0255, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.08674377224199288, | |
| "grad_norm": 0.3443413972854614, | |
| "learning_rate": 0.00016180904522613066, | |
| "loss": 0.9512, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.08718861209964412, | |
| "grad_norm": 0.5403580069541931, | |
| "learning_rate": 0.00016160804020100503, | |
| "loss": 1.0158, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.08763345195729537, | |
| "grad_norm": 0.40549829602241516, | |
| "learning_rate": 0.0001614070351758794, | |
| "loss": 1.0102, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.08807829181494661, | |
| "grad_norm": 0.43879571557044983, | |
| "learning_rate": 0.00016120603015075378, | |
| "loss": 1.0015, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.08852313167259787, | |
| "grad_norm": 0.3620396852493286, | |
| "learning_rate": 0.00016100502512562815, | |
| "loss": 1.0248, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.08896797153024912, | |
| "grad_norm": 0.339257150888443, | |
| "learning_rate": 0.00016080402010050252, | |
| "loss": 0.9548, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.08941281138790036, | |
| "grad_norm": 0.3820551037788391, | |
| "learning_rate": 0.0001606030150753769, | |
| "loss": 1.0026, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.0898576512455516, | |
| "grad_norm": 0.4135708808898926, | |
| "learning_rate": 0.00016040201005025127, | |
| "loss": 1.0476, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.09030249110320285, | |
| "grad_norm": 0.3772624731063843, | |
| "learning_rate": 0.00016020100502512564, | |
| "loss": 0.8981, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.09074733096085409, | |
| "grad_norm": 0.42693498730659485, | |
| "learning_rate": 0.00016, | |
| "loss": 1.3064, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.09119217081850534, | |
| "grad_norm": 0.2962593138217926, | |
| "learning_rate": 0.00015979899497487439, | |
| "loss": 1.0059, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.09163701067615658, | |
| "grad_norm": 0.3942723274230957, | |
| "learning_rate": 0.00015959798994974876, | |
| "loss": 0.9041, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.09208185053380782, | |
| "grad_norm": 0.34500858187675476, | |
| "learning_rate": 0.0001593969849246231, | |
| "loss": 0.9624, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.09252669039145907, | |
| "grad_norm": 0.41078099608421326, | |
| "learning_rate": 0.0001591959798994975, | |
| "loss": 1.0105, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.09297153024911033, | |
| "grad_norm": 0.3484095335006714, | |
| "learning_rate": 0.00015899497487437188, | |
| "loss": 1.0591, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.09341637010676157, | |
| "grad_norm": 0.3550896644592285, | |
| "learning_rate": 0.00015879396984924625, | |
| "loss": 1.0196, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.09386120996441281, | |
| "grad_norm": 0.3832697570323944, | |
| "learning_rate": 0.0001585929648241206, | |
| "loss": 1.048, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.09430604982206406, | |
| "grad_norm": 0.34110692143440247, | |
| "learning_rate": 0.000158391959798995, | |
| "loss": 1.0706, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.0947508896797153, | |
| "grad_norm": 0.3769712746143341, | |
| "learning_rate": 0.00015819095477386937, | |
| "loss": 0.9514, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.09519572953736655, | |
| "grad_norm": 0.3767399489879608, | |
| "learning_rate": 0.00015798994974874372, | |
| "loss": 0.9611, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.09564056939501779, | |
| "grad_norm": 0.3955567479133606, | |
| "learning_rate": 0.0001577889447236181, | |
| "loss": 0.9683, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.09608540925266904, | |
| "grad_norm": 0.4317300021648407, | |
| "learning_rate": 0.00015758793969849246, | |
| "loss": 0.957, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.09653024911032028, | |
| "grad_norm": 0.4412591755390167, | |
| "learning_rate": 0.00015738693467336686, | |
| "loss": 1.0473, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.09697508896797152, | |
| "grad_norm": 0.37174472212791443, | |
| "learning_rate": 0.0001571859296482412, | |
| "loss": 0.9258, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.09741992882562278, | |
| "grad_norm": 0.4425841271877289, | |
| "learning_rate": 0.00015698492462311558, | |
| "loss": 1.0147, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.09786476868327403, | |
| "grad_norm": 0.40715131163597107, | |
| "learning_rate": 0.00015678391959798995, | |
| "loss": 1.0028, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.09830960854092527, | |
| "grad_norm": 0.3638611435890198, | |
| "learning_rate": 0.00015658291457286433, | |
| "loss": 1.1178, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.09875444839857651, | |
| "grad_norm": 0.3498910963535309, | |
| "learning_rate": 0.0001563819095477387, | |
| "loss": 1.0467, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.09919928825622776, | |
| "grad_norm": 0.36899253726005554, | |
| "learning_rate": 0.00015618090452261307, | |
| "loss": 1.0702, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.099644128113879, | |
| "grad_norm": 0.398636132478714, | |
| "learning_rate": 0.00015597989949748745, | |
| "loss": 0.9641, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.10008896797153025, | |
| "grad_norm": 0.3399724066257477, | |
| "learning_rate": 0.00015577889447236182, | |
| "loss": 0.9786, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.10053380782918149, | |
| "grad_norm": 0.3582400679588318, | |
| "learning_rate": 0.0001555778894472362, | |
| "loss": 0.9441, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.10097864768683273, | |
| "grad_norm": 0.365993469953537, | |
| "learning_rate": 0.00015537688442211056, | |
| "loss": 0.9669, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.10142348754448399, | |
| "grad_norm": 0.46609994769096375, | |
| "learning_rate": 0.00015517587939698494, | |
| "loss": 1.0387, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.10186832740213524, | |
| "grad_norm": 0.4048779010772705, | |
| "learning_rate": 0.0001549748743718593, | |
| "loss": 0.9808, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.10231316725978648, | |
| "grad_norm": 0.3965264856815338, | |
| "learning_rate": 0.00015477386934673368, | |
| "loss": 1.0191, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.10275800711743772, | |
| "grad_norm": 0.379210501909256, | |
| "learning_rate": 0.00015457286432160806, | |
| "loss": 0.9608, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.10320284697508897, | |
| "grad_norm": 0.4589402675628662, | |
| "learning_rate": 0.0001543718592964824, | |
| "loss": 0.9267, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.10364768683274021, | |
| "grad_norm": 0.3889247179031372, | |
| "learning_rate": 0.0001541708542713568, | |
| "loss": 1.0115, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.10409252669039146, | |
| "grad_norm": 0.3973037004470825, | |
| "learning_rate": 0.00015396984924623117, | |
| "loss": 0.9967, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.1045373665480427, | |
| "grad_norm": 0.35045430064201355, | |
| "learning_rate": 0.00015376884422110555, | |
| "loss": 0.9899, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.10498220640569395, | |
| "grad_norm": 0.36741137504577637, | |
| "learning_rate": 0.0001535678391959799, | |
| "loss": 1.0016, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.10542704626334519, | |
| "grad_norm": 0.4001176953315735, | |
| "learning_rate": 0.00015336683417085427, | |
| "loss": 1.0941, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.10587188612099645, | |
| "grad_norm": 0.3526718020439148, | |
| "learning_rate": 0.00015316582914572867, | |
| "loss": 0.9905, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.10631672597864769, | |
| "grad_norm": 0.39122429490089417, | |
| "learning_rate": 0.000152964824120603, | |
| "loss": 1.0116, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.10676156583629894, | |
| "grad_norm": 0.36022770404815674, | |
| "learning_rate": 0.00015276381909547739, | |
| "loss": 0.9501, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.10720640569395018, | |
| "grad_norm": 0.3478608727455139, | |
| "learning_rate": 0.00015256281407035176, | |
| "loss": 1.0017, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.10765124555160142, | |
| "grad_norm": 0.36006009578704834, | |
| "learning_rate": 0.00015236180904522613, | |
| "loss": 0.935, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.10809608540925267, | |
| "grad_norm": 0.3560234606266022, | |
| "learning_rate": 0.0001521608040201005, | |
| "loss": 0.9552, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.10854092526690391, | |
| "grad_norm": 0.6785101890563965, | |
| "learning_rate": 0.00015195979899497488, | |
| "loss": 0.9972, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.10898576512455516, | |
| "grad_norm": 0.32202988862991333, | |
| "learning_rate": 0.00015175879396984925, | |
| "loss": 0.9763, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.1094306049822064, | |
| "grad_norm": 0.33148205280303955, | |
| "learning_rate": 0.00015155778894472362, | |
| "loss": 1.0615, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.10987544483985764, | |
| "grad_norm": 0.35082313418388367, | |
| "learning_rate": 0.000151356783919598, | |
| "loss": 1.0332, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.1103202846975089, | |
| "grad_norm": 0.34773561358451843, | |
| "learning_rate": 0.00015115577889447237, | |
| "loss": 0.9172, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.11076512455516015, | |
| "grad_norm": 0.35180824995040894, | |
| "learning_rate": 0.00015095477386934674, | |
| "loss": 1.077, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.11120996441281139, | |
| "grad_norm": 0.33543917536735535, | |
| "learning_rate": 0.00015075376884422112, | |
| "loss": 0.9345, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.11165480427046263, | |
| "grad_norm": 0.3516584038734436, | |
| "learning_rate": 0.0001505527638190955, | |
| "loss": 0.9481, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.11209964412811388, | |
| "grad_norm": 0.3360784947872162, | |
| "learning_rate": 0.00015035175879396986, | |
| "loss": 0.9285, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.11254448398576512, | |
| "grad_norm": 0.3512001633644104, | |
| "learning_rate": 0.00015015075376884423, | |
| "loss": 0.9745, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.11298932384341637, | |
| "grad_norm": 0.34287190437316895, | |
| "learning_rate": 0.0001499497487437186, | |
| "loss": 1.0596, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.11343416370106761, | |
| "grad_norm": 0.3830087184906006, | |
| "learning_rate": 0.00014974874371859298, | |
| "loss": 0.9534, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.11387900355871886, | |
| "grad_norm": 0.3950470983982086, | |
| "learning_rate": 0.00014954773869346735, | |
| "loss": 1.045, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.11432384341637011, | |
| "grad_norm": 0.5723717212677002, | |
| "learning_rate": 0.0001493467336683417, | |
| "loss": 0.8671, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.11476868327402136, | |
| "grad_norm": 0.4654160737991333, | |
| "learning_rate": 0.0001491457286432161, | |
| "loss": 0.936, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.1152135231316726, | |
| "grad_norm": 0.4141252338886261, | |
| "learning_rate": 0.00014894472361809047, | |
| "loss": 1.0275, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.11565836298932385, | |
| "grad_norm": 0.3363032042980194, | |
| "learning_rate": 0.00014874371859296482, | |
| "loss": 0.899, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.11610320284697509, | |
| "grad_norm": 0.35972562432289124, | |
| "learning_rate": 0.0001485427135678392, | |
| "loss": 0.8928, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.11654804270462633, | |
| "grad_norm": 0.35416486859321594, | |
| "learning_rate": 0.00014834170854271356, | |
| "loss": 0.9557, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.11699288256227758, | |
| "grad_norm": 0.3734114170074463, | |
| "learning_rate": 0.00014814070351758796, | |
| "loss": 0.956, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.11743772241992882, | |
| "grad_norm": 0.3472467362880707, | |
| "learning_rate": 0.0001479396984924623, | |
| "loss": 0.9911, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.11788256227758007, | |
| "grad_norm": 0.433698832988739, | |
| "learning_rate": 0.00014773869346733668, | |
| "loss": 0.9547, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.11832740213523131, | |
| "grad_norm": 0.3658886253833771, | |
| "learning_rate": 0.00014753768844221106, | |
| "loss": 0.9044, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.11877224199288257, | |
| "grad_norm": 0.3511579632759094, | |
| "learning_rate": 0.00014733668341708543, | |
| "loss": 0.9766, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.11921708185053381, | |
| "grad_norm": 0.3257918357849121, | |
| "learning_rate": 0.0001471356783919598, | |
| "loss": 0.9641, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.11966192170818506, | |
| "grad_norm": 0.3833535611629486, | |
| "learning_rate": 0.00014693467336683417, | |
| "loss": 1.0307, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.1201067615658363, | |
| "grad_norm": 0.3672519326210022, | |
| "learning_rate": 0.00014673366834170855, | |
| "loss": 0.941, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.12055160142348754, | |
| "grad_norm": 0.3263191282749176, | |
| "learning_rate": 0.00014653266331658292, | |
| "loss": 1.0327, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.12099644128113879, | |
| "grad_norm": 0.3983098864555359, | |
| "learning_rate": 0.0001463316582914573, | |
| "loss": 1.011, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.12144128113879003, | |
| "grad_norm": 0.37602588534355164, | |
| "learning_rate": 0.00014613065326633167, | |
| "loss": 1.0328, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.12188612099644128, | |
| "grad_norm": 0.354548841714859, | |
| "learning_rate": 0.00014592964824120604, | |
| "loss": 1.0472, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.12233096085409252, | |
| "grad_norm": 0.343363493680954, | |
| "learning_rate": 0.0001457286432160804, | |
| "loss": 1.0053, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.12277580071174377, | |
| "grad_norm": 0.3958703577518463, | |
| "learning_rate": 0.00014552763819095479, | |
| "loss": 0.917, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.12322064056939502, | |
| "grad_norm": 0.4580914378166199, | |
| "learning_rate": 0.00014532663316582916, | |
| "loss": 0.961, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.12366548042704627, | |
| "grad_norm": 0.39822980761528015, | |
| "learning_rate": 0.00014512562814070353, | |
| "loss": 0.9276, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.12411032028469751, | |
| "grad_norm": 0.3082531988620758, | |
| "learning_rate": 0.0001449246231155779, | |
| "loss": 0.9496, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.12455516014234876, | |
| "grad_norm": 0.34612587094306946, | |
| "learning_rate": 0.00014472361809045228, | |
| "loss": 0.9371, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.125, | |
| "grad_norm": 0.3305722177028656, | |
| "learning_rate": 0.00014452261306532665, | |
| "loss": 0.9265, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.12544483985765126, | |
| "grad_norm": 0.35687321424484253, | |
| "learning_rate": 0.000144321608040201, | |
| "loss": 0.9935, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.1258896797153025, | |
| "grad_norm": 0.3545040488243103, | |
| "learning_rate": 0.00014412060301507537, | |
| "loss": 0.973, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.12633451957295375, | |
| "grad_norm": 0.3379552960395813, | |
| "learning_rate": 0.00014391959798994977, | |
| "loss": 1.0022, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.12677935943060498, | |
| "grad_norm": 0.32599905133247375, | |
| "learning_rate": 0.00014371859296482411, | |
| "loss": 1.0497, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.12722419928825623, | |
| "grad_norm": 0.38973209261894226, | |
| "learning_rate": 0.0001435175879396985, | |
| "loss": 0.9373, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.12766903914590746, | |
| "grad_norm": 0.37509605288505554, | |
| "learning_rate": 0.00014331658291457286, | |
| "loss": 0.8695, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.12811387900355872, | |
| "grad_norm": 0.33525556325912476, | |
| "learning_rate": 0.00014311557788944726, | |
| "loss": 0.9768, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.12855871886120995, | |
| "grad_norm": 0.3496398329734802, | |
| "learning_rate": 0.0001429145728643216, | |
| "loss": 1.0044, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.1290035587188612, | |
| "grad_norm": 0.39922475814819336, | |
| "learning_rate": 0.00014271356783919598, | |
| "loss": 0.9998, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.12944839857651247, | |
| "grad_norm": 0.33336395025253296, | |
| "learning_rate": 0.00014251256281407035, | |
| "loss": 0.9871, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.1298932384341637, | |
| "grad_norm": 0.30318179726600647, | |
| "learning_rate": 0.00014231155778894473, | |
| "loss": 0.9652, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.13033807829181496, | |
| "grad_norm": 0.33315086364746094, | |
| "learning_rate": 0.0001421105527638191, | |
| "loss": 0.9793, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.1307829181494662, | |
| "grad_norm": 0.33998075127601624, | |
| "learning_rate": 0.00014190954773869347, | |
| "loss": 0.945, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.13122775800711745, | |
| "grad_norm": 0.3731788396835327, | |
| "learning_rate": 0.00014170854271356784, | |
| "loss": 0.9881, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.13167259786476868, | |
| "grad_norm": 0.34216803312301636, | |
| "learning_rate": 0.00014150753768844222, | |
| "loss": 0.9576, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.13211743772241993, | |
| "grad_norm": 0.4074036180973053, | |
| "learning_rate": 0.0001413065326633166, | |
| "loss": 1.0654, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.13256227758007116, | |
| "grad_norm": 0.401727557182312, | |
| "learning_rate": 0.00014110552763819096, | |
| "loss": 0.9813, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.13300711743772242, | |
| "grad_norm": 0.3025040626525879, | |
| "learning_rate": 0.00014090452261306534, | |
| "loss": 0.9491, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.13345195729537365, | |
| "grad_norm": 0.3456086814403534, | |
| "learning_rate": 0.0001407035175879397, | |
| "loss": 1.102, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.1338967971530249, | |
| "grad_norm": 0.36754533648490906, | |
| "learning_rate": 0.00014050251256281408, | |
| "loss": 0.9794, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.13434163701067617, | |
| "grad_norm": 0.2782064378261566, | |
| "learning_rate": 0.00014030150753768846, | |
| "loss": 0.8153, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.1347864768683274, | |
| "grad_norm": 0.35259565711021423, | |
| "learning_rate": 0.0001401005025125628, | |
| "loss": 1.032, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.13523131672597866, | |
| "grad_norm": 0.3422435224056244, | |
| "learning_rate": 0.0001398994974874372, | |
| "loss": 0.9601, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.1356761565836299, | |
| "grad_norm": 0.4378672242164612, | |
| "learning_rate": 0.00013969849246231157, | |
| "loss": 0.9241, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.13612099644128114, | |
| "grad_norm": 0.33044546842575073, | |
| "learning_rate": 0.00013949748743718595, | |
| "loss": 0.9607, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.13656583629893237, | |
| "grad_norm": 0.36180225014686584, | |
| "learning_rate": 0.0001392964824120603, | |
| "loss": 1.0878, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.13701067615658363, | |
| "grad_norm": 0.4124760627746582, | |
| "learning_rate": 0.00013909547738693467, | |
| "loss": 0.9698, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.13745551601423486, | |
| "grad_norm": 0.33320683240890503, | |
| "learning_rate": 0.00013889447236180907, | |
| "loss": 0.869, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.13790035587188612, | |
| "grad_norm": 0.3595818877220154, | |
| "learning_rate": 0.0001386934673366834, | |
| "loss": 1.0227, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.13834519572953738, | |
| "grad_norm": 0.35221633315086365, | |
| "learning_rate": 0.00013849246231155778, | |
| "loss": 0.9576, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.1387900355871886, | |
| "grad_norm": 0.3357265889644623, | |
| "learning_rate": 0.00013829145728643216, | |
| "loss": 0.9877, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.13923487544483987, | |
| "grad_norm": 0.3630138039588928, | |
| "learning_rate": 0.00013809045226130656, | |
| "loss": 0.9918, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.1396797153024911, | |
| "grad_norm": 0.3503361642360687, | |
| "learning_rate": 0.0001378894472361809, | |
| "loss": 1.0585, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.14012455516014236, | |
| "grad_norm": 0.33437854051589966, | |
| "learning_rate": 0.00013768844221105528, | |
| "loss": 1.1292, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.14056939501779359, | |
| "grad_norm": 0.3571662902832031, | |
| "learning_rate": 0.00013748743718592965, | |
| "loss": 1.0084, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.14101423487544484, | |
| "grad_norm": 0.3780195415019989, | |
| "learning_rate": 0.00013728643216080402, | |
| "loss": 0.98, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.14145907473309607, | |
| "grad_norm": 0.3566683232784271, | |
| "learning_rate": 0.0001370854271356784, | |
| "loss": 0.9694, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.14190391459074733, | |
| "grad_norm": 0.36311331391334534, | |
| "learning_rate": 0.00013688442211055277, | |
| "loss": 0.8768, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.1423487544483986, | |
| "grad_norm": 0.3234863877296448, | |
| "learning_rate": 0.00013668341708542714, | |
| "loss": 0.9384, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.14279359430604982, | |
| "grad_norm": 0.3360319137573242, | |
| "learning_rate": 0.00013648241206030151, | |
| "loss": 0.9831, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.14323843416370108, | |
| "grad_norm": 0.3335811197757721, | |
| "learning_rate": 0.0001362814070351759, | |
| "loss": 0.9541, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.1436832740213523, | |
| "grad_norm": 0.40138107538223267, | |
| "learning_rate": 0.00013608040201005026, | |
| "loss": 0.9647, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.14412811387900357, | |
| "grad_norm": 0.36284399032592773, | |
| "learning_rate": 0.00013587939698492463, | |
| "loss": 0.9763, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.1445729537366548, | |
| "grad_norm": 0.29980310797691345, | |
| "learning_rate": 0.000135678391959799, | |
| "loss": 0.9773, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.14501779359430605, | |
| "grad_norm": 0.37920883297920227, | |
| "learning_rate": 0.00013547738693467338, | |
| "loss": 1.0024, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.14546263345195729, | |
| "grad_norm": 0.32456889748573303, | |
| "learning_rate": 0.00013527638190954775, | |
| "loss": 0.995, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.14590747330960854, | |
| "grad_norm": 0.368257999420166, | |
| "learning_rate": 0.0001350753768844221, | |
| "loss": 1.0205, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.14635231316725977, | |
| "grad_norm": 0.37282678484916687, | |
| "learning_rate": 0.00013487437185929647, | |
| "loss": 0.8557, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.14679715302491103, | |
| "grad_norm": 0.34274178743362427, | |
| "learning_rate": 0.00013467336683417087, | |
| "loss": 0.8833, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.1472419928825623, | |
| "grad_norm": 0.34231486916542053, | |
| "learning_rate": 0.00013447236180904524, | |
| "loss": 0.9529, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.14768683274021352, | |
| "grad_norm": 0.38436999917030334, | |
| "learning_rate": 0.0001342713567839196, | |
| "loss": 1.029, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.14813167259786478, | |
| "grad_norm": 0.400931179523468, | |
| "learning_rate": 0.00013407035175879396, | |
| "loss": 0.9913, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.148576512455516, | |
| "grad_norm": 0.32551515102386475, | |
| "learning_rate": 0.00013386934673366836, | |
| "loss": 0.9879, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.14902135231316727, | |
| "grad_norm": 0.35669583082199097, | |
| "learning_rate": 0.0001336683417085427, | |
| "loss": 1.0058, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.1494661921708185, | |
| "grad_norm": 0.36875060200691223, | |
| "learning_rate": 0.00013346733668341708, | |
| "loss": 0.9858, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.14991103202846975, | |
| "grad_norm": 0.36278796195983887, | |
| "learning_rate": 0.00013326633165829146, | |
| "loss": 1.097, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.15035587188612098, | |
| "grad_norm": 0.31908801198005676, | |
| "learning_rate": 0.00013306532663316586, | |
| "loss": 0.955, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.15080071174377224, | |
| "grad_norm": 0.38954317569732666, | |
| "learning_rate": 0.0001328643216080402, | |
| "loss": 0.9402, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.1512455516014235, | |
| "grad_norm": 0.3285794258117676, | |
| "learning_rate": 0.00013266331658291457, | |
| "loss": 0.9327, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.15169039145907473, | |
| "grad_norm": 0.4542711675167084, | |
| "learning_rate": 0.00013246231155778895, | |
| "loss": 0.7939, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.152135231316726, | |
| "grad_norm": 0.3730446696281433, | |
| "learning_rate": 0.00013226130653266332, | |
| "loss": 1.0014, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.15258007117437722, | |
| "grad_norm": 0.3543410301208496, | |
| "learning_rate": 0.0001320603015075377, | |
| "loss": 0.9617, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.15302491103202848, | |
| "grad_norm": 0.32625502347946167, | |
| "learning_rate": 0.00013185929648241207, | |
| "loss": 0.9975, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.1534697508896797, | |
| "grad_norm": 0.39967095851898193, | |
| "learning_rate": 0.00013165829145728644, | |
| "loss": 0.8421, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.15391459074733096, | |
| "grad_norm": 0.36405855417251587, | |
| "learning_rate": 0.0001314572864321608, | |
| "loss": 1.0338, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.1543594306049822, | |
| "grad_norm": 0.32427796721458435, | |
| "learning_rate": 0.00013125628140703518, | |
| "loss": 0.971, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.15480427046263345, | |
| "grad_norm": 0.3405332565307617, | |
| "learning_rate": 0.00013105527638190956, | |
| "loss": 0.9974, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.1552491103202847, | |
| "grad_norm": 0.3611313998699188, | |
| "learning_rate": 0.00013085427135678393, | |
| "loss": 1.0026, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.15569395017793594, | |
| "grad_norm": 0.3901672661304474, | |
| "learning_rate": 0.0001306532663316583, | |
| "loss": 0.924, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.1561387900355872, | |
| "grad_norm": 0.38940900564193726, | |
| "learning_rate": 0.00013045226130653268, | |
| "loss": 1.0827, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.15658362989323843, | |
| "grad_norm": 0.3226456046104431, | |
| "learning_rate": 0.00013025125628140705, | |
| "loss": 0.9063, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.1570284697508897, | |
| "grad_norm": 0.2980092763900757, | |
| "learning_rate": 0.0001300502512562814, | |
| "loss": 0.9688, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.15747330960854092, | |
| "grad_norm": 0.3783577084541321, | |
| "learning_rate": 0.00012984924623115577, | |
| "loss": 1.0825, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.15791814946619218, | |
| "grad_norm": 0.3832230269908905, | |
| "learning_rate": 0.00012964824120603017, | |
| "loss": 1.0627, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.1583629893238434, | |
| "grad_norm": 0.34628695249557495, | |
| "learning_rate": 0.00012944723618090454, | |
| "loss": 0.8811, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.15880782918149466, | |
| "grad_norm": 0.43190130591392517, | |
| "learning_rate": 0.0001292462311557789, | |
| "loss": 1.0152, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.1592526690391459, | |
| "grad_norm": 0.36563345789909363, | |
| "learning_rate": 0.00012904522613065326, | |
| "loss": 0.9848, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.15969750889679715, | |
| "grad_norm": 0.2992965281009674, | |
| "learning_rate": 0.00012884422110552766, | |
| "loss": 0.9507, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.1601423487544484, | |
| "grad_norm": 0.3296932876110077, | |
| "learning_rate": 0.000128643216080402, | |
| "loss": 1.1, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.16058718861209964, | |
| "grad_norm": 0.41810277104377747, | |
| "learning_rate": 0.00012844221105527638, | |
| "loss": 0.8272, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.1610320284697509, | |
| "grad_norm": 0.41459906101226807, | |
| "learning_rate": 0.00012824120603015075, | |
| "loss": 1.0549, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.16147686832740213, | |
| "grad_norm": 0.35844656825065613, | |
| "learning_rate": 0.00012804020100502515, | |
| "loss": 1.0996, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.1619217081850534, | |
| "grad_norm": 0.31563690304756165, | |
| "learning_rate": 0.0001278391959798995, | |
| "loss": 0.8873, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.16236654804270462, | |
| "grad_norm": 0.3521721065044403, | |
| "learning_rate": 0.00012763819095477387, | |
| "loss": 0.8421, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.16281138790035588, | |
| "grad_norm": 0.3149266839027405, | |
| "learning_rate": 0.00012743718592964824, | |
| "loss": 0.9538, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.1632562277580071, | |
| "grad_norm": 0.36552533507347107, | |
| "learning_rate": 0.00012723618090452262, | |
| "loss": 0.9734, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.16370106761565836, | |
| "grad_norm": 0.33934077620506287, | |
| "learning_rate": 0.000127035175879397, | |
| "loss": 1.0199, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.16414590747330962, | |
| "grad_norm": 0.4329892098903656, | |
| "learning_rate": 0.00012683417085427136, | |
| "loss": 0.872, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.16459074733096085, | |
| "grad_norm": 0.4003223776817322, | |
| "learning_rate": 0.00012663316582914574, | |
| "loss": 1.0864, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.1650355871886121, | |
| "grad_norm": 0.37618136405944824, | |
| "learning_rate": 0.0001264321608040201, | |
| "loss": 0.9797, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.16548042704626334, | |
| "grad_norm": 0.30325135588645935, | |
| "learning_rate": 0.00012623115577889448, | |
| "loss": 0.8896, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.1659252669039146, | |
| "grad_norm": 0.42738157510757446, | |
| "learning_rate": 0.00012603015075376885, | |
| "loss": 1.0881, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.16637010676156583, | |
| "grad_norm": 0.38146787881851196, | |
| "learning_rate": 0.00012582914572864323, | |
| "loss": 0.9281, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.16681494661921709, | |
| "grad_norm": 0.40179941058158875, | |
| "learning_rate": 0.0001256281407035176, | |
| "loss": 0.9364, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.16725978647686832, | |
| "grad_norm": 0.33039966225624084, | |
| "learning_rate": 0.00012542713567839197, | |
| "loss": 0.9216, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.16770462633451957, | |
| "grad_norm": 0.3515557050704956, | |
| "learning_rate": 0.00012522613065326635, | |
| "loss": 1.0472, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.16814946619217083, | |
| "grad_norm": 0.3095014691352844, | |
| "learning_rate": 0.0001250251256281407, | |
| "loss": 0.9153, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.16859430604982206, | |
| "grad_norm": 0.42122480273246765, | |
| "learning_rate": 0.00012482412060301507, | |
| "loss": 1.0208, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.16903914590747332, | |
| "grad_norm": 0.3838384747505188, | |
| "learning_rate": 0.00012462311557788947, | |
| "loss": 0.9936, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.16948398576512455, | |
| "grad_norm": 0.38645121455192566, | |
| "learning_rate": 0.00012442211055276384, | |
| "loss": 0.983, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.1699288256227758, | |
| "grad_norm": 0.34789687395095825, | |
| "learning_rate": 0.00012422110552763818, | |
| "loss": 0.9341, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.17037366548042704, | |
| "grad_norm": 0.3516675531864166, | |
| "learning_rate": 0.00012402010050251256, | |
| "loss": 1.052, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.1708185053380783, | |
| "grad_norm": 0.36350032687187195, | |
| "learning_rate": 0.00012381909547738696, | |
| "loss": 1.047, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.17126334519572953, | |
| "grad_norm": 0.3919387757778168, | |
| "learning_rate": 0.0001236180904522613, | |
| "loss": 0.9899, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.17170818505338079, | |
| "grad_norm": 0.3369230628013611, | |
| "learning_rate": 0.00012341708542713568, | |
| "loss": 0.981, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.17215302491103202, | |
| "grad_norm": 0.34190815687179565, | |
| "learning_rate": 0.00012321608040201005, | |
| "loss": 0.9727, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.17259786476868327, | |
| "grad_norm": 0.3579177260398865, | |
| "learning_rate": 0.00012301507537688445, | |
| "loss": 1.0071, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.17304270462633453, | |
| "grad_norm": 0.307522714138031, | |
| "learning_rate": 0.0001228140703517588, | |
| "loss": 1.0614, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.17348754448398576, | |
| "grad_norm": 0.3733164966106415, | |
| "learning_rate": 0.00012261306532663317, | |
| "loss": 1.1578, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.17393238434163702, | |
| "grad_norm": 0.3878190517425537, | |
| "learning_rate": 0.00012241206030150754, | |
| "loss": 0.9454, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.17437722419928825, | |
| "grad_norm": 0.37619033455848694, | |
| "learning_rate": 0.00012221105527638191, | |
| "loss": 0.9608, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.1748220640569395, | |
| "grad_norm": 0.3204808533191681, | |
| "learning_rate": 0.00012201005025125629, | |
| "loss": 1.0344, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.17526690391459074, | |
| "grad_norm": 0.3382740616798401, | |
| "learning_rate": 0.00012180904522613066, | |
| "loss": 0.9624, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.175711743772242, | |
| "grad_norm": 0.3102628290653229, | |
| "learning_rate": 0.00012160804020100502, | |
| "loss": 0.8531, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.17615658362989323, | |
| "grad_norm": 0.32901301980018616, | |
| "learning_rate": 0.00012140703517587942, | |
| "loss": 0.9773, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.17660142348754448, | |
| "grad_norm": 0.3148498833179474, | |
| "learning_rate": 0.00012120603015075378, | |
| "loss": 0.9834, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.17704626334519574, | |
| "grad_norm": 0.34393250942230225, | |
| "learning_rate": 0.00012100502512562815, | |
| "loss": 0.8226, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.17749110320284697, | |
| "grad_norm": 0.415526807308197, | |
| "learning_rate": 0.00012080402010050251, | |
| "loss": 0.9692, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.17793594306049823, | |
| "grad_norm": 0.34324660897254944, | |
| "learning_rate": 0.00012060301507537688, | |
| "loss": 1.0135, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.17838078291814946, | |
| "grad_norm": 0.3012760579586029, | |
| "learning_rate": 0.00012040201005025127, | |
| "loss": 0.9889, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.17882562277580072, | |
| "grad_norm": 0.3476797342300415, | |
| "learning_rate": 0.00012020100502512563, | |
| "loss": 0.9328, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.17927046263345195, | |
| "grad_norm": 0.36807379126548767, | |
| "learning_rate": 0.00012, | |
| "loss": 1.0193, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.1797153024911032, | |
| "grad_norm": 0.3321215808391571, | |
| "learning_rate": 0.00011979899497487436, | |
| "loss": 0.9197, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.18016014234875444, | |
| "grad_norm": 0.48590320348739624, | |
| "learning_rate": 0.00011959798994974876, | |
| "loss": 0.8509, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.1806049822064057, | |
| "grad_norm": 0.3456606864929199, | |
| "learning_rate": 0.00011939698492462312, | |
| "loss": 1.033, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.18104982206405695, | |
| "grad_norm": 0.3836311101913452, | |
| "learning_rate": 0.0001191959798994975, | |
| "loss": 0.9396, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.18149466192170818, | |
| "grad_norm": 0.3805595338344574, | |
| "learning_rate": 0.00011899497487437185, | |
| "loss": 0.9135, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.18193950177935944, | |
| "grad_norm": 0.34188127517700195, | |
| "learning_rate": 0.00011879396984924624, | |
| "loss": 0.9551, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.18238434163701067, | |
| "grad_norm": 0.40114229917526245, | |
| "learning_rate": 0.00011859296482412061, | |
| "loss": 0.93, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.18282918149466193, | |
| "grad_norm": 0.3679947555065155, | |
| "learning_rate": 0.00011839195979899497, | |
| "loss": 1.0823, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.18327402135231316, | |
| "grad_norm": 0.4190071225166321, | |
| "learning_rate": 0.00011819095477386935, | |
| "loss": 0.8648, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.18371886120996442, | |
| "grad_norm": 0.39405253529548645, | |
| "learning_rate": 0.00011798994974874373, | |
| "loss": 1.0682, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.18416370106761565, | |
| "grad_norm": 0.33679550886154175, | |
| "learning_rate": 0.0001177889447236181, | |
| "loss": 0.9674, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.1846085409252669, | |
| "grad_norm": 0.3880975842475891, | |
| "learning_rate": 0.00011758793969849247, | |
| "loss": 0.9367, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.18505338078291814, | |
| "grad_norm": 0.4066067337989807, | |
| "learning_rate": 0.00011738693467336684, | |
| "loss": 0.9712, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.1854982206405694, | |
| "grad_norm": 0.48199501633644104, | |
| "learning_rate": 0.00011718592964824122, | |
| "loss": 0.9133, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.18594306049822065, | |
| "grad_norm": 0.34463343024253845, | |
| "learning_rate": 0.00011698492462311558, | |
| "loss": 0.9384, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.18638790035587188, | |
| "grad_norm": 0.4196937680244446, | |
| "learning_rate": 0.00011678391959798996, | |
| "loss": 0.9803, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.18683274021352314, | |
| "grad_norm": 0.3185271620750427, | |
| "learning_rate": 0.00011658291457286432, | |
| "loss": 0.9267, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.18727758007117437, | |
| "grad_norm": 0.36499395966529846, | |
| "learning_rate": 0.00011638190954773872, | |
| "loss": 0.9721, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.18772241992882563, | |
| "grad_norm": 0.3580315411090851, | |
| "learning_rate": 0.00011618090452261308, | |
| "loss": 0.8834, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.18816725978647686, | |
| "grad_norm": 0.38039615750312805, | |
| "learning_rate": 0.00011597989949748745, | |
| "loss": 0.9638, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.18861209964412812, | |
| "grad_norm": 0.38077226281166077, | |
| "learning_rate": 0.00011577889447236181, | |
| "loss": 0.9077, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.18905693950177935, | |
| "grad_norm": 0.33230000734329224, | |
| "learning_rate": 0.00011557788944723618, | |
| "loss": 1.0115, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.1895017793594306, | |
| "grad_norm": 0.41263043880462646, | |
| "learning_rate": 0.00011537688442211057, | |
| "loss": 0.9162, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.18994661921708186, | |
| "grad_norm": 0.32407188415527344, | |
| "learning_rate": 0.00011517587939698493, | |
| "loss": 0.9618, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.1903914590747331, | |
| "grad_norm": 0.4661211669445038, | |
| "learning_rate": 0.0001149748743718593, | |
| "loss": 0.931, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.19083629893238435, | |
| "grad_norm": 0.37407800555229187, | |
| "learning_rate": 0.00011477386934673366, | |
| "loss": 1.0046, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.19128113879003558, | |
| "grad_norm": 0.3580109477043152, | |
| "learning_rate": 0.00011457286432160806, | |
| "loss": 1.0114, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.19172597864768684, | |
| "grad_norm": 0.42048606276512146, | |
| "learning_rate": 0.00011437185929648242, | |
| "loss": 0.9468, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.19217081850533807, | |
| "grad_norm": 0.38913312554359436, | |
| "learning_rate": 0.00011417085427135679, | |
| "loss": 1.1037, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.19261565836298933, | |
| "grad_norm": 0.35675281286239624, | |
| "learning_rate": 0.00011396984924623115, | |
| "loss": 0.9292, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.19306049822064056, | |
| "grad_norm": 0.3579562306404114, | |
| "learning_rate": 0.00011376884422110554, | |
| "loss": 0.9413, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.19350533807829182, | |
| "grad_norm": 0.3751574456691742, | |
| "learning_rate": 0.00011356783919597991, | |
| "loss": 1.0018, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.19395017793594305, | |
| "grad_norm": 0.3086400032043457, | |
| "learning_rate": 0.00011336683417085427, | |
| "loss": 0.9355, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.1943950177935943, | |
| "grad_norm": 0.41924887895584106, | |
| "learning_rate": 0.00011316582914572864, | |
| "loss": 0.942, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.19483985765124556, | |
| "grad_norm": 0.3349624276161194, | |
| "learning_rate": 0.00011296482412060303, | |
| "loss": 1.0232, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.1952846975088968, | |
| "grad_norm": 0.4605553448200226, | |
| "learning_rate": 0.0001127638190954774, | |
| "loss": 1.0958, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.19572953736654805, | |
| "grad_norm": 0.3877868950366974, | |
| "learning_rate": 0.00011256281407035176, | |
| "loss": 0.9227, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.19617437722419928, | |
| "grad_norm": 0.3474135100841522, | |
| "learning_rate": 0.00011236180904522614, | |
| "loss": 0.8446, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.19661921708185054, | |
| "grad_norm": 0.3451891541481018, | |
| "learning_rate": 0.00011216080402010052, | |
| "loss": 0.9641, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.19706405693950177, | |
| "grad_norm": 0.3565372824668884, | |
| "learning_rate": 0.00011195979899497488, | |
| "loss": 0.9807, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.19750889679715303, | |
| "grad_norm": 0.3283828794956207, | |
| "learning_rate": 0.00011175879396984925, | |
| "loss": 0.9949, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.19795373665480426, | |
| "grad_norm": 0.3562460243701935, | |
| "learning_rate": 0.00011155778894472361, | |
| "loss": 0.9807, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.19839857651245552, | |
| "grad_norm": 0.33841797709465027, | |
| "learning_rate": 0.00011135678391959799, | |
| "loss": 1.0213, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.19884341637010677, | |
| "grad_norm": 0.3265354633331299, | |
| "learning_rate": 0.00011115577889447237, | |
| "loss": 0.9138, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.199288256227758, | |
| "grad_norm": 0.35441333055496216, | |
| "learning_rate": 0.00011095477386934675, | |
| "loss": 0.8982, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.19973309608540926, | |
| "grad_norm": 0.3564106822013855, | |
| "learning_rate": 0.0001107537688442211, | |
| "loss": 1.0298, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.2001779359430605, | |
| "grad_norm": 0.510447084903717, | |
| "learning_rate": 0.00011055276381909548, | |
| "loss": 0.818, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.20062277580071175, | |
| "grad_norm": 0.3568657338619232, | |
| "learning_rate": 0.00011035175879396986, | |
| "loss": 0.9187, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.20106761565836298, | |
| "grad_norm": 0.40208712220191956, | |
| "learning_rate": 0.00011015075376884422, | |
| "loss": 1.0041, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.20151245551601424, | |
| "grad_norm": 0.36585667729377747, | |
| "learning_rate": 0.0001099497487437186, | |
| "loss": 0.9413, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.20195729537366547, | |
| "grad_norm": 0.36290839314460754, | |
| "learning_rate": 0.00010974874371859296, | |
| "loss": 1.0084, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.20240213523131673, | |
| "grad_norm": 0.34766581654548645, | |
| "learning_rate": 0.00010954773869346736, | |
| "loss": 1.0761, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.20284697508896798, | |
| "grad_norm": 0.3324412703514099, | |
| "learning_rate": 0.00010934673366834172, | |
| "loss": 0.9078, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.20329181494661921, | |
| "grad_norm": 0.35148242115974426, | |
| "learning_rate": 0.00010914572864321609, | |
| "loss": 0.9148, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.20373665480427047, | |
| "grad_norm": 0.36268171668052673, | |
| "learning_rate": 0.00010894472361809045, | |
| "loss": 0.9915, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.2041814946619217, | |
| "grad_norm": 0.3343249261379242, | |
| "learning_rate": 0.00010874371859296483, | |
| "loss": 0.9596, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.20462633451957296, | |
| "grad_norm": 0.3835364878177643, | |
| "learning_rate": 0.00010854271356783921, | |
| "loss": 0.9718, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.2050711743772242, | |
| "grad_norm": 0.3455188274383545, | |
| "learning_rate": 0.00010834170854271357, | |
| "loss": 0.9918, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.20551601423487545, | |
| "grad_norm": 0.3390166759490967, | |
| "learning_rate": 0.00010814070351758794, | |
| "loss": 0.9718, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.20596085409252668, | |
| "grad_norm": 0.40748950839042664, | |
| "learning_rate": 0.00010793969849246233, | |
| "loss": 0.9766, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.20640569395017794, | |
| "grad_norm": 0.41326215863227844, | |
| "learning_rate": 0.0001077386934673367, | |
| "loss": 0.9639, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.20685053380782917, | |
| "grad_norm": 0.36672449111938477, | |
| "learning_rate": 0.00010753768844221106, | |
| "loss": 1.0027, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.20729537366548043, | |
| "grad_norm": 0.3668520152568817, | |
| "learning_rate": 0.00010733668341708543, | |
| "loss": 0.9147, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.20774021352313168, | |
| "grad_norm": 0.40608394145965576, | |
| "learning_rate": 0.00010713567839195982, | |
| "loss": 0.973, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.20818505338078291, | |
| "grad_norm": 0.3467264175415039, | |
| "learning_rate": 0.00010693467336683418, | |
| "loss": 1.0182, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.20862989323843417, | |
| "grad_norm": 0.3724631071090698, | |
| "learning_rate": 0.00010673366834170855, | |
| "loss": 0.9269, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.2090747330960854, | |
| "grad_norm": 0.4459652006626129, | |
| "learning_rate": 0.00010653266331658291, | |
| "loss": 1.115, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.20951957295373666, | |
| "grad_norm": 0.3838179111480713, | |
| "learning_rate": 0.00010633165829145728, | |
| "loss": 0.9367, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.2099644128113879, | |
| "grad_norm": 0.3366687595844269, | |
| "learning_rate": 0.00010613065326633167, | |
| "loss": 1.0043, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.21040925266903915, | |
| "grad_norm": 0.39886242151260376, | |
| "learning_rate": 0.00010592964824120604, | |
| "loss": 0.9596, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.21085409252669038, | |
| "grad_norm": 0.36448466777801514, | |
| "learning_rate": 0.0001057286432160804, | |
| "loss": 0.874, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.21129893238434164, | |
| "grad_norm": 0.36318713426589966, | |
| "learning_rate": 0.00010552763819095478, | |
| "loss": 0.8955, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.2117437722419929, | |
| "grad_norm": 0.41962435841560364, | |
| "learning_rate": 0.00010532663316582916, | |
| "loss": 1.0139, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.21218861209964412, | |
| "grad_norm": 0.34656140208244324, | |
| "learning_rate": 0.00010512562814070352, | |
| "loss": 0.9768, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.21263345195729538, | |
| "grad_norm": 0.36636775732040405, | |
| "learning_rate": 0.0001049246231155779, | |
| "loss": 0.9826, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.2130782918149466, | |
| "grad_norm": 0.3224871754646301, | |
| "learning_rate": 0.00010472361809045225, | |
| "loss": 0.9229, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.21352313167259787, | |
| "grad_norm": 0.33149847388267517, | |
| "learning_rate": 0.00010452261306532664, | |
| "loss": 0.9374, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.2139679715302491, | |
| "grad_norm": 0.356975793838501, | |
| "learning_rate": 0.00010432160804020101, | |
| "loss": 0.8682, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.21441281138790036, | |
| "grad_norm": 0.3753889799118042, | |
| "learning_rate": 0.00010412060301507539, | |
| "loss": 0.961, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.2148576512455516, | |
| "grad_norm": 0.3479616641998291, | |
| "learning_rate": 0.00010391959798994975, | |
| "loss": 0.9423, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.21530249110320285, | |
| "grad_norm": 0.4025513529777527, | |
| "learning_rate": 0.00010371859296482413, | |
| "loss": 0.9917, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.2157473309608541, | |
| "grad_norm": 0.3388558328151703, | |
| "learning_rate": 0.0001035175879396985, | |
| "loss": 0.994, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.21619217081850534, | |
| "grad_norm": 0.3347098231315613, | |
| "learning_rate": 0.00010331658291457286, | |
| "loss": 0.9796, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.2166370106761566, | |
| "grad_norm": 0.29469332098960876, | |
| "learning_rate": 0.00010311557788944724, | |
| "loss": 0.9296, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.21708185053380782, | |
| "grad_norm": 0.4186575412750244, | |
| "learning_rate": 0.00010291457286432162, | |
| "loss": 1.0021, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.21752669039145908, | |
| "grad_norm": 0.3087356388568878, | |
| "learning_rate": 0.00010271356783919598, | |
| "loss": 0.968, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.2179715302491103, | |
| "grad_norm": 0.3883945643901825, | |
| "learning_rate": 0.00010251256281407036, | |
| "loss": 1.1866, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.21841637010676157, | |
| "grad_norm": 0.31850650906562805, | |
| "learning_rate": 0.00010231155778894473, | |
| "loss": 0.9544, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.2188612099644128, | |
| "grad_norm": 0.40497350692749023, | |
| "learning_rate": 0.00010211055276381909, | |
| "loss": 1.0134, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.21930604982206406, | |
| "grad_norm": 0.31457439064979553, | |
| "learning_rate": 0.00010190954773869348, | |
| "loss": 0.8261, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.2197508896797153, | |
| "grad_norm": 0.398622065782547, | |
| "learning_rate": 0.00010170854271356785, | |
| "loss": 1.0384, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.22019572953736655, | |
| "grad_norm": 0.30925434827804565, | |
| "learning_rate": 0.00010150753768844221, | |
| "loss": 0.9075, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.2206405693950178, | |
| "grad_norm": 0.3536154329776764, | |
| "learning_rate": 0.00010130653266331658, | |
| "loss": 0.9856, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.22108540925266904, | |
| "grad_norm": 0.3990980386734009, | |
| "learning_rate": 0.00010110552763819097, | |
| "loss": 0.9822, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.2215302491103203, | |
| "grad_norm": 0.3845369517803192, | |
| "learning_rate": 0.00010090452261306533, | |
| "loss": 0.9191, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.22197508896797152, | |
| "grad_norm": 0.45938462018966675, | |
| "learning_rate": 0.0001007035175879397, | |
| "loss": 0.994, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.22241992882562278, | |
| "grad_norm": 0.32502633333206177, | |
| "learning_rate": 0.00010050251256281407, | |
| "loss": 0.9397, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.222864768683274, | |
| "grad_norm": 0.3375188410282135, | |
| "learning_rate": 0.00010030150753768846, | |
| "loss": 0.909, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.22330960854092527, | |
| "grad_norm": 0.31290072202682495, | |
| "learning_rate": 0.00010010050251256282, | |
| "loss": 0.9114, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.2237544483985765, | |
| "grad_norm": 0.37251392006874084, | |
| "learning_rate": 9.989949748743719e-05, | |
| "loss": 0.9812, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.22419928825622776, | |
| "grad_norm": 0.33114826679229736, | |
| "learning_rate": 9.969849246231156e-05, | |
| "loss": 0.893, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.22464412811387902, | |
| "grad_norm": 0.3244706392288208, | |
| "learning_rate": 9.949748743718594e-05, | |
| "loss": 0.9659, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.22508896797153025, | |
| "grad_norm": 0.3634791374206543, | |
| "learning_rate": 9.929648241206031e-05, | |
| "loss": 1.0315, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.2255338078291815, | |
| "grad_norm": 0.38301393389701843, | |
| "learning_rate": 9.909547738693468e-05, | |
| "loss": 0.9492, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.22597864768683273, | |
| "grad_norm": 0.3449389338493347, | |
| "learning_rate": 9.889447236180906e-05, | |
| "loss": 0.9327, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.226423487544484, | |
| "grad_norm": 0.3777461051940918, | |
| "learning_rate": 9.869346733668342e-05, | |
| "loss": 0.9938, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.22686832740213522, | |
| "grad_norm": 0.3581281006336212, | |
| "learning_rate": 9.84924623115578e-05, | |
| "loss": 1.0184, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.22731316725978648, | |
| "grad_norm": 0.3965352475643158, | |
| "learning_rate": 9.829145728643216e-05, | |
| "loss": 0.9131, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.2277580071174377, | |
| "grad_norm": 0.33017244935035706, | |
| "learning_rate": 9.809045226130655e-05, | |
| "loss": 0.9547, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.22820284697508897, | |
| "grad_norm": 0.35746607184410095, | |
| "learning_rate": 9.788944723618091e-05, | |
| "loss": 0.9917, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.22864768683274023, | |
| "grad_norm": 0.36392533779144287, | |
| "learning_rate": 9.768844221105528e-05, | |
| "loss": 1.0056, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.22909252669039146, | |
| "grad_norm": 0.3272344768047333, | |
| "learning_rate": 9.748743718592965e-05, | |
| "loss": 0.9895, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.22953736654804271, | |
| "grad_norm": 0.4714422821998596, | |
| "learning_rate": 9.728643216080403e-05, | |
| "loss": 1.0156, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.22998220640569395, | |
| "grad_norm": 0.3458651006221771, | |
| "learning_rate": 9.70854271356784e-05, | |
| "loss": 0.8773, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.2304270462633452, | |
| "grad_norm": 0.32996249198913574, | |
| "learning_rate": 9.688442211055276e-05, | |
| "loss": 0.9028, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.23087188612099643, | |
| "grad_norm": 0.3559573292732239, | |
| "learning_rate": 9.668341708542715e-05, | |
| "loss": 0.8993, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.2313167259786477, | |
| "grad_norm": 0.36793214082717896, | |
| "learning_rate": 9.64824120603015e-05, | |
| "loss": 0.8616, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.23176156583629892, | |
| "grad_norm": 0.3426240086555481, | |
| "learning_rate": 9.628140703517589e-05, | |
| "loss": 0.9483, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.23220640569395018, | |
| "grad_norm": 0.4089488685131073, | |
| "learning_rate": 9.608040201005025e-05, | |
| "loss": 0.9446, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.2326512455516014, | |
| "grad_norm": 0.40365123748779297, | |
| "learning_rate": 9.587939698492462e-05, | |
| "loss": 1.0485, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.23309608540925267, | |
| "grad_norm": 0.3387534022331238, | |
| "learning_rate": 9.5678391959799e-05, | |
| "loss": 0.8881, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.23354092526690393, | |
| "grad_norm": 0.3568766117095947, | |
| "learning_rate": 9.547738693467337e-05, | |
| "loss": 0.9514, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.23398576512455516, | |
| "grad_norm": 0.42054617404937744, | |
| "learning_rate": 9.527638190954774e-05, | |
| "loss": 0.9481, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.23443060498220641, | |
| "grad_norm": 0.441377192735672, | |
| "learning_rate": 9.507537688442212e-05, | |
| "loss": 0.927, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.23487544483985764, | |
| "grad_norm": 0.3475041389465332, | |
| "learning_rate": 9.487437185929649e-05, | |
| "loss": 0.9735, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.2353202846975089, | |
| "grad_norm": 0.4227800667285919, | |
| "learning_rate": 9.467336683417086e-05, | |
| "loss": 1.0058, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.23576512455516013, | |
| "grad_norm": 0.3622898459434509, | |
| "learning_rate": 9.447236180904523e-05, | |
| "loss": 0.8462, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.2362099644128114, | |
| "grad_norm": 0.4561034142971039, | |
| "learning_rate": 9.427135678391961e-05, | |
| "loss": 0.9615, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.23665480427046262, | |
| "grad_norm": 0.35412004590034485, | |
| "learning_rate": 9.407035175879397e-05, | |
| "loss": 1.0062, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.23709964412811388, | |
| "grad_norm": 0.4144454598426819, | |
| "learning_rate": 9.386934673366835e-05, | |
| "loss": 0.8798, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.23754448398576514, | |
| "grad_norm": 0.40296846628189087, | |
| "learning_rate": 9.366834170854271e-05, | |
| "loss": 1.0744, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.23798932384341637, | |
| "grad_norm": 0.35528820753097534, | |
| "learning_rate": 9.34673366834171e-05, | |
| "loss": 1.0083, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.23843416370106763, | |
| "grad_norm": 0.37445664405822754, | |
| "learning_rate": 9.326633165829146e-05, | |
| "loss": 0.9631, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.23887900355871886, | |
| "grad_norm": 0.360051691532135, | |
| "learning_rate": 9.306532663316585e-05, | |
| "loss": 0.936, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.2393238434163701, | |
| "grad_norm": 0.440403550863266, | |
| "learning_rate": 9.28643216080402e-05, | |
| "loss": 0.8711, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.23976868327402134, | |
| "grad_norm": 0.2862887680530548, | |
| "learning_rate": 9.266331658291458e-05, | |
| "loss": 0.9069, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.2402135231316726, | |
| "grad_norm": 0.3707970976829529, | |
| "learning_rate": 9.246231155778895e-05, | |
| "loss": 1.0669, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.24065836298932383, | |
| "grad_norm": 0.3454754948616028, | |
| "learning_rate": 9.226130653266331e-05, | |
| "loss": 1.0074, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.2411032028469751, | |
| "grad_norm": 0.3313436806201935, | |
| "learning_rate": 9.20603015075377e-05, | |
| "loss": 0.9425, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.24154804270462635, | |
| "grad_norm": 0.36515554785728455, | |
| "learning_rate": 9.185929648241206e-05, | |
| "loss": 0.9946, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.24199288256227758, | |
| "grad_norm": 0.3730347454547882, | |
| "learning_rate": 9.165829145728644e-05, | |
| "loss": 0.9838, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.24243772241992884, | |
| "grad_norm": 0.39922618865966797, | |
| "learning_rate": 9.14572864321608e-05, | |
| "loss": 0.815, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.24288256227758007, | |
| "grad_norm": 0.31671297550201416, | |
| "learning_rate": 9.125628140703519e-05, | |
| "loss": 1.0014, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.24332740213523132, | |
| "grad_norm": 0.39499327540397644, | |
| "learning_rate": 9.105527638190955e-05, | |
| "loss": 0.9104, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.24377224199288255, | |
| "grad_norm": 0.3678281009197235, | |
| "learning_rate": 9.085427135678392e-05, | |
| "loss": 0.927, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.2442170818505338, | |
| "grad_norm": 0.372120201587677, | |
| "learning_rate": 9.06532663316583e-05, | |
| "loss": 0.8212, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.24466192170818504, | |
| "grad_norm": 0.3431313931941986, | |
| "learning_rate": 9.045226130653267e-05, | |
| "loss": 0.8883, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.2451067615658363, | |
| "grad_norm": 0.391932874917984, | |
| "learning_rate": 9.025125628140704e-05, | |
| "loss": 0.9425, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.24555160142348753, | |
| "grad_norm": 0.4534127116203308, | |
| "learning_rate": 9.005025125628141e-05, | |
| "loss": 1.2452, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.2459964412811388, | |
| "grad_norm": 0.37136268615722656, | |
| "learning_rate": 8.984924623115579e-05, | |
| "loss": 1.0424, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.24644128113879005, | |
| "grad_norm": 0.41206422448158264, | |
| "learning_rate": 8.964824120603016e-05, | |
| "loss": 1.1418, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.24688612099644128, | |
| "grad_norm": 0.440790057182312, | |
| "learning_rate": 8.944723618090453e-05, | |
| "loss": 0.8978, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.24733096085409254, | |
| "grad_norm": 0.32154756784439087, | |
| "learning_rate": 8.92462311557789e-05, | |
| "loss": 0.974, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.24777580071174377, | |
| "grad_norm": 0.3203801214694977, | |
| "learning_rate": 8.904522613065326e-05, | |
| "loss": 0.9684, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.24822064056939502, | |
| "grad_norm": 0.3494798243045807, | |
| "learning_rate": 8.884422110552765e-05, | |
| "loss": 0.9127, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.24866548042704625, | |
| "grad_norm": 0.3334081470966339, | |
| "learning_rate": 8.864321608040201e-05, | |
| "loss": 0.9685, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.2491103202846975, | |
| "grad_norm": 0.3990677297115326, | |
| "learning_rate": 8.84422110552764e-05, | |
| "loss": 0.9923, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.24955516014234874, | |
| "grad_norm": 0.39742422103881836, | |
| "learning_rate": 8.824120603015076e-05, | |
| "loss": 0.9786, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 0.396680623292923, | |
| "learning_rate": 8.804020100502513e-05, | |
| "loss": 0.9636, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.25044483985765126, | |
| "grad_norm": 0.32014915347099304, | |
| "learning_rate": 8.78391959798995e-05, | |
| "loss": 0.9392, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.2508896797153025, | |
| "grad_norm": 0.31222036480903625, | |
| "learning_rate": 8.763819095477387e-05, | |
| "loss": 0.8806, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.2513345195729537, | |
| "grad_norm": 0.3277677595615387, | |
| "learning_rate": 8.743718592964825e-05, | |
| "loss": 0.8699, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.251779359430605, | |
| "grad_norm": 0.42860186100006104, | |
| "learning_rate": 8.723618090452261e-05, | |
| "loss": 0.9997, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.25222419928825623, | |
| "grad_norm": 0.35114365816116333, | |
| "learning_rate": 8.7035175879397e-05, | |
| "loss": 0.947, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.2526690391459075, | |
| "grad_norm": 0.3189033269882202, | |
| "learning_rate": 8.683417085427135e-05, | |
| "loss": 0.904, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.2531138790035587, | |
| "grad_norm": 0.37736451625823975, | |
| "learning_rate": 8.663316582914574e-05, | |
| "loss": 0.9518, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.25355871886120995, | |
| "grad_norm": 0.357546329498291, | |
| "learning_rate": 8.64321608040201e-05, | |
| "loss": 0.9499, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.2540035587188612, | |
| "grad_norm": 0.37832340598106384, | |
| "learning_rate": 8.623115577889449e-05, | |
| "loss": 0.895, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.25444839857651247, | |
| "grad_norm": 0.3017910122871399, | |
| "learning_rate": 8.603015075376884e-05, | |
| "loss": 1.041, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.2548932384341637, | |
| "grad_norm": 0.3350992798805237, | |
| "learning_rate": 8.582914572864322e-05, | |
| "loss": 0.9303, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.25533807829181493, | |
| "grad_norm": 0.3973081707954407, | |
| "learning_rate": 8.562814070351759e-05, | |
| "loss": 0.9663, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.2557829181494662, | |
| "grad_norm": 0.3381018340587616, | |
| "learning_rate": 8.542713567839196e-05, | |
| "loss": 0.9427, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.25622775800711745, | |
| "grad_norm": 0.3191261887550354, | |
| "learning_rate": 8.522613065326634e-05, | |
| "loss": 0.8298, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.2566725978647687, | |
| "grad_norm": 0.4260123372077942, | |
| "learning_rate": 8.502512562814071e-05, | |
| "loss": 1.0098, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.2571174377224199, | |
| "grad_norm": 0.34333735704421997, | |
| "learning_rate": 8.482412060301508e-05, | |
| "loss": 1.0, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.25756227758007116, | |
| "grad_norm": 0.4197953939437866, | |
| "learning_rate": 8.462311557788946e-05, | |
| "loss": 1.0629, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.2580071174377224, | |
| "grad_norm": 0.37437987327575684, | |
| "learning_rate": 8.442211055276383e-05, | |
| "loss": 0.9298, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.2584519572953737, | |
| "grad_norm": 0.40584203600883484, | |
| "learning_rate": 8.42211055276382e-05, | |
| "loss": 0.9493, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.25889679715302494, | |
| "grad_norm": 0.2971116900444031, | |
| "learning_rate": 8.402010050251256e-05, | |
| "loss": 0.9228, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.25934163701067614, | |
| "grad_norm": 0.33302900195121765, | |
| "learning_rate": 8.381909547738695e-05, | |
| "loss": 0.9277, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.2597864768683274, | |
| "grad_norm": 0.4172103703022003, | |
| "learning_rate": 8.36180904522613e-05, | |
| "loss": 0.9604, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.26023131672597866, | |
| "grad_norm": 0.36361220479011536, | |
| "learning_rate": 8.341708542713568e-05, | |
| "loss": 0.927, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.2606761565836299, | |
| "grad_norm": 0.365182489156723, | |
| "learning_rate": 8.321608040201005e-05, | |
| "loss": 0.9246, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.2611209964412811, | |
| "grad_norm": 0.32461798191070557, | |
| "learning_rate": 8.301507537688443e-05, | |
| "loss": 0.9727, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.2615658362989324, | |
| "grad_norm": 0.3159072995185852, | |
| "learning_rate": 8.28140703517588e-05, | |
| "loss": 0.9286, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.26201067615658363, | |
| "grad_norm": 0.32971808314323425, | |
| "learning_rate": 8.261306532663317e-05, | |
| "loss": 0.9639, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.2624555160142349, | |
| "grad_norm": 0.5026357173919678, | |
| "learning_rate": 8.241206030150754e-05, | |
| "loss": 0.9706, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.2629003558718861, | |
| "grad_norm": 0.37912416458129883, | |
| "learning_rate": 8.22110552763819e-05, | |
| "loss": 0.8684, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.26334519572953735, | |
| "grad_norm": 0.3844146430492401, | |
| "learning_rate": 8.201005025125629e-05, | |
| "loss": 0.9659, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.2637900355871886, | |
| "grad_norm": 0.3652094006538391, | |
| "learning_rate": 8.180904522613065e-05, | |
| "loss": 0.8978, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.26423487544483987, | |
| "grad_norm": 0.3851691484451294, | |
| "learning_rate": 8.160804020100504e-05, | |
| "loss": 1.0336, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.2646797153024911, | |
| "grad_norm": 0.3992913067340851, | |
| "learning_rate": 8.14070351758794e-05, | |
| "loss": 0.93, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.26512455516014233, | |
| "grad_norm": 0.39918407797813416, | |
| "learning_rate": 8.120603015075378e-05, | |
| "loss": 1.0553, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.2655693950177936, | |
| "grad_norm": 0.369907021522522, | |
| "learning_rate": 8.100502512562814e-05, | |
| "loss": 1.0672, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.26601423487544484, | |
| "grad_norm": 0.3794615566730499, | |
| "learning_rate": 8.080402010050251e-05, | |
| "loss": 0.9986, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.2664590747330961, | |
| "grad_norm": 0.4018343389034271, | |
| "learning_rate": 8.060301507537689e-05, | |
| "loss": 0.9832, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.2669039145907473, | |
| "grad_norm": 0.3550480902194977, | |
| "learning_rate": 8.040201005025126e-05, | |
| "loss": 0.8965, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.26734875444839856, | |
| "grad_norm": 0.3323322832584381, | |
| "learning_rate": 8.020100502512563e-05, | |
| "loss": 0.9779, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.2677935943060498, | |
| "grad_norm": 0.3290577828884125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0571, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.2682384341637011, | |
| "grad_norm": 0.36351272463798523, | |
| "learning_rate": 7.979899497487438e-05, | |
| "loss": 0.9393, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.26868327402135234, | |
| "grad_norm": 0.3494581878185272, | |
| "learning_rate": 7.959798994974875e-05, | |
| "loss": 0.9157, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.26912811387900354, | |
| "grad_norm": 0.36106282472610474, | |
| "learning_rate": 7.939698492462313e-05, | |
| "loss": 0.9772, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.2695729537366548, | |
| "grad_norm": 0.35360780358314514, | |
| "learning_rate": 7.91959798994975e-05, | |
| "loss": 1.0008, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.27001779359430605, | |
| "grad_norm": 0.3501545786857605, | |
| "learning_rate": 7.899497487437186e-05, | |
| "loss": 0.9061, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.2704626334519573, | |
| "grad_norm": 0.37978798151016235, | |
| "learning_rate": 7.879396984924623e-05, | |
| "loss": 1.0096, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.2709074733096085, | |
| "grad_norm": 0.3628638684749603, | |
| "learning_rate": 7.85929648241206e-05, | |
| "loss": 0.9912, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.2713523131672598, | |
| "grad_norm": 0.3882206976413727, | |
| "learning_rate": 7.839195979899498e-05, | |
| "loss": 0.8698, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.27179715302491103, | |
| "grad_norm": 0.44139206409454346, | |
| "learning_rate": 7.819095477386935e-05, | |
| "loss": 0.9444, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.2722419928825623, | |
| "grad_norm": 0.2937663793563843, | |
| "learning_rate": 7.798994974874372e-05, | |
| "loss": 0.8367, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.27268683274021355, | |
| "grad_norm": 0.34553638100624084, | |
| "learning_rate": 7.77889447236181e-05, | |
| "loss": 1.0092, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.27313167259786475, | |
| "grad_norm": 0.4855239689350128, | |
| "learning_rate": 7.758793969849247e-05, | |
| "loss": 0.8105, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.273576512455516, | |
| "grad_norm": 0.359298974275589, | |
| "learning_rate": 7.738693467336684e-05, | |
| "loss": 0.9225, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.27402135231316727, | |
| "grad_norm": 0.3697519600391388, | |
| "learning_rate": 7.71859296482412e-05, | |
| "loss": 1.0232, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.2744661921708185, | |
| "grad_norm": 0.3558047115802765, | |
| "learning_rate": 7.698492462311559e-05, | |
| "loss": 1.0508, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.2749110320284697, | |
| "grad_norm": 0.375429630279541, | |
| "learning_rate": 7.678391959798995e-05, | |
| "loss": 0.9915, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.275355871886121, | |
| "grad_norm": 0.32131966948509216, | |
| "learning_rate": 7.658291457286433e-05, | |
| "loss": 0.9213, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.27580071174377224, | |
| "grad_norm": 0.4405272305011749, | |
| "learning_rate": 7.638190954773869e-05, | |
| "loss": 0.9044, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.2762455516014235, | |
| "grad_norm": 0.38022780418395996, | |
| "learning_rate": 7.618090452261307e-05, | |
| "loss": 0.9114, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.27669039145907476, | |
| "grad_norm": 0.375396728515625, | |
| "learning_rate": 7.597989949748744e-05, | |
| "loss": 0.9496, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.27713523131672596, | |
| "grad_norm": 0.38898783922195435, | |
| "learning_rate": 7.577889447236181e-05, | |
| "loss": 0.9488, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.2775800711743772, | |
| "grad_norm": 0.32414594292640686, | |
| "learning_rate": 7.557788944723618e-05, | |
| "loss": 0.9054, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.2780249110320285, | |
| "grad_norm": 0.34590399265289307, | |
| "learning_rate": 7.537688442211056e-05, | |
| "loss": 0.9491, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.27846975088967973, | |
| "grad_norm": 0.4231082499027252, | |
| "learning_rate": 7.517587939698493e-05, | |
| "loss": 1.0757, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.27891459074733094, | |
| "grad_norm": 0.3876168131828308, | |
| "learning_rate": 7.49748743718593e-05, | |
| "loss": 0.9705, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.2793594306049822, | |
| "grad_norm": 0.3473493158817291, | |
| "learning_rate": 7.477386934673368e-05, | |
| "loss": 0.9262, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.27980427046263345, | |
| "grad_norm": 0.34793248772621155, | |
| "learning_rate": 7.457286432160805e-05, | |
| "loss": 0.9728, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.2802491103202847, | |
| "grad_norm": 0.3288039267063141, | |
| "learning_rate": 7.437185929648241e-05, | |
| "loss": 0.9609, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.28069395017793597, | |
| "grad_norm": 0.37984830141067505, | |
| "learning_rate": 7.417085427135678e-05, | |
| "loss": 1.0235, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.28113879003558717, | |
| "grad_norm": 0.382462739944458, | |
| "learning_rate": 7.396984924623115e-05, | |
| "loss": 0.9898, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.28158362989323843, | |
| "grad_norm": 0.406568318605423, | |
| "learning_rate": 7.376884422110553e-05, | |
| "loss": 0.9442, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.2820284697508897, | |
| "grad_norm": 0.33671334385871887, | |
| "learning_rate": 7.35678391959799e-05, | |
| "loss": 0.9771, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.28247330960854095, | |
| "grad_norm": 0.34497055411338806, | |
| "learning_rate": 7.336683417085427e-05, | |
| "loss": 0.9452, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.28291814946619215, | |
| "grad_norm": 0.37973660230636597, | |
| "learning_rate": 7.316582914572865e-05, | |
| "loss": 1.0431, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.2833629893238434, | |
| "grad_norm": 0.391178160905838, | |
| "learning_rate": 7.296482412060302e-05, | |
| "loss": 0.9732, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.28380782918149466, | |
| "grad_norm": 0.41452303528785706, | |
| "learning_rate": 7.276381909547739e-05, | |
| "loss": 0.986, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.2842526690391459, | |
| "grad_norm": 0.388171911239624, | |
| "learning_rate": 7.256281407035177e-05, | |
| "loss": 0.9368, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.2846975088967972, | |
| "grad_norm": 0.40021994709968567, | |
| "learning_rate": 7.236180904522614e-05, | |
| "loss": 0.9213, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.2851423487544484, | |
| "grad_norm": 0.380087286233902, | |
| "learning_rate": 7.21608040201005e-05, | |
| "loss": 0.9452, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.28558718861209964, | |
| "grad_norm": 0.3855552673339844, | |
| "learning_rate": 7.195979899497488e-05, | |
| "loss": 0.8841, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.2860320284697509, | |
| "grad_norm": 0.4310576319694519, | |
| "learning_rate": 7.175879396984924e-05, | |
| "loss": 0.9666, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.28647686832740216, | |
| "grad_norm": 0.3338180482387543, | |
| "learning_rate": 7.155778894472363e-05, | |
| "loss": 0.9127, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.28692170818505336, | |
| "grad_norm": 0.373388409614563, | |
| "learning_rate": 7.135678391959799e-05, | |
| "loss": 0.9409, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.2873665480427046, | |
| "grad_norm": 0.37845322489738464, | |
| "learning_rate": 7.115577889447236e-05, | |
| "loss": 0.9885, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.2878113879003559, | |
| "grad_norm": 0.39277783036231995, | |
| "learning_rate": 7.095477386934674e-05, | |
| "loss": 0.9819, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.28825622775800713, | |
| "grad_norm": 0.3995840549468994, | |
| "learning_rate": 7.075376884422111e-05, | |
| "loss": 1.018, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.28870106761565834, | |
| "grad_norm": 0.48846814036369324, | |
| "learning_rate": 7.055276381909548e-05, | |
| "loss": 1.0557, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.2891459074733096, | |
| "grad_norm": 0.44867080450057983, | |
| "learning_rate": 7.035175879396985e-05, | |
| "loss": 0.8785, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.28959074733096085, | |
| "grad_norm": 0.3561117649078369, | |
| "learning_rate": 7.015075376884423e-05, | |
| "loss": 0.8914, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.2900355871886121, | |
| "grad_norm": 0.4034915268421173, | |
| "learning_rate": 6.99497487437186e-05, | |
| "loss": 0.9038, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.29048042704626337, | |
| "grad_norm": 0.33660322427749634, | |
| "learning_rate": 6.974874371859297e-05, | |
| "loss": 0.9757, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.29092526690391457, | |
| "grad_norm": 0.3358772099018097, | |
| "learning_rate": 6.954773869346733e-05, | |
| "loss": 0.9003, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.29137010676156583, | |
| "grad_norm": 0.40661585330963135, | |
| "learning_rate": 6.93467336683417e-05, | |
| "loss": 0.8901, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.2918149466192171, | |
| "grad_norm": 0.3901599943637848, | |
| "learning_rate": 6.914572864321608e-05, | |
| "loss": 0.8622, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.29225978647686834, | |
| "grad_norm": 0.4064997732639313, | |
| "learning_rate": 6.894472361809045e-05, | |
| "loss": 0.8799, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.29270462633451955, | |
| "grad_norm": 0.3740297555923462, | |
| "learning_rate": 6.874371859296482e-05, | |
| "loss": 0.9026, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.2931494661921708, | |
| "grad_norm": 0.3562834858894348, | |
| "learning_rate": 6.85427135678392e-05, | |
| "loss": 0.949, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.29359430604982206, | |
| "grad_norm": 0.41133421659469604, | |
| "learning_rate": 6.834170854271357e-05, | |
| "loss": 1.0558, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.2940391459074733, | |
| "grad_norm": 0.4356454014778137, | |
| "learning_rate": 6.814070351758794e-05, | |
| "loss": 0.9691, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.2944839857651246, | |
| "grad_norm": 0.3359556496143341, | |
| "learning_rate": 6.793969849246232e-05, | |
| "loss": 0.9266, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.2949288256227758, | |
| "grad_norm": 0.34760648012161255, | |
| "learning_rate": 6.773869346733669e-05, | |
| "loss": 0.8974, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.29537366548042704, | |
| "grad_norm": 0.42510053515434265, | |
| "learning_rate": 6.753768844221105e-05, | |
| "loss": 0.9537, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.2958185053380783, | |
| "grad_norm": 0.35934481024742126, | |
| "learning_rate": 6.733668341708544e-05, | |
| "loss": 0.9455, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.29626334519572955, | |
| "grad_norm": 0.3559943735599518, | |
| "learning_rate": 6.71356783919598e-05, | |
| "loss": 0.9363, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.29670818505338076, | |
| "grad_norm": 0.36470353603363037, | |
| "learning_rate": 6.693467336683418e-05, | |
| "loss": 0.978, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.297153024911032, | |
| "grad_norm": 0.4014419913291931, | |
| "learning_rate": 6.673366834170854e-05, | |
| "loss": 1.0503, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.2975978647686833, | |
| "grad_norm": 0.4242320656776428, | |
| "learning_rate": 6.653266331658293e-05, | |
| "loss": 0.8848, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.29804270462633453, | |
| "grad_norm": 0.3722448945045471, | |
| "learning_rate": 6.633165829145729e-05, | |
| "loss": 0.9487, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.2984875444839858, | |
| "grad_norm": 0.33282628655433655, | |
| "learning_rate": 6.613065326633166e-05, | |
| "loss": 0.9483, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.298932384341637, | |
| "grad_norm": 0.45830950140953064, | |
| "learning_rate": 6.592964824120603e-05, | |
| "loss": 1.0413, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.29937722419928825, | |
| "grad_norm": 0.32155314087867737, | |
| "learning_rate": 6.57286432160804e-05, | |
| "loss": 0.9299, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.2998220640569395, | |
| "grad_norm": 0.319416344165802, | |
| "learning_rate": 6.552763819095478e-05, | |
| "loss": 0.9, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.30026690391459077, | |
| "grad_norm": 0.4341578781604767, | |
| "learning_rate": 6.532663316582915e-05, | |
| "loss": 0.9455, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.30071174377224197, | |
| "grad_norm": 0.4117160737514496, | |
| "learning_rate": 6.512562814070352e-05, | |
| "loss": 0.9682, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.3011565836298932, | |
| "grad_norm": 0.42781034111976624, | |
| "learning_rate": 6.492462311557788e-05, | |
| "loss": 1.0128, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.3016014234875445, | |
| "grad_norm": 0.3355860710144043, | |
| "learning_rate": 6.472361809045227e-05, | |
| "loss": 0.911, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.30204626334519574, | |
| "grad_norm": 0.3754599690437317, | |
| "learning_rate": 6.452261306532663e-05, | |
| "loss": 1.0985, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.302491103202847, | |
| "grad_norm": 0.3943292200565338, | |
| "learning_rate": 6.4321608040201e-05, | |
| "loss": 0.9051, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.3029359430604982, | |
| "grad_norm": 0.3109300434589386, | |
| "learning_rate": 6.412060301507538e-05, | |
| "loss": 0.8964, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.30338078291814946, | |
| "grad_norm": 0.32145023345947266, | |
| "learning_rate": 6.391959798994975e-05, | |
| "loss": 0.9225, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.3038256227758007, | |
| "grad_norm": 0.37335747480392456, | |
| "learning_rate": 6.371859296482412e-05, | |
| "loss": 0.9586, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.304270462633452, | |
| "grad_norm": 0.3109886646270752, | |
| "learning_rate": 6.35175879396985e-05, | |
| "loss": 0.9308, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.3047153024911032, | |
| "grad_norm": 0.3259667456150055, | |
| "learning_rate": 6.331658291457287e-05, | |
| "loss": 0.9322, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.30516014234875444, | |
| "grad_norm": 0.35218545794487, | |
| "learning_rate": 6.311557788944724e-05, | |
| "loss": 0.9853, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.3056049822064057, | |
| "grad_norm": 0.3094191551208496, | |
| "learning_rate": 6.291457286432161e-05, | |
| "loss": 0.8857, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.30604982206405695, | |
| "grad_norm": 0.3218885064125061, | |
| "learning_rate": 6.271356783919599e-05, | |
| "loss": 0.9363, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.3064946619217082, | |
| "grad_norm": 0.4279980957508087, | |
| "learning_rate": 6.251256281407035e-05, | |
| "loss": 0.9375, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.3069395017793594, | |
| "grad_norm": 0.3290557861328125, | |
| "learning_rate": 6.231155778894473e-05, | |
| "loss": 0.8822, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.30738434163701067, | |
| "grad_norm": 0.3323034942150116, | |
| "learning_rate": 6.211055276381909e-05, | |
| "loss": 1.016, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.30782918149466193, | |
| "grad_norm": 0.31911730766296387, | |
| "learning_rate": 6.190954773869348e-05, | |
| "loss": 0.8814, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.3082740213523132, | |
| "grad_norm": 0.3330342471599579, | |
| "learning_rate": 6.170854271356784e-05, | |
| "loss": 0.9745, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.3087188612099644, | |
| "grad_norm": 0.42443540692329407, | |
| "learning_rate": 6.150753768844222e-05, | |
| "loss": 0.8555, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.30916370106761565, | |
| "grad_norm": 0.3406936526298523, | |
| "learning_rate": 6.130653266331658e-05, | |
| "loss": 0.9958, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.3096085409252669, | |
| "grad_norm": 0.4078681766986847, | |
| "learning_rate": 6.110552763819096e-05, | |
| "loss": 0.8692, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.31005338078291816, | |
| "grad_norm": 0.4206138849258423, | |
| "learning_rate": 6.090452261306533e-05, | |
| "loss": 0.9255, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.3104982206405694, | |
| "grad_norm": 0.37396302819252014, | |
| "learning_rate": 6.070351758793971e-05, | |
| "loss": 0.9942, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.3109430604982206, | |
| "grad_norm": 0.3433186411857605, | |
| "learning_rate": 6.0502512562814076e-05, | |
| "loss": 0.8593, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.3113879003558719, | |
| "grad_norm": 0.37683218717575073, | |
| "learning_rate": 6.030150753768844e-05, | |
| "loss": 0.8921, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.31183274021352314, | |
| "grad_norm": 0.3779331147670746, | |
| "learning_rate": 6.0100502512562815e-05, | |
| "loss": 0.9528, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.3122775800711744, | |
| "grad_norm": 0.296854168176651, | |
| "learning_rate": 5.989949748743718e-05, | |
| "loss": 0.8811, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.3127224199288256, | |
| "grad_norm": 0.4115276336669922, | |
| "learning_rate": 5.969849246231156e-05, | |
| "loss": 0.9749, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.31316725978647686, | |
| "grad_norm": 0.36785462498664856, | |
| "learning_rate": 5.949748743718593e-05, | |
| "loss": 0.9813, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.3136120996441281, | |
| "grad_norm": 0.4077514410018921, | |
| "learning_rate": 5.929648241206031e-05, | |
| "loss": 0.9611, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.3140569395017794, | |
| "grad_norm": 0.34254419803619385, | |
| "learning_rate": 5.909547738693467e-05, | |
| "loss": 0.9411, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.3145017793594306, | |
| "grad_norm": 0.3231055736541748, | |
| "learning_rate": 5.889447236180905e-05, | |
| "loss": 0.8937, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.31494661921708184, | |
| "grad_norm": 0.46129921078681946, | |
| "learning_rate": 5.869346733668342e-05, | |
| "loss": 1.0143, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.3153914590747331, | |
| "grad_norm": 0.31858929991722107, | |
| "learning_rate": 5.849246231155779e-05, | |
| "loss": 0.8875, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.31583629893238435, | |
| "grad_norm": 0.34170815348625183, | |
| "learning_rate": 5.829145728643216e-05, | |
| "loss": 0.9015, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.3162811387900356, | |
| "grad_norm": 0.3193584084510803, | |
| "learning_rate": 5.809045226130654e-05, | |
| "loss": 0.9041, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.3167259786476868, | |
| "grad_norm": 0.3801109790802002, | |
| "learning_rate": 5.7889447236180904e-05, | |
| "loss": 0.8286, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.31717081850533807, | |
| "grad_norm": 0.3580700159072876, | |
| "learning_rate": 5.7688442211055284e-05, | |
| "loss": 0.8942, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.31761565836298933, | |
| "grad_norm": 0.4236672818660736, | |
| "learning_rate": 5.748743718592965e-05, | |
| "loss": 0.9645, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.3180604982206406, | |
| "grad_norm": 0.43990832567214966, | |
| "learning_rate": 5.728643216080403e-05, | |
| "loss": 1.0032, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.3185053380782918, | |
| "grad_norm": 0.34689345955848694, | |
| "learning_rate": 5.7085427135678396e-05, | |
| "loss": 0.8941, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.31895017793594305, | |
| "grad_norm": 0.31815850734710693, | |
| "learning_rate": 5.688442211055277e-05, | |
| "loss": 0.8067, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.3193950177935943, | |
| "grad_norm": 0.36961331963539124, | |
| "learning_rate": 5.6683417085427135e-05, | |
| "loss": 0.9644, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.31983985765124556, | |
| "grad_norm": 0.35958558320999146, | |
| "learning_rate": 5.6482412060301515e-05, | |
| "loss": 0.89, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.3202846975088968, | |
| "grad_norm": 0.3142717480659485, | |
| "learning_rate": 5.628140703517588e-05, | |
| "loss": 0.9572, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.320729537366548, | |
| "grad_norm": 0.3404165506362915, | |
| "learning_rate": 5.608040201005026e-05, | |
| "loss": 0.9028, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.3211743772241993, | |
| "grad_norm": 0.33231121301651, | |
| "learning_rate": 5.587939698492463e-05, | |
| "loss": 0.8523, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.32161921708185054, | |
| "grad_norm": 0.3641085624694824, | |
| "learning_rate": 5.567839195979899e-05, | |
| "loss": 0.9232, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.3220640569395018, | |
| "grad_norm": 0.4227713644504547, | |
| "learning_rate": 5.547738693467337e-05, | |
| "loss": 0.872, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.322508896797153, | |
| "grad_norm": 0.3884482979774475, | |
| "learning_rate": 5.527638190954774e-05, | |
| "loss": 0.9237, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.32295373665480426, | |
| "grad_norm": 0.4094702899456024, | |
| "learning_rate": 5.507537688442211e-05, | |
| "loss": 0.9966, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.3233985765124555, | |
| "grad_norm": 0.39778733253479004, | |
| "learning_rate": 5.487437185929648e-05, | |
| "loss": 0.8983, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.3238434163701068, | |
| "grad_norm": 0.3704332709312439, | |
| "learning_rate": 5.467336683417086e-05, | |
| "loss": 0.9645, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.32428825622775803, | |
| "grad_norm": 0.39981192350387573, | |
| "learning_rate": 5.4472361809045224e-05, | |
| "loss": 0.9289, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.32473309608540923, | |
| "grad_norm": 0.44905000925064087, | |
| "learning_rate": 5.4271356783919604e-05, | |
| "loss": 1.0632, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.3251779359430605, | |
| "grad_norm": 0.4241604208946228, | |
| "learning_rate": 5.407035175879397e-05, | |
| "loss": 0.9888, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.32562277580071175, | |
| "grad_norm": 0.30892229080200195, | |
| "learning_rate": 5.386934673366835e-05, | |
| "loss": 0.8545, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.326067615658363, | |
| "grad_norm": 0.36549416184425354, | |
| "learning_rate": 5.3668341708542716e-05, | |
| "loss": 0.9532, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.3265124555160142, | |
| "grad_norm": 0.36795175075531006, | |
| "learning_rate": 5.346733668341709e-05, | |
| "loss": 0.9992, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.32695729537366547, | |
| "grad_norm": 0.3399661183357239, | |
| "learning_rate": 5.3266331658291455e-05, | |
| "loss": 0.9349, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.3274021352313167, | |
| "grad_norm": 0.5193089842796326, | |
| "learning_rate": 5.3065326633165835e-05, | |
| "loss": 0.9922, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.327846975088968, | |
| "grad_norm": 0.3065936863422394, | |
| "learning_rate": 5.28643216080402e-05, | |
| "loss": 0.9197, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.32829181494661924, | |
| "grad_norm": 0.37638553977012634, | |
| "learning_rate": 5.266331658291458e-05, | |
| "loss": 0.8854, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.32873665480427045, | |
| "grad_norm": 0.36696720123291016, | |
| "learning_rate": 5.246231155778895e-05, | |
| "loss": 0.8642, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.3291814946619217, | |
| "grad_norm": 0.33234134316444397, | |
| "learning_rate": 5.226130653266332e-05, | |
| "loss": 0.9507, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.32962633451957296, | |
| "grad_norm": 0.3777725398540497, | |
| "learning_rate": 5.206030150753769e-05, | |
| "loss": 0.9751, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.3300711743772242, | |
| "grad_norm": 0.3410782516002655, | |
| "learning_rate": 5.1859296482412066e-05, | |
| "loss": 0.8185, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.3305160142348754, | |
| "grad_norm": 0.5856947302818298, | |
| "learning_rate": 5.165829145728643e-05, | |
| "loss": 1.0592, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.3309608540925267, | |
| "grad_norm": 0.35289299488067627, | |
| "learning_rate": 5.145728643216081e-05, | |
| "loss": 1.1304, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.33140569395017794, | |
| "grad_norm": 0.4018556475639343, | |
| "learning_rate": 5.125628140703518e-05, | |
| "loss": 1.0516, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.3318505338078292, | |
| "grad_norm": 0.402352899312973, | |
| "learning_rate": 5.1055276381909544e-05, | |
| "loss": 1.0456, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.33229537366548045, | |
| "grad_norm": 0.36698055267333984, | |
| "learning_rate": 5.0854271356783924e-05, | |
| "loss": 1.0091, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.33274021352313166, | |
| "grad_norm": 0.34098100662231445, | |
| "learning_rate": 5.065326633165829e-05, | |
| "loss": 0.8725, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.3331850533807829, | |
| "grad_norm": 0.3870490789413452, | |
| "learning_rate": 5.045226130653266e-05, | |
| "loss": 0.9775, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.33362989323843417, | |
| "grad_norm": 0.3566199839115143, | |
| "learning_rate": 5.0251256281407036e-05, | |
| "loss": 0.842, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.33407473309608543, | |
| "grad_norm": 0.4538438618183136, | |
| "learning_rate": 5.005025125628141e-05, | |
| "loss": 0.9158, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.33451957295373663, | |
| "grad_norm": 0.3462452292442322, | |
| "learning_rate": 4.984924623115578e-05, | |
| "loss": 0.8715, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.3349644128113879, | |
| "grad_norm": 0.40105798840522766, | |
| "learning_rate": 4.9648241206030155e-05, | |
| "loss": 1.0943, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.33540925266903915, | |
| "grad_norm": 0.38309648633003235, | |
| "learning_rate": 4.944723618090453e-05, | |
| "loss": 0.8961, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.3358540925266904, | |
| "grad_norm": 0.31808745861053467, | |
| "learning_rate": 4.92462311557789e-05, | |
| "loss": 0.8785, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.33629893238434166, | |
| "grad_norm": 0.3787521421909332, | |
| "learning_rate": 4.9045226130653274e-05, | |
| "loss": 0.8932, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.33674377224199287, | |
| "grad_norm": 0.36229708790779114, | |
| "learning_rate": 4.884422110552764e-05, | |
| "loss": 1.01, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.3371886120996441, | |
| "grad_norm": 0.3797127604484558, | |
| "learning_rate": 4.864321608040201e-05, | |
| "loss": 0.8492, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.3376334519572954, | |
| "grad_norm": 0.3839523196220398, | |
| "learning_rate": 4.844221105527638e-05, | |
| "loss": 0.9927, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.33807829181494664, | |
| "grad_norm": 0.3037039637565613, | |
| "learning_rate": 4.824120603015075e-05, | |
| "loss": 0.9326, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.33852313167259784, | |
| "grad_norm": 0.35209789872169495, | |
| "learning_rate": 4.8040201005025125e-05, | |
| "loss": 0.8634, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.3389679715302491, | |
| "grad_norm": 0.338007390499115, | |
| "learning_rate": 4.78391959798995e-05, | |
| "loss": 1.0579, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.33941281138790036, | |
| "grad_norm": 0.43554559350013733, | |
| "learning_rate": 4.763819095477387e-05, | |
| "loss": 1.0572, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.3398576512455516, | |
| "grad_norm": 0.47670868039131165, | |
| "learning_rate": 4.7437185929648244e-05, | |
| "loss": 1.1046, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.3403024911032028, | |
| "grad_norm": 0.3101906180381775, | |
| "learning_rate": 4.723618090452262e-05, | |
| "loss": 0.9641, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.3407473309608541, | |
| "grad_norm": 0.3647597134113312, | |
| "learning_rate": 4.703517587939698e-05, | |
| "loss": 0.9678, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.34119217081850534, | |
| "grad_norm": 0.40179431438446045, | |
| "learning_rate": 4.6834170854271356e-05, | |
| "loss": 0.9187, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.3416370106761566, | |
| "grad_norm": 0.41022738814353943, | |
| "learning_rate": 4.663316582914573e-05, | |
| "loss": 0.9218, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.34208185053380785, | |
| "grad_norm": 0.3737107515335083, | |
| "learning_rate": 4.64321608040201e-05, | |
| "loss": 0.9396, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.34252669039145905, | |
| "grad_norm": 0.34264299273490906, | |
| "learning_rate": 4.6231155778894475e-05, | |
| "loss": 0.8803, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.3429715302491103, | |
| "grad_norm": 0.363738089799881, | |
| "learning_rate": 4.603015075376885e-05, | |
| "loss": 0.9905, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.34341637010676157, | |
| "grad_norm": 0.4605758488178253, | |
| "learning_rate": 4.582914572864322e-05, | |
| "loss": 0.904, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.34386120996441283, | |
| "grad_norm": 0.3742416799068451, | |
| "learning_rate": 4.5628140703517594e-05, | |
| "loss": 0.9772, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.34430604982206403, | |
| "grad_norm": 0.4309648871421814, | |
| "learning_rate": 4.542713567839196e-05, | |
| "loss": 1.0439, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.3447508896797153, | |
| "grad_norm": 0.3657279908657074, | |
| "learning_rate": 4.522613065326633e-05, | |
| "loss": 1.0162, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.34519572953736655, | |
| "grad_norm": 0.4098432958126068, | |
| "learning_rate": 4.5025125628140706e-05, | |
| "loss": 0.9788, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.3456405693950178, | |
| "grad_norm": 0.4177950918674469, | |
| "learning_rate": 4.482412060301508e-05, | |
| "loss": 1.0105, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.34608540925266906, | |
| "grad_norm": 0.35204648971557617, | |
| "learning_rate": 4.462311557788945e-05, | |
| "loss": 0.9713, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.34653024911032027, | |
| "grad_norm": 0.3725673258304596, | |
| "learning_rate": 4.4422110552763825e-05, | |
| "loss": 0.9843, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.3469750889679715, | |
| "grad_norm": 0.38592076301574707, | |
| "learning_rate": 4.42211055276382e-05, | |
| "loss": 0.8528, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.3474199288256228, | |
| "grad_norm": 0.35379818081855774, | |
| "learning_rate": 4.4020100502512564e-05, | |
| "loss": 0.9712, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.34786476868327404, | |
| "grad_norm": 0.3713836371898651, | |
| "learning_rate": 4.381909547738694e-05, | |
| "loss": 0.9531, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.34830960854092524, | |
| "grad_norm": 0.3799002766609192, | |
| "learning_rate": 4.3618090452261303e-05, | |
| "loss": 0.9348, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.3487544483985765, | |
| "grad_norm": 0.34504106640815735, | |
| "learning_rate": 4.3417085427135676e-05, | |
| "loss": 0.8863, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.34919928825622776, | |
| "grad_norm": 0.2971758544445038, | |
| "learning_rate": 4.321608040201005e-05, | |
| "loss": 0.8983, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.349644128113879, | |
| "grad_norm": 0.34722504019737244, | |
| "learning_rate": 4.301507537688442e-05, | |
| "loss": 0.9456, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.3500889679715303, | |
| "grad_norm": 0.3370276987552643, | |
| "learning_rate": 4.2814070351758795e-05, | |
| "loss": 0.9288, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.3505338078291815, | |
| "grad_norm": 0.3471807837486267, | |
| "learning_rate": 4.261306532663317e-05, | |
| "loss": 0.98, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.35097864768683273, | |
| "grad_norm": 0.3932972252368927, | |
| "learning_rate": 4.241206030150754e-05, | |
| "loss": 0.9479, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.351423487544484, | |
| "grad_norm": 0.36573097109794617, | |
| "learning_rate": 4.2211055276381914e-05, | |
| "loss": 0.9339, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.35186832740213525, | |
| "grad_norm": 0.34445399045944214, | |
| "learning_rate": 4.201005025125628e-05, | |
| "loss": 0.807, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.35231316725978645, | |
| "grad_norm": 0.39604124426841736, | |
| "learning_rate": 4.180904522613065e-05, | |
| "loss": 0.9503, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.3527580071174377, | |
| "grad_norm": 0.3157517910003662, | |
| "learning_rate": 4.1608040201005026e-05, | |
| "loss": 0.9381, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.35320284697508897, | |
| "grad_norm": 0.3242207467556, | |
| "learning_rate": 4.14070351758794e-05, | |
| "loss": 0.934, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.3536476868327402, | |
| "grad_norm": 0.3632892966270447, | |
| "learning_rate": 4.120603015075377e-05, | |
| "loss": 0.9653, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.3540925266903915, | |
| "grad_norm": 0.35393667221069336, | |
| "learning_rate": 4.1005025125628145e-05, | |
| "loss": 0.9132, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.3545373665480427, | |
| "grad_norm": 0.36036303639411926, | |
| "learning_rate": 4.080402010050252e-05, | |
| "loss": 1.0329, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.35498220640569395, | |
| "grad_norm": 0.3710475265979767, | |
| "learning_rate": 4.060301507537689e-05, | |
| "loss": 0.8732, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.3554270462633452, | |
| "grad_norm": 0.37702277302742004, | |
| "learning_rate": 4.040201005025126e-05, | |
| "loss": 0.9467, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.35587188612099646, | |
| "grad_norm": 0.3579627275466919, | |
| "learning_rate": 4.020100502512563e-05, | |
| "loss": 0.9603, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.35631672597864766, | |
| "grad_norm": 0.44332355260849, | |
| "learning_rate": 4e-05, | |
| "loss": 0.9611, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.3567615658362989, | |
| "grad_norm": 0.30799567699432373, | |
| "learning_rate": 3.9798994974874376e-05, | |
| "loss": 0.9908, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.3572064056939502, | |
| "grad_norm": 0.38729211688041687, | |
| "learning_rate": 3.959798994974875e-05, | |
| "loss": 0.9629, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.35765124555160144, | |
| "grad_norm": 0.4034986197948456, | |
| "learning_rate": 3.9396984924623115e-05, | |
| "loss": 0.9545, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.3580960854092527, | |
| "grad_norm": 0.35203176736831665, | |
| "learning_rate": 3.919597989949749e-05, | |
| "loss": 0.8863, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.3585409252669039, | |
| "grad_norm": 0.3381657004356384, | |
| "learning_rate": 3.899497487437186e-05, | |
| "loss": 0.9144, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.35898576512455516, | |
| "grad_norm": 0.34250691533088684, | |
| "learning_rate": 3.8793969849246234e-05, | |
| "loss": 0.8687, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.3594306049822064, | |
| "grad_norm": 0.3308602273464203, | |
| "learning_rate": 3.85929648241206e-05, | |
| "loss": 0.9292, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.35987544483985767, | |
| "grad_norm": 0.3888717591762543, | |
| "learning_rate": 3.8391959798994973e-05, | |
| "loss": 0.9774, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.3603202846975089, | |
| "grad_norm": 0.37949973344802856, | |
| "learning_rate": 3.8190954773869346e-05, | |
| "loss": 0.9278, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.36076512455516013, | |
| "grad_norm": 0.3511112332344055, | |
| "learning_rate": 3.798994974874372e-05, | |
| "loss": 0.9714, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.3612099644128114, | |
| "grad_norm": 0.3813224732875824, | |
| "learning_rate": 3.778894472361809e-05, | |
| "loss": 1.0487, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.36165480427046265, | |
| "grad_norm": 0.4005330502986908, | |
| "learning_rate": 3.7587939698492465e-05, | |
| "loss": 0.9673, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.3620996441281139, | |
| "grad_norm": 0.3238542675971985, | |
| "learning_rate": 3.738693467336684e-05, | |
| "loss": 0.9409, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.3625444839857651, | |
| "grad_norm": 0.29547253251075745, | |
| "learning_rate": 3.7185929648241204e-05, | |
| "loss": 0.9494, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.36298932384341637, | |
| "grad_norm": 0.45984339714050293, | |
| "learning_rate": 3.698492462311558e-05, | |
| "loss": 0.8507, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.3634341637010676, | |
| "grad_norm": 0.3637581169605255, | |
| "learning_rate": 3.678391959798995e-05, | |
| "loss": 1.0145, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.3638790035587189, | |
| "grad_norm": 0.3606102764606476, | |
| "learning_rate": 3.658291457286432e-05, | |
| "loss": 0.877, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.3643238434163701, | |
| "grad_norm": 0.33879634737968445, | |
| "learning_rate": 3.6381909547738696e-05, | |
| "loss": 0.9165, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.36476868327402134, | |
| "grad_norm": 0.3897345960140228, | |
| "learning_rate": 3.618090452261307e-05, | |
| "loss": 0.9762, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.3652135231316726, | |
| "grad_norm": 0.39814460277557373, | |
| "learning_rate": 3.597989949748744e-05, | |
| "loss": 0.9111, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.36565836298932386, | |
| "grad_norm": 0.40529152750968933, | |
| "learning_rate": 3.5778894472361815e-05, | |
| "loss": 0.8778, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.36610320284697506, | |
| "grad_norm": 0.34325361251831055, | |
| "learning_rate": 3.557788944723618e-05, | |
| "loss": 0.903, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.3665480427046263, | |
| "grad_norm": 0.36211341619491577, | |
| "learning_rate": 3.5376884422110554e-05, | |
| "loss": 0.963, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.3669928825622776, | |
| "grad_norm": 0.4107413589954376, | |
| "learning_rate": 3.517587939698493e-05, | |
| "loss": 0.9007, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.36743772241992884, | |
| "grad_norm": 0.35432523488998413, | |
| "learning_rate": 3.49748743718593e-05, | |
| "loss": 0.8406, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.3678825622775801, | |
| "grad_norm": 0.4027344286441803, | |
| "learning_rate": 3.4773869346733667e-05, | |
| "loss": 1.0062, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.3683274021352313, | |
| "grad_norm": 0.34778210520744324, | |
| "learning_rate": 3.457286432160804e-05, | |
| "loss": 1.0124, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.36877224199288255, | |
| "grad_norm": 0.3145458996295929, | |
| "learning_rate": 3.437185929648241e-05, | |
| "loss": 0.8455, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.3692170818505338, | |
| "grad_norm": 0.39053332805633545, | |
| "learning_rate": 3.4170854271356785e-05, | |
| "loss": 0.9635, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.36966192170818507, | |
| "grad_norm": 0.3456934988498688, | |
| "learning_rate": 3.396984924623116e-05, | |
| "loss": 0.9399, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.3701067615658363, | |
| "grad_norm": 0.34614813327789307, | |
| "learning_rate": 3.3768844221105525e-05, | |
| "loss": 0.9724, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.37055160142348753, | |
| "grad_norm": 0.45311570167541504, | |
| "learning_rate": 3.35678391959799e-05, | |
| "loss": 1.0435, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.3709964412811388, | |
| "grad_norm": 0.3952670395374298, | |
| "learning_rate": 3.336683417085427e-05, | |
| "loss": 0.9251, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.37144128113879005, | |
| "grad_norm": 0.3246530592441559, | |
| "learning_rate": 3.3165829145728643e-05, | |
| "loss": 0.9194, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.3718861209964413, | |
| "grad_norm": 0.3489208221435547, | |
| "learning_rate": 3.2964824120603016e-05, | |
| "loss": 0.9522, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.3723309608540925, | |
| "grad_norm": 0.3140431046485901, | |
| "learning_rate": 3.276381909547739e-05, | |
| "loss": 0.9511, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.37277580071174377, | |
| "grad_norm": 0.3103282153606415, | |
| "learning_rate": 3.256281407035176e-05, | |
| "loss": 0.8739, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.373220640569395, | |
| "grad_norm": 0.3995440602302551, | |
| "learning_rate": 3.2361809045226135e-05, | |
| "loss": 0.9918, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.3736654804270463, | |
| "grad_norm": 0.39125266671180725, | |
| "learning_rate": 3.21608040201005e-05, | |
| "loss": 0.9751, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.3741103202846975, | |
| "grad_norm": 0.3043217658996582, | |
| "learning_rate": 3.1959798994974875e-05, | |
| "loss": 0.8899, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.37455516014234874, | |
| "grad_norm": 0.4185977280139923, | |
| "learning_rate": 3.175879396984925e-05, | |
| "loss": 0.9169, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.375, | |
| "grad_norm": 0.3753814697265625, | |
| "learning_rate": 3.155778894472362e-05, | |
| "loss": 0.951, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.37544483985765126, | |
| "grad_norm": 0.33144545555114746, | |
| "learning_rate": 3.1356783919597993e-05, | |
| "loss": 0.9228, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.3758896797153025, | |
| "grad_norm": 0.3024055063724518, | |
| "learning_rate": 3.1155778894472366e-05, | |
| "loss": 0.9157, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.3763345195729537, | |
| "grad_norm": 0.3643604815006256, | |
| "learning_rate": 3.095477386934674e-05, | |
| "loss": 0.9387, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.376779359430605, | |
| "grad_norm": 0.426024854183197, | |
| "learning_rate": 3.075376884422111e-05, | |
| "loss": 1.0013, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.37722419928825623, | |
| "grad_norm": 0.3174848258495331, | |
| "learning_rate": 3.055276381909548e-05, | |
| "loss": 0.914, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.3776690391459075, | |
| "grad_norm": 0.3318672478199005, | |
| "learning_rate": 3.0351758793969855e-05, | |
| "loss": 0.9502, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.3781138790035587, | |
| "grad_norm": 0.4423961937427521, | |
| "learning_rate": 3.015075376884422e-05, | |
| "loss": 0.9341, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.37855871886120995, | |
| "grad_norm": 0.40427151322364807, | |
| "learning_rate": 2.994974874371859e-05, | |
| "loss": 0.8887, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.3790035587188612, | |
| "grad_norm": 0.3664209246635437, | |
| "learning_rate": 2.9748743718592964e-05, | |
| "loss": 0.8858, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.37944839857651247, | |
| "grad_norm": 0.3712497353553772, | |
| "learning_rate": 2.9547738693467337e-05, | |
| "loss": 0.8004, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.3798932384341637, | |
| "grad_norm": 0.4214048981666565, | |
| "learning_rate": 2.934673366834171e-05, | |
| "loss": 1.0013, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.38033807829181493, | |
| "grad_norm": 0.3496900498867035, | |
| "learning_rate": 2.914572864321608e-05, | |
| "loss": 0.908, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.3807829181494662, | |
| "grad_norm": 0.4434766471385956, | |
| "learning_rate": 2.8944723618090452e-05, | |
| "loss": 0.9394, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.38122775800711745, | |
| "grad_norm": 0.37649184465408325, | |
| "learning_rate": 2.8743718592964825e-05, | |
| "loss": 0.9892, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.3816725978647687, | |
| "grad_norm": 0.35058286786079407, | |
| "learning_rate": 2.8542713567839198e-05, | |
| "loss": 0.9576, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.3821174377224199, | |
| "grad_norm": 0.3940383493900299, | |
| "learning_rate": 2.8341708542713568e-05, | |
| "loss": 0.9078, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.38256227758007116, | |
| "grad_norm": 0.31289801001548767, | |
| "learning_rate": 2.814070351758794e-05, | |
| "loss": 0.8471, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.3830071174377224, | |
| "grad_norm": 0.3773019313812256, | |
| "learning_rate": 2.7939698492462314e-05, | |
| "loss": 1.0291, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.3834519572953737, | |
| "grad_norm": 0.38689449429512024, | |
| "learning_rate": 2.7738693467336686e-05, | |
| "loss": 0.965, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.38389679715302494, | |
| "grad_norm": 0.4001306891441345, | |
| "learning_rate": 2.7537688442211056e-05, | |
| "loss": 0.9251, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.38434163701067614, | |
| "grad_norm": 0.2970896363258362, | |
| "learning_rate": 2.733668341708543e-05, | |
| "loss": 0.8795, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.3847864768683274, | |
| "grad_norm": 0.3230406939983368, | |
| "learning_rate": 2.7135678391959802e-05, | |
| "loss": 0.9162, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.38523131672597866, | |
| "grad_norm": 0.38683274388313293, | |
| "learning_rate": 2.6934673366834175e-05, | |
| "loss": 0.921, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.3856761565836299, | |
| "grad_norm": 0.36258265376091003, | |
| "learning_rate": 2.6733668341708545e-05, | |
| "loss": 0.9282, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.3861209964412811, | |
| "grad_norm": 0.30888402462005615, | |
| "learning_rate": 2.6532663316582917e-05, | |
| "loss": 0.9154, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.3865658362989324, | |
| "grad_norm": 0.4258480370044708, | |
| "learning_rate": 2.633165829145729e-05, | |
| "loss": 0.9405, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.38701067615658363, | |
| "grad_norm": 0.33099818229675293, | |
| "learning_rate": 2.613065326633166e-05, | |
| "loss": 0.923, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.3874555160142349, | |
| "grad_norm": 0.3900362253189087, | |
| "learning_rate": 2.5929648241206033e-05, | |
| "loss": 0.8813, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.3879003558718861, | |
| "grad_norm": 0.3388621509075165, | |
| "learning_rate": 2.5728643216080406e-05, | |
| "loss": 0.9765, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.38834519572953735, | |
| "grad_norm": 0.32633256912231445, | |
| "learning_rate": 2.5527638190954772e-05, | |
| "loss": 0.8824, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.3887900355871886, | |
| "grad_norm": 0.34141066670417786, | |
| "learning_rate": 2.5326633165829145e-05, | |
| "loss": 0.862, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.38923487544483987, | |
| "grad_norm": 0.39966803789138794, | |
| "learning_rate": 2.5125628140703518e-05, | |
| "loss": 0.9406, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.3896797153024911, | |
| "grad_norm": 0.34703922271728516, | |
| "learning_rate": 2.492462311557789e-05, | |
| "loss": 0.9392, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.39012455516014233, | |
| "grad_norm": 0.3692745268344879, | |
| "learning_rate": 2.4723618090452264e-05, | |
| "loss": 0.838, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.3905693950177936, | |
| "grad_norm": 0.3813033401966095, | |
| "learning_rate": 2.4522613065326637e-05, | |
| "loss": 0.9557, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.39101423487544484, | |
| "grad_norm": 0.361794650554657, | |
| "learning_rate": 2.4321608040201007e-05, | |
| "loss": 0.7851, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.3914590747330961, | |
| "grad_norm": 0.3740581274032593, | |
| "learning_rate": 2.4120603015075376e-05, | |
| "loss": 1.0353, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.3919039145907473, | |
| "grad_norm": 0.3482462465763092, | |
| "learning_rate": 2.391959798994975e-05, | |
| "loss": 0.9339, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.39234875444839856, | |
| "grad_norm": 0.35234349966049194, | |
| "learning_rate": 2.3718592964824122e-05, | |
| "loss": 0.8892, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.3927935943060498, | |
| "grad_norm": 0.349231481552124, | |
| "learning_rate": 2.351758793969849e-05, | |
| "loss": 0.9183, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.3932384341637011, | |
| "grad_norm": 0.3892892599105835, | |
| "learning_rate": 2.3316582914572865e-05, | |
| "loss": 1.0786, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.39368327402135234, | |
| "grad_norm": 0.4677392840385437, | |
| "learning_rate": 2.3115577889447238e-05, | |
| "loss": 0.9138, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.39412811387900354, | |
| "grad_norm": 0.32070422172546387, | |
| "learning_rate": 2.291457286432161e-05, | |
| "loss": 0.9076, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.3945729537366548, | |
| "grad_norm": 0.30801644921302795, | |
| "learning_rate": 2.271356783919598e-05, | |
| "loss": 0.9457, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.39501779359430605, | |
| "grad_norm": 0.4170681834220886, | |
| "learning_rate": 2.2512562814070353e-05, | |
| "loss": 0.9452, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.3954626334519573, | |
| "grad_norm": 0.44136953353881836, | |
| "learning_rate": 2.2311557788944726e-05, | |
| "loss": 1.1165, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.3959074733096085, | |
| "grad_norm": 0.35362014174461365, | |
| "learning_rate": 2.21105527638191e-05, | |
| "loss": 0.9155, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.3963523131672598, | |
| "grad_norm": 0.3813976049423218, | |
| "learning_rate": 2.190954773869347e-05, | |
| "loss": 0.8541, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.39679715302491103, | |
| "grad_norm": 0.41842445731163025, | |
| "learning_rate": 2.1708542713567838e-05, | |
| "loss": 0.9675, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.3972419928825623, | |
| "grad_norm": 0.3513477146625519, | |
| "learning_rate": 2.150753768844221e-05, | |
| "loss": 0.9387, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.39768683274021355, | |
| "grad_norm": 0.3313136398792267, | |
| "learning_rate": 2.1306532663316584e-05, | |
| "loss": 0.9186, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.39813167259786475, | |
| "grad_norm": 0.370280921459198, | |
| "learning_rate": 2.1105527638190957e-05, | |
| "loss": 1.1071, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.398576512455516, | |
| "grad_norm": 0.3955540657043457, | |
| "learning_rate": 2.0904522613065327e-05, | |
| "loss": 0.9314, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.39902135231316727, | |
| "grad_norm": 0.394826203584671, | |
| "learning_rate": 2.07035175879397e-05, | |
| "loss": 0.9315, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.3994661921708185, | |
| "grad_norm": 0.35463854670524597, | |
| "learning_rate": 2.0502512562814073e-05, | |
| "loss": 0.9011, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.3999110320284697, | |
| "grad_norm": 0.3725610673427582, | |
| "learning_rate": 2.0301507537688446e-05, | |
| "loss": 0.9455, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.400355871886121, | |
| "grad_norm": 0.4204149842262268, | |
| "learning_rate": 2.0100502512562815e-05, | |
| "loss": 0.973, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.40080071174377224, | |
| "grad_norm": 0.312836617231369, | |
| "learning_rate": 1.9899497487437188e-05, | |
| "loss": 0.875, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.4012455516014235, | |
| "grad_norm": 0.4284355044364929, | |
| "learning_rate": 1.9698492462311558e-05, | |
| "loss": 1.0345, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.40169039145907476, | |
| "grad_norm": 0.41245731711387634, | |
| "learning_rate": 1.949748743718593e-05, | |
| "loss": 0.884, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.40213523131672596, | |
| "grad_norm": 0.31140172481536865, | |
| "learning_rate": 1.92964824120603e-05, | |
| "loss": 0.8928, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.4025800711743772, | |
| "grad_norm": 0.4398500323295593, | |
| "learning_rate": 1.9095477386934673e-05, | |
| "loss": 0.9624, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.4030249110320285, | |
| "grad_norm": 0.3438228964805603, | |
| "learning_rate": 1.8894472361809046e-05, | |
| "loss": 0.8849, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.40346975088967973, | |
| "grad_norm": 0.3663855195045471, | |
| "learning_rate": 1.869346733668342e-05, | |
| "loss": 0.8945, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.40391459074733094, | |
| "grad_norm": 0.3942212164402008, | |
| "learning_rate": 1.849246231155779e-05, | |
| "loss": 0.8601, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.4043594306049822, | |
| "grad_norm": 0.3836296498775482, | |
| "learning_rate": 1.829145728643216e-05, | |
| "loss": 1.0224, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.40480427046263345, | |
| "grad_norm": 0.41282856464385986, | |
| "learning_rate": 1.8090452261306535e-05, | |
| "loss": 0.9005, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.4052491103202847, | |
| "grad_norm": 0.3127693235874176, | |
| "learning_rate": 1.7889447236180908e-05, | |
| "loss": 0.8051, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.40569395017793597, | |
| "grad_norm": 0.2867036759853363, | |
| "learning_rate": 1.7688442211055277e-05, | |
| "loss": 1.0131, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.40613879003558717, | |
| "grad_norm": 0.28900963068008423, | |
| "learning_rate": 1.748743718592965e-05, | |
| "loss": 0.8713, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.40658362989323843, | |
| "grad_norm": 0.39211320877075195, | |
| "learning_rate": 1.728643216080402e-05, | |
| "loss": 0.9688, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.4070284697508897, | |
| "grad_norm": 0.39720427989959717, | |
| "learning_rate": 1.7085427135678393e-05, | |
| "loss": 0.9303, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.40747330960854095, | |
| "grad_norm": 0.3245285749435425, | |
| "learning_rate": 1.6884422110552762e-05, | |
| "loss": 0.8772, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.40791814946619215, | |
| "grad_norm": 0.3102715015411377, | |
| "learning_rate": 1.6683417085427135e-05, | |
| "loss": 0.8974, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.4083629893238434, | |
| "grad_norm": 0.34904980659484863, | |
| "learning_rate": 1.6482412060301508e-05, | |
| "loss": 0.9458, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.40880782918149466, | |
| "grad_norm": 0.3596024215221405, | |
| "learning_rate": 1.628140703517588e-05, | |
| "loss": 0.9627, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.4092526690391459, | |
| "grad_norm": 0.37325575947761536, | |
| "learning_rate": 1.608040201005025e-05, | |
| "loss": 0.7928, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.4096975088967972, | |
| "grad_norm": 0.3483446538448334, | |
| "learning_rate": 1.5879396984924624e-05, | |
| "loss": 0.9163, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.4101423487544484, | |
| "grad_norm": 0.3493581712245941, | |
| "learning_rate": 1.5678391959798997e-05, | |
| "loss": 0.9661, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.41058718861209964, | |
| "grad_norm": 0.3486252725124359, | |
| "learning_rate": 1.547738693467337e-05, | |
| "loss": 0.9236, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.4110320284697509, | |
| "grad_norm": 0.40376630425453186, | |
| "learning_rate": 1.527638190954774e-05, | |
| "loss": 0.9523, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.41147686832740216, | |
| "grad_norm": 0.39947953820228577, | |
| "learning_rate": 1.507537688442211e-05, | |
| "loss": 0.9814, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.41192170818505336, | |
| "grad_norm": 0.3233617842197418, | |
| "learning_rate": 1.4874371859296482e-05, | |
| "loss": 0.9911, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.4123665480427046, | |
| "grad_norm": 0.3426443040370941, | |
| "learning_rate": 1.4673366834170855e-05, | |
| "loss": 1.0236, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.4128113879003559, | |
| "grad_norm": 0.3716176450252533, | |
| "learning_rate": 1.4472361809045226e-05, | |
| "loss": 0.9969, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.41325622775800713, | |
| "grad_norm": 0.37307268381118774, | |
| "learning_rate": 1.4271356783919599e-05, | |
| "loss": 0.9022, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.41370106761565834, | |
| "grad_norm": 0.31498992443084717, | |
| "learning_rate": 1.407035175879397e-05, | |
| "loss": 0.9017, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.4141459074733096, | |
| "grad_norm": 0.3574257493019104, | |
| "learning_rate": 1.3869346733668343e-05, | |
| "loss": 1.0195, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.41459074733096085, | |
| "grad_norm": 0.3232157230377197, | |
| "learning_rate": 1.3668341708542715e-05, | |
| "loss": 0.8746, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.4150355871886121, | |
| "grad_norm": 0.3905941843986511, | |
| "learning_rate": 1.3467336683417087e-05, | |
| "loss": 0.8746, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.41548042704626337, | |
| "grad_norm": 0.3263537883758545, | |
| "learning_rate": 1.3266331658291459e-05, | |
| "loss": 0.8922, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.41592526690391457, | |
| "grad_norm": 0.3259488642215729, | |
| "learning_rate": 1.306532663316583e-05, | |
| "loss": 0.9866, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.41637010676156583, | |
| "grad_norm": 0.4093850255012512, | |
| "learning_rate": 1.2864321608040203e-05, | |
| "loss": 0.8843, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.4168149466192171, | |
| "grad_norm": 0.3108372390270233, | |
| "learning_rate": 1.2663316582914573e-05, | |
| "loss": 0.9248, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.41725978647686834, | |
| "grad_norm": 0.3368922770023346, | |
| "learning_rate": 1.2462311557788946e-05, | |
| "loss": 0.9424, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.41770462633451955, | |
| "grad_norm": 0.35581493377685547, | |
| "learning_rate": 1.2261306532663318e-05, | |
| "loss": 0.9238, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.4181494661921708, | |
| "grad_norm": 0.392605185508728, | |
| "learning_rate": 1.2060301507537688e-05, | |
| "loss": 0.9718, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.41859430604982206, | |
| "grad_norm": 0.33466875553131104, | |
| "learning_rate": 1.1859296482412061e-05, | |
| "loss": 0.8881, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.4190391459074733, | |
| "grad_norm": 0.4258723258972168, | |
| "learning_rate": 1.1658291457286432e-05, | |
| "loss": 0.9908, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.4194839857651246, | |
| "grad_norm": 0.3950963318347931, | |
| "learning_rate": 1.1457286432160805e-05, | |
| "loss": 0.8359, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.4199288256227758, | |
| "grad_norm": 0.35699567198753357, | |
| "learning_rate": 1.1256281407035177e-05, | |
| "loss": 0.9785, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.42037366548042704, | |
| "grad_norm": 0.3818075954914093, | |
| "learning_rate": 1.105527638190955e-05, | |
| "loss": 0.9927, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.4208185053380783, | |
| "grad_norm": 0.3606509566307068, | |
| "learning_rate": 1.0854271356783919e-05, | |
| "loss": 0.8477, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.42126334519572955, | |
| "grad_norm": 0.3209396004676819, | |
| "learning_rate": 1.0653266331658292e-05, | |
| "loss": 0.9992, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.42170818505338076, | |
| "grad_norm": 0.35111239552497864, | |
| "learning_rate": 1.0452261306532663e-05, | |
| "loss": 0.8933, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.422153024911032, | |
| "grad_norm": 0.3492172658443451, | |
| "learning_rate": 1.0251256281407036e-05, | |
| "loss": 0.9133, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.4225978647686833, | |
| "grad_norm": 0.34597399830818176, | |
| "learning_rate": 1.0050251256281408e-05, | |
| "loss": 0.8814, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.42304270462633453, | |
| "grad_norm": 0.35809046030044556, | |
| "learning_rate": 9.849246231155779e-06, | |
| "loss": 0.9919, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.4234875444839858, | |
| "grad_norm": 0.3955031931400299, | |
| "learning_rate": 9.64824120603015e-06, | |
| "loss": 0.8611, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.423932384341637, | |
| "grad_norm": 0.35886260867118835, | |
| "learning_rate": 9.447236180904523e-06, | |
| "loss": 0.9025, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.42437722419928825, | |
| "grad_norm": 0.340167760848999, | |
| "learning_rate": 9.246231155778894e-06, | |
| "loss": 0.9633, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.4248220640569395, | |
| "grad_norm": 0.3560841977596283, | |
| "learning_rate": 9.045226130653267e-06, | |
| "loss": 0.9444, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.42526690391459077, | |
| "grad_norm": 0.39469799399375916, | |
| "learning_rate": 8.844221105527639e-06, | |
| "loss": 0.9697, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.42571174377224197, | |
| "grad_norm": 0.35771381855010986, | |
| "learning_rate": 8.64321608040201e-06, | |
| "loss": 0.944, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.4261565836298932, | |
| "grad_norm": 0.3121163845062256, | |
| "learning_rate": 8.442211055276381e-06, | |
| "loss": 0.8582, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.4266014234875445, | |
| "grad_norm": 0.39604029059410095, | |
| "learning_rate": 8.241206030150754e-06, | |
| "loss": 0.9885, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.42704626334519574, | |
| "grad_norm": 0.35038042068481445, | |
| "learning_rate": 8.040201005025125e-06, | |
| "loss": 0.8873, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.427491103202847, | |
| "grad_norm": 0.39221829175949097, | |
| "learning_rate": 7.839195979899498e-06, | |
| "loss": 0.9252, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.4279359430604982, | |
| "grad_norm": 0.39741194248199463, | |
| "learning_rate": 7.63819095477387e-06, | |
| "loss": 0.9643, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.42838078291814946, | |
| "grad_norm": 0.511210024356842, | |
| "learning_rate": 7.437185929648241e-06, | |
| "loss": 0.9651, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.4288256227758007, | |
| "grad_norm": 0.4568634331226349, | |
| "learning_rate": 7.236180904522613e-06, | |
| "loss": 1.0238, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.429270462633452, | |
| "grad_norm": 0.36889776587486267, | |
| "learning_rate": 7.035175879396985e-06, | |
| "loss": 0.964, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.4297153024911032, | |
| "grad_norm": 0.34706658124923706, | |
| "learning_rate": 6.834170854271357e-06, | |
| "loss": 0.9929, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.43016014234875444, | |
| "grad_norm": 0.33485299348831177, | |
| "learning_rate": 6.633165829145729e-06, | |
| "loss": 0.9141, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.4306049822064057, | |
| "grad_norm": 0.5187619924545288, | |
| "learning_rate": 6.4321608040201015e-06, | |
| "loss": 1.084, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.43104982206405695, | |
| "grad_norm": 0.41246598958969116, | |
| "learning_rate": 6.231155778894473e-06, | |
| "loss": 0.9763, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.4314946619217082, | |
| "grad_norm": 0.35957226157188416, | |
| "learning_rate": 6.030150753768844e-06, | |
| "loss": 0.9251, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.4319395017793594, | |
| "grad_norm": 0.3485277593135834, | |
| "learning_rate": 5.829145728643216e-06, | |
| "loss": 0.832, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.43238434163701067, | |
| "grad_norm": 0.2965943217277527, | |
| "learning_rate": 5.628140703517588e-06, | |
| "loss": 0.9496, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.43282918149466193, | |
| "grad_norm": 0.3268524706363678, | |
| "learning_rate": 5.4271356783919595e-06, | |
| "loss": 0.9876, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.4332740213523132, | |
| "grad_norm": 0.33093732595443726, | |
| "learning_rate": 5.226130653266332e-06, | |
| "loss": 0.9083, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.4337188612099644, | |
| "grad_norm": 0.3334566652774811, | |
| "learning_rate": 5.025125628140704e-06, | |
| "loss": 0.8941, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.43416370106761565, | |
| "grad_norm": 0.327716201543808, | |
| "learning_rate": 4.824120603015075e-06, | |
| "loss": 0.9783, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.4346085409252669, | |
| "grad_norm": 0.4024101793766022, | |
| "learning_rate": 4.623115577889447e-06, | |
| "loss": 0.9505, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.43505338078291816, | |
| "grad_norm": 0.3500884175300598, | |
| "learning_rate": 4.422110552763819e-06, | |
| "loss": 0.9397, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.4354982206405694, | |
| "grad_norm": 0.3603179156780243, | |
| "learning_rate": 4.2211055276381906e-06, | |
| "loss": 0.9335, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.4359430604982206, | |
| "grad_norm": 0.3110332190990448, | |
| "learning_rate": 4.020100502512563e-06, | |
| "loss": 0.9589, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.4363879003558719, | |
| "grad_norm": 0.3665446937084198, | |
| "learning_rate": 3.819095477386935e-06, | |
| "loss": 0.9139, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.43683274021352314, | |
| "grad_norm": 0.30451545119285583, | |
| "learning_rate": 3.6180904522613065e-06, | |
| "loss": 0.9229, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.4372775800711744, | |
| "grad_norm": 0.359427809715271, | |
| "learning_rate": 3.4170854271356786e-06, | |
| "loss": 0.9409, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.4377224199288256, | |
| "grad_norm": 0.29950541257858276, | |
| "learning_rate": 3.2160804020100507e-06, | |
| "loss": 0.8438, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.43816725978647686, | |
| "grad_norm": 0.44397425651550293, | |
| "learning_rate": 3.015075376884422e-06, | |
| "loss": 0.9639, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.4386120996441281, | |
| "grad_norm": 0.3420177400112152, | |
| "learning_rate": 2.814070351758794e-06, | |
| "loss": 0.8911, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.4390569395017794, | |
| "grad_norm": 0.3904622793197632, | |
| "learning_rate": 2.613065326633166e-06, | |
| "loss": 0.9407, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.4395017793594306, | |
| "grad_norm": 0.2967797517776489, | |
| "learning_rate": 2.4120603015075375e-06, | |
| "loss": 0.9157, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.43994661921708184, | |
| "grad_norm": 0.3801586925983429, | |
| "learning_rate": 2.2110552763819096e-06, | |
| "loss": 0.8314, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.4403914590747331, | |
| "grad_norm": 0.3838663101196289, | |
| "learning_rate": 2.0100502512562813e-06, | |
| "loss": 0.8797, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.44083629893238435, | |
| "grad_norm": 0.3395996689796448, | |
| "learning_rate": 1.8090452261306533e-06, | |
| "loss": 0.8795, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.4412811387900356, | |
| "grad_norm": 0.38413113355636597, | |
| "learning_rate": 1.6080402010050254e-06, | |
| "loss": 0.9914, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.4417259786476868, | |
| "grad_norm": 0.32779523730278015, | |
| "learning_rate": 1.407035175879397e-06, | |
| "loss": 0.8799, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.44217081850533807, | |
| "grad_norm": 0.4115554094314575, | |
| "learning_rate": 1.2060301507537688e-06, | |
| "loss": 0.8444, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.44261565836298933, | |
| "grad_norm": 0.3810219168663025, | |
| "learning_rate": 1.0050251256281407e-06, | |
| "loss": 1.0453, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.4430604982206406, | |
| "grad_norm": 0.40686285495758057, | |
| "learning_rate": 8.040201005025127e-07, | |
| "loss": 0.8967, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.4435053380782918, | |
| "grad_norm": 0.3694639801979065, | |
| "learning_rate": 6.030150753768844e-07, | |
| "loss": 0.8554, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.44395017793594305, | |
| "grad_norm": 0.32546064257621765, | |
| "learning_rate": 4.0201005025125634e-07, | |
| "loss": 0.8795, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.4443950177935943, | |
| "grad_norm": 0.3117218613624573, | |
| "learning_rate": 2.0100502512562817e-07, | |
| "loss": 0.9495, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.44483985765124556, | |
| "grad_norm": 0.36267247796058655, | |
| "learning_rate": 0.0, | |
| "loss": 0.9526, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.7939011197427712e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |