diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,13962 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0, + "eval_steps": 500, + "global_step": 6325, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.003952569169960474, + "grad_norm": 5.423260652678299, + "learning_rate": 2.527646129541864e-07, + "loss": 1.05, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5580334663391113, + "step": 5, + "valid_targets_mean": 14058.8, + "valid_targets_min": 3349 + }, + { + "epoch": 0.007905138339920948, + "grad_norm": 5.606140808889678, + "learning_rate": 5.687203791469194e-07, + "loss": 1.0377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5576962828636169, + "step": 10, + "valid_targets_mean": 14435.0, + "valid_targets_min": 1153 + }, + { + "epoch": 0.011857707509881422, + "grad_norm": 5.308182741429135, + "learning_rate": 8.846761453396525e-07, + "loss": 1.0299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5200608968734741, + "step": 15, + "valid_targets_mean": 14954.7, + "valid_targets_min": 1797 + }, + { + "epoch": 0.015810276679841896, + "grad_norm": 4.958717211851316, + "learning_rate": 1.2006319115323856e-06, + "loss": 1.0176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.517076849937439, + "step": 20, + "valid_targets_mean": 13647.1, + "valid_targets_min": 1534 + }, + { + "epoch": 0.019762845849802372, + "grad_norm": 4.980578003090514, + "learning_rate": 1.5165876777251187e-06, + "loss": 1.0013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4874853491783142, + "step": 25, + "valid_targets_mean": 13359.6, + "valid_targets_min": 944 + }, + { + "epoch": 0.023715415019762844, + "grad_norm": 4.24606492903291, + "learning_rate": 1.8325434439178516e-06, + "loss": 1.0099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47628873586654663, + "step": 30, + "valid_targets_mean": 13654.9, + "valid_targets_min": 939 + }, + { + "epoch": 0.02766798418972332, + "grad_norm": 3.885111081230548, + "learning_rate": 2.148499210110585e-06, + "loss": 0.9813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4994699954986572, + "step": 35, + "valid_targets_mean": 14160.8, + "valid_targets_min": 3644 + }, + { + "epoch": 0.03162055335968379, + "grad_norm": 3.0823610912160087, + "learning_rate": 2.4644549763033174e-06, + "loss": 0.9529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47820037603378296, + "step": 40, + "valid_targets_mean": 14916.2, + "valid_targets_min": 3957 + }, + { + "epoch": 0.03557312252964427, + "grad_norm": 2.7271556684690315, + "learning_rate": 2.7804107424960508e-06, + "loss": 0.9389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.444784939289093, + "step": 45, + "valid_targets_mean": 13790.8, + "valid_targets_min": 1266 + }, + { + "epoch": 0.039525691699604744, + "grad_norm": 2.0500712198009805, + "learning_rate": 3.096366508688784e-06, + "loss": 0.906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45998141169548035, + "step": 50, + "valid_targets_mean": 14225.1, + "valid_targets_min": 1101 + }, + { + "epoch": 0.043478260869565216, + "grad_norm": 1.4980180017215796, + "learning_rate": 3.412322274881517e-06, + "loss": 0.8887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44382500648498535, + "step": 55, + "valid_targets_mean": 13131.0, + "valid_targets_min": 1728 + }, + { + "epoch": 0.04743083003952569, + "grad_norm": 1.1346739511407258, + "learning_rate": 3.72827804107425e-06, + "loss": 0.8904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4568609893321991, + "step": 60, + "valid_targets_mean": 14177.5, + "valid_targets_min": 1318 + }, + { + "epoch": 0.05138339920948617, + "grad_norm": 0.8214779638704146, + "learning_rate": 4.044233807266983e-06, + "loss": 0.8585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4428967237472534, + "step": 65, + "valid_targets_mean": 15102.9, + "valid_targets_min": 4730 + }, + { + "epoch": 0.05533596837944664, + "grad_norm": 0.6629183734869679, + "learning_rate": 4.360189573459716e-06, + "loss": 0.8467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42459481954574585, + "step": 70, + "valid_targets_mean": 15314.7, + "valid_targets_min": 2033 + }, + { + "epoch": 0.05928853754940711, + "grad_norm": 0.5532313203983071, + "learning_rate": 4.676145339652449e-06, + "loss": 0.8356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45821166038513184, + "step": 75, + "valid_targets_mean": 15278.4, + "valid_targets_min": 4945 + }, + { + "epoch": 0.06324110671936758, + "grad_norm": 0.40683217020894946, + "learning_rate": 4.9921011058451815e-06, + "loss": 0.8339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38208213448524475, + "step": 80, + "valid_targets_mean": 14215.3, + "valid_targets_min": 1248 + }, + { + "epoch": 0.06719367588932806, + "grad_norm": 0.34123655066465597, + "learning_rate": 5.308056872037915e-06, + "loss": 0.8302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3852984607219696, + "step": 85, + "valid_targets_mean": 13611.2, + "valid_targets_min": 1717 + }, + { + "epoch": 0.07114624505928854, + "grad_norm": 0.27968335103513725, + "learning_rate": 5.624012638230648e-06, + "loss": 0.8175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3790891170501709, + "step": 90, + "valid_targets_mean": 13576.4, + "valid_targets_min": 1718 + }, + { + "epoch": 0.07509881422924901, + "grad_norm": 0.27098882385510703, + "learning_rate": 5.939968404423381e-06, + "loss": 0.8073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39501458406448364, + "step": 95, + "valid_targets_mean": 13945.5, + "valid_targets_min": 3805 + }, + { + "epoch": 0.07905138339920949, + "grad_norm": 0.23915035994509312, + "learning_rate": 6.255924170616115e-06, + "loss": 0.8211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41911450028419495, + "step": 100, + "valid_targets_mean": 14423.6, + "valid_targets_min": 3192 + }, + { + "epoch": 0.08300395256916997, + "grad_norm": 0.2266336530365729, + "learning_rate": 6.571879936808847e-06, + "loss": 0.8119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42597052454948425, + "step": 105, + "valid_targets_mean": 14321.1, + "valid_targets_min": 1839 + }, + { + "epoch": 0.08695652173913043, + "grad_norm": 0.20296332452255536, + "learning_rate": 6.8878357030015805e-06, + "loss": 0.8063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38341501355171204, + "step": 110, + "valid_targets_mean": 13383.2, + "valid_targets_min": 2553 + }, + { + "epoch": 0.09090909090909091, + "grad_norm": 0.20416009858119577, + "learning_rate": 7.203791469194313e-06, + "loss": 0.8148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3973555266857147, + "step": 115, + "valid_targets_mean": 13551.6, + "valid_targets_min": 1756 + }, + { + "epoch": 0.09486166007905138, + "grad_norm": 0.20980194798826707, + "learning_rate": 7.519747235387046e-06, + "loss": 0.8119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4005943536758423, + "step": 120, + "valid_targets_mean": 14829.8, + "valid_targets_min": 4704 + }, + { + "epoch": 0.09881422924901186, + "grad_norm": 0.18977201323535636, + "learning_rate": 7.83570300157978e-06, + "loss": 0.8155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37444019317626953, + "step": 125, + "valid_targets_mean": 14023.4, + "valid_targets_min": 1837 + }, + { + "epoch": 0.10276679841897234, + "grad_norm": 0.20900516703544766, + "learning_rate": 8.151658767772512e-06, + "loss": 0.7985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39838480949401855, + "step": 130, + "valid_targets_mean": 13998.8, + "valid_targets_min": 3768 + }, + { + "epoch": 0.1067193675889328, + "grad_norm": 0.20691386676692633, + "learning_rate": 8.467614533965247e-06, + "loss": 0.8008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37638261914253235, + "step": 135, + "valid_targets_mean": 12823.8, + "valid_targets_min": 1059 + }, + { + "epoch": 0.11067193675889328, + "grad_norm": 0.1948351280942195, + "learning_rate": 8.783570300157978e-06, + "loss": 0.8036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40146249532699585, + "step": 140, + "valid_targets_mean": 13631.0, + "valid_targets_min": 1482 + }, + { + "epoch": 0.11462450592885376, + "grad_norm": 0.19803984110298997, + "learning_rate": 9.09952606635071e-06, + "loss": 0.8203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41593778133392334, + "step": 145, + "valid_targets_mean": 13786.5, + "valid_targets_min": 4284 + }, + { + "epoch": 0.11857707509881422, + "grad_norm": 0.19988147962551045, + "learning_rate": 9.415481832543445e-06, + "loss": 0.8046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4205974340438843, + "step": 150, + "valid_targets_mean": 13878.8, + "valid_targets_min": 3289 + }, + { + "epoch": 0.1225296442687747, + "grad_norm": 0.20644768428051893, + "learning_rate": 9.731437598736178e-06, + "loss": 0.7968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4280118942260742, + "step": 155, + "valid_targets_mean": 14374.3, + "valid_targets_min": 865 + }, + { + "epoch": 0.12648221343873517, + "grad_norm": 0.19247241801548193, + "learning_rate": 1.004739336492891e-05, + "loss": 0.7782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3704099655151367, + "step": 160, + "valid_targets_mean": 13264.3, + "valid_targets_min": 1200 + }, + { + "epoch": 0.13043478260869565, + "grad_norm": 0.22411005868837025, + "learning_rate": 1.0363349131121642e-05, + "loss": 0.7809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41674715280532837, + "step": 165, + "valid_targets_mean": 14536.7, + "valid_targets_min": 1593 + }, + { + "epoch": 0.13438735177865613, + "grad_norm": 0.2803768428073471, + "learning_rate": 1.0679304897314377e-05, + "loss": 0.8024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39957907795906067, + "step": 170, + "valid_targets_mean": 15350.7, + "valid_targets_min": 3790 + }, + { + "epoch": 0.1383399209486166, + "grad_norm": 0.2196561510378701, + "learning_rate": 1.099526066350711e-05, + "loss": 0.8, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38922470808029175, + "step": 175, + "valid_targets_mean": 14338.1, + "valid_targets_min": 3290 + }, + { + "epoch": 0.1422924901185771, + "grad_norm": 0.21296903503859665, + "learning_rate": 1.1311216429699843e-05, + "loss": 0.7796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40355920791625977, + "step": 180, + "valid_targets_mean": 15206.8, + "valid_targets_min": 3804 + }, + { + "epoch": 0.14624505928853754, + "grad_norm": 0.21507770835617815, + "learning_rate": 1.1627172195892576e-05, + "loss": 0.7869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36741364002227783, + "step": 185, + "valid_targets_mean": 14233.2, + "valid_targets_min": 712 + }, + { + "epoch": 0.15019762845849802, + "grad_norm": 0.22037390222231343, + "learning_rate": 1.1943127962085309e-05, + "loss": 0.7844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38151100277900696, + "step": 190, + "valid_targets_mean": 14602.6, + "valid_targets_min": 3751 + }, + { + "epoch": 0.1541501976284585, + "grad_norm": 0.21697690714603643, + "learning_rate": 1.2259083728278043e-05, + "loss": 0.808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39278414845466614, + "step": 195, + "valid_targets_mean": 13478.3, + "valid_targets_min": 1452 + }, + { + "epoch": 0.15810276679841898, + "grad_norm": 0.21198457465973647, + "learning_rate": 1.2575039494470776e-05, + "loss": 0.791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4140682816505432, + "step": 200, + "valid_targets_mean": 15036.8, + "valid_targets_min": 3072 + }, + { + "epoch": 0.16205533596837945, + "grad_norm": 0.23330119037148384, + "learning_rate": 1.2890995260663507e-05, + "loss": 0.8068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42678481340408325, + "step": 205, + "valid_targets_mean": 14301.0, + "valid_targets_min": 1367 + }, + { + "epoch": 0.16600790513833993, + "grad_norm": 0.2369425729495493, + "learning_rate": 1.320695102685624e-05, + "loss": 0.7732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3593332767486572, + "step": 210, + "valid_targets_mean": 13734.3, + "valid_targets_min": 1883 + }, + { + "epoch": 0.16996047430830039, + "grad_norm": 0.2363959523577065, + "learning_rate": 1.3522906793048973e-05, + "loss": 0.7934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4053111672401428, + "step": 215, + "valid_targets_mean": 14169.9, + "valid_targets_min": 2496 + }, + { + "epoch": 0.17391304347826086, + "grad_norm": 0.22271629390692346, + "learning_rate": 1.3838862559241708e-05, + "loss": 0.7808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39584285020828247, + "step": 220, + "valid_targets_mean": 14203.3, + "valid_targets_min": 2572 + }, + { + "epoch": 0.17786561264822134, + "grad_norm": 0.24382423181570775, + "learning_rate": 1.415481832543444e-05, + "loss": 0.7968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36962223052978516, + "step": 225, + "valid_targets_mean": 14584.5, + "valid_targets_min": 872 + }, + { + "epoch": 0.18181818181818182, + "grad_norm": 0.246982945069733, + "learning_rate": 1.4470774091627173e-05, + "loss": 0.7814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40980449318885803, + "step": 230, + "valid_targets_mean": 14387.8, + "valid_targets_min": 2529 + }, + { + "epoch": 0.1857707509881423, + "grad_norm": 0.23925339868807888, + "learning_rate": 1.4786729857819906e-05, + "loss": 0.7925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38813668489456177, + "step": 235, + "valid_targets_mean": 14815.8, + "valid_targets_min": 1740 + }, + { + "epoch": 0.18972332015810275, + "grad_norm": 0.24515194753106012, + "learning_rate": 1.510268562401264e-05, + "loss": 0.797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.361288458108902, + "step": 240, + "valid_targets_mean": 13887.4, + "valid_targets_min": 886 + }, + { + "epoch": 0.19367588932806323, + "grad_norm": 0.28493607868326815, + "learning_rate": 1.5418641390205372e-05, + "loss": 0.7953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36796835064888, + "step": 245, + "valid_targets_mean": 12568.4, + "valid_targets_min": 1120 + }, + { + "epoch": 0.1976284584980237, + "grad_norm": 0.38182840282426145, + "learning_rate": 1.5734597156398107e-05, + "loss": 0.8037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4290255904197693, + "step": 250, + "valid_targets_mean": 14424.6, + "valid_targets_min": 1353 + }, + { + "epoch": 0.2015810276679842, + "grad_norm": 0.2585460291583751, + "learning_rate": 1.6050552922590838e-05, + "loss": 0.789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39859646558761597, + "step": 255, + "valid_targets_mean": 14107.2, + "valid_targets_min": 3544 + }, + { + "epoch": 0.20553359683794467, + "grad_norm": 0.2465925771329986, + "learning_rate": 1.6366508688783572e-05, + "loss": 0.8046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39915797114372253, + "step": 260, + "valid_targets_mean": 14384.8, + "valid_targets_min": 3888 + }, + { + "epoch": 0.20948616600790515, + "grad_norm": 0.3095874070087879, + "learning_rate": 1.6682464454976304e-05, + "loss": 0.8014, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3771776854991913, + "step": 265, + "valid_targets_mean": 13831.1, + "valid_targets_min": 1207 + }, + { + "epoch": 0.2134387351778656, + "grad_norm": 0.32270093443094927, + "learning_rate": 1.6998420221169038e-05, + "loss": 0.7937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37409839034080505, + "step": 270, + "valid_targets_mean": 12889.0, + "valid_targets_min": 3314 + }, + { + "epoch": 0.21739130434782608, + "grad_norm": 0.31730177488761147, + "learning_rate": 1.731437598736177e-05, + "loss": 0.79, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42949312925338745, + "step": 275, + "valid_targets_mean": 14687.0, + "valid_targets_min": 3559 + }, + { + "epoch": 0.22134387351778656, + "grad_norm": 0.27682367415117176, + "learning_rate": 1.7630331753554504e-05, + "loss": 0.814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39933204650878906, + "step": 280, + "valid_targets_mean": 13689.4, + "valid_targets_min": 2451 + }, + { + "epoch": 0.22529644268774704, + "grad_norm": 0.2752762367043883, + "learning_rate": 1.7946287519747235e-05, + "loss": 0.7915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3993968367576599, + "step": 285, + "valid_targets_mean": 13230.3, + "valid_targets_min": 2206 + }, + { + "epoch": 0.22924901185770752, + "grad_norm": 0.27234119956856456, + "learning_rate": 1.826224328593997e-05, + "loss": 0.7947, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3935086727142334, + "step": 290, + "valid_targets_mean": 14591.8, + "valid_targets_min": 3835 + }, + { + "epoch": 0.233201581027668, + "grad_norm": 0.331434865400126, + "learning_rate": 1.8578199052132704e-05, + "loss": 0.8093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4356576204299927, + "step": 295, + "valid_targets_mean": 15313.0, + "valid_targets_min": 3852 + }, + { + "epoch": 0.23715415019762845, + "grad_norm": 0.2974483331764454, + "learning_rate": 1.8894154818325436e-05, + "loss": 0.7934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4072684943675995, + "step": 300, + "valid_targets_mean": 14081.8, + "valid_targets_min": 700 + }, + { + "epoch": 0.24110671936758893, + "grad_norm": 0.2739512384727348, + "learning_rate": 1.921011058451817e-05, + "loss": 0.7758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3961341977119446, + "step": 305, + "valid_targets_mean": 13791.0, + "valid_targets_min": 1813 + }, + { + "epoch": 0.2450592885375494, + "grad_norm": 0.3198899669774751, + "learning_rate": 1.95260663507109e-05, + "loss": 0.7833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42259830236434937, + "step": 310, + "valid_targets_mean": 15069.0, + "valid_targets_min": 3406 + }, + { + "epoch": 0.2490118577075099, + "grad_norm": 0.4184285924405467, + "learning_rate": 1.9842022116903633e-05, + "loss": 0.81, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41391313076019287, + "step": 315, + "valid_targets_mean": 14924.2, + "valid_targets_min": 4655 + }, + { + "epoch": 0.25296442687747034, + "grad_norm": 0.3355322122137892, + "learning_rate": 2.015797788309637e-05, + "loss": 0.792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4101108908653259, + "step": 320, + "valid_targets_mean": 14173.6, + "valid_targets_min": 699 + }, + { + "epoch": 0.25691699604743085, + "grad_norm": 0.3781476226402094, + "learning_rate": 2.04739336492891e-05, + "loss": 0.7858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39666569232940674, + "step": 325, + "valid_targets_mean": 14792.1, + "valid_targets_min": 6241 + }, + { + "epoch": 0.2608695652173913, + "grad_norm": 0.3218237403894484, + "learning_rate": 2.0789889415481833e-05, + "loss": 0.783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39463984966278076, + "step": 330, + "valid_targets_mean": 14079.2, + "valid_targets_min": 770 + }, + { + "epoch": 0.2648221343873518, + "grad_norm": 0.42061133194924843, + "learning_rate": 2.1105845181674568e-05, + "loss": 0.7797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40682515501976013, + "step": 335, + "valid_targets_mean": 15269.3, + "valid_targets_min": 4395 + }, + { + "epoch": 0.26877470355731226, + "grad_norm": 0.3523316775773403, + "learning_rate": 2.14218009478673e-05, + "loss": 0.7813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38079309463500977, + "step": 340, + "valid_targets_mean": 14574.2, + "valid_targets_min": 818 + }, + { + "epoch": 0.2727272727272727, + "grad_norm": 0.3891360041306506, + "learning_rate": 2.1737756714060033e-05, + "loss": 0.8013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4164104759693146, + "step": 345, + "valid_targets_mean": 15599.4, + "valid_targets_min": 817 + }, + { + "epoch": 0.2766798418972332, + "grad_norm": 0.36098107274972835, + "learning_rate": 2.2053712480252765e-05, + "loss": 0.7848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42365938425064087, + "step": 350, + "valid_targets_mean": 15235.5, + "valid_targets_min": 1668 + }, + { + "epoch": 0.28063241106719367, + "grad_norm": 0.3072723692608139, + "learning_rate": 2.23696682464455e-05, + "loss": 0.8054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.381642609834671, + "step": 355, + "valid_targets_mean": 15273.3, + "valid_targets_min": 3720 + }, + { + "epoch": 0.2845849802371542, + "grad_norm": 0.3345552844067019, + "learning_rate": 2.2685624012638234e-05, + "loss": 0.7932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4098958671092987, + "step": 360, + "valid_targets_mean": 14826.7, + "valid_targets_min": 2436 + }, + { + "epoch": 0.2885375494071146, + "grad_norm": 0.40392550487103734, + "learning_rate": 2.3001579778830965e-05, + "loss": 0.7822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3878558278083801, + "step": 365, + "valid_targets_mean": 14499.8, + "valid_targets_min": 951 + }, + { + "epoch": 0.2924901185770751, + "grad_norm": 0.6132594356722687, + "learning_rate": 2.33175355450237e-05, + "loss": 0.7928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4196432828903198, + "step": 370, + "valid_targets_mean": 15053.3, + "valid_targets_min": 3608 + }, + { + "epoch": 0.2964426877470356, + "grad_norm": 0.6780251229740196, + "learning_rate": 2.363349131121643e-05, + "loss": 0.7832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3680266737937927, + "step": 375, + "valid_targets_mean": 13996.0, + "valid_targets_min": 2854 + }, + { + "epoch": 0.30039525691699603, + "grad_norm": 0.46123867283431863, + "learning_rate": 2.3949447077409165e-05, + "loss": 0.7921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3822548985481262, + "step": 380, + "valid_targets_mean": 14095.1, + "valid_targets_min": 1958 + }, + { + "epoch": 0.30434782608695654, + "grad_norm": 0.5872806804795866, + "learning_rate": 2.42654028436019e-05, + "loss": 0.8012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37458640336990356, + "step": 385, + "valid_targets_mean": 14682.3, + "valid_targets_min": 1643 + }, + { + "epoch": 0.308300395256917, + "grad_norm": 0.3508704940361925, + "learning_rate": 2.458135860979463e-05, + "loss": 0.7738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40913331508636475, + "step": 390, + "valid_targets_mean": 14613.8, + "valid_targets_min": 1278 + }, + { + "epoch": 0.31225296442687744, + "grad_norm": 0.4307206025114627, + "learning_rate": 2.4897314375987366e-05, + "loss": 0.792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3657647967338562, + "step": 395, + "valid_targets_mean": 13638.3, + "valid_targets_min": 4324 + }, + { + "epoch": 0.31620553359683795, + "grad_norm": 0.2980121966853022, + "learning_rate": 2.5213270142180094e-05, + "loss": 0.7908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4062190651893616, + "step": 400, + "valid_targets_mean": 14501.8, + "valid_targets_min": 2555 + }, + { + "epoch": 0.3201581027667984, + "grad_norm": 0.4783531889605732, + "learning_rate": 2.552922590837283e-05, + "loss": 0.7925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35953736305236816, + "step": 405, + "valid_targets_mean": 13581.5, + "valid_targets_min": 2674 + }, + { + "epoch": 0.3241106719367589, + "grad_norm": 0.4010383012719532, + "learning_rate": 2.5845181674565566e-05, + "loss": 0.7993, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42256343364715576, + "step": 410, + "valid_targets_mean": 14274.3, + "valid_targets_min": 3178 + }, + { + "epoch": 0.32806324110671936, + "grad_norm": 0.421622667913756, + "learning_rate": 2.6161137440758294e-05, + "loss": 0.7882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4276689291000366, + "step": 415, + "valid_targets_mean": 14909.3, + "valid_targets_min": 4794 + }, + { + "epoch": 0.33201581027667987, + "grad_norm": 0.48793281121452164, + "learning_rate": 2.647709320695103e-05, + "loss": 0.7933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4072173833847046, + "step": 420, + "valid_targets_mean": 14238.5, + "valid_targets_min": 468 + }, + { + "epoch": 0.3359683794466403, + "grad_norm": 0.3946704619430819, + "learning_rate": 2.679304897314376e-05, + "loss": 0.7859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39688414335250854, + "step": 425, + "valid_targets_mean": 13883.8, + "valid_targets_min": 1796 + }, + { + "epoch": 0.33992094861660077, + "grad_norm": 0.5181285182371762, + "learning_rate": 2.7109004739336494e-05, + "loss": 0.8074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38468730449676514, + "step": 430, + "valid_targets_mean": 14269.6, + "valid_targets_min": 725 + }, + { + "epoch": 0.3438735177865613, + "grad_norm": 0.4795766582595916, + "learning_rate": 2.742496050552923e-05, + "loss": 0.7674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37411993741989136, + "step": 435, + "valid_targets_mean": 13111.2, + "valid_targets_min": 1218 + }, + { + "epoch": 0.34782608695652173, + "grad_norm": 0.5469916418222083, + "learning_rate": 2.774091627172196e-05, + "loss": 0.7868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40929681062698364, + "step": 440, + "valid_targets_mean": 15077.8, + "valid_targets_min": 5393 + }, + { + "epoch": 0.35177865612648224, + "grad_norm": 0.5203621814206215, + "learning_rate": 2.8056872037914695e-05, + "loss": 0.7909, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40027111768722534, + "step": 445, + "valid_targets_mean": 14184.6, + "valid_targets_min": 1467 + }, + { + "epoch": 0.3557312252964427, + "grad_norm": 0.44312786767858603, + "learning_rate": 2.8372827804107426e-05, + "loss": 0.7812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4072280526161194, + "step": 450, + "valid_targets_mean": 12972.9, + "valid_targets_min": 975 + }, + { + "epoch": 0.35968379446640314, + "grad_norm": 0.6722222161031239, + "learning_rate": 2.868878357030016e-05, + "loss": 0.7985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39762067794799805, + "step": 455, + "valid_targets_mean": 14657.1, + "valid_targets_min": 996 + }, + { + "epoch": 0.36363636363636365, + "grad_norm": 0.7041016751410396, + "learning_rate": 2.9004739336492895e-05, + "loss": 0.8025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.375191867351532, + "step": 460, + "valid_targets_mean": 13162.3, + "valid_targets_min": 682 + }, + { + "epoch": 0.3675889328063241, + "grad_norm": 0.5556168012006437, + "learning_rate": 2.9320695102685626e-05, + "loss": 0.8032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.416301429271698, + "step": 465, + "valid_targets_mean": 15104.2, + "valid_targets_min": 3713 + }, + { + "epoch": 0.3715415019762846, + "grad_norm": 0.5965966649097171, + "learning_rate": 2.963665086887836e-05, + "loss": 0.8085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3795076310634613, + "step": 470, + "valid_targets_mean": 13908.5, + "valid_targets_min": 2136 + }, + { + "epoch": 0.37549407114624506, + "grad_norm": 0.7865252222200217, + "learning_rate": 2.9952606635071092e-05, + "loss": 0.7823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36410844326019287, + "step": 475, + "valid_targets_mean": 12463.6, + "valid_targets_min": 1219 + }, + { + "epoch": 0.3794466403162055, + "grad_norm": 0.6009875176223476, + "learning_rate": 3.0268562401263827e-05, + "loss": 0.7872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37179064750671387, + "step": 480, + "valid_targets_mean": 13777.6, + "valid_targets_min": 930 + }, + { + "epoch": 0.383399209486166, + "grad_norm": 0.45809851440429467, + "learning_rate": 3.058451816745656e-05, + "loss": 0.8031, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.416666716337204, + "step": 485, + "valid_targets_mean": 14587.8, + "valid_targets_min": 833 + }, + { + "epoch": 0.38735177865612647, + "grad_norm": 0.47887299705706526, + "learning_rate": 3.090047393364929e-05, + "loss": 0.792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3661605715751648, + "step": 490, + "valid_targets_mean": 13144.1, + "valid_targets_min": 560 + }, + { + "epoch": 0.391304347826087, + "grad_norm": 0.41140552000528086, + "learning_rate": 3.121642969984203e-05, + "loss": 0.7984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40964192152023315, + "step": 495, + "valid_targets_mean": 13937.6, + "valid_targets_min": 758 + }, + { + "epoch": 0.3952569169960474, + "grad_norm": 0.513909439292748, + "learning_rate": 3.153238546603476e-05, + "loss": 0.7799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39913374185562134, + "step": 500, + "valid_targets_mean": 14791.8, + "valid_targets_min": 1704 + }, + { + "epoch": 0.39920948616600793, + "grad_norm": 0.4666650131511573, + "learning_rate": 3.184834123222749e-05, + "loss": 0.79, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40544551610946655, + "step": 505, + "valid_targets_mean": 14683.1, + "valid_targets_min": 5446 + }, + { + "epoch": 0.4031620553359684, + "grad_norm": 0.43009835065774205, + "learning_rate": 3.216429699842023e-05, + "loss": 0.7783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41245007514953613, + "step": 510, + "valid_targets_mean": 15053.8, + "valid_targets_min": 4667 + }, + { + "epoch": 0.40711462450592883, + "grad_norm": 0.6491389362386009, + "learning_rate": 3.248025276461296e-05, + "loss": 0.7851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35217374563217163, + "step": 515, + "valid_targets_mean": 12829.0, + "valid_targets_min": 575 + }, + { + "epoch": 0.41106719367588934, + "grad_norm": 0.7301880886746906, + "learning_rate": 3.279620853080569e-05, + "loss": 0.8177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41111475229263306, + "step": 520, + "valid_targets_mean": 14770.8, + "valid_targets_min": 1488 + }, + { + "epoch": 0.4150197628458498, + "grad_norm": 0.5786573545320509, + "learning_rate": 3.311216429699842e-05, + "loss": 0.8077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.405892938375473, + "step": 525, + "valid_targets_mean": 14595.5, + "valid_targets_min": 3051 + }, + { + "epoch": 0.4189723320158103, + "grad_norm": 0.5863964424642815, + "learning_rate": 3.342812006319116e-05, + "loss": 0.7902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36269092559814453, + "step": 530, + "valid_targets_mean": 14767.3, + "valid_targets_min": 3732 + }, + { + "epoch": 0.42292490118577075, + "grad_norm": 0.5515446278645201, + "learning_rate": 3.374407582938389e-05, + "loss": 0.7914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3833330273628235, + "step": 535, + "valid_targets_mean": 14716.2, + "valid_targets_min": 3561 + }, + { + "epoch": 0.4268774703557312, + "grad_norm": 0.5055996055476422, + "learning_rate": 3.406003159557662e-05, + "loss": 0.7962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3659287691116333, + "step": 540, + "valid_targets_mean": 13001.2, + "valid_targets_min": 1355 + }, + { + "epoch": 0.4308300395256917, + "grad_norm": 0.4719984353590635, + "learning_rate": 3.437598736176936e-05, + "loss": 0.7923, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39592984318733215, + "step": 545, + "valid_targets_mean": 14664.1, + "valid_targets_min": 3116 + }, + { + "epoch": 0.43478260869565216, + "grad_norm": 0.8298865934871378, + "learning_rate": 3.4691943127962084e-05, + "loss": 0.7812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4030647277832031, + "step": 550, + "valid_targets_mean": 15389.4, + "valid_targets_min": 4038 + }, + { + "epoch": 0.43873517786561267, + "grad_norm": 0.541955249049712, + "learning_rate": 3.500789889415482e-05, + "loss": 0.783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3970744013786316, + "step": 555, + "valid_targets_mean": 13010.8, + "valid_targets_min": 463 + }, + { + "epoch": 0.4426877470355731, + "grad_norm": 0.7620960312243735, + "learning_rate": 3.532385466034755e-05, + "loss": 0.7934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3636441230773926, + "step": 560, + "valid_targets_mean": 14652.5, + "valid_targets_min": 862 + }, + { + "epoch": 0.44664031620553357, + "grad_norm": 0.4732151057824115, + "learning_rate": 3.5639810426540284e-05, + "loss": 0.7879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38346779346466064, + "step": 565, + "valid_targets_mean": 12959.7, + "valid_targets_min": 1689 + }, + { + "epoch": 0.4505928853754941, + "grad_norm": 0.6250792977480693, + "learning_rate": 3.595576619273302e-05, + "loss": 0.7789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3828081488609314, + "step": 570, + "valid_targets_mean": 13899.4, + "valid_targets_min": 633 + }, + { + "epoch": 0.45454545454545453, + "grad_norm": 0.49226778210692246, + "learning_rate": 3.6271721958925753e-05, + "loss": 0.7946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37467336654663086, + "step": 575, + "valid_targets_mean": 14017.5, + "valid_targets_min": 1010 + }, + { + "epoch": 0.45849802371541504, + "grad_norm": 0.5507787819155295, + "learning_rate": 3.6587677725118485e-05, + "loss": 0.7807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.385878324508667, + "step": 580, + "valid_targets_mean": 13037.2, + "valid_targets_min": 1378 + }, + { + "epoch": 0.4624505928853755, + "grad_norm": 0.6166976638080929, + "learning_rate": 3.690363349131122e-05, + "loss": 0.7918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4087505042552948, + "step": 585, + "valid_targets_mean": 14844.6, + "valid_targets_min": 4417 + }, + { + "epoch": 0.466403162055336, + "grad_norm": 0.42053432470282953, + "learning_rate": 3.7219589257503954e-05, + "loss": 0.7723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4127066731452942, + "step": 590, + "valid_targets_mean": 14651.8, + "valid_targets_min": 3942 + }, + { + "epoch": 0.47035573122529645, + "grad_norm": 0.5120578819545079, + "learning_rate": 3.7535545023696685e-05, + "loss": 0.7763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3908807039260864, + "step": 595, + "valid_targets_mean": 14643.8, + "valid_targets_min": 1433 + }, + { + "epoch": 0.4743083003952569, + "grad_norm": 0.6151976824309842, + "learning_rate": 3.7851500789889416e-05, + "loss": 0.7911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3684166967868805, + "step": 600, + "valid_targets_mean": 13669.4, + "valid_targets_min": 1836 + }, + { + "epoch": 0.4782608695652174, + "grad_norm": 0.6184357281552708, + "learning_rate": 3.8167456556082154e-05, + "loss": 0.7927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42682188749313354, + "step": 605, + "valid_targets_mean": 14156.0, + "valid_targets_min": 1795 + }, + { + "epoch": 0.48221343873517786, + "grad_norm": 0.754276919568179, + "learning_rate": 3.8483412322274885e-05, + "loss": 0.793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3949351906776428, + "step": 610, + "valid_targets_mean": 13917.3, + "valid_targets_min": 1664 + }, + { + "epoch": 0.48616600790513836, + "grad_norm": 0.5620377931232118, + "learning_rate": 3.8799368088467617e-05, + "loss": 0.7963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3786265254020691, + "step": 615, + "valid_targets_mean": 14800.0, + "valid_targets_min": 976 + }, + { + "epoch": 0.4901185770750988, + "grad_norm": 0.6901900682365142, + "learning_rate": 3.9115323854660355e-05, + "loss": 0.7833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3824901282787323, + "step": 620, + "valid_targets_mean": 14457.2, + "valid_targets_min": 2441 + }, + { + "epoch": 0.49407114624505927, + "grad_norm": 0.6186512940729718, + "learning_rate": 3.943127962085308e-05, + "loss": 0.7879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34835341572761536, + "step": 625, + "valid_targets_mean": 13882.0, + "valid_targets_min": 356 + }, + { + "epoch": 0.4980237154150198, + "grad_norm": 0.6626084217463781, + "learning_rate": 3.974723538704582e-05, + "loss": 0.7942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3973018527030945, + "step": 630, + "valid_targets_mean": 13864.6, + "valid_targets_min": 621 + }, + { + "epoch": 0.5019762845849802, + "grad_norm": 0.73928036036712, + "learning_rate": 3.999999695371908e-05, + "loss": 0.7895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40963608026504517, + "step": 635, + "valid_targets_mean": 15166.8, + "valid_targets_min": 952 + }, + { + "epoch": 0.5059288537549407, + "grad_norm": 0.5505010313309583, + "learning_rate": 3.999989033398402e-05, + "loss": 0.7786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36309218406677246, + "step": 640, + "valid_targets_mean": 13718.0, + "valid_targets_min": 2247 + }, + { + "epoch": 0.5098814229249012, + "grad_norm": 0.5392909701269587, + "learning_rate": 3.9999631401130525e-05, + "loss": 0.7964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4268054664134979, + "step": 645, + "valid_targets_mean": 15139.2, + "valid_targets_min": 3960 + }, + { + "epoch": 0.5138339920948617, + "grad_norm": 0.3844810919439295, + "learning_rate": 3.9999220157130544e-05, + "loss": 0.7828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4113592207431793, + "step": 650, + "valid_targets_mean": 13629.3, + "valid_targets_min": 571 + }, + { + "epoch": 0.5177865612648221, + "grad_norm": 0.4778921773223092, + "learning_rate": 3.999865660511599e-05, + "loss": 0.777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39474910497665405, + "step": 655, + "valid_targets_mean": 14218.6, + "valid_targets_min": 3744 + }, + { + "epoch": 0.5217391304347826, + "grad_norm": 0.4370229206054984, + "learning_rate": 3.99979407493787e-05, + "loss": 0.7843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40925562381744385, + "step": 660, + "valid_targets_mean": 15832.2, + "valid_targets_min": 8691 + }, + { + "epoch": 0.525691699604743, + "grad_norm": 0.40043356929172425, + "learning_rate": 3.999707259537042e-05, + "loss": 0.7961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3883097171783447, + "step": 665, + "valid_targets_mean": 15026.1, + "valid_targets_min": 2070 + }, + { + "epoch": 0.5296442687747036, + "grad_norm": 0.5723389556503627, + "learning_rate": 3.999605214970274e-05, + "loss": 0.8114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3859957456588745, + "step": 670, + "valid_targets_mean": 14954.4, + "valid_targets_min": 1167 + }, + { + "epoch": 0.5335968379446641, + "grad_norm": 0.7853033707009076, + "learning_rate": 3.9994879420147086e-05, + "loss": 0.7857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4288088083267212, + "step": 675, + "valid_targets_mean": 15218.2, + "valid_targets_min": 2101 + }, + { + "epoch": 0.5375494071146245, + "grad_norm": 0.7059654428506722, + "learning_rate": 3.9993554415634585e-05, + "loss": 0.7862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3464420437812805, + "step": 680, + "valid_targets_mean": 13556.8, + "valid_targets_min": 1128 + }, + { + "epoch": 0.541501976284585, + "grad_norm": 0.6595745803548834, + "learning_rate": 3.999207714625609e-05, + "loss": 0.7885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3749304413795471, + "step": 685, + "valid_targets_mean": 12802.3, + "valid_targets_min": 2114 + }, + { + "epoch": 0.5454545454545454, + "grad_norm": 0.4891631039851974, + "learning_rate": 3.999044762326203e-05, + "loss": 0.781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4055309295654297, + "step": 690, + "valid_targets_mean": 14721.1, + "valid_targets_min": 1810 + }, + { + "epoch": 0.549407114624506, + "grad_norm": 0.4436920949315136, + "learning_rate": 3.998866585906236e-05, + "loss": 0.8104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4059298634529114, + "step": 695, + "valid_targets_mean": 14275.2, + "valid_targets_min": 3380 + }, + { + "epoch": 0.5533596837944664, + "grad_norm": 0.4526662139323455, + "learning_rate": 3.9986731867226456e-05, + "loss": 0.7749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34638217091560364, + "step": 700, + "valid_targets_mean": 13954.4, + "valid_targets_min": 2303 + }, + { + "epoch": 0.5573122529644269, + "grad_norm": 0.5823972611642799, + "learning_rate": 3.998464566248303e-05, + "loss": 0.771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37308019399642944, + "step": 705, + "valid_targets_mean": 12741.9, + "valid_targets_min": 1247 + }, + { + "epoch": 0.5612648221343873, + "grad_norm": 0.46048420103939264, + "learning_rate": 3.998240726071996e-05, + "loss": 0.7743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3807825446128845, + "step": 710, + "valid_targets_mean": 13697.3, + "valid_targets_min": 2834 + }, + { + "epoch": 0.5652173913043478, + "grad_norm": 0.4732513451861853, + "learning_rate": 3.998001667898426e-05, + "loss": 0.7912, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3580189645290375, + "step": 715, + "valid_targets_mean": 14037.0, + "valid_targets_min": 3489 + }, + { + "epoch": 0.5691699604743083, + "grad_norm": 0.6982548390150592, + "learning_rate": 3.997747393548186e-05, + "loss": 0.7898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38289541006088257, + "step": 720, + "valid_targets_mean": 13588.8, + "valid_targets_min": 1385 + }, + { + "epoch": 0.5731225296442688, + "grad_norm": 1.0661580852977235, + "learning_rate": 3.997477904957754e-05, + "loss": 0.783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3792533278465271, + "step": 725, + "valid_targets_mean": 13941.4, + "valid_targets_min": 1183 + }, + { + "epoch": 0.5770750988142292, + "grad_norm": 1.074037911682697, + "learning_rate": 3.997193204179474e-05, + "loss": 0.7863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4062015414237976, + "step": 730, + "valid_targets_mean": 13801.4, + "valid_targets_min": 1508 + }, + { + "epoch": 0.5810276679841897, + "grad_norm": 0.4319683282287736, + "learning_rate": 3.996893293381539e-05, + "loss": 0.7913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40379539132118225, + "step": 735, + "valid_targets_mean": 14286.9, + "valid_targets_min": 823 + }, + { + "epoch": 0.5849802371541502, + "grad_norm": 0.4723035834255679, + "learning_rate": 3.99657817484798e-05, + "loss": 0.7878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4188079833984375, + "step": 740, + "valid_targets_mean": 15295.0, + "valid_targets_min": 899 + }, + { + "epoch": 0.5889328063241107, + "grad_norm": 0.5095361171490776, + "learning_rate": 3.9962478509786456e-05, + "loss": 0.772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.387433797121048, + "step": 745, + "valid_targets_mean": 14337.9, + "valid_targets_min": 2383 + }, + { + "epoch": 0.5928853754940712, + "grad_norm": 0.5490479682004452, + "learning_rate": 3.9959023242891805e-05, + "loss": 0.7703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33966580033302307, + "step": 750, + "valid_targets_mean": 12771.0, + "valid_targets_min": 1139 + }, + { + "epoch": 0.5968379446640316, + "grad_norm": 0.9661564019541177, + "learning_rate": 3.995541597411014e-05, + "loss": 0.7898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40765658020973206, + "step": 755, + "valid_targets_mean": 15106.8, + "valid_targets_min": 4725 + }, + { + "epoch": 0.6007905138339921, + "grad_norm": 0.6325659874581517, + "learning_rate": 3.995165673091331e-05, + "loss": 0.7872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40939173102378845, + "step": 760, + "valid_targets_mean": 14887.5, + "valid_targets_min": 4587 + }, + { + "epoch": 0.6047430830039525, + "grad_norm": 0.9711835249915316, + "learning_rate": 3.994774554193057e-05, + "loss": 0.7848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.423783540725708, + "step": 765, + "valid_targets_mean": 14937.2, + "valid_targets_min": 3725 + }, + { + "epoch": 0.6086956521739131, + "grad_norm": 0.7219465031782212, + "learning_rate": 3.994368243694837e-05, + "loss": 0.7864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37313663959503174, + "step": 770, + "valid_targets_mean": 14472.3, + "valid_targets_min": 2506 + }, + { + "epoch": 0.6126482213438735, + "grad_norm": 0.6246008604088684, + "learning_rate": 3.993946744691008e-05, + "loss": 0.7823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3944859206676483, + "step": 775, + "valid_targets_mean": 12436.2, + "valid_targets_min": 818 + }, + { + "epoch": 0.616600790513834, + "grad_norm": 0.5902985701502207, + "learning_rate": 3.99351006039158e-05, + "loss": 0.7844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39889413118362427, + "step": 780, + "valid_targets_mean": 14504.0, + "valid_targets_min": 1151 + }, + { + "epoch": 0.6205533596837944, + "grad_norm": 0.594638060269896, + "learning_rate": 3.993058194122207e-05, + "loss": 0.7822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37158966064453125, + "step": 785, + "valid_targets_mean": 13156.5, + "valid_targets_min": 2887 + }, + { + "epoch": 0.6245059288537549, + "grad_norm": 0.5346442397495802, + "learning_rate": 3.9925911493241665e-05, + "loss": 0.7856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4073689579963684, + "step": 790, + "valid_targets_mean": 13670.3, + "valid_targets_min": 754 + }, + { + "epoch": 0.6284584980237155, + "grad_norm": 0.7611803688528466, + "learning_rate": 3.992108929554332e-05, + "loss": 0.7886, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3887355327606201, + "step": 795, + "valid_targets_mean": 13751.9, + "valid_targets_min": 2066 + }, + { + "epoch": 0.6324110671936759, + "grad_norm": 0.5266399456137865, + "learning_rate": 3.991611538485141e-05, + "loss": 0.7898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4026590585708618, + "step": 800, + "valid_targets_mean": 14690.9, + "valid_targets_min": 3434 + }, + { + "epoch": 0.6363636363636364, + "grad_norm": 0.6077582205903398, + "learning_rate": 3.991098979904575e-05, + "loss": 0.7825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39754316210746765, + "step": 805, + "valid_targets_mean": 14818.8, + "valid_targets_min": 801 + }, + { + "epoch": 0.6403162055335968, + "grad_norm": 0.465681257613959, + "learning_rate": 3.990571257716124e-05, + "loss": 0.7756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3772549629211426, + "step": 810, + "valid_targets_mean": 14248.6, + "valid_targets_min": 1858 + }, + { + "epoch": 0.6442687747035574, + "grad_norm": 0.5895622068909333, + "learning_rate": 3.9900283759387624e-05, + "loss": 0.7881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3574357032775879, + "step": 815, + "valid_targets_mean": 13824.2, + "valid_targets_min": 1079 + }, + { + "epoch": 0.6482213438735178, + "grad_norm": 0.49052156016418014, + "learning_rate": 3.9894703387069125e-05, + "loss": 0.779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4226961135864258, + "step": 820, + "valid_targets_mean": 15382.5, + "valid_targets_min": 3209 + }, + { + "epoch": 0.6521739130434783, + "grad_norm": 0.5580721917737813, + "learning_rate": 3.988897150270417e-05, + "loss": 0.7879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37427183985710144, + "step": 825, + "valid_targets_mean": 14565.3, + "valid_targets_min": 2070 + }, + { + "epoch": 0.6561264822134387, + "grad_norm": 0.496616584291907, + "learning_rate": 3.988308814994508e-05, + "loss": 0.7735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37059658765792847, + "step": 830, + "valid_targets_mean": 15235.3, + "valid_targets_min": 2500 + }, + { + "epoch": 0.6600790513833992, + "grad_norm": 0.9755811479499964, + "learning_rate": 3.9877053373597654e-05, + "loss": 0.7903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3971031904220581, + "step": 835, + "valid_targets_mean": 14439.3, + "valid_targets_min": 1577 + }, + { + "epoch": 0.6640316205533597, + "grad_norm": 0.6218928330885396, + "learning_rate": 3.9870867219620953e-05, + "loss": 0.7809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3996012806892395, + "step": 840, + "valid_targets_mean": 14535.7, + "valid_targets_min": 1069 + }, + { + "epoch": 0.6679841897233202, + "grad_norm": 0.9348440407366061, + "learning_rate": 3.986452973512684e-05, + "loss": 0.796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41023826599121094, + "step": 845, + "valid_targets_mean": 13577.5, + "valid_targets_min": 930 + }, + { + "epoch": 0.6719367588932806, + "grad_norm": 0.5847520605925773, + "learning_rate": 3.9858040968379687e-05, + "loss": 0.7778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.395527720451355, + "step": 850, + "valid_targets_mean": 14524.3, + "valid_targets_min": 2680 + }, + { + "epoch": 0.6758893280632411, + "grad_norm": 0.6654718314098561, + "learning_rate": 3.985140096879598e-05, + "loss": 0.7965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40291863679885864, + "step": 855, + "valid_targets_mean": 13548.6, + "valid_targets_min": 1796 + }, + { + "epoch": 0.6798418972332015, + "grad_norm": 0.49119262830494537, + "learning_rate": 3.9844609786943955e-05, + "loss": 0.7964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3974599838256836, + "step": 860, + "valid_targets_mean": 14365.3, + "valid_targets_min": 1308 + }, + { + "epoch": 0.6837944664031621, + "grad_norm": 0.4446814971936942, + "learning_rate": 3.983766747454319e-05, + "loss": 0.7934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40060877799987793, + "step": 865, + "valid_targets_mean": 14524.4, + "valid_targets_min": 1065 + }, + { + "epoch": 0.6877470355731226, + "grad_norm": 0.5279549186758704, + "learning_rate": 3.9830574084464235e-05, + "loss": 0.7847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36588966846466064, + "step": 870, + "valid_targets_mean": 12899.2, + "valid_targets_min": 2330 + }, + { + "epoch": 0.691699604743083, + "grad_norm": 0.7099679985303011, + "learning_rate": 3.982332967072822e-05, + "loss": 0.7869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3927813768386841, + "step": 875, + "valid_targets_mean": 14603.6, + "valid_targets_min": 2031 + }, + { + "epoch": 0.6956521739130435, + "grad_norm": 0.4748267293424668, + "learning_rate": 3.9815934288506394e-05, + "loss": 0.7822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3556380867958069, + "step": 880, + "valid_targets_mean": 12900.1, + "valid_targets_min": 2421 + }, + { + "epoch": 0.6996047430830039, + "grad_norm": 0.5186575760912874, + "learning_rate": 3.980838799411975e-05, + "loss": 0.7966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41868293285369873, + "step": 885, + "valid_targets_mean": 14466.9, + "valid_targets_min": 3130 + }, + { + "epoch": 0.7035573122529645, + "grad_norm": 0.588059631438118, + "learning_rate": 3.9800690845038604e-05, + "loss": 0.7786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34142574667930603, + "step": 890, + "valid_targets_mean": 13231.6, + "valid_targets_min": 1101 + }, + { + "epoch": 0.7075098814229249, + "grad_norm": 0.5605694886591978, + "learning_rate": 3.9792842899882095e-05, + "loss": 0.7978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35573869943618774, + "step": 895, + "valid_targets_mean": 12478.5, + "valid_targets_min": 399 + }, + { + "epoch": 0.7114624505928854, + "grad_norm": 0.5133146252777666, + "learning_rate": 3.978484421841782e-05, + "loss": 0.7906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39170870184898376, + "step": 900, + "valid_targets_mean": 13728.7, + "valid_targets_min": 1608 + }, + { + "epoch": 0.7154150197628458, + "grad_norm": 0.5165716185305553, + "learning_rate": 3.97766948615613e-05, + "loss": 0.7984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36306124925613403, + "step": 905, + "valid_targets_mean": 12901.5, + "valid_targets_min": 480 + }, + { + "epoch": 0.7193675889328063, + "grad_norm": 0.5882750247711833, + "learning_rate": 3.976839489137559e-05, + "loss": 0.7893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39885076880455017, + "step": 910, + "valid_targets_mean": 14042.8, + "valid_targets_min": 1376 + }, + { + "epoch": 0.7233201581027668, + "grad_norm": 0.5329856230448209, + "learning_rate": 3.975994437107075e-05, + "loss": 0.7883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3773532509803772, + "step": 915, + "valid_targets_mean": 13963.1, + "valid_targets_min": 2881 + }, + { + "epoch": 0.7272727272727273, + "grad_norm": 0.7759023686703217, + "learning_rate": 3.975134336500337e-05, + "loss": 0.7848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4320923686027527, + "step": 920, + "valid_targets_mean": 14500.4, + "valid_targets_min": 1604 + }, + { + "epoch": 0.7312252964426877, + "grad_norm": 0.8518285933434391, + "learning_rate": 3.9742591938676135e-05, + "loss": 0.7851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3973637819290161, + "step": 925, + "valid_targets_mean": 15529.2, + "valid_targets_min": 8887 + }, + { + "epoch": 0.7351778656126482, + "grad_norm": 0.42351847222470007, + "learning_rate": 3.9733690158737255e-05, + "loss": 0.7741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36503350734710693, + "step": 930, + "valid_targets_mean": 12764.6, + "valid_targets_min": 3293 + }, + { + "epoch": 0.7391304347826086, + "grad_norm": 0.6216415707507882, + "learning_rate": 3.972463809297999e-05, + "loss": 0.7934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4125515818595886, + "step": 935, + "valid_targets_mean": 15962.4, + "valid_targets_min": 2908 + }, + { + "epoch": 0.7430830039525692, + "grad_norm": 0.43075038231493296, + "learning_rate": 3.971543581034214e-05, + "loss": 0.7642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39416226744651794, + "step": 940, + "valid_targets_mean": 14361.1, + "valid_targets_min": 1383 + }, + { + "epoch": 0.7470355731225297, + "grad_norm": 0.5186222468112213, + "learning_rate": 3.970608338090551e-05, + "loss": 0.7819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.407385915517807, + "step": 945, + "valid_targets_mean": 14837.0, + "valid_targets_min": 1220 + }, + { + "epoch": 0.7509881422924901, + "grad_norm": 0.6399786551082401, + "learning_rate": 3.9696580875895365e-05, + "loss": 0.7924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41128236055374146, + "step": 950, + "valid_targets_mean": 14297.8, + "valid_targets_min": 2616 + }, + { + "epoch": 0.7549407114624506, + "grad_norm": 0.7582429508272119, + "learning_rate": 3.968692836767992e-05, + "loss": 0.7706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41801831126213074, + "step": 955, + "valid_targets_mean": 14267.3, + "valid_targets_min": 1125 + }, + { + "epoch": 0.758893280632411, + "grad_norm": 0.4419044851926264, + "learning_rate": 3.967712592976976e-05, + "loss": 0.7783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.375307559967041, + "step": 960, + "valid_targets_mean": 15219.3, + "valid_targets_min": 951 + }, + { + "epoch": 0.7628458498023716, + "grad_norm": 0.5027388331711558, + "learning_rate": 3.9667173636817284e-05, + "loss": 0.7838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3631296753883362, + "step": 965, + "valid_targets_mean": 12794.1, + "valid_targets_min": 770 + }, + { + "epoch": 0.766798418972332, + "grad_norm": 0.41095299363682214, + "learning_rate": 3.965707156461615e-05, + "loss": 0.776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40908169746398926, + "step": 970, + "valid_targets_mean": 14210.7, + "valid_targets_min": 3809 + }, + { + "epoch": 0.7707509881422925, + "grad_norm": 0.5293671834638368, + "learning_rate": 3.964681979010068e-05, + "loss": 0.7891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44894111156463623, + "step": 975, + "valid_targets_mean": 15424.8, + "valid_targets_min": 6228 + }, + { + "epoch": 0.7747035573122529, + "grad_norm": 0.8569112475276607, + "learning_rate": 3.9636418391345296e-05, + "loss": 0.7671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3632447421550751, + "step": 980, + "valid_targets_mean": 13947.1, + "valid_targets_min": 974 + }, + { + "epoch": 0.7786561264822134, + "grad_norm": 0.7240124520963909, + "learning_rate": 3.96258674475639e-05, + "loss": 0.785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3693791329860687, + "step": 985, + "valid_targets_mean": 13686.5, + "valid_targets_min": 1014 + }, + { + "epoch": 0.782608695652174, + "grad_norm": 0.7479889399881811, + "learning_rate": 3.96151670391093e-05, + "loss": 0.7887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33649736642837524, + "step": 990, + "valid_targets_mean": 12230.1, + "valid_targets_min": 1053 + }, + { + "epoch": 0.7865612648221344, + "grad_norm": 0.8917188869147429, + "learning_rate": 3.9604317247472564e-05, + "loss": 0.7773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38277196884155273, + "step": 995, + "valid_targets_mean": 14638.1, + "valid_targets_min": 1913 + }, + { + "epoch": 0.7905138339920948, + "grad_norm": 0.6690611088672793, + "learning_rate": 3.9593318155282425e-05, + "loss": 0.7818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3650968670845032, + "step": 1000, + "valid_targets_mean": 13967.0, + "valid_targets_min": 1105 + }, + { + "epoch": 0.7944664031620553, + "grad_norm": 0.5743841501213321, + "learning_rate": 3.9582169846304645e-05, + "loss": 0.7692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40669864416122437, + "step": 1005, + "valid_targets_mean": 13862.1, + "valid_targets_min": 306 + }, + { + "epoch": 0.7984189723320159, + "grad_norm": 0.4525847081586118, + "learning_rate": 3.9570872405441374e-05, + "loss": 0.7829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35990628600120544, + "step": 1010, + "valid_targets_mean": 13374.5, + "valid_targets_min": 1557 + }, + { + "epoch": 0.8023715415019763, + "grad_norm": 0.4924459306260721, + "learning_rate": 3.9559425918730506e-05, + "loss": 0.7886, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3812897801399231, + "step": 1015, + "valid_targets_mean": 14516.1, + "valid_targets_min": 3486 + }, + { + "epoch": 0.8063241106719368, + "grad_norm": 1.042252823655229, + "learning_rate": 3.9547830473345026e-05, + "loss": 0.7764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40294745564460754, + "step": 1020, + "valid_targets_mean": 15880.5, + "valid_targets_min": 6156 + }, + { + "epoch": 0.8102766798418972, + "grad_norm": 0.630778060314298, + "learning_rate": 3.953608615759234e-05, + "loss": 0.7936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.393210232257843, + "step": 1025, + "valid_targets_mean": 14065.9, + "valid_targets_min": 744 + }, + { + "epoch": 0.8142292490118577, + "grad_norm": 0.7021186920639693, + "learning_rate": 3.9524193060913606e-05, + "loss": 0.7739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3551902770996094, + "step": 1030, + "valid_targets_mean": 13171.9, + "valid_targets_min": 1156 + }, + { + "epoch": 0.8181818181818182, + "grad_norm": 0.38194275642613024, + "learning_rate": 3.951215127388305e-05, + "loss": 0.7736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38149648904800415, + "step": 1035, + "valid_targets_mean": 14910.2, + "valid_targets_min": 1684 + }, + { + "epoch": 0.8221343873517787, + "grad_norm": 0.8854498569999548, + "learning_rate": 3.9499960888207295e-05, + "loss": 0.7874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38690221309661865, + "step": 1040, + "valid_targets_mean": 13092.2, + "valid_targets_min": 2306 + }, + { + "epoch": 0.8260869565217391, + "grad_norm": 0.6249563813199297, + "learning_rate": 3.948762199672461e-05, + "loss": 0.7685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39367565512657166, + "step": 1045, + "valid_targets_mean": 15153.9, + "valid_targets_min": 4161 + }, + { + "epoch": 0.8300395256916996, + "grad_norm": 0.7311200977914243, + "learning_rate": 3.9475134693404275e-05, + "loss": 0.7779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37699949741363525, + "step": 1050, + "valid_targets_mean": 13396.8, + "valid_targets_min": 471 + }, + { + "epoch": 0.83399209486166, + "grad_norm": 0.4878388549235403, + "learning_rate": 3.946249907334582e-05, + "loss": 0.7797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39908716082572937, + "step": 1055, + "valid_targets_mean": 15654.7, + "valid_targets_min": 5202 + }, + { + "epoch": 0.8379446640316206, + "grad_norm": 0.7813678983425782, + "learning_rate": 3.94497152327783e-05, + "loss": 0.7815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35354092717170715, + "step": 1060, + "valid_targets_mean": 13706.4, + "valid_targets_min": 1443 + }, + { + "epoch": 0.841897233201581, + "grad_norm": 1.1269824155518038, + "learning_rate": 3.9436783269059584e-05, + "loss": 0.7662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37741878628730774, + "step": 1065, + "valid_targets_mean": 14635.4, + "valid_targets_min": 2152 + }, + { + "epoch": 0.8458498023715415, + "grad_norm": 0.4346412326137335, + "learning_rate": 3.94237032806756e-05, + "loss": 0.7811, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.371143639087677, + "step": 1070, + "valid_targets_mean": 13869.1, + "valid_targets_min": 3392 + }, + { + "epoch": 0.849802371541502, + "grad_norm": 1.0096217742468876, + "learning_rate": 3.941047536723958e-05, + "loss": 0.797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43017202615737915, + "step": 1075, + "valid_targets_mean": 13605.6, + "valid_targets_min": 1339 + }, + { + "epoch": 0.8537549407114624, + "grad_norm": 0.604907899007041, + "learning_rate": 3.939709962949132e-05, + "loss": 0.7793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.426268994808197, + "step": 1080, + "valid_targets_mean": 14788.2, + "valid_targets_min": 4211 + }, + { + "epoch": 0.857707509881423, + "grad_norm": 0.7266729603581471, + "learning_rate": 3.938357616929638e-05, + "loss": 0.8058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39704304933547974, + "step": 1085, + "valid_targets_mean": 14276.3, + "valid_targets_min": 633 + }, + { + "epoch": 0.8616600790513834, + "grad_norm": 0.6019346220214667, + "learning_rate": 3.936990508964537e-05, + "loss": 0.7675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36863335967063904, + "step": 1090, + "valid_targets_mean": 14468.4, + "valid_targets_min": 4969 + }, + { + "epoch": 0.8656126482213439, + "grad_norm": 0.7377704687433713, + "learning_rate": 3.935608649465308e-05, + "loss": 0.7718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38709306716918945, + "step": 1095, + "valid_targets_mean": 14547.2, + "valid_targets_min": 4364 + }, + { + "epoch": 0.8695652173913043, + "grad_norm": 0.8031350923823615, + "learning_rate": 3.9342120489557754e-05, + "loss": 0.7829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36482474207878113, + "step": 1100, + "valid_targets_mean": 13067.8, + "valid_targets_min": 2196 + }, + { + "epoch": 0.8735177865612648, + "grad_norm": 0.5373480574122789, + "learning_rate": 3.932800718072027e-05, + "loss": 0.7804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41225314140319824, + "step": 1105, + "valid_targets_mean": 13840.8, + "valid_targets_min": 1676 + }, + { + "epoch": 0.8774703557312253, + "grad_norm": 0.7318197588026999, + "learning_rate": 3.9313746675623317e-05, + "loss": 0.7827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39296087622642517, + "step": 1110, + "valid_targets_mean": 14714.1, + "valid_targets_min": 4275 + }, + { + "epoch": 0.8814229249011858, + "grad_norm": 0.6461566695712441, + "learning_rate": 3.92993390828706e-05, + "loss": 0.7967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4027155041694641, + "step": 1115, + "valid_targets_mean": 14502.2, + "valid_targets_min": 2924 + }, + { + "epoch": 0.8853754940711462, + "grad_norm": 0.7779242325786324, + "learning_rate": 3.928478451218597e-05, + "loss": 0.7777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4077022671699524, + "step": 1120, + "valid_targets_mean": 15380.9, + "valid_targets_min": 5404 + }, + { + "epoch": 0.8893280632411067, + "grad_norm": 0.7566872386999974, + "learning_rate": 3.9270083074412667e-05, + "loss": 0.7884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4055677056312561, + "step": 1125, + "valid_targets_mean": 15225.2, + "valid_targets_min": 3784 + }, + { + "epoch": 0.8932806324110671, + "grad_norm": 0.8000934078390987, + "learning_rate": 3.925523488151236e-05, + "loss": 0.8017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35032448172569275, + "step": 1130, + "valid_targets_mean": 12651.5, + "valid_targets_min": 2236 + }, + { + "epoch": 0.8972332015810277, + "grad_norm": 0.5827405853552103, + "learning_rate": 3.924024004656443e-05, + "loss": 0.7926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43031615018844604, + "step": 1135, + "valid_targets_mean": 15264.1, + "valid_targets_min": 4312 + }, + { + "epoch": 0.9011857707509882, + "grad_norm": 0.5616682795123106, + "learning_rate": 3.922509868376499e-05, + "loss": 0.7838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39970850944519043, + "step": 1140, + "valid_targets_mean": 15327.6, + "valid_targets_min": 5057 + }, + { + "epoch": 0.9051383399209486, + "grad_norm": 0.6278551194992027, + "learning_rate": 3.9209810908426084e-05, + "loss": 0.7806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39955222606658936, + "step": 1145, + "valid_targets_mean": 14842.1, + "valid_targets_min": 4555 + }, + { + "epoch": 0.9090909090909091, + "grad_norm": 0.6531283079159776, + "learning_rate": 3.919437683697479e-05, + "loss": 0.7756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39462852478027344, + "step": 1150, + "valid_targets_mean": 14640.0, + "valid_targets_min": 3033 + }, + { + "epoch": 0.9130434782608695, + "grad_norm": 0.8359205926001623, + "learning_rate": 3.917879658695232e-05, + "loss": 0.7867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4111984372138977, + "step": 1155, + "valid_targets_mean": 13687.5, + "valid_targets_min": 3446 + }, + { + "epoch": 0.9169960474308301, + "grad_norm": 0.6609928304632963, + "learning_rate": 3.916307027701317e-05, + "loss": 0.7765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36376020312309265, + "step": 1160, + "valid_targets_mean": 13455.4, + "valid_targets_min": 2630 + }, + { + "epoch": 0.9209486166007905, + "grad_norm": 0.9258709588866835, + "learning_rate": 3.914719802692413e-05, + "loss": 0.7766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3639974296092987, + "step": 1165, + "valid_targets_mean": 14447.8, + "valid_targets_min": 2803 + }, + { + "epoch": 0.924901185770751, + "grad_norm": 0.7363430282912075, + "learning_rate": 3.9131179957563494e-05, + "loss": 0.7741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.373590350151062, + "step": 1170, + "valid_targets_mean": 13315.7, + "valid_targets_min": 793 + }, + { + "epoch": 0.9288537549407114, + "grad_norm": 0.4693544289518966, + "learning_rate": 3.911501619092001e-05, + "loss": 0.7933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38991236686706543, + "step": 1175, + "valid_targets_mean": 14675.7, + "valid_targets_min": 1153 + }, + { + "epoch": 0.932806324110672, + "grad_norm": 0.5514669717088364, + "learning_rate": 3.909870685009205e-05, + "loss": 0.7807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43705636262893677, + "step": 1180, + "valid_targets_mean": 15243.2, + "valid_targets_min": 1477 + }, + { + "epoch": 0.9367588932806324, + "grad_norm": 0.5087842217827133, + "learning_rate": 3.908225205928661e-05, + "loss": 0.7799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4050707519054413, + "step": 1185, + "valid_targets_mean": 14066.0, + "valid_targets_min": 1354 + }, + { + "epoch": 0.9407114624505929, + "grad_norm": 0.46146125628360024, + "learning_rate": 3.906565194381842e-05, + "loss": 0.7716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3831913471221924, + "step": 1190, + "valid_targets_mean": 13686.8, + "valid_targets_min": 675 + }, + { + "epoch": 0.9446640316205533, + "grad_norm": 0.4970468063299326, + "learning_rate": 3.904890663010893e-05, + "loss": 0.7819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.337127685546875, + "step": 1195, + "valid_targets_mean": 13225.9, + "valid_targets_min": 2664 + }, + { + "epoch": 0.9486166007905138, + "grad_norm": 0.6065018214201138, + "learning_rate": 3.903201624568539e-05, + "loss": 0.7776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3826074004173279, + "step": 1200, + "valid_targets_mean": 13653.7, + "valid_targets_min": 1416 + }, + { + "epoch": 0.9525691699604744, + "grad_norm": 0.5917545832047351, + "learning_rate": 3.901498091917986e-05, + "loss": 0.7801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.404086172580719, + "step": 1205, + "valid_targets_mean": 13904.3, + "valid_targets_min": 749 + }, + { + "epoch": 0.9565217391304348, + "grad_norm": 0.47642824214640456, + "learning_rate": 3.8997800780328237e-05, + "loss": 0.797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40527474880218506, + "step": 1210, + "valid_targets_mean": 13919.1, + "valid_targets_min": 1741 + }, + { + "epoch": 0.9604743083003953, + "grad_norm": 0.5333067098076941, + "learning_rate": 3.898047595996927e-05, + "loss": 0.7695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4114024043083191, + "step": 1215, + "valid_targets_mean": 14451.1, + "valid_targets_min": 1581 + }, + { + "epoch": 0.9644268774703557, + "grad_norm": 0.7213145144510875, + "learning_rate": 3.896300659004355e-05, + "loss": 0.7825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40361258387565613, + "step": 1220, + "valid_targets_mean": 14150.2, + "valid_targets_min": 3886 + }, + { + "epoch": 0.9683794466403162, + "grad_norm": 0.5342742826775776, + "learning_rate": 3.89453928035925e-05, + "loss": 0.7879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42812681198120117, + "step": 1225, + "valid_targets_mean": 14423.6, + "valid_targets_min": 1638 + }, + { + "epoch": 0.9723320158102767, + "grad_norm": 0.6695987816412164, + "learning_rate": 3.892763473475742e-05, + "loss": 0.7792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39430859684944153, + "step": 1230, + "valid_targets_mean": 14692.5, + "valid_targets_min": 4159 + }, + { + "epoch": 0.9762845849802372, + "grad_norm": 0.6580351950918835, + "learning_rate": 3.890973251877838e-05, + "loss": 0.7749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3833167850971222, + "step": 1235, + "valid_targets_mean": 13599.8, + "valid_targets_min": 736 + }, + { + "epoch": 0.9802371541501976, + "grad_norm": 0.5272927882404158, + "learning_rate": 3.8891686291993244e-05, + "loss": 0.7657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42486029863357544, + "step": 1240, + "valid_targets_mean": 15179.8, + "valid_targets_min": 3789 + }, + { + "epoch": 0.9841897233201581, + "grad_norm": 0.5086849462318614, + "learning_rate": 3.887349619183662e-05, + "loss": 0.7857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4005928635597229, + "step": 1245, + "valid_targets_mean": 13824.0, + "valid_targets_min": 1995 + }, + { + "epoch": 0.9881422924901185, + "grad_norm": 0.5621690345892484, + "learning_rate": 3.8855162356838816e-05, + "loss": 0.7725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.394489586353302, + "step": 1250, + "valid_targets_mean": 14316.6, + "valid_targets_min": 1380 + }, + { + "epoch": 0.9920948616600791, + "grad_norm": 0.5386706641463076, + "learning_rate": 3.8836684926624774e-05, + "loss": 0.7883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37907958030700684, + "step": 1255, + "valid_targets_mean": 13220.4, + "valid_targets_min": 1674 + }, + { + "epoch": 0.9960474308300395, + "grad_norm": 0.4868295746643737, + "learning_rate": 3.8818064041913015e-05, + "loss": 0.7937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38478803634643555, + "step": 1260, + "valid_targets_mean": 15525.2, + "valid_targets_min": 4422 + }, + { + "epoch": 1.0, + "grad_norm": 0.4739979405181958, + "learning_rate": 3.8799299844514566e-05, + "loss": 0.7924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4166604280471802, + "step": 1265, + "valid_targets_mean": 14266.3, + "valid_targets_min": 3169 + }, + { + "epoch": 1.0039525691699605, + "grad_norm": 0.5342357619414959, + "learning_rate": 3.878039247733189e-05, + "loss": 0.7824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3798103630542755, + "step": 1270, + "valid_targets_mean": 14547.9, + "valid_targets_min": 2897 + }, + { + "epoch": 1.007905138339921, + "grad_norm": 0.590962259249142, + "learning_rate": 3.876134208435777e-05, + "loss": 0.7691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36821886897087097, + "step": 1275, + "valid_targets_mean": 13532.7, + "valid_targets_min": 1544 + }, + { + "epoch": 1.0118577075098814, + "grad_norm": 0.5669115684178365, + "learning_rate": 3.874214881067425e-05, + "loss": 0.7879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39285504817962646, + "step": 1280, + "valid_targets_mean": 13420.7, + "valid_targets_min": 1473 + }, + { + "epoch": 1.0158102766798418, + "grad_norm": 0.4965887715656634, + "learning_rate": 3.872281280245149e-05, + "loss": 0.7643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39528465270996094, + "step": 1285, + "valid_targets_mean": 13819.2, + "valid_targets_min": 3537 + }, + { + "epoch": 1.0197628458498025, + "grad_norm": 0.44549375768947397, + "learning_rate": 3.870333420694668e-05, + "loss": 0.7636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3964688777923584, + "step": 1290, + "valid_targets_mean": 13786.1, + "valid_targets_min": 3753 + }, + { + "epoch": 1.023715415019763, + "grad_norm": 0.5294111864354341, + "learning_rate": 3.868371317250293e-05, + "loss": 0.7902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3844342827796936, + "step": 1295, + "valid_targets_mean": 14608.9, + "valid_targets_min": 2146 + }, + { + "epoch": 1.0276679841897234, + "grad_norm": 0.5207714933915234, + "learning_rate": 3.8663949848548094e-05, + "loss": 0.7733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3812286853790283, + "step": 1300, + "valid_targets_mean": 13842.3, + "valid_targets_min": 1348 + }, + { + "epoch": 1.0316205533596838, + "grad_norm": 0.46679727265952536, + "learning_rate": 3.8644044385593675e-05, + "loss": 0.7892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39822548627853394, + "step": 1305, + "valid_targets_mean": 14319.5, + "valid_targets_min": 3823 + }, + { + "epoch": 1.0355731225296443, + "grad_norm": 0.5260285672661564, + "learning_rate": 3.862399693523366e-05, + "loss": 0.7832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39706867933273315, + "step": 1310, + "valid_targets_mean": 14132.3, + "valid_targets_min": 3236 + }, + { + "epoch": 1.0395256916996047, + "grad_norm": 0.5740474720627513, + "learning_rate": 3.8603807650143375e-05, + "loss": 0.7856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3991788625717163, + "step": 1315, + "valid_targets_mean": 14290.8, + "valid_targets_min": 4249 + }, + { + "epoch": 1.0434782608695652, + "grad_norm": 0.631979155842724, + "learning_rate": 3.858347668407831e-05, + "loss": 0.7853, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42490431666374207, + "step": 1320, + "valid_targets_mean": 14606.3, + "valid_targets_min": 3237 + }, + { + "epoch": 1.0474308300395256, + "grad_norm": 0.5017338756007949, + "learning_rate": 3.8563004191872953e-05, + "loss": 0.7925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4062742590904236, + "step": 1325, + "valid_targets_mean": 14140.8, + "valid_targets_min": 1536 + }, + { + "epoch": 1.051383399209486, + "grad_norm": 0.5777696138846387, + "learning_rate": 3.854239032943962e-05, + "loss": 0.7801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4129505157470703, + "step": 1330, + "valid_targets_mean": 14036.2, + "valid_targets_min": 2683 + }, + { + "epoch": 1.0553359683794465, + "grad_norm": 0.5552618753898639, + "learning_rate": 3.8521635253767245e-05, + "loss": 0.7757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37739551067352295, + "step": 1335, + "valid_targets_mean": 14732.6, + "valid_targets_min": 4628 + }, + { + "epoch": 1.0592885375494072, + "grad_norm": 0.49905376845539867, + "learning_rate": 3.850073912292023e-05, + "loss": 0.7769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3903096914291382, + "step": 1340, + "valid_targets_mean": 14263.4, + "valid_targets_min": 2383 + }, + { + "epoch": 1.0632411067193677, + "grad_norm": 0.6989397994882822, + "learning_rate": 3.847970209603717e-05, + "loss": 0.7856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.365909606218338, + "step": 1345, + "valid_targets_mean": 13407.2, + "valid_targets_min": 1246 + }, + { + "epoch": 1.0671936758893281, + "grad_norm": 0.4760166977374061, + "learning_rate": 3.845852433332971e-05, + "loss": 0.7755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3669053316116333, + "step": 1350, + "valid_targets_mean": 14156.5, + "valid_targets_min": 608 + }, + { + "epoch": 1.0711462450592886, + "grad_norm": 0.5453657777192888, + "learning_rate": 3.843720599608128e-05, + "loss": 0.7735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4230594038963318, + "step": 1355, + "valid_targets_mean": 15405.5, + "valid_targets_min": 5342 + }, + { + "epoch": 1.075098814229249, + "grad_norm": 0.6196594242304612, + "learning_rate": 3.841574724664591e-05, + "loss": 0.7818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39519044756889343, + "step": 1360, + "valid_targets_mean": 14196.0, + "valid_targets_min": 2915 + }, + { + "epoch": 1.0790513833992095, + "grad_norm": 0.4807718657133351, + "learning_rate": 3.839414824844694e-05, + "loss": 0.7905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4080059230327606, + "step": 1365, + "valid_targets_mean": 15577.4, + "valid_targets_min": 6290 + }, + { + "epoch": 1.08300395256917, + "grad_norm": 0.8598795086324652, + "learning_rate": 3.83724091659758e-05, + "loss": 0.7727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4065036475658417, + "step": 1370, + "valid_targets_mean": 14032.3, + "valid_targets_min": 2395 + }, + { + "epoch": 1.0869565217391304, + "grad_norm": 0.679328087267936, + "learning_rate": 3.835053016479078e-05, + "loss": 0.776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42030614614486694, + "step": 1375, + "valid_targets_mean": 14986.0, + "valid_targets_min": 1954 + }, + { + "epoch": 1.0909090909090908, + "grad_norm": 0.8793461353848334, + "learning_rate": 3.832851141151574e-05, + "loss": 0.7803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35925623774528503, + "step": 1380, + "valid_targets_mean": 15015.3, + "valid_targets_min": 1839 + }, + { + "epoch": 1.0948616600790513, + "grad_norm": 0.6637809632873164, + "learning_rate": 3.830635307383884e-05, + "loss": 0.7666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3886744976043701, + "step": 1385, + "valid_targets_mean": 15059.2, + "valid_targets_min": 3927 + }, + { + "epoch": 1.098814229249012, + "grad_norm": 0.5980546754322869, + "learning_rate": 3.828405532051129e-05, + "loss": 0.7773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3916899859905243, + "step": 1390, + "valid_targets_mean": 14748.9, + "valid_targets_min": 3311 + }, + { + "epoch": 1.1027667984189724, + "grad_norm": 0.8491687192891921, + "learning_rate": 3.8261618321346034e-05, + "loss": 0.7556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3707411289215088, + "step": 1395, + "valid_targets_mean": 13844.5, + "valid_targets_min": 1984 + }, + { + "epoch": 1.1067193675889329, + "grad_norm": 0.6527546239740953, + "learning_rate": 3.823904224721647e-05, + "loss": 0.7791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.387393981218338, + "step": 1400, + "valid_targets_mean": 14572.8, + "valid_targets_min": 2146 + }, + { + "epoch": 1.1106719367588933, + "grad_norm": 0.9156929548641958, + "learning_rate": 3.821632727005516e-05, + "loss": 0.7926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38959160447120667, + "step": 1405, + "valid_targets_mean": 13833.4, + "valid_targets_min": 1107 + }, + { + "epoch": 1.1146245059288538, + "grad_norm": 0.6372553115241735, + "learning_rate": 3.8193473562852505e-05, + "loss": 0.7777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35072803497314453, + "step": 1410, + "valid_targets_mean": 13390.4, + "valid_targets_min": 2436 + }, + { + "epoch": 1.1185770750988142, + "grad_norm": 0.5108547796836694, + "learning_rate": 3.8170481299655426e-05, + "loss": 0.7886, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38577091693878174, + "step": 1415, + "valid_targets_mean": 13795.2, + "valid_targets_min": 951 + }, + { + "epoch": 1.1225296442687747, + "grad_norm": 0.5233363383111848, + "learning_rate": 3.8147350655566045e-05, + "loss": 0.7739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37083107233047485, + "step": 1420, + "valid_targets_mean": 13460.2, + "valid_targets_min": 1339 + }, + { + "epoch": 1.1264822134387351, + "grad_norm": 0.482913229671558, + "learning_rate": 3.8124081806740353e-05, + "loss": 0.785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3840934932231903, + "step": 1425, + "valid_targets_mean": 14141.5, + "valid_targets_min": 949 + }, + { + "epoch": 1.1304347826086956, + "grad_norm": 0.5777422873912571, + "learning_rate": 3.8100674930386886e-05, + "loss": 0.7804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40525323152542114, + "step": 1430, + "valid_targets_mean": 14248.3, + "valid_targets_min": 1635 + }, + { + "epoch": 1.1343873517786562, + "grad_norm": 0.45523103758404176, + "learning_rate": 3.807713020476531e-05, + "loss": 0.7933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4087633192539215, + "step": 1435, + "valid_targets_mean": 14744.6, + "valid_targets_min": 4369 + }, + { + "epoch": 1.1383399209486167, + "grad_norm": 0.49797011095676763, + "learning_rate": 3.805344780918516e-05, + "loss": 0.7631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36382752656936646, + "step": 1440, + "valid_targets_mean": 14064.8, + "valid_targets_min": 1762 + }, + { + "epoch": 1.1422924901185771, + "grad_norm": 0.6635776703974982, + "learning_rate": 3.8029627924004396e-05, + "loss": 0.7804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36858490109443665, + "step": 1445, + "valid_targets_mean": 13216.6, + "valid_targets_min": 1005 + }, + { + "epoch": 1.1462450592885376, + "grad_norm": 0.6506173808188533, + "learning_rate": 3.800567073062806e-05, + "loss": 0.776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3638190031051636, + "step": 1450, + "valid_targets_mean": 13619.9, + "valid_targets_min": 1603 + }, + { + "epoch": 1.150197628458498, + "grad_norm": 0.446881345895381, + "learning_rate": 3.798157641150689e-05, + "loss": 0.7839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3571556806564331, + "step": 1455, + "valid_targets_mean": 13176.4, + "valid_targets_min": 570 + }, + { + "epoch": 1.1541501976284585, + "grad_norm": 0.5168604408266534, + "learning_rate": 3.795734515013594e-05, + "loss": 0.783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3646943271160126, + "step": 1460, + "valid_targets_mean": 14028.9, + "valid_targets_min": 1888 + }, + { + "epoch": 1.158102766798419, + "grad_norm": 0.53414849627983, + "learning_rate": 3.793297713105317e-05, + "loss": 0.7917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4441913366317749, + "step": 1465, + "valid_targets_mean": 15127.8, + "valid_targets_min": 3861 + }, + { + "epoch": 1.1620553359683794, + "grad_norm": 0.7672964622818446, + "learning_rate": 3.790847253983805e-05, + "loss": 0.7721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3654632866382599, + "step": 1470, + "valid_targets_mean": 13929.2, + "valid_targets_min": 1610 + }, + { + "epoch": 1.1660079051383399, + "grad_norm": 0.5275894590336688, + "learning_rate": 3.7883831563110135e-05, + "loss": 0.7755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4024370610713959, + "step": 1475, + "valid_targets_mean": 14021.5, + "valid_targets_min": 3227 + }, + { + "epoch": 1.1699604743083003, + "grad_norm": 0.5415038255129413, + "learning_rate": 3.7859054388527656e-05, + "loss": 0.786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39524945616722107, + "step": 1480, + "valid_targets_mean": 14051.8, + "valid_targets_min": 4730 + }, + { + "epoch": 1.1739130434782608, + "grad_norm": 0.9453312460972945, + "learning_rate": 3.783414120478608e-05, + "loss": 0.7841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3677988052368164, + "step": 1485, + "valid_targets_mean": 14003.8, + "valid_targets_min": 2901 + }, + { + "epoch": 1.1778656126482214, + "grad_norm": 0.7418554658732741, + "learning_rate": 3.780909220161669e-05, + "loss": 0.7814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4135068655014038, + "step": 1490, + "valid_targets_mean": 14559.0, + "valid_targets_min": 2371 + }, + { + "epoch": 1.1818181818181819, + "grad_norm": 0.5723971678177596, + "learning_rate": 3.778390756978512e-05, + "loss": 0.7839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37410277128219604, + "step": 1495, + "valid_targets_mean": 14224.0, + "valid_targets_min": 2930 + }, + { + "epoch": 1.1857707509881423, + "grad_norm": 0.5887552749339141, + "learning_rate": 3.7758587501089905e-05, + "loss": 0.7663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3459744453430176, + "step": 1500, + "valid_targets_mean": 13188.5, + "valid_targets_min": 1037 + }, + { + "epoch": 1.1897233201581028, + "grad_norm": 1.0255525810059505, + "learning_rate": 3.773313218836104e-05, + "loss": 0.7805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38787323236465454, + "step": 1505, + "valid_targets_mean": 15016.4, + "valid_targets_min": 4689 + }, + { + "epoch": 1.1936758893280632, + "grad_norm": 0.6233100480322117, + "learning_rate": 3.7707541825458486e-05, + "loss": 0.7766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36896324157714844, + "step": 1510, + "valid_targets_mean": 13540.9, + "valid_targets_min": 1320 + }, + { + "epoch": 1.1976284584980237, + "grad_norm": 0.43397678046535226, + "learning_rate": 3.7681816607270716e-05, + "loss": 0.7861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42983898520469666, + "step": 1515, + "valid_targets_mean": 13939.3, + "valid_targets_min": 1452 + }, + { + "epoch": 1.2015810276679841, + "grad_norm": 0.7578919398450119, + "learning_rate": 3.765595672971321e-05, + "loss": 0.7842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37220197916030884, + "step": 1520, + "valid_targets_mean": 14838.2, + "valid_targets_min": 2874 + }, + { + "epoch": 1.2055335968379446, + "grad_norm": 0.6410271012223142, + "learning_rate": 3.762996238972698e-05, + "loss": 0.779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41356992721557617, + "step": 1525, + "valid_targets_mean": 14048.7, + "valid_targets_min": 1030 + }, + { + "epoch": 1.2094861660079053, + "grad_norm": 0.6993493633996323, + "learning_rate": 3.760383378527707e-05, + "loss": 0.784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36171770095825195, + "step": 1530, + "valid_targets_mean": 14227.4, + "valid_targets_min": 1677 + }, + { + "epoch": 1.2134387351778657, + "grad_norm": 0.5033586106724561, + "learning_rate": 3.7577571115351005e-05, + "loss": 0.781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4071463644504547, + "step": 1535, + "valid_targets_mean": 14409.3, + "valid_targets_min": 1801 + }, + { + "epoch": 1.2173913043478262, + "grad_norm": 0.7010092292333622, + "learning_rate": 3.755117457995737e-05, + "loss": 0.782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40729638934135437, + "step": 1540, + "valid_targets_mean": 14081.2, + "valid_targets_min": 732 + }, + { + "epoch": 1.2213438735177866, + "grad_norm": 0.6356823618999157, + "learning_rate": 3.7524644380124186e-05, + "loss": 0.77, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3809888958930969, + "step": 1545, + "valid_targets_mean": 14221.7, + "valid_targets_min": 885 + }, + { + "epoch": 1.225296442687747, + "grad_norm": 0.588171705258684, + "learning_rate": 3.7497980717897426e-05, + "loss": 0.7878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3842422664165497, + "step": 1550, + "valid_targets_mean": 14680.2, + "valid_targets_min": 1793 + }, + { + "epoch": 1.2292490118577075, + "grad_norm": 0.6264178439438822, + "learning_rate": 3.747118379633949e-05, + "loss": 0.7908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35870692133903503, + "step": 1555, + "valid_targets_mean": 12557.0, + "valid_targets_min": 1414 + }, + { + "epoch": 1.233201581027668, + "grad_norm": 0.41483528773042505, + "learning_rate": 3.7444253819527634e-05, + "loss": 0.7855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38512998819351196, + "step": 1560, + "valid_targets_mean": 15098.4, + "valid_targets_min": 2046 + }, + { + "epoch": 1.2371541501976284, + "grad_norm": 0.6600107844211872, + "learning_rate": 3.741719099255241e-05, + "loss": 0.784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3800917863845825, + "step": 1565, + "valid_targets_mean": 14874.0, + "valid_targets_min": 4374 + }, + { + "epoch": 1.2411067193675889, + "grad_norm": 0.5228175226408358, + "learning_rate": 3.7389995521516145e-05, + "loss": 0.778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3650703430175781, + "step": 1570, + "valid_targets_mean": 13051.7, + "valid_targets_min": 1653 + }, + { + "epoch": 1.2450592885375493, + "grad_norm": 0.480270066147165, + "learning_rate": 3.73626676135313e-05, + "loss": 0.7753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3526269197463989, + "step": 1575, + "valid_targets_mean": 12921.1, + "valid_targets_min": 503 + }, + { + "epoch": 1.2490118577075098, + "grad_norm": 0.49357168276451185, + "learning_rate": 3.733520747671897e-05, + "loss": 0.7664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35789427161216736, + "step": 1580, + "valid_targets_mean": 13685.4, + "valid_targets_min": 2750 + }, + { + "epoch": 1.2529644268774702, + "grad_norm": 0.6650843991131056, + "learning_rate": 3.730761532020727e-05, + "loss": 0.7894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36531496047973633, + "step": 1585, + "valid_targets_mean": 13332.9, + "valid_targets_min": 1428 + }, + { + "epoch": 1.256916996047431, + "grad_norm": 0.6324833713265132, + "learning_rate": 3.72798913541297e-05, + "loss": 0.7838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3598484694957733, + "step": 1590, + "valid_targets_mean": 13785.1, + "valid_targets_min": 730 + }, + { + "epoch": 1.2608695652173914, + "grad_norm": 0.7407410013810838, + "learning_rate": 3.7252035789623627e-05, + "loss": 0.7803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4599645733833313, + "step": 1595, + "valid_targets_mean": 14319.5, + "valid_targets_min": 2869 + }, + { + "epoch": 1.2648221343873518, + "grad_norm": 0.6492261762231131, + "learning_rate": 3.722404883882858e-05, + "loss": 0.7794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3990551233291626, + "step": 1600, + "valid_targets_mean": 15387.3, + "valid_targets_min": 967 + }, + { + "epoch": 1.2687747035573123, + "grad_norm": 0.5464122937288279, + "learning_rate": 3.719593071488474e-05, + "loss": 0.7958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38981708884239197, + "step": 1605, + "valid_targets_mean": 14666.3, + "valid_targets_min": 1719 + }, + { + "epoch": 1.2727272727272727, + "grad_norm": 0.6023546941182465, + "learning_rate": 3.716768163193123e-05, + "loss": 0.7913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4197324514389038, + "step": 1610, + "valid_targets_mean": 14900.7, + "valid_targets_min": 4574 + }, + { + "epoch": 1.2766798418972332, + "grad_norm": 0.44316303224116205, + "learning_rate": 3.7139301805104526e-05, + "loss": 0.7728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37879401445388794, + "step": 1615, + "valid_targets_mean": 14493.4, + "valid_targets_min": 4086 + }, + { + "epoch": 1.2806324110671936, + "grad_norm": 0.514119783972632, + "learning_rate": 3.711079145053681e-05, + "loss": 0.772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37435469031333923, + "step": 1620, + "valid_targets_mean": 14006.3, + "valid_targets_min": 300 + }, + { + "epoch": 1.2845849802371543, + "grad_norm": 0.5736090993151349, + "learning_rate": 3.708215078535432e-05, + "loss": 0.774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38007426261901855, + "step": 1625, + "valid_targets_mean": 14081.3, + "valid_targets_min": 2940 + }, + { + "epoch": 1.2885375494071147, + "grad_norm": 0.6806353271649632, + "learning_rate": 3.70533800276757e-05, + "loss": 0.7947, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4096287786960602, + "step": 1630, + "valid_targets_mean": 14706.3, + "valid_targets_min": 1441 + }, + { + "epoch": 1.2924901185770752, + "grad_norm": 0.5087871628044215, + "learning_rate": 3.702447939661036e-05, + "loss": 0.773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3534351587295532, + "step": 1635, + "valid_targets_mean": 13892.4, + "valid_targets_min": 5252 + }, + { + "epoch": 1.2964426877470356, + "grad_norm": 0.529237107941588, + "learning_rate": 3.699544911225675e-05, + "loss": 0.7765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40586304664611816, + "step": 1640, + "valid_targets_mean": 14478.2, + "valid_targets_min": 899 + }, + { + "epoch": 1.300395256916996, + "grad_norm": 0.6184748460065325, + "learning_rate": 3.696628939570074e-05, + "loss": 0.7771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35570770502090454, + "step": 1645, + "valid_targets_mean": 12987.0, + "valid_targets_min": 2496 + }, + { + "epoch": 1.3043478260869565, + "grad_norm": 0.5114007414210887, + "learning_rate": 3.693700046901393e-05, + "loss": 0.7811, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3768381178379059, + "step": 1650, + "valid_targets_mean": 13878.3, + "valid_targets_min": 3093 + }, + { + "epoch": 1.308300395256917, + "grad_norm": 0.8427829265619798, + "learning_rate": 3.690758255525192e-05, + "loss": 0.7819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3798404932022095, + "step": 1655, + "valid_targets_mean": 13994.7, + "valid_targets_min": 4669 + }, + { + "epoch": 1.3122529644268774, + "grad_norm": 0.6468049711711297, + "learning_rate": 3.6878035878452656e-05, + "loss": 0.7678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3959360122680664, + "step": 1660, + "valid_targets_mean": 13465.2, + "valid_targets_min": 1092 + }, + { + "epoch": 1.316205533596838, + "grad_norm": 0.49779034955769463, + "learning_rate": 3.684836066363468e-05, + "loss": 0.7719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41004955768585205, + "step": 1665, + "valid_targets_mean": 14744.8, + "valid_targets_min": 736 + }, + { + "epoch": 1.3201581027667983, + "grad_norm": 0.5376896369327292, + "learning_rate": 3.6818557136795485e-05, + "loss": 0.7764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3893268406391144, + "step": 1670, + "valid_targets_mean": 14042.2, + "valid_targets_min": 4555 + }, + { + "epoch": 1.3241106719367588, + "grad_norm": 0.4392176849944792, + "learning_rate": 3.678862552490968e-05, + "loss": 0.7857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3976079821586609, + "step": 1675, + "valid_targets_mean": 14111.8, + "valid_targets_min": 2787 + }, + { + "epoch": 1.3280632411067192, + "grad_norm": 0.4660316079214555, + "learning_rate": 3.6758566055927415e-05, + "loss": 0.766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38921529054641724, + "step": 1680, + "valid_targets_mean": 14248.9, + "valid_targets_min": 770 + }, + { + "epoch": 1.33201581027668, + "grad_norm": 0.44783813733984434, + "learning_rate": 3.6728378958772496e-05, + "loss": 0.7842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4203168749809265, + "step": 1685, + "valid_targets_mean": 14920.5, + "valid_targets_min": 5513 + }, + { + "epoch": 1.3359683794466404, + "grad_norm": 0.4798241137219911, + "learning_rate": 3.6698064463340726e-05, + "loss": 0.7744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38677555322647095, + "step": 1690, + "valid_targets_mean": 13793.9, + "valid_targets_min": 1319 + }, + { + "epoch": 1.3399209486166008, + "grad_norm": 0.5766653224769768, + "learning_rate": 3.6667622800498144e-05, + "loss": 0.7806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3811759948730469, + "step": 1695, + "valid_targets_mean": 15180.7, + "valid_targets_min": 832 + }, + { + "epoch": 1.3438735177865613, + "grad_norm": 0.8947576382199715, + "learning_rate": 3.6637054202079255e-05, + "loss": 0.792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3813202977180481, + "step": 1700, + "valid_targets_mean": 14062.2, + "valid_targets_min": 2456 + }, + { + "epoch": 1.3478260869565217, + "grad_norm": 0.8643349314761717, + "learning_rate": 3.6606358900885256e-05, + "loss": 0.7819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3720206618309021, + "step": 1705, + "valid_targets_mean": 13215.6, + "valid_targets_min": 1566 + }, + { + "epoch": 1.3517786561264822, + "grad_norm": 0.5453011608686016, + "learning_rate": 3.6575537130682286e-05, + "loss": 0.775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39780330657958984, + "step": 1710, + "valid_targets_mean": 14432.9, + "valid_targets_min": 1221 + }, + { + "epoch": 1.3557312252964426, + "grad_norm": 0.8194366601708298, + "learning_rate": 3.654458912619962e-05, + "loss": 0.7809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3681338429450989, + "step": 1715, + "valid_targets_mean": 13720.8, + "valid_targets_min": 3980 + }, + { + "epoch": 1.359683794466403, + "grad_norm": 0.5972984559047401, + "learning_rate": 3.651351512312792e-05, + "loss": 0.7873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39449793100357056, + "step": 1720, + "valid_targets_mean": 14763.7, + "valid_targets_min": 5959 + }, + { + "epoch": 1.3636363636363638, + "grad_norm": 0.6065795728492546, + "learning_rate": 3.648231535811738e-05, + "loss": 0.7847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35031813383102417, + "step": 1725, + "valid_targets_mean": 14084.4, + "valid_targets_min": 4440 + }, + { + "epoch": 1.3675889328063242, + "grad_norm": 0.47804721378149484, + "learning_rate": 3.645099006877599e-05, + "loss": 0.7683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4125348925590515, + "step": 1730, + "valid_targets_mean": 15571.3, + "valid_targets_min": 4274 + }, + { + "epoch": 1.3715415019762847, + "grad_norm": 0.6173039148058694, + "learning_rate": 3.641953949366769e-05, + "loss": 0.7824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3862411677837372, + "step": 1735, + "valid_targets_mean": 14265.9, + "valid_targets_min": 2384 + }, + { + "epoch": 1.3754940711462451, + "grad_norm": 0.4489997168249982, + "learning_rate": 3.638796387231053e-05, + "loss": 0.7863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4159923195838928, + "step": 1740, + "valid_targets_mean": 14699.1, + "valid_targets_min": 1566 + }, + { + "epoch": 1.3794466403162056, + "grad_norm": 0.4469178994183297, + "learning_rate": 3.635626344517492e-05, + "loss": 0.7802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36166131496429443, + "step": 1745, + "valid_targets_mean": 12857.9, + "valid_targets_min": 674 + }, + { + "epoch": 1.383399209486166, + "grad_norm": 0.638598977463782, + "learning_rate": 3.6324438453681714e-05, + "loss": 0.7819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41108885407447815, + "step": 1750, + "valid_targets_mean": 14514.6, + "valid_targets_min": 3282 + }, + { + "epoch": 1.3873517786561265, + "grad_norm": 0.5121480648821587, + "learning_rate": 3.629248914020043e-05, + "loss": 0.782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3955515921115875, + "step": 1755, + "valid_targets_mean": 14528.0, + "valid_targets_min": 2983 + }, + { + "epoch": 1.391304347826087, + "grad_norm": 0.5541822301706387, + "learning_rate": 3.626041574804738e-05, + "loss": 0.7805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4199274480342865, + "step": 1760, + "valid_targets_mean": 14284.5, + "valid_targets_min": 3789 + }, + { + "epoch": 1.3952569169960474, + "grad_norm": 0.5952746435913819, + "learning_rate": 3.6228218521483826e-05, + "loss": 0.7826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38731759786605835, + "step": 1765, + "valid_targets_mean": 13294.1, + "valid_targets_min": 4041 + }, + { + "epoch": 1.3992094861660078, + "grad_norm": 0.669369051423579, + "learning_rate": 3.619589770571411e-05, + "loss": 0.7836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3596285879611969, + "step": 1770, + "valid_targets_mean": 13052.8, + "valid_targets_min": 2343 + }, + { + "epoch": 1.4031620553359683, + "grad_norm": 0.8544161872618881, + "learning_rate": 3.616345354688379e-05, + "loss": 0.7733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39350444078445435, + "step": 1775, + "valid_targets_mean": 15320.4, + "valid_targets_min": 1075 + }, + { + "epoch": 1.4071146245059287, + "grad_norm": 0.6876301505757669, + "learning_rate": 3.613088629207778e-05, + "loss": 0.774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41779065132141113, + "step": 1780, + "valid_targets_mean": 15027.2, + "valid_targets_min": 5278 + }, + { + "epoch": 1.4110671936758894, + "grad_norm": 0.8273299735079659, + "learning_rate": 3.609819618931844e-05, + "loss": 0.7725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38137397170066833, + "step": 1785, + "valid_targets_mean": 14282.2, + "valid_targets_min": 1630 + }, + { + "epoch": 1.4150197628458498, + "grad_norm": 0.6799054119525976, + "learning_rate": 3.6065383487563707e-05, + "loss": 0.7723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3785322904586792, + "step": 1790, + "valid_targets_mean": 13775.6, + "valid_targets_min": 1334 + }, + { + "epoch": 1.4189723320158103, + "grad_norm": 0.4762819566098888, + "learning_rate": 3.60324484367052e-05, + "loss": 0.7749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40682607889175415, + "step": 1795, + "valid_targets_mean": 13758.6, + "valid_targets_min": 2279 + }, + { + "epoch": 1.4229249011857708, + "grad_norm": 0.5435955861103067, + "learning_rate": 3.599939128756631e-05, + "loss": 0.7854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3830779790878296, + "step": 1800, + "valid_targets_mean": 14477.2, + "valid_targets_min": 1766 + }, + { + "epoch": 1.4268774703557312, + "grad_norm": 0.41646771074544, + "learning_rate": 3.59662122919003e-05, + "loss": 0.7732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36587437987327576, + "step": 1805, + "valid_targets_mean": 13297.4, + "valid_targets_min": 1337 + }, + { + "epoch": 1.4308300395256917, + "grad_norm": 0.6268771226574205, + "learning_rate": 3.5932911702388356e-05, + "loss": 0.805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39879414439201355, + "step": 1810, + "valid_targets_mean": 15132.9, + "valid_targets_min": 9242 + }, + { + "epoch": 1.434782608695652, + "grad_norm": 0.43991140177565985, + "learning_rate": 3.5899489772637705e-05, + "loss": 0.7682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3434188663959503, + "step": 1815, + "valid_targets_mean": 12731.4, + "valid_targets_min": 1752 + }, + { + "epoch": 1.4387351778656128, + "grad_norm": 0.41506225441374556, + "learning_rate": 3.586594675717967e-05, + "loss": 0.7917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40633416175842285, + "step": 1820, + "valid_targets_mean": 14778.1, + "valid_targets_min": 2144 + }, + { + "epoch": 1.4426877470355732, + "grad_norm": 0.657568594161029, + "learning_rate": 3.58322829114677e-05, + "loss": 0.7852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3211144804954529, + "step": 1825, + "valid_targets_mean": 11677.2, + "valid_targets_min": 700 + }, + { + "epoch": 1.4466403162055337, + "grad_norm": 0.5571201691284725, + "learning_rate": 3.579849849187548e-05, + "loss": 0.7722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37913063168525696, + "step": 1830, + "valid_targets_mean": 14531.2, + "valid_targets_min": 1927 + }, + { + "epoch": 1.4505928853754941, + "grad_norm": 0.8587792542596606, + "learning_rate": 3.576459375569493e-05, + "loss": 0.7892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40625035762786865, + "step": 1835, + "valid_targets_mean": 13792.2, + "valid_targets_min": 1030 + }, + { + "epoch": 1.4545454545454546, + "grad_norm": 0.5852903462014718, + "learning_rate": 3.573056896113427e-05, + "loss": 0.7761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38232237100601196, + "step": 1840, + "valid_targets_mean": 13894.4, + "valid_targets_min": 1125 + }, + { + "epoch": 1.458498023715415, + "grad_norm": 0.7325821889009455, + "learning_rate": 3.569642436731604e-05, + "loss": 0.776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3642297387123108, + "step": 1845, + "valid_targets_mean": 14161.0, + "valid_targets_min": 1106 + }, + { + "epoch": 1.4624505928853755, + "grad_norm": 0.4541050079437419, + "learning_rate": 3.566216023427517e-05, + "loss": 0.7678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3948119878768921, + "step": 1850, + "valid_targets_mean": 14587.7, + "valid_targets_min": 1817 + }, + { + "epoch": 1.466403162055336, + "grad_norm": 0.8096533023170913, + "learning_rate": 3.562777682295691e-05, + "loss": 0.7746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3675066828727722, + "step": 1855, + "valid_targets_mean": 12776.5, + "valid_targets_min": 1070 + }, + { + "epoch": 1.4703557312252964, + "grad_norm": 0.6205881935725822, + "learning_rate": 3.559327439521495e-05, + "loss": 0.7907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39876341819763184, + "step": 1860, + "valid_targets_mean": 15423.2, + "valid_targets_min": 5051 + }, + { + "epoch": 1.4743083003952568, + "grad_norm": 0.7056403628502835, + "learning_rate": 3.5558653213809324e-05, + "loss": 0.7739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3892015814781189, + "step": 1865, + "valid_targets_mean": 15138.2, + "valid_targets_min": 4709 + }, + { + "epoch": 1.4782608695652173, + "grad_norm": 0.7853390733319443, + "learning_rate": 3.552391354240451e-05, + "loss": 0.7751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4014977216720581, + "step": 1870, + "valid_targets_mean": 13906.6, + "valid_targets_min": 1108 + }, + { + "epoch": 1.4822134387351777, + "grad_norm": 0.6025018157671609, + "learning_rate": 3.548905564556732e-05, + "loss": 0.7914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37064599990844727, + "step": 1875, + "valid_targets_mean": 14411.2, + "valid_targets_min": 2506 + }, + { + "epoch": 1.4861660079051384, + "grad_norm": 0.4943977547510394, + "learning_rate": 3.545407978876497e-05, + "loss": 0.7893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3685780465602875, + "step": 1880, + "valid_targets_mean": 14093.8, + "valid_targets_min": 4601 + }, + { + "epoch": 1.4901185770750989, + "grad_norm": 0.5152065770750309, + "learning_rate": 3.5418986238363e-05, + "loss": 0.7766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39420703053474426, + "step": 1885, + "valid_targets_mean": 13917.6, + "valid_targets_min": 1624 + }, + { + "epoch": 1.4940711462450593, + "grad_norm": 0.3635896407104615, + "learning_rate": 3.538377526162331e-05, + "loss": 0.775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36466044187545776, + "step": 1890, + "valid_targets_mean": 14679.4, + "valid_targets_min": 1684 + }, + { + "epoch": 1.4980237154150198, + "grad_norm": 0.5420151357111238, + "learning_rate": 3.534844712670202e-05, + "loss": 0.7742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38826605677604675, + "step": 1895, + "valid_targets_mean": 15189.0, + "valid_targets_min": 2051 + }, + { + "epoch": 1.5019762845849802, + "grad_norm": 0.509313746556035, + "learning_rate": 3.5313002102647536e-05, + "loss": 0.7705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4002588987350464, + "step": 1900, + "valid_targets_mean": 14220.8, + "valid_targets_min": 3704 + }, + { + "epoch": 1.5059288537549407, + "grad_norm": 0.6311098405759559, + "learning_rate": 3.527744045939845e-05, + "loss": 0.7731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3740570545196533, + "step": 1905, + "valid_targets_mean": 13989.8, + "valid_targets_min": 1737 + }, + { + "epoch": 1.5098814229249014, + "grad_norm": 0.6881825488594006, + "learning_rate": 3.524176246778148e-05, + "loss": 0.7898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4014437198638916, + "step": 1910, + "valid_targets_mean": 14972.8, + "valid_targets_min": 4384 + }, + { + "epoch": 1.5138339920948618, + "grad_norm": 0.8022782014167682, + "learning_rate": 3.520596839950943e-05, + "loss": 0.7939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4084741473197937, + "step": 1915, + "valid_targets_mean": 15067.7, + "valid_targets_min": 3089 + }, + { + "epoch": 1.5177865612648223, + "grad_norm": 0.6659346494231555, + "learning_rate": 3.517005852717911e-05, + "loss": 0.7838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3769366443157196, + "step": 1920, + "valid_targets_mean": 13914.8, + "valid_targets_min": 1780 + }, + { + "epoch": 1.5217391304347827, + "grad_norm": 0.477708479543674, + "learning_rate": 3.513403312426924e-05, + "loss": 0.7831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35111361742019653, + "step": 1925, + "valid_targets_mean": 13257.1, + "valid_targets_min": 1692 + }, + { + "epoch": 1.5256916996047432, + "grad_norm": 0.4423031102505199, + "learning_rate": 3.509789246513839e-05, + "loss": 0.7815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43305057287216187, + "step": 1930, + "valid_targets_mean": 14562.7, + "valid_targets_min": 2204 + }, + { + "epoch": 1.5296442687747036, + "grad_norm": 0.4640530612500586, + "learning_rate": 3.5061636825022906e-05, + "loss": 0.7698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39777421951293945, + "step": 1935, + "valid_targets_mean": 13672.5, + "valid_targets_min": 1926 + }, + { + "epoch": 1.533596837944664, + "grad_norm": 0.5016138355128864, + "learning_rate": 3.5025266480034786e-05, + "loss": 0.8063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4066345691680908, + "step": 1940, + "valid_targets_mean": 14598.0, + "valid_targets_min": 5758 + }, + { + "epoch": 1.5375494071146245, + "grad_norm": 0.4779775168278641, + "learning_rate": 3.498878170715957e-05, + "loss": 0.7888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3779437243938446, + "step": 1945, + "valid_targets_mean": 13770.6, + "valid_targets_min": 2469 + }, + { + "epoch": 1.541501976284585, + "grad_norm": 0.5141279389880361, + "learning_rate": 3.4952182784254265e-05, + "loss": 0.7823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41048893332481384, + "step": 1950, + "valid_targets_mean": 14889.8, + "valid_targets_min": 746 + }, + { + "epoch": 1.5454545454545454, + "grad_norm": 0.7818695014900082, + "learning_rate": 3.491546999004521e-05, + "loss": 0.7956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3636508584022522, + "step": 1955, + "valid_targets_mean": 14574.7, + "valid_targets_min": 2062 + }, + { + "epoch": 1.5494071146245059, + "grad_norm": 0.6086682136223084, + "learning_rate": 3.487864360412593e-05, + "loss": 0.7877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3359108567237854, + "step": 1960, + "valid_targets_mean": 12826.9, + "valid_targets_min": 897 + }, + { + "epoch": 1.5533596837944663, + "grad_norm": 0.66959428588689, + "learning_rate": 3.4841703906955064e-05, + "loss": 0.7873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3806391954421997, + "step": 1965, + "valid_targets_mean": 13913.0, + "valid_targets_min": 1186 + }, + { + "epoch": 1.5573122529644268, + "grad_norm": 0.6094175060741776, + "learning_rate": 3.480465117985416e-05, + "loss": 0.7838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3978649973869324, + "step": 1970, + "valid_targets_mean": 14434.3, + "valid_targets_min": 2012 + }, + { + "epoch": 1.5612648221343872, + "grad_norm": 0.5216627994410538, + "learning_rate": 3.47674857050056e-05, + "loss": 0.7913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3964079022407532, + "step": 1975, + "valid_targets_mean": 15286.6, + "valid_targets_min": 1839 + }, + { + "epoch": 1.5652173913043477, + "grad_norm": 0.4426966853343233, + "learning_rate": 3.47302077654504e-05, + "loss": 0.7813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3889954686164856, + "step": 1980, + "valid_targets_mean": 14059.7, + "valid_targets_min": 1069 + }, + { + "epoch": 1.5691699604743083, + "grad_norm": 0.7393176437337569, + "learning_rate": 3.4692817645086084e-05, + "loss": 0.7922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37355679273605347, + "step": 1985, + "valid_targets_mean": 13672.5, + "valid_targets_min": 958 + }, + { + "epoch": 1.5731225296442688, + "grad_norm": 0.7014405631850327, + "learning_rate": 3.4655315628664494e-05, + "loss": 0.7858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4012637734413147, + "step": 1990, + "valid_targets_mean": 14786.7, + "valid_targets_min": 2111 + }, + { + "epoch": 1.5770750988142292, + "grad_norm": 0.49343333744392104, + "learning_rate": 3.461770200178965e-05, + "loss": 0.7834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36578860878944397, + "step": 1995, + "valid_targets_mean": 13461.8, + "valid_targets_min": 1404 + }, + { + "epoch": 1.5810276679841897, + "grad_norm": 0.46350336579253865, + "learning_rate": 3.457997705091558e-05, + "loss": 0.7661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36132892966270447, + "step": 2000, + "valid_targets_mean": 12974.8, + "valid_targets_min": 2100 + }, + { + "epoch": 1.5849802371541502, + "grad_norm": 0.6905787792730621, + "learning_rate": 3.454214106334408e-05, + "loss": 0.7786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43557900190353394, + "step": 2005, + "valid_targets_mean": 14944.0, + "valid_targets_min": 6066 + }, + { + "epoch": 1.5889328063241108, + "grad_norm": 0.5125968353197242, + "learning_rate": 3.450419432722261e-05, + "loss": 0.7802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37530091404914856, + "step": 2010, + "valid_targets_mean": 13644.8, + "valid_targets_min": 2822 + }, + { + "epoch": 1.5928853754940713, + "grad_norm": 0.5160553875112762, + "learning_rate": 3.4466137131542035e-05, + "loss": 0.7872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4086306095123291, + "step": 2015, + "valid_targets_mean": 15325.6, + "valid_targets_min": 3981 + }, + { + "epoch": 1.5968379446640317, + "grad_norm": 0.5424605030921852, + "learning_rate": 3.442796976613446e-05, + "loss": 0.774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4186684787273407, + "step": 2020, + "valid_targets_mean": 15360.2, + "valid_targets_min": 3428 + }, + { + "epoch": 1.6007905138339922, + "grad_norm": 0.41692425688769086, + "learning_rate": 3.438969252167098e-05, + "loss": 0.7806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4237048923969269, + "step": 2025, + "valid_targets_mean": 15740.9, + "valid_targets_min": 1131 + }, + { + "epoch": 1.6047430830039526, + "grad_norm": 0.4891500171341543, + "learning_rate": 3.435130568965954e-05, + "loss": 0.7713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3993532955646515, + "step": 2030, + "valid_targets_mean": 14252.5, + "valid_targets_min": 496 + }, + { + "epoch": 1.608695652173913, + "grad_norm": 0.4471844431487332, + "learning_rate": 3.4312809562442636e-05, + "loss": 0.7731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3738534152507782, + "step": 2035, + "valid_targets_mean": 13023.1, + "valid_targets_min": 2783 + }, + { + "epoch": 1.6126482213438735, + "grad_norm": 0.48395648414274695, + "learning_rate": 3.427420443319514e-05, + "loss": 0.7728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4212624132633209, + "step": 2040, + "valid_targets_mean": 14978.6, + "valid_targets_min": 1390 + }, + { + "epoch": 1.616600790513834, + "grad_norm": 0.5410671012742138, + "learning_rate": 3.423549059592204e-05, + "loss": 0.7859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4304890036582947, + "step": 2045, + "valid_targets_mean": 15055.6, + "valid_targets_min": 4603 + }, + { + "epoch": 1.6205533596837944, + "grad_norm": 0.44801006652852465, + "learning_rate": 3.4196668345456224e-05, + "loss": 0.7942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3621789216995239, + "step": 2050, + "valid_targets_mean": 13749.3, + "valid_targets_min": 2182 + }, + { + "epoch": 1.6245059288537549, + "grad_norm": 0.5873084208663081, + "learning_rate": 3.41577379774562e-05, + "loss": 0.7724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34086209535598755, + "step": 2055, + "valid_targets_mean": 13140.2, + "valid_targets_min": 3044 + }, + { + "epoch": 1.6284584980237153, + "grad_norm": 0.47596878794445907, + "learning_rate": 3.4118699788403895e-05, + "loss": 0.7812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3554530739784241, + "step": 2060, + "valid_targets_mean": 13533.8, + "valid_targets_min": 770 + }, + { + "epoch": 1.6324110671936758, + "grad_norm": 0.7948734707588258, + "learning_rate": 3.407955407560236e-05, + "loss": 0.7764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3993421196937561, + "step": 2065, + "valid_targets_mean": 13568.9, + "valid_targets_min": 1750 + }, + { + "epoch": 1.6363636363636362, + "grad_norm": 0.49155438781883515, + "learning_rate": 3.4040301137173485e-05, + "loss": 0.7765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3757030963897705, + "step": 2070, + "valid_targets_mean": 13965.2, + "valid_targets_min": 560 + }, + { + "epoch": 1.6403162055335967, + "grad_norm": 0.9290962141178459, + "learning_rate": 3.400094127205581e-05, + "loss": 0.7747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4002494513988495, + "step": 2075, + "valid_targets_mean": 14091.7, + "valid_targets_min": 1500 + }, + { + "epoch": 1.6442687747035574, + "grad_norm": 0.4977308123139238, + "learning_rate": 3.396147478000215e-05, + "loss": 0.7905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4093455672264099, + "step": 2080, + "valid_targets_mean": 15227.9, + "valid_targets_min": 4478 + }, + { + "epoch": 1.6482213438735178, + "grad_norm": 1.4819604547912453, + "learning_rate": 3.392190196157738e-05, + "loss": 0.7652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38078370690345764, + "step": 2085, + "valid_targets_mean": 15566.4, + "valid_targets_min": 687 + }, + { + "epoch": 1.6521739130434783, + "grad_norm": 1.3544495209576286, + "learning_rate": 3.388222311815614e-05, + "loss": 0.7784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38564109802246094, + "step": 2090, + "valid_targets_mean": 14513.3, + "valid_targets_min": 1685 + }, + { + "epoch": 1.6561264822134387, + "grad_norm": 0.8411483827902816, + "learning_rate": 3.384243855192047e-05, + "loss": 0.7942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4128496050834656, + "step": 2095, + "valid_targets_mean": 14866.0, + "valid_targets_min": 4673 + }, + { + "epoch": 1.6600790513833992, + "grad_norm": 1.607407493409883, + "learning_rate": 3.380254856585763e-05, + "loss": 0.7819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39513111114501953, + "step": 2100, + "valid_targets_mean": 14421.3, + "valid_targets_min": 1419 + }, + { + "epoch": 1.6640316205533598, + "grad_norm": 0.8534950622433303, + "learning_rate": 3.376255346375768e-05, + "loss": 0.7727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37644681334495544, + "step": 2105, + "valid_targets_mean": 13447.2, + "valid_targets_min": 644 + }, + { + "epoch": 1.6679841897233203, + "grad_norm": 0.45113845506656797, + "learning_rate": 3.3722453550211236e-05, + "loss": 0.7824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3672454357147217, + "step": 2110, + "valid_targets_mean": 12994.0, + "valid_targets_min": 739 + }, + { + "epoch": 1.6719367588932808, + "grad_norm": 1.1963527809622616, + "learning_rate": 3.36822491306071e-05, + "loss": 0.7823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3866955041885376, + "step": 2115, + "valid_targets_mean": 13629.6, + "valid_targets_min": 1669 + }, + { + "epoch": 1.6758893280632412, + "grad_norm": 1.1187339643283005, + "learning_rate": 3.3641940511129994e-05, + "loss": 0.7801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3650205135345459, + "step": 2120, + "valid_targets_mean": 13348.2, + "valid_targets_min": 1766 + }, + { + "epoch": 1.6798418972332017, + "grad_norm": 1.3327886647754081, + "learning_rate": 3.3601527998758165e-05, + "loss": 0.7873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4317617416381836, + "step": 2125, + "valid_targets_mean": 15053.7, + "valid_targets_min": 1095 + }, + { + "epoch": 1.683794466403162, + "grad_norm": 0.9099328078572083, + "learning_rate": 3.3561011901261104e-05, + "loss": 0.7744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3942001760005951, + "step": 2130, + "valid_targets_mean": 13941.5, + "valid_targets_min": 1954 + }, + { + "epoch": 1.6877470355731226, + "grad_norm": 0.8310573553242454, + "learning_rate": 3.352039252719715e-05, + "loss": 0.7673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39484018087387085, + "step": 2135, + "valid_targets_mean": 15296.1, + "valid_targets_min": 2935 + }, + { + "epoch": 1.691699604743083, + "grad_norm": 0.6593253329908947, + "learning_rate": 3.347967018591118e-05, + "loss": 0.7602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3869093358516693, + "step": 2140, + "valid_targets_mean": 13737.6, + "valid_targets_min": 1576 + }, + { + "epoch": 1.6956521739130435, + "grad_norm": 0.514017138985256, + "learning_rate": 3.343884518753223e-05, + "loss": 0.78, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37932732701301575, + "step": 2145, + "valid_targets_mean": 14139.6, + "valid_targets_min": 2912 + }, + { + "epoch": 1.699604743083004, + "grad_norm": 0.736703147010078, + "learning_rate": 3.3397917842971154e-05, + "loss": 0.7694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4088629484176636, + "step": 2150, + "valid_targets_mean": 13998.8, + "valid_targets_min": 991 + }, + { + "epoch": 1.7035573122529644, + "grad_norm": 0.38982584208698545, + "learning_rate": 3.335688846391823e-05, + "loss": 0.7747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33452633023262024, + "step": 2155, + "valid_targets_mean": 13357.8, + "valid_targets_min": 3230 + }, + { + "epoch": 1.7075098814229248, + "grad_norm": 0.5505083723609786, + "learning_rate": 3.33157573628408e-05, + "loss": 0.7799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35151559114456177, + "step": 2160, + "valid_targets_mean": 13708.8, + "valid_targets_min": 892 + }, + { + "epoch": 1.7114624505928853, + "grad_norm": 0.4466844472727718, + "learning_rate": 3.3274524852980894e-05, + "loss": 0.7871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3605594038963318, + "step": 2165, + "valid_targets_mean": 13209.8, + "valid_targets_min": 617 + }, + { + "epoch": 1.7154150197628457, + "grad_norm": 0.5766970416910411, + "learning_rate": 3.323319124835286e-05, + "loss": 0.7977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3874567151069641, + "step": 2170, + "valid_targets_mean": 14664.7, + "valid_targets_min": 3023 + }, + { + "epoch": 1.7193675889328062, + "grad_norm": 0.6917723073245257, + "learning_rate": 3.319175686374092e-05, + "loss": 0.7922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4032149910926819, + "step": 2175, + "valid_targets_mean": 14720.7, + "valid_targets_min": 2150 + }, + { + "epoch": 1.7233201581027668, + "grad_norm": 0.46467356717825314, + "learning_rate": 3.3150222014696814e-05, + "loss": 0.7758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3941926956176758, + "step": 2180, + "valid_targets_mean": 15159.6, + "valid_targets_min": 4900 + }, + { + "epoch": 1.7272727272727273, + "grad_norm": 0.5842004174967416, + "learning_rate": 3.3108587017537405e-05, + "loss": 0.7723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38824644684791565, + "step": 2185, + "valid_targets_mean": 14435.0, + "valid_targets_min": 714 + }, + { + "epoch": 1.7312252964426877, + "grad_norm": 0.5828117740128022, + "learning_rate": 3.306685218934225e-05, + "loss": 0.7692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3723229765892029, + "step": 2190, + "valid_targets_mean": 14177.8, + "valid_targets_min": 2185 + }, + { + "epoch": 1.7351778656126482, + "grad_norm": 0.42829348064319517, + "learning_rate": 3.3025017847951175e-05, + "loss": 0.7746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35972219705581665, + "step": 2195, + "valid_targets_mean": 13176.8, + "valid_targets_min": 1776 + }, + { + "epoch": 1.7391304347826086, + "grad_norm": 0.4943314082068932, + "learning_rate": 3.298308431196188e-05, + "loss": 0.7743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39035218954086304, + "step": 2200, + "valid_targets_mean": 13944.2, + "valid_targets_min": 1586 + }, + { + "epoch": 1.7430830039525693, + "grad_norm": 0.4391466719425551, + "learning_rate": 3.294105190072749e-05, + "loss": 0.7882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.396766722202301, + "step": 2205, + "valid_targets_mean": 15132.7, + "valid_targets_min": 2408 + }, + { + "epoch": 1.7470355731225298, + "grad_norm": 0.5461486097862359, + "learning_rate": 3.2898920934354156e-05, + "loss": 0.7817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43438276648521423, + "step": 2210, + "valid_targets_mean": 14593.0, + "valid_targets_min": 3052 + }, + { + "epoch": 1.7509881422924902, + "grad_norm": 0.5227240374603596, + "learning_rate": 3.285669173369857e-05, + "loss": 0.7714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3938679099082947, + "step": 2215, + "valid_targets_mean": 13718.2, + "valid_targets_min": 2592 + }, + { + "epoch": 1.7549407114624507, + "grad_norm": 0.5551894565884691, + "learning_rate": 3.281436462036556e-05, + "loss": 0.7909, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3801150321960449, + "step": 2220, + "valid_targets_mean": 13784.2, + "valid_targets_min": 865 + }, + { + "epoch": 1.7588932806324111, + "grad_norm": 0.48483430296675173, + "learning_rate": 3.2771939916705626e-05, + "loss": 0.7791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37417951226234436, + "step": 2225, + "valid_targets_mean": 14134.8, + "valid_targets_min": 2479 + }, + { + "epoch": 1.7628458498023716, + "grad_norm": 0.48638423764320865, + "learning_rate": 3.272941794581249e-05, + "loss": 0.7922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3843648433685303, + "step": 2230, + "valid_targets_mean": 13112.1, + "valid_targets_min": 1157 + }, + { + "epoch": 1.766798418972332, + "grad_norm": 0.5028051196481695, + "learning_rate": 3.2686799031520614e-05, + "loss": 0.7635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3484877347946167, + "step": 2235, + "valid_targets_mean": 12108.2, + "valid_targets_min": 700 + }, + { + "epoch": 1.7707509881422925, + "grad_norm": 0.4773052543288179, + "learning_rate": 3.264408349840278e-05, + "loss": 0.7703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.395057737827301, + "step": 2240, + "valid_targets_mean": 14436.4, + "valid_targets_min": 4885 + }, + { + "epoch": 1.774703557312253, + "grad_norm": 0.5286778448423883, + "learning_rate": 3.260127167176757e-05, + "loss": 0.7789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37170901894569397, + "step": 2245, + "valid_targets_mean": 13796.2, + "valid_targets_min": 1876 + }, + { + "epoch": 1.7786561264822134, + "grad_norm": 0.49836097412128133, + "learning_rate": 3.25583638776569e-05, + "loss": 0.7904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40207380056381226, + "step": 2250, + "valid_targets_mean": 13784.2, + "valid_targets_min": 1477 + }, + { + "epoch": 1.7826086956521738, + "grad_norm": 0.5068874768848025, + "learning_rate": 3.251536044284358e-05, + "loss": 0.7782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4178197383880615, + "step": 2255, + "valid_targets_mean": 13946.6, + "valid_targets_min": 3609 + }, + { + "epoch": 1.7865612648221343, + "grad_norm": 0.4750359385648404, + "learning_rate": 3.247226169482875e-05, + "loss": 0.785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39203977584838867, + "step": 2260, + "valid_targets_mean": 14571.8, + "valid_targets_min": 1610 + }, + { + "epoch": 1.7905138339920947, + "grad_norm": 0.6074773355256092, + "learning_rate": 3.242906796183944e-05, + "loss": 0.7898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3988493084907532, + "step": 2265, + "valid_targets_mean": 14587.6, + "valid_targets_min": 3445 + }, + { + "epoch": 1.7944664031620552, + "grad_norm": 0.4215774828826137, + "learning_rate": 3.2385779572826095e-05, + "loss": 0.7734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3582175374031067, + "step": 2270, + "valid_targets_mean": 13541.9, + "valid_targets_min": 1975 + }, + { + "epoch": 1.7984189723320159, + "grad_norm": 0.5829406093004404, + "learning_rate": 3.234239685745997e-05, + "loss": 0.7831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40310510993003845, + "step": 2275, + "valid_targets_mean": 14074.1, + "valid_targets_min": 1916 + }, + { + "epoch": 1.8023715415019763, + "grad_norm": 0.5811920843907805, + "learning_rate": 3.229892014613071e-05, + "loss": 0.7847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36158254742622375, + "step": 2280, + "valid_targets_mean": 12062.0, + "valid_targets_min": 1249 + }, + { + "epoch": 1.8063241106719368, + "grad_norm": 0.4403937418558115, + "learning_rate": 3.225534976994383e-05, + "loss": 0.778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42345383763313293, + "step": 2285, + "valid_targets_mean": 14364.4, + "valid_targets_min": 3286 + }, + { + "epoch": 1.8102766798418972, + "grad_norm": 0.7297568486746766, + "learning_rate": 3.221168606071811e-05, + "loss": 0.7793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3851701319217682, + "step": 2290, + "valid_targets_mean": 14304.0, + "valid_targets_min": 5277 + }, + { + "epoch": 1.8142292490118577, + "grad_norm": 0.7232758517786192, + "learning_rate": 3.2167929350983186e-05, + "loss": 0.78, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4021092653274536, + "step": 2295, + "valid_targets_mean": 14755.4, + "valid_targets_min": 991 + }, + { + "epoch": 1.8181818181818183, + "grad_norm": 0.7720495101286113, + "learning_rate": 3.2124079973976923e-05, + "loss": 0.7657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3899635374546051, + "step": 2300, + "valid_targets_mean": 14178.6, + "valid_targets_min": 1431 + }, + { + "epoch": 1.8221343873517788, + "grad_norm": 0.4797198905957224, + "learning_rate": 3.20801382636429e-05, + "loss": 0.7854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39665156602859497, + "step": 2305, + "valid_targets_mean": 14781.4, + "valid_targets_min": 2306 + }, + { + "epoch": 1.8260869565217392, + "grad_norm": 0.5412187715737803, + "learning_rate": 3.203610455462793e-05, + "loss": 0.7756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3560769557952881, + "step": 2310, + "valid_targets_mean": 13941.9, + "valid_targets_min": 2708 + }, + { + "epoch": 1.8300395256916997, + "grad_norm": 0.570862852504943, + "learning_rate": 3.199197918227941e-05, + "loss": 0.7768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37252092361450195, + "step": 2315, + "valid_targets_mean": 12519.1, + "valid_targets_min": 1213 + }, + { + "epoch": 1.8339920948616601, + "grad_norm": 0.5465147909794545, + "learning_rate": 3.194776248264284e-05, + "loss": 0.7635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4125135540962219, + "step": 2320, + "valid_targets_mean": 15514.3, + "valid_targets_min": 5364 + }, + { + "epoch": 1.8379446640316206, + "grad_norm": 0.43541261553777666, + "learning_rate": 3.190345479245923e-05, + "loss": 0.7819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35826700925827026, + "step": 2325, + "valid_targets_mean": 12470.0, + "valid_targets_min": 434 + }, + { + "epoch": 1.841897233201581, + "grad_norm": 0.45091525628295465, + "learning_rate": 3.1859056449162565e-05, + "loss": 0.7745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.421520471572876, + "step": 2330, + "valid_targets_mean": 14724.1, + "valid_targets_min": 1264 + }, + { + "epoch": 1.8458498023715415, + "grad_norm": 0.4531714421378381, + "learning_rate": 3.18145677908772e-05, + "loss": 0.7708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39088737964630127, + "step": 2335, + "valid_targets_mean": 13819.8, + "valid_targets_min": 1148 + }, + { + "epoch": 1.849802371541502, + "grad_norm": 0.8933119895297064, + "learning_rate": 3.17699891564153e-05, + "loss": 0.7771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3776126503944397, + "step": 2340, + "valid_targets_mean": 14269.5, + "valid_targets_min": 657 + }, + { + "epoch": 1.8537549407114624, + "grad_norm": 0.7431114921909793, + "learning_rate": 3.172532088527428e-05, + "loss": 0.7889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4277322292327881, + "step": 2345, + "valid_targets_mean": 15481.1, + "valid_targets_min": 6394 + }, + { + "epoch": 1.8577075098814229, + "grad_norm": 0.7700977310955982, + "learning_rate": 3.1680563317634186e-05, + "loss": 0.78, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36739635467529297, + "step": 2350, + "valid_targets_mean": 13804.7, + "valid_targets_min": 1533 + }, + { + "epoch": 1.8616600790513833, + "grad_norm": 0.5370660598443255, + "learning_rate": 3.163571679435512e-05, + "loss": 0.7797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4114643931388855, + "step": 2355, + "valid_targets_mean": 15109.3, + "valid_targets_min": 1821 + }, + { + "epoch": 1.8656126482213438, + "grad_norm": 0.6003241902066953, + "learning_rate": 3.159078165697464e-05, + "loss": 0.7845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.394725501537323, + "step": 2360, + "valid_targets_mean": 14324.3, + "valid_targets_min": 2389 + }, + { + "epoch": 1.8695652173913042, + "grad_norm": 0.6731672237950322, + "learning_rate": 3.1545758247705174e-05, + "loss": 0.7751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35223299264907837, + "step": 2365, + "valid_targets_mean": 13368.6, + "valid_targets_min": 1476 + }, + { + "epoch": 1.8735177865612647, + "grad_norm": 0.42502504933877033, + "learning_rate": 3.150064690943139e-05, + "loss": 0.776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39197036623954773, + "step": 2370, + "valid_targets_mean": 14463.5, + "valid_targets_min": 2627 + }, + { + "epoch": 1.8774703557312253, + "grad_norm": 0.4098436104459243, + "learning_rate": 3.1455447985707615e-05, + "loss": 0.7633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3527672290802002, + "step": 2375, + "valid_targets_mean": 13793.5, + "valid_targets_min": 2126 + }, + { + "epoch": 1.8814229249011858, + "grad_norm": 0.38216514859219586, + "learning_rate": 3.141016182075517e-05, + "loss": 0.7678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40441587567329407, + "step": 2380, + "valid_targets_mean": 14450.8, + "valid_targets_min": 882 + }, + { + "epoch": 1.8853754940711462, + "grad_norm": 0.45231603624026157, + "learning_rate": 3.136478875945982e-05, + "loss": 0.789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41786277294158936, + "step": 2385, + "valid_targets_mean": 15112.4, + "valid_targets_min": 3922 + }, + { + "epoch": 1.8893280632411067, + "grad_norm": 0.4581279241184952, + "learning_rate": 3.131932914736906e-05, + "loss": 0.7648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35269004106521606, + "step": 2390, + "valid_targets_mean": 13759.9, + "valid_targets_min": 931 + }, + { + "epoch": 1.8932806324110671, + "grad_norm": 0.45112494667296804, + "learning_rate": 3.1273783330689556e-05, + "loss": 0.767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.365069180727005, + "step": 2395, + "valid_targets_mean": 13577.6, + "valid_targets_min": 3942 + }, + { + "epoch": 1.8972332015810278, + "grad_norm": 0.5106312953957868, + "learning_rate": 3.122815165628448e-05, + "loss": 0.7731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41991156339645386, + "step": 2400, + "valid_targets_mean": 15856.0, + "valid_targets_min": 4564 + }, + { + "epoch": 1.9011857707509883, + "grad_norm": 0.515192707809807, + "learning_rate": 3.118243447167089e-05, + "loss": 0.7722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.385560542345047, + "step": 2405, + "valid_targets_mean": 15823.2, + "valid_targets_min": 3002 + }, + { + "epoch": 1.9051383399209487, + "grad_norm": 0.5036344245047077, + "learning_rate": 3.1136632125017005e-05, + "loss": 0.7796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42015203833580017, + "step": 2410, + "valid_targets_mean": 15338.5, + "valid_targets_min": 3305 + }, + { + "epoch": 1.9090909090909092, + "grad_norm": 0.4537803367963703, + "learning_rate": 3.109074496513969e-05, + "loss": 0.7696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.399087131023407, + "step": 2415, + "valid_targets_mean": 13994.5, + "valid_targets_min": 562 + }, + { + "epoch": 1.9130434782608696, + "grad_norm": 0.6214320624370854, + "learning_rate": 3.1044773341501645e-05, + "loss": 0.7893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42496049404144287, + "step": 2420, + "valid_targets_mean": 14510.5, + "valid_targets_min": 356 + }, + { + "epoch": 1.91699604743083, + "grad_norm": 0.5959345292267265, + "learning_rate": 3.099871760420888e-05, + "loss": 0.7802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45387113094329834, + "step": 2425, + "valid_targets_mean": 16231.0, + "valid_targets_min": 11225 + }, + { + "epoch": 1.9209486166007905, + "grad_norm": 0.4930368767909245, + "learning_rate": 3.095257810400796e-05, + "loss": 0.7721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41533786058425903, + "step": 2430, + "valid_targets_mean": 15024.8, + "valid_targets_min": 1252 + }, + { + "epoch": 1.924901185770751, + "grad_norm": 0.5068639431001682, + "learning_rate": 3.090635519228338e-05, + "loss": 0.7734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4463202655315399, + "step": 2435, + "valid_targets_mean": 15115.5, + "valid_targets_min": 1941 + }, + { + "epoch": 1.9288537549407114, + "grad_norm": 0.6609350847955431, + "learning_rate": 3.086004922105484e-05, + "loss": 0.774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37650012969970703, + "step": 2440, + "valid_targets_mean": 13691.5, + "valid_targets_min": 4112 + }, + { + "epoch": 1.9328063241106719, + "grad_norm": 0.4477979418848163, + "learning_rate": 3.081366054297464e-05, + "loss": 0.7665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3817542791366577, + "step": 2445, + "valid_targets_mean": 14549.2, + "valid_targets_min": 2510 + }, + { + "epoch": 1.9367588932806323, + "grad_norm": 0.4548825750541578, + "learning_rate": 3.0767189511324905e-05, + "loss": 0.7786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3989569842815399, + "step": 2450, + "valid_targets_mean": 14887.2, + "valid_targets_min": 6489 + }, + { + "epoch": 1.9407114624505928, + "grad_norm": 0.5431891540631055, + "learning_rate": 3.072063648001499e-05, + "loss": 0.7843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4193345904350281, + "step": 2455, + "valid_targets_mean": 13804.6, + "valid_targets_min": 2096 + }, + { + "epoch": 1.9446640316205532, + "grad_norm": 0.5036764590196405, + "learning_rate": 3.0674001803578694e-05, + "loss": 0.7826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40994542837142944, + "step": 2460, + "valid_targets_mean": 15205.8, + "valid_targets_min": 4845 + }, + { + "epoch": 1.9486166007905137, + "grad_norm": 0.4483988567092992, + "learning_rate": 3.06272858371716e-05, + "loss": 0.774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41489845514297485, + "step": 2465, + "valid_targets_mean": 15220.3, + "valid_targets_min": 1038 + }, + { + "epoch": 1.9525691699604744, + "grad_norm": 0.5819729681461071, + "learning_rate": 3.058048893656841e-05, + "loss": 0.7819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3868977427482605, + "step": 2470, + "valid_targets_mean": 14653.4, + "valid_targets_min": 1856 + }, + { + "epoch": 1.9565217391304348, + "grad_norm": 0.49720392433075855, + "learning_rate": 3.0533611458160174e-05, + "loss": 0.7806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35273289680480957, + "step": 2475, + "valid_targets_mean": 13677.1, + "valid_targets_min": 985 + }, + { + "epoch": 1.9604743083003953, + "grad_norm": 0.5374283044595166, + "learning_rate": 3.0486653758951562e-05, + "loss": 0.7748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4247947931289673, + "step": 2480, + "valid_targets_mean": 15753.1, + "valid_targets_min": 5894 + }, + { + "epoch": 1.9644268774703557, + "grad_norm": 0.46620303812133773, + "learning_rate": 3.043961619655824e-05, + "loss": 0.7792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4042903184890747, + "step": 2485, + "valid_targets_mean": 14244.9, + "valid_targets_min": 1779 + }, + { + "epoch": 1.9683794466403162, + "grad_norm": 0.6482369587809752, + "learning_rate": 3.039249912920406e-05, + "loss": 0.7763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41087111830711365, + "step": 2490, + "valid_targets_mean": 13954.1, + "valid_targets_min": 1052 + }, + { + "epoch": 1.9723320158102768, + "grad_norm": 0.5931502637055073, + "learning_rate": 3.034530291571836e-05, + "loss": 0.7833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39423125982284546, + "step": 2495, + "valid_targets_mean": 14447.0, + "valid_targets_min": 562 + }, + { + "epoch": 1.9762845849802373, + "grad_norm": 0.5886717592408556, + "learning_rate": 3.0298027915533238e-05, + "loss": 0.7718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37848570942878723, + "step": 2500, + "valid_targets_mean": 14780.4, + "valid_targets_min": 3228 + }, + { + "epoch": 1.9802371541501977, + "grad_norm": 0.5737904763817373, + "learning_rate": 3.02506744886808e-05, + "loss": 0.767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43090033531188965, + "step": 2505, + "valid_targets_mean": 15594.3, + "valid_targets_min": 4512 + }, + { + "epoch": 1.9841897233201582, + "grad_norm": 0.500508012660206, + "learning_rate": 3.020324299579042e-05, + "loss": 0.7693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41233742237091064, + "step": 2510, + "valid_targets_mean": 14884.6, + "valid_targets_min": 3953 + }, + { + "epoch": 1.9881422924901186, + "grad_norm": 0.48490774623090044, + "learning_rate": 3.015573379808602e-05, + "loss": 0.7638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40514707565307617, + "step": 2515, + "valid_targets_mean": 14247.9, + "valid_targets_min": 1326 + }, + { + "epoch": 1.992094861660079, + "grad_norm": 0.5136639238225086, + "learning_rate": 3.0108147257383286e-05, + "loss": 0.7738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38142019510269165, + "step": 2520, + "valid_targets_mean": 14660.7, + "valid_targets_min": 2734 + }, + { + "epoch": 1.9960474308300395, + "grad_norm": 0.5065435666891152, + "learning_rate": 3.0060483736086927e-05, + "loss": 0.7686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40591907501220703, + "step": 2525, + "valid_targets_mean": 13588.5, + "valid_targets_min": 1079 + }, + { + "epoch": 2.0, + "grad_norm": 0.5298551492341886, + "learning_rate": 3.0012743597187897e-05, + "loss": 0.7707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3959786295890808, + "step": 2530, + "valid_targets_mean": 13213.2, + "valid_targets_min": 2682 + }, + { + "epoch": 2.0039525691699605, + "grad_norm": 0.4169287887561478, + "learning_rate": 2.9964927204260683e-05, + "loss": 0.7754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3849204182624817, + "step": 2535, + "valid_targets_mean": 14702.0, + "valid_targets_min": 3741 + }, + { + "epoch": 2.007905138339921, + "grad_norm": 0.5960756503333358, + "learning_rate": 2.9917034921460467e-05, + "loss": 0.7685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37172481417655945, + "step": 2540, + "valid_targets_mean": 15118.0, + "valid_targets_min": 1473 + }, + { + "epoch": 2.0118577075098814, + "grad_norm": 0.43572476344856903, + "learning_rate": 2.986906711352039e-05, + "loss": 0.7891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40657711029052734, + "step": 2545, + "valid_targets_mean": 14333.6, + "valid_targets_min": 3249 + }, + { + "epoch": 2.015810276679842, + "grad_norm": 0.4454983095158851, + "learning_rate": 2.982102414574879e-05, + "loss": 0.7755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4099913537502289, + "step": 2550, + "valid_targets_mean": 14453.1, + "valid_targets_min": 1284 + }, + { + "epoch": 2.0197628458498023, + "grad_norm": 0.446759206466815, + "learning_rate": 2.977290638402637e-05, + "loss": 0.7881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3887191414833069, + "step": 2555, + "valid_targets_mean": 13262.8, + "valid_targets_min": 1085 + }, + { + "epoch": 2.0237154150197627, + "grad_norm": 0.5423896095764535, + "learning_rate": 2.9724714194803465e-05, + "loss": 0.7598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40957966446876526, + "step": 2560, + "valid_targets_mean": 13452.8, + "valid_targets_min": 1757 + }, + { + "epoch": 2.027667984189723, + "grad_norm": 0.6093339487034539, + "learning_rate": 2.967644794509722e-05, + "loss": 0.7861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40051162242889404, + "step": 2565, + "valid_targets_mean": 15309.1, + "valid_targets_min": 4258 + }, + { + "epoch": 2.0316205533596836, + "grad_norm": 0.567767907618077, + "learning_rate": 2.962810800248879e-05, + "loss": 0.7769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3648568391799927, + "step": 2570, + "valid_targets_mean": 13071.8, + "valid_targets_min": 1537 + }, + { + "epoch": 2.035573122529644, + "grad_norm": 0.6312676544913319, + "learning_rate": 2.9579694735120577e-05, + "loss": 0.7634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3606482148170471, + "step": 2575, + "valid_targets_mean": 13983.4, + "valid_targets_min": 2254 + }, + { + "epoch": 2.039525691699605, + "grad_norm": 0.8690095071159801, + "learning_rate": 2.9531208511693384e-05, + "loss": 0.7822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3941802978515625, + "step": 2580, + "valid_targets_mean": 14741.5, + "valid_targets_min": 3404 + }, + { + "epoch": 2.0434782608695654, + "grad_norm": 0.6439496401910794, + "learning_rate": 2.9482649701463636e-05, + "loss": 0.7815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4165646731853485, + "step": 2585, + "valid_targets_mean": 15408.8, + "valid_targets_min": 4169 + }, + { + "epoch": 2.047430830039526, + "grad_norm": 0.6494136765214837, + "learning_rate": 2.9434018674240536e-05, + "loss": 0.7731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4068681001663208, + "step": 2590, + "valid_targets_mean": 13632.5, + "valid_targets_min": 1784 + }, + { + "epoch": 2.0513833992094863, + "grad_norm": 0.5302675520822073, + "learning_rate": 2.9385315800383298e-05, + "loss": 0.7737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3832058310508728, + "step": 2595, + "valid_targets_mean": 14221.0, + "valid_targets_min": 895 + }, + { + "epoch": 2.0553359683794468, + "grad_norm": 0.49306906927052924, + "learning_rate": 2.9336541450798285e-05, + "loss": 0.7785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36830419301986694, + "step": 2600, + "valid_targets_mean": 13693.2, + "valid_targets_min": 2052 + }, + { + "epoch": 2.059288537549407, + "grad_norm": 0.6125912241432147, + "learning_rate": 2.9287695996936194e-05, + "loss": 0.7897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3708136975765228, + "step": 2605, + "valid_targets_mean": 12739.2, + "valid_targets_min": 916 + }, + { + "epoch": 2.0632411067193677, + "grad_norm": 0.6535033330469237, + "learning_rate": 2.9238779810789232e-05, + "loss": 0.7577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4186321198940277, + "step": 2610, + "valid_targets_mean": 14649.5, + "valid_targets_min": 1973 + }, + { + "epoch": 2.067193675889328, + "grad_norm": 0.6500652360909569, + "learning_rate": 2.9189793264888294e-05, + "loss": 0.7747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38051989674568176, + "step": 2615, + "valid_targets_mean": 14633.7, + "valid_targets_min": 4341 + }, + { + "epoch": 2.0711462450592886, + "grad_norm": 0.5348173014115049, + "learning_rate": 2.91407367323001e-05, + "loss": 0.7826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3990240693092346, + "step": 2620, + "valid_targets_mean": 13771.0, + "valid_targets_min": 2924 + }, + { + "epoch": 2.075098814229249, + "grad_norm": 0.5820736284853023, + "learning_rate": 2.909161058662437e-05, + "loss": 0.7597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3583071231842041, + "step": 2625, + "valid_targets_mean": 14548.5, + "valid_targets_min": 4555 + }, + { + "epoch": 2.0790513833992095, + "grad_norm": 0.421686794150997, + "learning_rate": 2.9042415201990974e-05, + "loss": 0.7653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34147346019744873, + "step": 2630, + "valid_targets_mean": 14615.4, + "valid_targets_min": 1645 + }, + { + "epoch": 2.08300395256917, + "grad_norm": 0.5397133873938808, + "learning_rate": 2.899315095305709e-05, + "loss": 0.7751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37592965364456177, + "step": 2635, + "valid_targets_mean": 14447.8, + "valid_targets_min": 4050 + }, + { + "epoch": 2.0869565217391304, + "grad_norm": 0.47984781381906033, + "learning_rate": 2.894381821500435e-05, + "loss": 0.7921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40896934270858765, + "step": 2640, + "valid_targets_mean": 14065.4, + "valid_targets_min": 2603 + }, + { + "epoch": 2.090909090909091, + "grad_norm": 0.4479820445238131, + "learning_rate": 2.8894417363535972e-05, + "loss": 0.7584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41852182149887085, + "step": 2645, + "valid_targets_mean": 14449.1, + "valid_targets_min": 2772 + }, + { + "epoch": 2.0948616600790513, + "grad_norm": 0.4929008677408695, + "learning_rate": 2.8844948774873902e-05, + "loss": 0.7894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39323076605796814, + "step": 2650, + "valid_targets_mean": 14426.2, + "valid_targets_min": 824 + }, + { + "epoch": 2.0988142292490117, + "grad_norm": 0.6345718642962392, + "learning_rate": 2.879541282575596e-05, + "loss": 0.7792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3867238461971283, + "step": 2655, + "valid_targets_mean": 14960.3, + "valid_targets_min": 2220 + }, + { + "epoch": 2.102766798418972, + "grad_norm": 0.387740165417871, + "learning_rate": 2.874580989343295e-05, + "loss": 0.7729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3884007930755615, + "step": 2660, + "valid_targets_mean": 14824.6, + "valid_targets_min": 1668 + }, + { + "epoch": 2.1067193675889326, + "grad_norm": 0.4920413766498231, + "learning_rate": 2.869614035566582e-05, + "loss": 0.7666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3464805483818054, + "step": 2665, + "valid_targets_mean": 12768.9, + "valid_targets_min": 1391 + }, + { + "epoch": 2.110671936758893, + "grad_norm": 0.4432268856594169, + "learning_rate": 2.864640459072275e-05, + "loss": 0.768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3539339005947113, + "step": 2670, + "valid_targets_mean": 13034.3, + "valid_targets_min": 770 + }, + { + "epoch": 2.1146245059288535, + "grad_norm": 0.5006709600909526, + "learning_rate": 2.8596602977376283e-05, + "loss": 0.7808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3748158812522888, + "step": 2675, + "valid_targets_mean": 14039.1, + "valid_targets_min": 1499 + }, + { + "epoch": 2.1185770750988144, + "grad_norm": 0.586392794699354, + "learning_rate": 2.8546735894900464e-05, + "loss": 0.7754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.408595472574234, + "step": 2680, + "valid_targets_mean": 14679.0, + "valid_targets_min": 961 + }, + { + "epoch": 2.122529644268775, + "grad_norm": 0.47952625489983935, + "learning_rate": 2.8496803723067903e-05, + "loss": 0.7596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3928757607936859, + "step": 2685, + "valid_targets_mean": 14999.3, + "valid_targets_min": 819 + }, + { + "epoch": 2.1264822134387353, + "grad_norm": 0.4443879687192432, + "learning_rate": 2.844680684214693e-05, + "loss": 0.775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36783796548843384, + "step": 2690, + "valid_targets_mean": 14107.2, + "valid_targets_min": 873 + }, + { + "epoch": 2.130434782608696, + "grad_norm": 0.465004425099754, + "learning_rate": 2.839674563289868e-05, + "loss": 0.7762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3629588186740875, + "step": 2695, + "valid_targets_mean": 13031.2, + "valid_targets_min": 2192 + }, + { + "epoch": 2.1343873517786562, + "grad_norm": 0.5218918396986245, + "learning_rate": 2.8346620476574186e-05, + "loss": 0.7769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3603062033653259, + "step": 2700, + "valid_targets_mean": 14139.9, + "valid_targets_min": 2616 + }, + { + "epoch": 2.1383399209486167, + "grad_norm": 0.4803290109434963, + "learning_rate": 2.829643175491148e-05, + "loss": 0.7824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42037665843963623, + "step": 2705, + "valid_targets_mean": 13970.3, + "valid_targets_min": 1787 + }, + { + "epoch": 2.142292490118577, + "grad_norm": 0.44540565678337163, + "learning_rate": 2.8246179850132695e-05, + "loss": 0.7814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3908318877220154, + "step": 2710, + "valid_targets_mean": 13487.9, + "valid_targets_min": 1319 + }, + { + "epoch": 2.1462450592885376, + "grad_norm": 0.5087220266684651, + "learning_rate": 2.8195865144941147e-05, + "loss": 0.7665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3947524428367615, + "step": 2715, + "valid_targets_mean": 14721.2, + "valid_targets_min": 1803 + }, + { + "epoch": 2.150197628458498, + "grad_norm": 0.4109427509879984, + "learning_rate": 2.8145488022518422e-05, + "loss": 0.7761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3968765139579773, + "step": 2720, + "valid_targets_mean": 14526.8, + "valid_targets_min": 3738 + }, + { + "epoch": 2.1541501976284585, + "grad_norm": 0.47136360026247426, + "learning_rate": 2.8095048866521453e-05, + "loss": 0.775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37698715925216675, + "step": 2725, + "valid_targets_mean": 13393.1, + "valid_targets_min": 2564 + }, + { + "epoch": 2.158102766798419, + "grad_norm": 0.5786929677949275, + "learning_rate": 2.8044548061079604e-05, + "loss": 0.7862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36528095602989197, + "step": 2730, + "valid_targets_mean": 14267.6, + "valid_targets_min": 4847 + }, + { + "epoch": 2.1620553359683794, + "grad_norm": 0.5255704778071933, + "learning_rate": 2.7993985990791737e-05, + "loss": 0.7875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39502614736557007, + "step": 2735, + "valid_targets_mean": 13798.3, + "valid_targets_min": 1576 + }, + { + "epoch": 2.16600790513834, + "grad_norm": 0.5206998372780713, + "learning_rate": 2.7943363040723297e-05, + "loss": 0.7755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4103739261627197, + "step": 2740, + "valid_targets_mean": 14130.4, + "valid_targets_min": 2314 + }, + { + "epoch": 2.1699604743083003, + "grad_norm": 0.5190850581917796, + "learning_rate": 2.7892679596403373e-05, + "loss": 0.7793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39672452211380005, + "step": 2745, + "valid_targets_mean": 15329.2, + "valid_targets_min": 5242 + }, + { + "epoch": 2.1739130434782608, + "grad_norm": 0.4546696334767682, + "learning_rate": 2.7841936043821745e-05, + "loss": 0.774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37974822521209717, + "step": 2750, + "valid_targets_mean": 13620.5, + "valid_targets_min": 666 + }, + { + "epoch": 2.177865612648221, + "grad_norm": 0.474615324766684, + "learning_rate": 2.779113276942597e-05, + "loss": 0.7605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39854341745376587, + "step": 2755, + "valid_targets_mean": 14447.7, + "valid_targets_min": 1729 + }, + { + "epoch": 2.1818181818181817, + "grad_norm": 0.4963002746144172, + "learning_rate": 2.7740270160118433e-05, + "loss": 0.7803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4384486973285675, + "step": 2760, + "valid_targets_mean": 15380.9, + "valid_targets_min": 4554 + }, + { + "epoch": 2.185770750988142, + "grad_norm": 0.48990731644372254, + "learning_rate": 2.768934860325339e-05, + "loss": 0.7763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3670347332954407, + "step": 2765, + "valid_targets_mean": 12658.2, + "valid_targets_min": 712 + }, + { + "epoch": 2.1897233201581026, + "grad_norm": 0.47114585584184226, + "learning_rate": 2.7638368486634017e-05, + "loss": 0.7852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39595267176628113, + "step": 2770, + "valid_targets_mean": 15210.9, + "valid_targets_min": 4438 + }, + { + "epoch": 2.1936758893280635, + "grad_norm": 0.5245144069846917, + "learning_rate": 2.7587330198509464e-05, + "loss": 0.784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39933067560195923, + "step": 2775, + "valid_targets_mean": 14473.9, + "valid_targets_min": 3136 + }, + { + "epoch": 2.197628458498024, + "grad_norm": 0.46986728778163084, + "learning_rate": 2.7536234127571922e-05, + "loss": 0.7901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40675270557403564, + "step": 2780, + "valid_targets_mean": 14757.8, + "valid_targets_min": 2457 + }, + { + "epoch": 2.2015810276679844, + "grad_norm": 0.5628808570352416, + "learning_rate": 2.7485080662953607e-05, + "loss": 0.7815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3919416666030884, + "step": 2785, + "valid_targets_mean": 15176.4, + "valid_targets_min": 3589 + }, + { + "epoch": 2.205533596837945, + "grad_norm": 0.48090796521694057, + "learning_rate": 2.743387019422385e-05, + "loss": 0.7793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33060842752456665, + "step": 2790, + "valid_targets_mean": 12114.5, + "valid_targets_min": 501 + }, + { + "epoch": 2.2094861660079053, + "grad_norm": 0.5357573929097531, + "learning_rate": 2.738260311138609e-05, + "loss": 0.7705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3580405116081238, + "step": 2795, + "valid_targets_mean": 13285.9, + "valid_targets_min": 1984 + }, + { + "epoch": 2.2134387351778657, + "grad_norm": 0.47897737553942105, + "learning_rate": 2.7331279804874928e-05, + "loss": 0.7663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40160953998565674, + "step": 2800, + "valid_targets_mean": 13824.4, + "valid_targets_min": 3293 + }, + { + "epoch": 2.217391304347826, + "grad_norm": 0.49934764324564673, + "learning_rate": 2.7279900665553163e-05, + "loss": 0.7676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36619892716407776, + "step": 2805, + "valid_targets_mean": 13679.8, + "valid_targets_min": 468 + }, + { + "epoch": 2.2213438735177866, + "grad_norm": 0.6336789883076881, + "learning_rate": 2.722846608470878e-05, + "loss": 0.7565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36743390560150146, + "step": 2810, + "valid_targets_mean": 13802.2, + "valid_targets_min": 1008 + }, + { + "epoch": 2.225296442687747, + "grad_norm": 0.5848638922362013, + "learning_rate": 2.717697645405199e-05, + "loss": 0.7638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.356411337852478, + "step": 2815, + "valid_targets_mean": 12592.2, + "valid_targets_min": 1733 + }, + { + "epoch": 2.2292490118577075, + "grad_norm": 0.41038225298126885, + "learning_rate": 2.7125432165712273e-05, + "loss": 0.7584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4140254259109497, + "step": 2820, + "valid_targets_mean": 14403.9, + "valid_targets_min": 4363 + }, + { + "epoch": 2.233201581027668, + "grad_norm": 0.3735375491816951, + "learning_rate": 2.7073833612235328e-05, + "loss": 0.7685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.359930157661438, + "step": 2825, + "valid_targets_mean": 13710.1, + "valid_targets_min": 752 + }, + { + "epoch": 2.2371541501976284, + "grad_norm": 0.4620211967785695, + "learning_rate": 2.7022181186580147e-05, + "loss": 0.7822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3945060968399048, + "step": 2830, + "valid_targets_mean": 13790.4, + "valid_targets_min": 1132 + }, + { + "epoch": 2.241106719367589, + "grad_norm": 0.47404072130591185, + "learning_rate": 2.6970475282115985e-05, + "loss": 0.7705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40418922901153564, + "step": 2835, + "valid_targets_mean": 13433.0, + "valid_targets_min": 1947 + }, + { + "epoch": 2.2450592885375493, + "grad_norm": 0.437929630000758, + "learning_rate": 2.691871629261938e-05, + "loss": 0.7729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3925638794898987, + "step": 2840, + "valid_targets_mean": 12788.6, + "valid_targets_min": 531 + }, + { + "epoch": 2.2490118577075098, + "grad_norm": 0.591050840071583, + "learning_rate": 2.6866904612271155e-05, + "loss": 0.7603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4136330485343933, + "step": 2845, + "valid_targets_mean": 15991.8, + "valid_targets_min": 3323 + }, + { + "epoch": 2.2529644268774702, + "grad_norm": 0.5081629351720508, + "learning_rate": 2.6815040635653394e-05, + "loss": 0.7786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40411069989204407, + "step": 2850, + "valid_targets_mean": 14765.1, + "valid_targets_min": 1802 + }, + { + "epoch": 2.2569169960474307, + "grad_norm": 0.5661907695640184, + "learning_rate": 2.676312475774647e-05, + "loss": 0.7784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42903634905815125, + "step": 2855, + "valid_targets_mean": 13972.7, + "valid_targets_min": 1867 + }, + { + "epoch": 2.260869565217391, + "grad_norm": 0.5215051848077786, + "learning_rate": 2.6711157373926003e-05, + "loss": 0.7793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3678131699562073, + "step": 2860, + "valid_targets_mean": 13212.8, + "valid_targets_min": 742 + }, + { + "epoch": 2.2648221343873516, + "grad_norm": 0.5676367516802306, + "learning_rate": 2.6659138879959897e-05, + "loss": 0.7541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37292495369911194, + "step": 2865, + "valid_targets_mean": 14328.2, + "valid_targets_min": 3521 + }, + { + "epoch": 2.2687747035573125, + "grad_norm": 0.4523927333982824, + "learning_rate": 2.6607069672005257e-05, + "loss": 0.7764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38148459792137146, + "step": 2870, + "valid_targets_mean": 13187.8, + "valid_targets_min": 698 + }, + { + "epoch": 2.2727272727272725, + "grad_norm": 0.45742544564850507, + "learning_rate": 2.6554950146605444e-05, + "loss": 0.7673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.373684287071228, + "step": 2875, + "valid_targets_mean": 12670.2, + "valid_targets_min": 1616 + }, + { + "epoch": 2.2766798418972334, + "grad_norm": 0.60733401048542, + "learning_rate": 2.6502780700686987e-05, + "loss": 0.7805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41696086525917053, + "step": 2880, + "valid_targets_mean": 14828.2, + "valid_targets_min": 4640 + }, + { + "epoch": 2.280632411067194, + "grad_norm": 0.5616022162577262, + "learning_rate": 2.6450561731556638e-05, + "loss": 0.7726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39648258686065674, + "step": 2885, + "valid_targets_mean": 14729.2, + "valid_targets_min": 1177 + }, + { + "epoch": 2.2845849802371543, + "grad_norm": 0.5563530011573588, + "learning_rate": 2.6398293636898268e-05, + "loss": 0.7802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35375475883483887, + "step": 2890, + "valid_targets_mean": 14128.0, + "valid_targets_min": 1403 + }, + { + "epoch": 2.2885375494071147, + "grad_norm": 0.4761374885924966, + "learning_rate": 2.6345976814769878e-05, + "loss": 0.7747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4456695318222046, + "step": 2895, + "valid_targets_mean": 14666.6, + "valid_targets_min": 2459 + }, + { + "epoch": 2.292490118577075, + "grad_norm": 0.40073637654436284, + "learning_rate": 2.629361166360058e-05, + "loss": 0.7586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37085291743278503, + "step": 2900, + "valid_targets_mean": 14502.6, + "valid_targets_min": 2732 + }, + { + "epoch": 2.2964426877470356, + "grad_norm": 0.4304754144871653, + "learning_rate": 2.6241198582187527e-05, + "loss": 0.7877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38260188698768616, + "step": 2905, + "valid_targets_mean": 13996.9, + "valid_targets_min": 3526 + }, + { + "epoch": 2.300395256916996, + "grad_norm": 0.5674224054546282, + "learning_rate": 2.6188737969692906e-05, + "loss": 0.7751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38618165254592896, + "step": 2910, + "valid_targets_mean": 14646.7, + "valid_targets_min": 2717 + }, + { + "epoch": 2.3043478260869565, + "grad_norm": 0.50786553454062, + "learning_rate": 2.613623022564088e-05, + "loss": 0.7695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38750573992729187, + "step": 2915, + "valid_targets_mean": 13732.9, + "valid_targets_min": 2231 + }, + { + "epoch": 2.308300395256917, + "grad_norm": 0.4302937511243403, + "learning_rate": 2.6083675749914556e-05, + "loss": 0.7787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36327022314071655, + "step": 2920, + "valid_targets_mean": 14749.5, + "valid_targets_min": 3547 + }, + { + "epoch": 2.3122529644268774, + "grad_norm": 0.46660809805484366, + "learning_rate": 2.6031074942752926e-05, + "loss": 0.7646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3552829921245575, + "step": 2925, + "valid_targets_mean": 14222.4, + "valid_targets_min": 2609 + }, + { + "epoch": 2.316205533596838, + "grad_norm": 0.5129248618662533, + "learning_rate": 2.597842820474784e-05, + "loss": 0.7877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3468460142612457, + "step": 2930, + "valid_targets_mean": 11743.0, + "valid_targets_min": 792 + }, + { + "epoch": 2.3201581027667983, + "grad_norm": 0.5770597622434369, + "learning_rate": 2.5925735936840948e-05, + "loss": 0.7804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4005228281021118, + "step": 2935, + "valid_targets_mean": 14178.2, + "valid_targets_min": 1791 + }, + { + "epoch": 2.324110671936759, + "grad_norm": 0.42550749581190894, + "learning_rate": 2.5872998540320612e-05, + "loss": 0.7927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42413461208343506, + "step": 2940, + "valid_targets_mean": 15498.1, + "valid_targets_min": 7631 + }, + { + "epoch": 2.3280632411067192, + "grad_norm": 0.529008274287526, + "learning_rate": 2.582021641681891e-05, + "loss": 0.7844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4053764045238495, + "step": 2945, + "valid_targets_mean": 14982.1, + "valid_targets_min": 1378 + }, + { + "epoch": 2.3320158102766797, + "grad_norm": 0.47439591013818666, + "learning_rate": 2.5767389968308546e-05, + "loss": 0.7734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39970991015434265, + "step": 2950, + "valid_targets_mean": 13691.3, + "valid_targets_min": 966 + }, + { + "epoch": 2.33596837944664, + "grad_norm": 0.4405428550311648, + "learning_rate": 2.571451959709977e-05, + "loss": 0.7757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39155566692352295, + "step": 2955, + "valid_targets_mean": 14856.5, + "valid_targets_min": 5130 + }, + { + "epoch": 2.3399209486166006, + "grad_norm": 0.46115242433522696, + "learning_rate": 2.566160570583735e-05, + "loss": 0.7635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38406071066856384, + "step": 2960, + "valid_targets_mean": 14170.6, + "valid_targets_min": 2530 + }, + { + "epoch": 2.3438735177865615, + "grad_norm": 0.6158963568061337, + "learning_rate": 2.560864869749749e-05, + "loss": 0.763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38242655992507935, + "step": 2965, + "valid_targets_mean": 13946.3, + "valid_targets_min": 2185 + }, + { + "epoch": 2.3478260869565215, + "grad_norm": 0.61624919777822, + "learning_rate": 2.5555648975384745e-05, + "loss": 0.7823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41033002734184265, + "step": 2970, + "valid_targets_mean": 14589.4, + "valid_targets_min": 808 + }, + { + "epoch": 2.3517786561264824, + "grad_norm": 0.4363469801202001, + "learning_rate": 2.5502606943128983e-05, + "loss": 0.7848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3739359378814697, + "step": 2975, + "valid_targets_mean": 14850.2, + "valid_targets_min": 3456 + }, + { + "epoch": 2.355731225296443, + "grad_norm": 0.6716682470237659, + "learning_rate": 2.5449523004682277e-05, + "loss": 0.7652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34200263023376465, + "step": 2980, + "valid_targets_mean": 14220.8, + "valid_targets_min": 1324 + }, + { + "epoch": 2.3596837944664033, + "grad_norm": 0.5647771222227309, + "learning_rate": 2.5396397564315872e-05, + "loss": 0.7969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.405070960521698, + "step": 2985, + "valid_targets_mean": 15537.4, + "valid_targets_min": 1318 + }, + { + "epoch": 2.3636363636363638, + "grad_norm": 0.4712840491911607, + "learning_rate": 2.5343231026617048e-05, + "loss": 0.7911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4044535458087921, + "step": 2990, + "valid_targets_mean": 15163.1, + "valid_targets_min": 2100 + }, + { + "epoch": 2.367588932806324, + "grad_norm": 0.41459123432215206, + "learning_rate": 2.529002379648608e-05, + "loss": 0.7778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3829897940158844, + "step": 2995, + "valid_targets_mean": 15354.6, + "valid_targets_min": 9768 + }, + { + "epoch": 2.3715415019762847, + "grad_norm": 0.5179369565061838, + "learning_rate": 2.523677627913315e-05, + "loss": 0.7746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3789197504520416, + "step": 3000, + "valid_targets_mean": 13091.9, + "valid_targets_min": 1576 + }, + { + "epoch": 2.375494071146245, + "grad_norm": 0.4007214748027483, + "learning_rate": 2.518348888007526e-05, + "loss": 0.7535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35102590918540955, + "step": 3005, + "valid_targets_mean": 13046.9, + "valid_targets_min": 1568 + }, + { + "epoch": 2.3794466403162056, + "grad_norm": 0.528318417108864, + "learning_rate": 2.5130162005133126e-05, + "loss": 0.7687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4005259573459625, + "step": 3010, + "valid_targets_mean": 15061.2, + "valid_targets_min": 4294 + }, + { + "epoch": 2.383399209486166, + "grad_norm": 0.4868786250162592, + "learning_rate": 2.5076796060428102e-05, + "loss": 0.7535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3294867277145386, + "step": 3015, + "valid_targets_mean": 11834.1, + "valid_targets_min": 909 + }, + { + "epoch": 2.3873517786561265, + "grad_norm": 0.5579563631748424, + "learning_rate": 2.50233914523791e-05, + "loss": 0.77, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4102717936038971, + "step": 3020, + "valid_targets_mean": 14926.7, + "valid_targets_min": 2795 + }, + { + "epoch": 2.391304347826087, + "grad_norm": 0.5077429211343327, + "learning_rate": 2.4969948587699456e-05, + "loss": 0.7795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3741700053215027, + "step": 3025, + "valid_targets_mean": 13182.3, + "valid_targets_min": 3533 + }, + { + "epoch": 2.3952569169960474, + "grad_norm": 0.49739538217809975, + "learning_rate": 2.491646787339388e-05, + "loss": 0.764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36394309997558594, + "step": 3030, + "valid_targets_mean": 12717.4, + "valid_targets_min": 1484 + }, + { + "epoch": 2.399209486166008, + "grad_norm": 0.4532261537412374, + "learning_rate": 2.4862949716755322e-05, + "loss": 0.7801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3491114377975464, + "step": 3035, + "valid_targets_mean": 13040.3, + "valid_targets_min": 2138 + }, + { + "epoch": 2.4031620553359683, + "grad_norm": 0.5127495686322161, + "learning_rate": 2.4809394525361886e-05, + "loss": 0.7649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36517781019210815, + "step": 3040, + "valid_targets_mean": 13130.9, + "valid_targets_min": 1369 + }, + { + "epoch": 2.4071146245059287, + "grad_norm": 0.419036910247364, + "learning_rate": 2.475580270707371e-05, + "loss": 0.7799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36680206656455994, + "step": 3045, + "valid_targets_mean": 13187.7, + "valid_targets_min": 681 + }, + { + "epoch": 2.411067193675889, + "grad_norm": 0.5311368101168255, + "learning_rate": 2.470217467002989e-05, + "loss": 0.7782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4209502935409546, + "step": 3050, + "valid_targets_mean": 15390.3, + "valid_targets_min": 4196 + }, + { + "epoch": 2.4150197628458496, + "grad_norm": 0.38822870648786895, + "learning_rate": 2.4648510822645334e-05, + "loss": 0.7906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38081133365631104, + "step": 3055, + "valid_targets_mean": 14898.2, + "valid_targets_min": 1727 + }, + { + "epoch": 2.4189723320158105, + "grad_norm": 0.44141486079903686, + "learning_rate": 2.4594811573607683e-05, + "loss": 0.7707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3718807101249695, + "step": 3060, + "valid_targets_mean": 13844.3, + "valid_targets_min": 1836 + }, + { + "epoch": 2.4229249011857705, + "grad_norm": 0.46773498604064284, + "learning_rate": 2.4541077331874195e-05, + "loss": 0.7658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3834363520145416, + "step": 3065, + "valid_targets_mean": 13998.0, + "valid_targets_min": 683 + }, + { + "epoch": 2.4268774703557314, + "grad_norm": 0.4972066447683509, + "learning_rate": 2.4487308506668597e-05, + "loss": 0.7599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3603547215461731, + "step": 3070, + "valid_targets_mean": 13882.3, + "valid_targets_min": 1363 + }, + { + "epoch": 2.430830039525692, + "grad_norm": 0.4152156139176059, + "learning_rate": 2.443350550747801e-05, + "loss": 0.7803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40987148880958557, + "step": 3075, + "valid_targets_mean": 14715.6, + "valid_targets_min": 3093 + }, + { + "epoch": 2.4347826086956523, + "grad_norm": 0.5519930274257293, + "learning_rate": 2.4379668744049806e-05, + "loss": 0.7869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4132250249385834, + "step": 3080, + "valid_targets_mean": 13525.5, + "valid_targets_min": 912 + }, + { + "epoch": 2.438735177865613, + "grad_norm": 0.5115728949303668, + "learning_rate": 2.432579862638851e-05, + "loss": 0.7651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34897032380104065, + "step": 3085, + "valid_targets_mean": 12643.7, + "valid_targets_min": 902 + }, + { + "epoch": 2.4426877470355732, + "grad_norm": 0.3995233727226593, + "learning_rate": 2.4271895564752645e-05, + "loss": 0.7681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37149572372436523, + "step": 3090, + "valid_targets_mean": 13063.9, + "valid_targets_min": 1306 + }, + { + "epoch": 2.4466403162055337, + "grad_norm": 0.5679582616675863, + "learning_rate": 2.4217959969651634e-05, + "loss": 0.7774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3629094660282135, + "step": 3095, + "valid_targets_mean": 13026.8, + "valid_targets_min": 773 + }, + { + "epoch": 2.450592885375494, + "grad_norm": 0.3853569323146549, + "learning_rate": 2.4163992251842664e-05, + "loss": 0.7831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4154769778251648, + "step": 3100, + "valid_targets_mean": 15460.8, + "valid_targets_min": 5594 + }, + { + "epoch": 2.4545454545454546, + "grad_norm": 0.4613945619878422, + "learning_rate": 2.410999282232755e-05, + "loss": 0.7756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4161749482154846, + "step": 3105, + "valid_targets_mean": 15344.5, + "valid_targets_min": 1684 + }, + { + "epoch": 2.458498023715415, + "grad_norm": 0.3850639525000737, + "learning_rate": 2.4055962092349635e-05, + "loss": 0.7724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36445170640945435, + "step": 3110, + "valid_targets_mean": 14100.6, + "valid_targets_min": 5101 + }, + { + "epoch": 2.4624505928853755, + "grad_norm": 0.45596889364521126, + "learning_rate": 2.400190047339062e-05, + "loss": 0.7809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4280589818954468, + "step": 3115, + "valid_targets_mean": 14173.2, + "valid_targets_min": 4212 + }, + { + "epoch": 2.466403162055336, + "grad_norm": 0.4543007349452076, + "learning_rate": 2.3947808377167444e-05, + "loss": 0.7717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3385325074195862, + "step": 3120, + "valid_targets_mean": 13442.8, + "valid_targets_min": 2297 + }, + { + "epoch": 2.4703557312252964, + "grad_norm": 0.5258919792025657, + "learning_rate": 2.3893686215629167e-05, + "loss": 0.7796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4407327473163605, + "step": 3125, + "valid_targets_mean": 14045.7, + "valid_targets_min": 707 + }, + { + "epoch": 2.474308300395257, + "grad_norm": 0.5300113894869891, + "learning_rate": 2.3839534400953807e-05, + "loss": 0.7844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3823537528514862, + "step": 3130, + "valid_targets_mean": 13427.0, + "valid_targets_min": 1220 + }, + { + "epoch": 2.4782608695652173, + "grad_norm": 0.4641918658631923, + "learning_rate": 2.378535334554522e-05, + "loss": 0.7628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3772657513618469, + "step": 3135, + "valid_targets_mean": 13356.5, + "valid_targets_min": 749 + }, + { + "epoch": 2.4822134387351777, + "grad_norm": 0.4840484752598125, + "learning_rate": 2.3731143462029928e-05, + "loss": 0.7547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4121960997581482, + "step": 3140, + "valid_targets_mean": 14708.7, + "valid_targets_min": 1409 + }, + { + "epoch": 2.486166007905138, + "grad_norm": 0.42607424037809116, + "learning_rate": 2.3676905163254027e-05, + "loss": 0.7767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4084748327732086, + "step": 3145, + "valid_targets_mean": 14709.5, + "valid_targets_min": 540 + }, + { + "epoch": 2.4901185770750986, + "grad_norm": 0.46080982813476135, + "learning_rate": 2.362263886228e-05, + "loss": 0.7646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3771706223487854, + "step": 3150, + "valid_targets_mean": 13509.6, + "valid_targets_min": 2087 + }, + { + "epoch": 2.494071146245059, + "grad_norm": 0.4285510968293881, + "learning_rate": 2.356834497238359e-05, + "loss": 0.7596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3943946659564972, + "step": 3155, + "valid_targets_mean": 15724.3, + "valid_targets_min": 3980 + }, + { + "epoch": 2.4980237154150196, + "grad_norm": 0.4152235483662349, + "learning_rate": 2.3514023907050654e-05, + "loss": 0.7705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35571491718292236, + "step": 3160, + "valid_targets_mean": 13888.0, + "valid_targets_min": 3744 + }, + { + "epoch": 2.5019762845849804, + "grad_norm": 0.4136193496466122, + "learning_rate": 2.345967607997399e-05, + "loss": 0.7786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34815075993537903, + "step": 3165, + "valid_targets_mean": 13528.8, + "valid_targets_min": 1038 + }, + { + "epoch": 2.5059288537549405, + "grad_norm": 0.47508409889548203, + "learning_rate": 2.3405301905050232e-05, + "loss": 0.7906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3998268246650696, + "step": 3170, + "valid_targets_mean": 14043.3, + "valid_targets_min": 700 + }, + { + "epoch": 2.5098814229249014, + "grad_norm": 0.4526997441324089, + "learning_rate": 2.3350901796376657e-05, + "loss": 0.7759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.379347562789917, + "step": 3175, + "valid_targets_mean": 13549.9, + "valid_targets_min": 1722 + }, + { + "epoch": 2.513833992094862, + "grad_norm": 0.6062196454967189, + "learning_rate": 2.329647616824803e-05, + "loss": 0.7823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3931977152824402, + "step": 3180, + "valid_targets_mean": 14306.0, + "valid_targets_min": 5255 + }, + { + "epoch": 2.5177865612648223, + "grad_norm": 0.36310143065912426, + "learning_rate": 2.3242025435153494e-05, + "loss": 0.7901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4255726933479309, + "step": 3185, + "valid_targets_mean": 14880.9, + "valid_targets_min": 4730 + }, + { + "epoch": 2.5217391304347827, + "grad_norm": 0.5599835441786823, + "learning_rate": 2.3187550011773373e-05, + "loss": 0.7897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37870922684669495, + "step": 3190, + "valid_targets_mean": 14443.4, + "valid_targets_min": 3227 + }, + { + "epoch": 2.525691699604743, + "grad_norm": 0.39762519577914746, + "learning_rate": 2.313305031297602e-05, + "loss": 0.7602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36241644620895386, + "step": 3195, + "valid_targets_mean": 13915.4, + "valid_targets_min": 717 + }, + { + "epoch": 2.5296442687747036, + "grad_norm": 0.48132755864820015, + "learning_rate": 2.3078526753814665e-05, + "loss": 0.7645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38540637493133545, + "step": 3200, + "valid_targets_mean": 14341.5, + "valid_targets_min": 2541 + }, + { + "epoch": 2.533596837944664, + "grad_norm": 0.4281788800004535, + "learning_rate": 2.3023979749524243e-05, + "loss": 0.7567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40059348940849304, + "step": 3205, + "valid_targets_mean": 15041.0, + "valid_targets_min": 2556 + }, + { + "epoch": 2.5375494071146245, + "grad_norm": 0.40973480350144065, + "learning_rate": 2.296940971551826e-05, + "loss": 0.7602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3770953416824341, + "step": 3210, + "valid_targets_mean": 14014.0, + "valid_targets_min": 448 + }, + { + "epoch": 2.541501976284585, + "grad_norm": 0.39546756615351086, + "learning_rate": 2.291481706738559e-05, + "loss": 0.7761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3814648985862732, + "step": 3215, + "valid_targets_mean": 13767.8, + "valid_targets_min": 2002 + }, + { + "epoch": 2.5454545454545454, + "grad_norm": 0.5546227148212878, + "learning_rate": 2.2860202220887338e-05, + "loss": 0.7656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3916764259338379, + "step": 3220, + "valid_targets_mean": 13389.4, + "valid_targets_min": 1139 + }, + { + "epoch": 2.549407114624506, + "grad_norm": 0.5127418069284794, + "learning_rate": 2.2805565591953666e-05, + "loss": 0.7791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39631062746047974, + "step": 3225, + "valid_targets_mean": 15600.1, + "valid_targets_min": 7144 + }, + { + "epoch": 2.5533596837944663, + "grad_norm": 0.4761084941539279, + "learning_rate": 2.275090759668062e-05, + "loss": 0.7831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3691408038139343, + "step": 3230, + "valid_targets_mean": 14435.3, + "valid_targets_min": 1821 + }, + { + "epoch": 2.5573122529644268, + "grad_norm": 0.5313023937268718, + "learning_rate": 2.2696228651326972e-05, + "loss": 0.7822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3804587125778198, + "step": 3235, + "valid_targets_mean": 14721.4, + "valid_targets_min": 3138 + }, + { + "epoch": 2.561264822134387, + "grad_norm": 0.5751525096682131, + "learning_rate": 2.2641529172311038e-05, + "loss": 0.7899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4019581377506256, + "step": 3240, + "valid_targets_mean": 15334.9, + "valid_targets_min": 3723 + }, + { + "epoch": 2.5652173913043477, + "grad_norm": 0.4399037886910513, + "learning_rate": 2.2586809576207505e-05, + "loss": 0.7778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3565340042114258, + "step": 3245, + "valid_targets_mean": 14067.0, + "valid_targets_min": 4249 + }, + { + "epoch": 2.5691699604743086, + "grad_norm": 0.48322303615753115, + "learning_rate": 2.2532070279744285e-05, + "loss": 0.7684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37612611055374146, + "step": 3250, + "valid_targets_mean": 13474.5, + "valid_targets_min": 884 + }, + { + "epoch": 2.5731225296442686, + "grad_norm": 0.5325857011168093, + "learning_rate": 2.2477311699799308e-05, + "loss": 0.7854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35244980454444885, + "step": 3255, + "valid_targets_mean": 13132.6, + "valid_targets_min": 2351 + }, + { + "epoch": 2.5770750988142295, + "grad_norm": 0.4867662130517175, + "learning_rate": 2.2422534253397364e-05, + "loss": 0.7686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3674978017807007, + "step": 3260, + "valid_targets_mean": 13640.5, + "valid_targets_min": 1891 + }, + { + "epoch": 2.5810276679841895, + "grad_norm": 0.43631816708368226, + "learning_rate": 2.2367738357706923e-05, + "loss": 0.7699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40214335918426514, + "step": 3265, + "valid_targets_mean": 14857.3, + "valid_targets_min": 1780 + }, + { + "epoch": 2.5849802371541504, + "grad_norm": 0.48094190050745467, + "learning_rate": 2.231292443003696e-05, + "loss": 0.7837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4277827739715576, + "step": 3270, + "valid_targets_mean": 14642.7, + "valid_targets_min": 1185 + }, + { + "epoch": 2.588932806324111, + "grad_norm": 0.36714030447834595, + "learning_rate": 2.2258092887833777e-05, + "loss": 0.771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.400951623916626, + "step": 3275, + "valid_targets_mean": 14002.3, + "valid_targets_min": 2355 + }, + { + "epoch": 2.5928853754940713, + "grad_norm": 0.46295872031884366, + "learning_rate": 2.2203244148677828e-05, + "loss": 0.7874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37999242544174194, + "step": 3280, + "valid_targets_mean": 14308.8, + "valid_targets_min": 1600 + }, + { + "epoch": 2.5968379446640317, + "grad_norm": 0.6638524842944484, + "learning_rate": 2.2148378630280526e-05, + "loss": 0.7666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.365829199552536, + "step": 3285, + "valid_targets_mean": 14155.2, + "valid_targets_min": 458 + }, + { + "epoch": 2.600790513833992, + "grad_norm": 0.6867809142385938, + "learning_rate": 2.2093496750481064e-05, + "loss": 0.7713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41077136993408203, + "step": 3290, + "valid_targets_mean": 14465.1, + "valid_targets_min": 3136 + }, + { + "epoch": 2.6047430830039526, + "grad_norm": 0.5066859958928227, + "learning_rate": 2.2038598927243268e-05, + "loss": 0.7855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.352563738822937, + "step": 3295, + "valid_targets_mean": 13875.2, + "valid_targets_min": 733 + }, + { + "epoch": 2.608695652173913, + "grad_norm": 0.3897598232206385, + "learning_rate": 2.198368557865234e-05, + "loss": 0.7792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3914433717727661, + "step": 3300, + "valid_targets_mean": 14937.5, + "valid_targets_min": 1494 + }, + { + "epoch": 2.6126482213438735, + "grad_norm": 0.4677758995543082, + "learning_rate": 2.1928757122911746e-05, + "loss": 0.7679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37029552459716797, + "step": 3305, + "valid_targets_mean": 14287.1, + "valid_targets_min": 1101 + }, + { + "epoch": 2.616600790513834, + "grad_norm": 0.4241605938088883, + "learning_rate": 2.1873813978340008e-05, + "loss": 0.7709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3976457118988037, + "step": 3310, + "valid_targets_mean": 14457.2, + "valid_targets_min": 1459 + }, + { + "epoch": 2.6205533596837944, + "grad_norm": 0.3996432613008201, + "learning_rate": 2.1818856563367504e-05, + "loss": 0.7746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4157434105873108, + "step": 3315, + "valid_targets_mean": 14275.3, + "valid_targets_min": 2279 + }, + { + "epoch": 2.624505928853755, + "grad_norm": 0.502368018740462, + "learning_rate": 2.1763885296533288e-05, + "loss": 0.7851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36270299553871155, + "step": 3320, + "valid_targets_mean": 14104.2, + "valid_targets_min": 3668 + }, + { + "epoch": 2.6284584980237153, + "grad_norm": 0.4676288626976099, + "learning_rate": 2.1708900596481904e-05, + "loss": 0.7591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3598445653915405, + "step": 3325, + "valid_targets_mean": 13801.2, + "valid_targets_min": 1476 + }, + { + "epoch": 2.632411067193676, + "grad_norm": 0.6956162081279936, + "learning_rate": 2.165390288196021e-05, + "loss": 0.7872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3917972445487976, + "step": 3330, + "valid_targets_mean": 15591.9, + "valid_targets_min": 2471 + }, + { + "epoch": 2.6363636363636362, + "grad_norm": 0.576842121298828, + "learning_rate": 2.1598892571814174e-05, + "loss": 0.7889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34919118881225586, + "step": 3335, + "valid_targets_mean": 13128.1, + "valid_targets_min": 300 + }, + { + "epoch": 2.6403162055335967, + "grad_norm": 0.6968979154864338, + "learning_rate": 2.154387008498569e-05, + "loss": 0.7653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44420677423477173, + "step": 3340, + "valid_targets_mean": 15688.2, + "valid_targets_min": 6783 + }, + { + "epoch": 2.6442687747035576, + "grad_norm": 0.7051179975628324, + "learning_rate": 2.148883584050938e-05, + "loss": 0.7842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.377674400806427, + "step": 3345, + "valid_targets_mean": 13249.9, + "valid_targets_min": 750 + }, + { + "epoch": 2.6482213438735176, + "grad_norm": 0.5653756835534364, + "learning_rate": 2.1433790257509408e-05, + "loss": 0.7816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38926032185554504, + "step": 3350, + "valid_targets_mean": 13908.6, + "valid_targets_min": 1200 + }, + { + "epoch": 2.6521739130434785, + "grad_norm": 0.4719331327354875, + "learning_rate": 2.1378733755196296e-05, + "loss": 0.7771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39561331272125244, + "step": 3355, + "valid_targets_mean": 15235.2, + "valid_targets_min": 3886 + }, + { + "epoch": 2.6561264822134385, + "grad_norm": 0.43635015744789746, + "learning_rate": 2.1323666752863725e-05, + "loss": 0.7825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39227962493896484, + "step": 3360, + "valid_targets_mean": 14076.7, + "valid_targets_min": 724 + }, + { + "epoch": 2.6600790513833994, + "grad_norm": 0.5539511531556188, + "learning_rate": 2.1268589669885332e-05, + "loss": 0.7825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3739437460899353, + "step": 3365, + "valid_targets_mean": 13485.3, + "valid_targets_min": 2620 + }, + { + "epoch": 2.66403162055336, + "grad_norm": 0.5013129980402862, + "learning_rate": 2.121350292571153e-05, + "loss": 0.7742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3966044783592224, + "step": 3370, + "valid_targets_mean": 15133.3, + "valid_targets_min": 2381 + }, + { + "epoch": 2.6679841897233203, + "grad_norm": 0.4058170696735073, + "learning_rate": 2.1158406939866315e-05, + "loss": 0.7705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3730071187019348, + "step": 3375, + "valid_targets_mean": 14696.4, + "valid_targets_min": 2020 + }, + { + "epoch": 2.6719367588932808, + "grad_norm": 0.5535854931538408, + "learning_rate": 2.110330213194405e-05, + "loss": 0.7586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41047123074531555, + "step": 3380, + "valid_targets_mean": 14903.7, + "valid_targets_min": 3626 + }, + { + "epoch": 2.675889328063241, + "grad_norm": 0.43473761151312595, + "learning_rate": 2.1048188921606304e-05, + "loss": 0.7763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3984124958515167, + "step": 3385, + "valid_targets_mean": 13542.1, + "valid_targets_min": 1701 + }, + { + "epoch": 2.6798418972332017, + "grad_norm": 0.7122893975934059, + "learning_rate": 2.099306772857861e-05, + "loss": 0.7887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3803366720676422, + "step": 3390, + "valid_targets_mean": 13682.2, + "valid_targets_min": 412 + }, + { + "epoch": 2.683794466403162, + "grad_norm": 0.49723901012454685, + "learning_rate": 2.0937938972647322e-05, + "loss": 0.7848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4008433222770691, + "step": 3395, + "valid_targets_mean": 14694.3, + "valid_targets_min": 764 + }, + { + "epoch": 2.6877470355731226, + "grad_norm": 0.4105938779888717, + "learning_rate": 2.0882803073656377e-05, + "loss": 0.7743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39382225275039673, + "step": 3400, + "valid_targets_mean": 14304.1, + "valid_targets_min": 1381 + }, + { + "epoch": 2.691699604743083, + "grad_norm": 0.5150400937304715, + "learning_rate": 2.082766045150411e-05, + "loss": 0.7771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.385837584733963, + "step": 3405, + "valid_targets_mean": 13070.4, + "valid_targets_min": 1729 + }, + { + "epoch": 2.6956521739130435, + "grad_norm": 0.46808804628950607, + "learning_rate": 2.077251152614006e-05, + "loss": 0.7669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3578745722770691, + "step": 3410, + "valid_targets_mean": 13102.6, + "valid_targets_min": 1991 + }, + { + "epoch": 2.699604743083004, + "grad_norm": 0.5266338648489687, + "learning_rate": 2.0717356717561768e-05, + "loss": 0.7645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39410364627838135, + "step": 3415, + "valid_targets_mean": 13567.2, + "valid_targets_min": 1433 + }, + { + "epoch": 2.7035573122529644, + "grad_norm": 0.484270788549827, + "learning_rate": 2.0662196445811596e-05, + "loss": 0.7666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40018320083618164, + "step": 3420, + "valid_targets_mean": 14990.0, + "valid_targets_min": 3726 + }, + { + "epoch": 2.707509881422925, + "grad_norm": 0.3843804144362952, + "learning_rate": 2.0607031130973482e-05, + "loss": 0.773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37156492471694946, + "step": 3425, + "valid_targets_mean": 14401.7, + "valid_targets_min": 4430 + }, + { + "epoch": 2.7114624505928853, + "grad_norm": 0.48580319832374746, + "learning_rate": 2.0551861193169786e-05, + "loss": 0.7684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41892027854919434, + "step": 3430, + "valid_targets_mean": 14842.3, + "valid_targets_min": 1801 + }, + { + "epoch": 2.7154150197628457, + "grad_norm": 0.5329784091220491, + "learning_rate": 2.0496687052558087e-05, + "loss": 0.7962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4082415699958801, + "step": 3435, + "valid_targets_mean": 15750.7, + "valid_targets_min": 4803 + }, + { + "epoch": 2.719367588932806, + "grad_norm": 0.41060582794767664, + "learning_rate": 2.0441509129327954e-05, + "loss": 0.7608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40854722261428833, + "step": 3440, + "valid_targets_mean": 15378.0, + "valid_targets_min": 4575 + }, + { + "epoch": 2.7233201581027666, + "grad_norm": 0.5515160087872353, + "learning_rate": 2.0386327843697767e-05, + "loss": 0.7814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4084497094154358, + "step": 3445, + "valid_targets_mean": 15253.1, + "valid_targets_min": 3554 + }, + { + "epoch": 2.7272727272727275, + "grad_norm": 0.5423948018364084, + "learning_rate": 2.0331143615911514e-05, + "loss": 0.772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39729198813438416, + "step": 3450, + "valid_targets_mean": 13414.1, + "valid_targets_min": 3511 + }, + { + "epoch": 2.7312252964426875, + "grad_norm": 0.4398666662587915, + "learning_rate": 2.0275956866235598e-05, + "loss": 0.7822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3724115490913391, + "step": 3455, + "valid_targets_mean": 14661.4, + "valid_targets_min": 4953 + }, + { + "epoch": 2.7351778656126484, + "grad_norm": 0.521919898421308, + "learning_rate": 2.0220768014955614e-05, + "loss": 0.7824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.421092689037323, + "step": 3460, + "valid_targets_mean": 13954.8, + "valid_targets_min": 850 + }, + { + "epoch": 2.7391304347826084, + "grad_norm": 0.45400094417860254, + "learning_rate": 2.0165577482373167e-05, + "loss": 0.7896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39997124671936035, + "step": 3465, + "valid_targets_mean": 13964.5, + "valid_targets_min": 1765 + }, + { + "epoch": 2.7430830039525693, + "grad_norm": 0.45291961034237666, + "learning_rate": 2.011038568880267e-05, + "loss": 0.7778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37030526995658875, + "step": 3470, + "valid_targets_mean": 14179.7, + "valid_targets_min": 1383 + }, + { + "epoch": 2.7470355731225298, + "grad_norm": 0.5258059718402178, + "learning_rate": 2.0055193054568148e-05, + "loss": 0.7689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36693617701530457, + "step": 3475, + "valid_targets_mean": 13204.1, + "valid_targets_min": 2015 + }, + { + "epoch": 2.7509881422924902, + "grad_norm": 0.47427141962898633, + "learning_rate": 2e-05, + "loss": 0.7862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3728561997413635, + "step": 3480, + "valid_targets_mean": 13166.8, + "valid_targets_min": 2233 + }, + { + "epoch": 2.7549407114624507, + "grad_norm": 0.418670521413244, + "learning_rate": 1.9944806945431862e-05, + "loss": 0.7698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3651885390281677, + "step": 3485, + "valid_targets_mean": 14787.6, + "valid_targets_min": 926 + }, + { + "epoch": 2.758893280632411, + "grad_norm": 0.556863922554222, + "learning_rate": 1.9889614311197335e-05, + "loss": 0.7799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3731737732887268, + "step": 3490, + "valid_targets_mean": 13949.6, + "valid_targets_min": 1185 + }, + { + "epoch": 2.7628458498023716, + "grad_norm": 0.46497422242927816, + "learning_rate": 1.983442251762684e-05, + "loss": 0.7792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3922172784805298, + "step": 3495, + "valid_targets_mean": 15890.0, + "valid_targets_min": 3519 + }, + { + "epoch": 2.766798418972332, + "grad_norm": 0.49825416047722326, + "learning_rate": 1.9779231985044393e-05, + "loss": 0.7552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35347694158554077, + "step": 3500, + "valid_targets_mean": 14829.6, + "valid_targets_min": 1861 + }, + { + "epoch": 2.7707509881422925, + "grad_norm": 0.47315058414881683, + "learning_rate": 1.9724043133764416e-05, + "loss": 0.7728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39114874601364136, + "step": 3505, + "valid_targets_mean": 15113.3, + "valid_targets_min": 3908 + }, + { + "epoch": 2.774703557312253, + "grad_norm": 0.41780863056124573, + "learning_rate": 1.9668856384088493e-05, + "loss": 0.7801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39700210094451904, + "step": 3510, + "valid_targets_mean": 13817.3, + "valid_targets_min": 2911 + }, + { + "epoch": 2.7786561264822134, + "grad_norm": 0.5365995043048214, + "learning_rate": 1.9613672156302243e-05, + "loss": 0.7596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4168865978717804, + "step": 3515, + "valid_targets_mean": 15364.8, + "valid_targets_min": 5993 + }, + { + "epoch": 2.782608695652174, + "grad_norm": 0.4636744974074533, + "learning_rate": 1.955849087067205e-05, + "loss": 0.768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36195653676986694, + "step": 3520, + "valid_targets_mean": 14295.5, + "valid_targets_min": 3395 + }, + { + "epoch": 2.7865612648221343, + "grad_norm": 0.40998266855738624, + "learning_rate": 1.950331294744192e-05, + "loss": 0.7774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3889957666397095, + "step": 3525, + "valid_targets_mean": 14496.5, + "valid_targets_min": 550 + }, + { + "epoch": 2.7905138339920947, + "grad_norm": 0.4394157400590593, + "learning_rate": 1.9448138806830217e-05, + "loss": 0.7732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3660573661327362, + "step": 3530, + "valid_targets_mean": 12966.8, + "valid_targets_min": 544 + }, + { + "epoch": 2.794466403162055, + "grad_norm": 0.48311526685355016, + "learning_rate": 1.939296886902653e-05, + "loss": 0.7934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4422834515571594, + "step": 3535, + "valid_targets_mean": 15669.3, + "valid_targets_min": 8125 + }, + { + "epoch": 2.7984189723320156, + "grad_norm": 0.4179445582154484, + "learning_rate": 1.933780355418841e-05, + "loss": 0.7621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3492257297039032, + "step": 3540, + "valid_targets_mean": 13286.4, + "valid_targets_min": 1579 + }, + { + "epoch": 2.8023715415019765, + "grad_norm": 0.39911111161573287, + "learning_rate": 1.9282643282438235e-05, + "loss": 0.7792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.400195449590683, + "step": 3545, + "valid_targets_mean": 14460.6, + "valid_targets_min": 3487 + }, + { + "epoch": 2.8063241106719365, + "grad_norm": 0.4368164750749288, + "learning_rate": 1.9227488473859947e-05, + "loss": 0.7682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38179901242256165, + "step": 3550, + "valid_targets_mean": 14099.4, + "valid_targets_min": 1143 + }, + { + "epoch": 2.8102766798418974, + "grad_norm": 0.4173085582327926, + "learning_rate": 1.9172339548495896e-05, + "loss": 0.7697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39176684617996216, + "step": 3555, + "valid_targets_mean": 14099.2, + "valid_targets_min": 2427 + }, + { + "epoch": 2.8142292490118574, + "grad_norm": 0.44616252599744827, + "learning_rate": 1.9117196926343627e-05, + "loss": 0.7877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.413191020488739, + "step": 3560, + "valid_targets_mean": 15905.2, + "valid_targets_min": 7046 + }, + { + "epoch": 2.8181818181818183, + "grad_norm": 0.4179463767348179, + "learning_rate": 1.9062061027352684e-05, + "loss": 0.7705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36210858821868896, + "step": 3565, + "valid_targets_mean": 13612.7, + "valid_targets_min": 3744 + }, + { + "epoch": 2.822134387351779, + "grad_norm": 0.5776239592978338, + "learning_rate": 1.900693227142139e-05, + "loss": 0.771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3831004500389099, + "step": 3570, + "valid_targets_mean": 14160.1, + "valid_targets_min": 5449 + }, + { + "epoch": 2.8260869565217392, + "grad_norm": 0.4065501443361682, + "learning_rate": 1.8951811078393703e-05, + "loss": 0.7701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.431177020072937, + "step": 3575, + "valid_targets_mean": 15103.4, + "valid_targets_min": 1176 + }, + { + "epoch": 2.8300395256916997, + "grad_norm": 0.5265037901501562, + "learning_rate": 1.889669786805595e-05, + "loss": 0.7627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3293297290802002, + "step": 3580, + "valid_targets_mean": 13212.9, + "valid_targets_min": 485 + }, + { + "epoch": 2.83399209486166, + "grad_norm": 0.6263485877330873, + "learning_rate": 1.884159306013369e-05, + "loss": 0.7871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3698626160621643, + "step": 3585, + "valid_targets_mean": 13478.8, + "valid_targets_min": 1003 + }, + { + "epoch": 2.8379446640316206, + "grad_norm": 0.43801477237062486, + "learning_rate": 1.878649707428847e-05, + "loss": 0.7789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37653183937072754, + "step": 3590, + "valid_targets_mean": 13727.9, + "valid_targets_min": 1255 + }, + { + "epoch": 2.841897233201581, + "grad_norm": 0.391747949668885, + "learning_rate": 1.873141033011467e-05, + "loss": 0.7635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3786509335041046, + "step": 3595, + "valid_targets_mean": 15016.3, + "valid_targets_min": 621 + }, + { + "epoch": 2.8458498023715415, + "grad_norm": 0.474974686113166, + "learning_rate": 1.8676333247136275e-05, + "loss": 0.7493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4067635238170624, + "step": 3600, + "valid_targets_mean": 14381.7, + "valid_targets_min": 2033 + }, + { + "epoch": 2.849802371541502, + "grad_norm": 0.4126686647629223, + "learning_rate": 1.862126624480371e-05, + "loss": 0.7781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40464115142822266, + "step": 3605, + "valid_targets_mean": 14189.7, + "valid_targets_min": 1033 + }, + { + "epoch": 2.8537549407114624, + "grad_norm": 0.4525039004880486, + "learning_rate": 1.8566209742490603e-05, + "loss": 0.7764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3622155785560608, + "step": 3610, + "valid_targets_mean": 12560.0, + "valid_targets_min": 1512 + }, + { + "epoch": 2.857707509881423, + "grad_norm": 0.37716983310394325, + "learning_rate": 1.8511164159490627e-05, + "loss": 0.7783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3754638135433197, + "step": 3615, + "valid_targets_mean": 12792.7, + "valid_targets_min": 639 + }, + { + "epoch": 2.8616600790513833, + "grad_norm": 0.455677282432222, + "learning_rate": 1.845612991501432e-05, + "loss": 0.7805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39505481719970703, + "step": 3620, + "valid_targets_mean": 14408.1, + "valid_targets_min": 2698 + }, + { + "epoch": 2.8656126482213438, + "grad_norm": 0.540975347740408, + "learning_rate": 1.840110742818583e-05, + "loss": 0.7543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4062827229499817, + "step": 3625, + "valid_targets_mean": 14381.5, + "valid_targets_min": 1381 + }, + { + "epoch": 2.869565217391304, + "grad_norm": 0.4265551564131819, + "learning_rate": 1.83460971180398e-05, + "loss": 0.7843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34960857033729553, + "step": 3630, + "valid_targets_mean": 12807.5, + "valid_targets_min": 2422 + }, + { + "epoch": 2.8735177865612647, + "grad_norm": 0.38429883252182134, + "learning_rate": 1.8291099403518103e-05, + "loss": 0.7859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3951256275177002, + "step": 3635, + "valid_targets_mean": 14858.4, + "valid_targets_min": 3905 + }, + { + "epoch": 2.8774703557312256, + "grad_norm": 0.37282585126671836, + "learning_rate": 1.8236114703466726e-05, + "loss": 0.7717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38665831089019775, + "step": 3640, + "valid_targets_mean": 12994.4, + "valid_targets_min": 2458 + }, + { + "epoch": 2.8814229249011856, + "grad_norm": 0.4923937382025989, + "learning_rate": 1.8181143436632503e-05, + "loss": 0.7832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.399013876914978, + "step": 3645, + "valid_targets_mean": 14247.9, + "valid_targets_min": 1091 + }, + { + "epoch": 2.8853754940711465, + "grad_norm": 0.39779269708531495, + "learning_rate": 1.812618602166e-05, + "loss": 0.7741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3636450171470642, + "step": 3650, + "valid_targets_mean": 13838.2, + "valid_targets_min": 572 + }, + { + "epoch": 2.8893280632411065, + "grad_norm": 0.4119813000698695, + "learning_rate": 1.807124287708826e-05, + "loss": 0.7739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4161669611930847, + "step": 3655, + "valid_targets_mean": 15265.4, + "valid_targets_min": 5958 + }, + { + "epoch": 2.8932806324110674, + "grad_norm": 0.49328942578364465, + "learning_rate": 1.801631442134767e-05, + "loss": 0.7629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37876129150390625, + "step": 3660, + "valid_targets_mean": 15374.9, + "valid_targets_min": 5666 + }, + { + "epoch": 2.897233201581028, + "grad_norm": 0.6035316499347775, + "learning_rate": 1.796140107275674e-05, + "loss": 0.7658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4009701609611511, + "step": 3665, + "valid_targets_mean": 14371.7, + "valid_targets_min": 732 + }, + { + "epoch": 2.9011857707509883, + "grad_norm": 0.38709744469076035, + "learning_rate": 1.7906503249518942e-05, + "loss": 0.775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3562006950378418, + "step": 3670, + "valid_targets_mean": 13748.6, + "valid_targets_min": 648 + }, + { + "epoch": 2.9051383399209487, + "grad_norm": 0.3972190183528178, + "learning_rate": 1.7851621369719478e-05, + "loss": 0.7695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.414151132106781, + "step": 3675, + "valid_targets_mean": 15228.0, + "valid_targets_min": 4521 + }, + { + "epoch": 2.909090909090909, + "grad_norm": 0.48252295664887024, + "learning_rate": 1.7796755851322175e-05, + "loss": 0.7813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3670024871826172, + "step": 3680, + "valid_targets_mean": 14406.7, + "valid_targets_min": 3632 + }, + { + "epoch": 2.9130434782608696, + "grad_norm": 0.6292552054569307, + "learning_rate": 1.7741907112166223e-05, + "loss": 0.7731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35857754945755005, + "step": 3685, + "valid_targets_mean": 13351.0, + "valid_targets_min": 2324 + }, + { + "epoch": 2.91699604743083, + "grad_norm": 0.49123955520088974, + "learning_rate": 1.768707556996305e-05, + "loss": 0.7852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3826162815093994, + "step": 3690, + "valid_targets_mean": 13529.9, + "valid_targets_min": 549 + }, + { + "epoch": 2.9209486166007905, + "grad_norm": 0.5731607063414569, + "learning_rate": 1.7632261642293084e-05, + "loss": 0.7792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4220391809940338, + "step": 3695, + "valid_targets_mean": 14487.2, + "valid_targets_min": 4407 + }, + { + "epoch": 2.924901185770751, + "grad_norm": 0.4926581279630939, + "learning_rate": 1.7577465746602643e-05, + "loss": 0.7678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3845979571342468, + "step": 3700, + "valid_targets_mean": 13738.8, + "valid_targets_min": 1824 + }, + { + "epoch": 2.9288537549407114, + "grad_norm": 0.45046792361270627, + "learning_rate": 1.7522688300200692e-05, + "loss": 0.7733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38029879331588745, + "step": 3705, + "valid_targets_mean": 13991.8, + "valid_targets_min": 1765 + }, + { + "epoch": 2.932806324110672, + "grad_norm": 0.4193664318172099, + "learning_rate": 1.7467929720255722e-05, + "loss": 0.7774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37046027183532715, + "step": 3710, + "valid_targets_mean": 12500.6, + "valid_targets_min": 1676 + }, + { + "epoch": 2.9367588932806323, + "grad_norm": 0.4286218499854701, + "learning_rate": 1.7413190423792495e-05, + "loss": 0.7763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4286716878414154, + "step": 3715, + "valid_targets_mean": 14305.9, + "valid_targets_min": 1501 + }, + { + "epoch": 2.940711462450593, + "grad_norm": 0.40196559341115684, + "learning_rate": 1.735847082768897e-05, + "loss": 0.7671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4034353494644165, + "step": 3720, + "valid_targets_mean": 14373.5, + "valid_targets_min": 1248 + }, + { + "epoch": 2.9446640316205532, + "grad_norm": 0.5172469158960632, + "learning_rate": 1.7303771348673028e-05, + "loss": 0.7797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38260069489479065, + "step": 3725, + "valid_targets_mean": 14008.4, + "valid_targets_min": 727 + }, + { + "epoch": 2.9486166007905137, + "grad_norm": 0.4550542840750786, + "learning_rate": 1.724909240331938e-05, + "loss": 0.7743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37679821252822876, + "step": 3730, + "valid_targets_mean": 14280.9, + "valid_targets_min": 803 + }, + { + "epoch": 2.9525691699604746, + "grad_norm": 0.4284860051322704, + "learning_rate": 1.719443440804634e-05, + "loss": 0.7609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36865293979644775, + "step": 3735, + "valid_targets_mean": 14025.7, + "valid_targets_min": 2858 + }, + { + "epoch": 2.9565217391304346, + "grad_norm": 0.4441613714588562, + "learning_rate": 1.7139797779112665e-05, + "loss": 0.7871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3846833407878876, + "step": 3740, + "valid_targets_mean": 14815.1, + "valid_targets_min": 1027 + }, + { + "epoch": 2.9604743083003955, + "grad_norm": 0.6263692755901162, + "learning_rate": 1.708518293261442e-05, + "loss": 0.787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37775030732154846, + "step": 3745, + "valid_targets_mean": 14933.4, + "valid_targets_min": 3606 + }, + { + "epoch": 2.9644268774703555, + "grad_norm": 0.510661787672573, + "learning_rate": 1.7030590284481747e-05, + "loss": 0.7776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3546392321586609, + "step": 3750, + "valid_targets_mean": 12486.1, + "valid_targets_min": 1880 + }, + { + "epoch": 2.9683794466403164, + "grad_norm": 0.43073091935181634, + "learning_rate": 1.6976020250475763e-05, + "loss": 0.7784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36328399181365967, + "step": 3755, + "valid_targets_mean": 13710.1, + "valid_targets_min": 1062 + }, + { + "epoch": 2.972332015810277, + "grad_norm": 0.4460194179794662, + "learning_rate": 1.6921473246185342e-05, + "loss": 0.7716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3796805739402771, + "step": 3760, + "valid_targets_mean": 13291.2, + "valid_targets_min": 2421 + }, + { + "epoch": 2.9762845849802373, + "grad_norm": 0.37816587399321366, + "learning_rate": 1.686694968702399e-05, + "loss": 0.783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39478665590286255, + "step": 3765, + "valid_targets_mean": 14523.1, + "valid_targets_min": 1742 + }, + { + "epoch": 2.9802371541501977, + "grad_norm": 0.4472228717878897, + "learning_rate": 1.681244998822663e-05, + "loss": 0.7716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39256033301353455, + "step": 3770, + "valid_targets_mean": 14972.8, + "valid_targets_min": 3310 + }, + { + "epoch": 2.984189723320158, + "grad_norm": 0.5095278208003844, + "learning_rate": 1.675797456484651e-05, + "loss": 0.7896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42199549078941345, + "step": 3775, + "valid_targets_mean": 15347.9, + "valid_targets_min": 1153 + }, + { + "epoch": 2.9881422924901186, + "grad_norm": 0.40040785071458435, + "learning_rate": 1.6703523831751975e-05, + "loss": 0.7594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40427595376968384, + "step": 3780, + "valid_targets_mean": 14055.6, + "valid_targets_min": 1817 + }, + { + "epoch": 2.992094861660079, + "grad_norm": 0.38194509005046623, + "learning_rate": 1.6649098203623357e-05, + "loss": 0.777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3975467085838318, + "step": 3785, + "valid_targets_mean": 15221.3, + "valid_targets_min": 5772 + }, + { + "epoch": 2.9960474308300395, + "grad_norm": 0.4247184873612966, + "learning_rate": 1.659469809494977e-05, + "loss": 0.7782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3829805254936218, + "step": 3790, + "valid_targets_mean": 13306.4, + "valid_targets_min": 930 + }, + { + "epoch": 3.0, + "grad_norm": 0.47563996986897783, + "learning_rate": 1.6540323920026016e-05, + "loss": 0.7822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36778271198272705, + "step": 3795, + "valid_targets_mean": 14882.0, + "valid_targets_min": 1536 + }, + { + "epoch": 3.0039525691699605, + "grad_norm": 0.5022982740236087, + "learning_rate": 1.6485976092949352e-05, + "loss": 0.7886, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4138259291648865, + "step": 3800, + "valid_targets_mean": 14776.2, + "valid_targets_min": 3009 + }, + { + "epoch": 3.007905138339921, + "grad_norm": 0.4706299176231315, + "learning_rate": 1.6431655027616412e-05, + "loss": 0.7874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38332653045654297, + "step": 3805, + "valid_targets_mean": 14134.9, + "valid_targets_min": 873 + }, + { + "epoch": 3.0118577075098814, + "grad_norm": 0.3582539206273108, + "learning_rate": 1.6377361137720002e-05, + "loss": 0.7704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36839526891708374, + "step": 3810, + "valid_targets_mean": 14321.9, + "valid_targets_min": 1692 + }, + { + "epoch": 3.015810276679842, + "grad_norm": 0.3778974697139966, + "learning_rate": 1.632309483674598e-05, + "loss": 0.7589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42573440074920654, + "step": 3815, + "valid_targets_mean": 14963.6, + "valid_targets_min": 5126 + }, + { + "epoch": 3.0197628458498023, + "grad_norm": 0.41527263141483284, + "learning_rate": 1.6268856537970075e-05, + "loss": 0.7601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3865579068660736, + "step": 3820, + "valid_targets_mean": 14014.7, + "valid_targets_min": 627 + }, + { + "epoch": 3.0237154150197627, + "grad_norm": 0.40780312090710047, + "learning_rate": 1.6214646654454787e-05, + "loss": 0.7674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3750816583633423, + "step": 3825, + "valid_targets_mean": 13430.6, + "valid_targets_min": 1793 + }, + { + "epoch": 3.027667984189723, + "grad_norm": 0.48096537296014485, + "learning_rate": 1.616046559904619e-05, + "loss": 0.7758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4138443171977997, + "step": 3830, + "valid_targets_mean": 15688.9, + "valid_targets_min": 6889 + }, + { + "epoch": 3.0316205533596836, + "grad_norm": 0.509159715075041, + "learning_rate": 1.6106313784370836e-05, + "loss": 0.768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39609789848327637, + "step": 3835, + "valid_targets_mean": 14318.3, + "valid_targets_min": 2433 + }, + { + "epoch": 3.035573122529644, + "grad_norm": 0.3844471364011385, + "learning_rate": 1.605219162283256e-05, + "loss": 0.7647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3819582462310791, + "step": 3840, + "valid_targets_mean": 13291.2, + "valid_targets_min": 1592 + }, + { + "epoch": 3.039525691699605, + "grad_norm": 0.3957820481481498, + "learning_rate": 1.5998099526609388e-05, + "loss": 0.7672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35220807790756226, + "step": 3845, + "valid_targets_mean": 13707.6, + "valid_targets_min": 3609 + }, + { + "epoch": 3.0434782608695654, + "grad_norm": 0.38377854973720504, + "learning_rate": 1.594403790765037e-05, + "loss": 0.7884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42910298705101013, + "step": 3850, + "valid_targets_mean": 15164.4, + "valid_targets_min": 1635 + }, + { + "epoch": 3.047430830039526, + "grad_norm": 0.3487058853825699, + "learning_rate": 1.5890007177672456e-05, + "loss": 0.7748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3809701204299927, + "step": 3855, + "valid_targets_mean": 14633.8, + "valid_targets_min": 2319 + }, + { + "epoch": 3.0513833992094863, + "grad_norm": 0.37103626355817415, + "learning_rate": 1.583600774815735e-05, + "loss": 0.7895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3773690462112427, + "step": 3860, + "valid_targets_mean": 14263.7, + "valid_targets_min": 549 + }, + { + "epoch": 3.0553359683794468, + "grad_norm": 0.37849931264359615, + "learning_rate": 1.5782040030348372e-05, + "loss": 0.7651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3994598686695099, + "step": 3865, + "valid_targets_mean": 14912.2, + "valid_targets_min": 2604 + }, + { + "epoch": 3.059288537549407, + "grad_norm": 0.42853323268568494, + "learning_rate": 1.5728104435247365e-05, + "loss": 0.769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35063233971595764, + "step": 3870, + "valid_targets_mean": 13895.4, + "valid_targets_min": 1971 + }, + { + "epoch": 3.0632411067193677, + "grad_norm": 0.3918069008691398, + "learning_rate": 1.5674201373611497e-05, + "loss": 0.7989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41139569878578186, + "step": 3875, + "valid_targets_mean": 13889.9, + "valid_targets_min": 937 + }, + { + "epoch": 3.067193675889328, + "grad_norm": 0.39861988041041585, + "learning_rate": 1.56203312559502e-05, + "loss": 0.7833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.386289119720459, + "step": 3880, + "valid_targets_mean": 14063.4, + "valid_targets_min": 1244 + }, + { + "epoch": 3.0711462450592886, + "grad_norm": 0.4018077512726005, + "learning_rate": 1.5566494492521996e-05, + "loss": 0.7652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3969590663909912, + "step": 3885, + "valid_targets_mean": 15262.5, + "valid_targets_min": 2558 + }, + { + "epoch": 3.075098814229249, + "grad_norm": 0.4450781605042407, + "learning_rate": 1.5512691493331416e-05, + "loss": 0.7726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3803051710128784, + "step": 3890, + "valid_targets_mean": 14665.7, + "valid_targets_min": 1917 + }, + { + "epoch": 3.0790513833992095, + "grad_norm": 0.3626318969395194, + "learning_rate": 1.545892266812581e-05, + "loss": 0.7609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3422858715057373, + "step": 3895, + "valid_targets_mean": 13997.6, + "valid_targets_min": 2836 + }, + { + "epoch": 3.08300395256917, + "grad_norm": 0.36518432636817943, + "learning_rate": 1.540518842639232e-05, + "loss": 0.7764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4128277897834778, + "step": 3900, + "valid_targets_mean": 13319.1, + "valid_targets_min": 1928 + }, + { + "epoch": 3.0869565217391304, + "grad_norm": 0.4590253092036026, + "learning_rate": 1.5351489177354673e-05, + "loss": 0.7741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.410162091255188, + "step": 3905, + "valid_targets_mean": 14488.2, + "valid_targets_min": 2500 + }, + { + "epoch": 3.090909090909091, + "grad_norm": 0.5197378362211535, + "learning_rate": 1.5297825329970124e-05, + "loss": 0.7754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38974297046661377, + "step": 3910, + "valid_targets_mean": 14696.8, + "valid_targets_min": 1970 + }, + { + "epoch": 3.0948616600790513, + "grad_norm": 0.3930355594485013, + "learning_rate": 1.5244197292926298e-05, + "loss": 0.7686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4064897894859314, + "step": 3915, + "valid_targets_mean": 14773.0, + "valid_targets_min": 3864 + }, + { + "epoch": 3.0988142292490117, + "grad_norm": 0.5079447407012323, + "learning_rate": 1.5190605474638122e-05, + "loss": 0.7534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40180671215057373, + "step": 3920, + "valid_targets_mean": 14284.3, + "valid_targets_min": 1500 + }, + { + "epoch": 3.102766798418972, + "grad_norm": 0.403903572667212, + "learning_rate": 1.5137050283244678e-05, + "loss": 0.7724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38005661964416504, + "step": 3925, + "valid_targets_mean": 13841.1, + "valid_targets_min": 1722 + }, + { + "epoch": 3.1067193675889326, + "grad_norm": 0.4238345821061741, + "learning_rate": 1.5083532126606124e-05, + "loss": 0.7522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37404435873031616, + "step": 3930, + "valid_targets_mean": 14185.5, + "valid_targets_min": 468 + }, + { + "epoch": 3.110671936758893, + "grad_norm": 0.46164883304603, + "learning_rate": 1.5030051412300546e-05, + "loss": 0.7879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38624754548072815, + "step": 3935, + "valid_targets_mean": 14069.2, + "valid_targets_min": 2352 + }, + { + "epoch": 3.1146245059288535, + "grad_norm": 0.4120482547262339, + "learning_rate": 1.4976608547620904e-05, + "loss": 0.7714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.424770712852478, + "step": 3940, + "valid_targets_mean": 14957.3, + "valid_targets_min": 1689 + }, + { + "epoch": 3.1185770750988144, + "grad_norm": 0.40006466527294987, + "learning_rate": 1.4923203939571896e-05, + "loss": 0.777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3708992004394531, + "step": 3945, + "valid_targets_mean": 14281.0, + "valid_targets_min": 1018 + }, + { + "epoch": 3.122529644268775, + "grad_norm": 0.3941716064484287, + "learning_rate": 1.486983799486688e-05, + "loss": 0.7764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42297688126564026, + "step": 3950, + "valid_targets_mean": 14472.5, + "valid_targets_min": 2716 + }, + { + "epoch": 3.1264822134387353, + "grad_norm": 0.37602010758011634, + "learning_rate": 1.4816511119924739e-05, + "loss": 0.7688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38304489850997925, + "step": 3955, + "valid_targets_mean": 13362.4, + "valid_targets_min": 1539 + }, + { + "epoch": 3.130434782608696, + "grad_norm": 0.46049561457305077, + "learning_rate": 1.4763223720866854e-05, + "loss": 0.7822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38778162002563477, + "step": 3960, + "valid_targets_mean": 14028.6, + "valid_targets_min": 837 + }, + { + "epoch": 3.1343873517786562, + "grad_norm": 0.4813969350924027, + "learning_rate": 1.4709976203513925e-05, + "loss": 0.7601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39721745252609253, + "step": 3965, + "valid_targets_mean": 13473.9, + "valid_targets_min": 2978 + }, + { + "epoch": 3.1383399209486167, + "grad_norm": 0.3733612741977813, + "learning_rate": 1.465676897338296e-05, + "loss": 0.769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4473903775215149, + "step": 3970, + "valid_targets_mean": 15622.4, + "valid_targets_min": 4757 + }, + { + "epoch": 3.142292490118577, + "grad_norm": 0.42940091494127064, + "learning_rate": 1.460360243568413e-05, + "loss": 0.776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3870553970336914, + "step": 3975, + "valid_targets_mean": 13838.6, + "valid_targets_min": 972 + }, + { + "epoch": 3.1462450592885376, + "grad_norm": 0.4447257775337491, + "learning_rate": 1.4550476995317727e-05, + "loss": 0.7656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40475964546203613, + "step": 3980, + "valid_targets_mean": 14691.8, + "valid_targets_min": 4090 + }, + { + "epoch": 3.150197628458498, + "grad_norm": 0.37894334049832545, + "learning_rate": 1.4497393056871027e-05, + "loss": 0.7823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3915172815322876, + "step": 3985, + "valid_targets_mean": 14447.0, + "valid_targets_min": 1337 + }, + { + "epoch": 3.1541501976284585, + "grad_norm": 0.45044003947149913, + "learning_rate": 1.444435102461526e-05, + "loss": 0.7756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4218601584434509, + "step": 3990, + "valid_targets_mean": 14046.0, + "valid_targets_min": 437 + }, + { + "epoch": 3.158102766798419, + "grad_norm": 0.38312145617349347, + "learning_rate": 1.4391351302502523e-05, + "loss": 0.7554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34695369005203247, + "step": 3995, + "valid_targets_mean": 13842.3, + "valid_targets_min": 2478 + }, + { + "epoch": 3.1620553359683794, + "grad_norm": 0.3610118335119697, + "learning_rate": 1.4338394294162654e-05, + "loss": 0.7761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.420643150806427, + "step": 4000, + "valid_targets_mean": 14676.8, + "valid_targets_min": 4908 + }, + { + "epoch": 3.16600790513834, + "grad_norm": 0.4063112989333291, + "learning_rate": 1.4285480402900236e-05, + "loss": 0.7669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37793582677841187, + "step": 4005, + "valid_targets_mean": 13004.5, + "valid_targets_min": 573 + }, + { + "epoch": 3.1699604743083003, + "grad_norm": 0.404178240214005, + "learning_rate": 1.4232610031691459e-05, + "loss": 0.7698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40523672103881836, + "step": 4010, + "valid_targets_mean": 14582.1, + "valid_targets_min": 4768 + }, + { + "epoch": 3.1739130434782608, + "grad_norm": 0.4106811747009398, + "learning_rate": 1.4179783583181098e-05, + "loss": 0.7765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39425960183143616, + "step": 4015, + "valid_targets_mean": 14939.9, + "valid_targets_min": 1927 + }, + { + "epoch": 3.177865612648221, + "grad_norm": 0.3568822200638098, + "learning_rate": 1.4127001459679397e-05, + "loss": 0.7597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3916058838367462, + "step": 4020, + "valid_targets_mean": 12731.4, + "valid_targets_min": 1071 + }, + { + "epoch": 3.1818181818181817, + "grad_norm": 0.5766424895404302, + "learning_rate": 1.4074264063159064e-05, + "loss": 0.779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40912574529647827, + "step": 4025, + "valid_targets_mean": 15468.0, + "valid_targets_min": 2015 + }, + { + "epoch": 3.185770750988142, + "grad_norm": 0.45421297355285056, + "learning_rate": 1.402157179525216e-05, + "loss": 0.7779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4083622694015503, + "step": 4030, + "valid_targets_mean": 14907.0, + "valid_targets_min": 3172 + }, + { + "epoch": 3.1897233201581026, + "grad_norm": 0.39714940660711295, + "learning_rate": 1.3968925057247082e-05, + "loss": 0.7649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3755744397640228, + "step": 4035, + "valid_targets_mean": 13742.5, + "valid_targets_min": 1534 + }, + { + "epoch": 3.1936758893280635, + "grad_norm": 0.3956939855978345, + "learning_rate": 1.3916324250085451e-05, + "loss": 0.7745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41691601276397705, + "step": 4040, + "valid_targets_mean": 15391.4, + "valid_targets_min": 1136 + }, + { + "epoch": 3.197628458498024, + "grad_norm": 0.42102469417026006, + "learning_rate": 1.3863769774359126e-05, + "loss": 0.7852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3950604498386383, + "step": 4045, + "valid_targets_mean": 14013.7, + "valid_targets_min": 1690 + }, + { + "epoch": 3.2015810276679844, + "grad_norm": 0.39486021477381955, + "learning_rate": 1.3811262030307098e-05, + "loss": 0.7777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42867332696914673, + "step": 4050, + "valid_targets_mean": 14518.8, + "valid_targets_min": 2348 + }, + { + "epoch": 3.205533596837945, + "grad_norm": 0.45275200762889833, + "learning_rate": 1.375880141781248e-05, + "loss": 0.7826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40804851055145264, + "step": 4055, + "valid_targets_mean": 14392.6, + "valid_targets_min": 4952 + }, + { + "epoch": 3.2094861660079053, + "grad_norm": 0.4514211366707812, + "learning_rate": 1.3706388336399425e-05, + "loss": 0.7679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36850303411483765, + "step": 4060, + "valid_targets_mean": 13286.7, + "valid_targets_min": 679 + }, + { + "epoch": 3.2134387351778657, + "grad_norm": 0.3669106881716158, + "learning_rate": 1.3654023185230125e-05, + "loss": 0.7676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36262792348861694, + "step": 4065, + "valid_targets_mean": 14129.4, + "valid_targets_min": 2137 + }, + { + "epoch": 3.217391304347826, + "grad_norm": 0.3743956407215081, + "learning_rate": 1.3601706363101737e-05, + "loss": 0.7737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38553035259246826, + "step": 4070, + "valid_targets_mean": 14595.5, + "valid_targets_min": 2384 + }, + { + "epoch": 3.2213438735177866, + "grad_norm": 0.4147484601857183, + "learning_rate": 1.3549438268443367e-05, + "loss": 0.7872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39577335119247437, + "step": 4075, + "valid_targets_mean": 13978.8, + "valid_targets_min": 3688 + }, + { + "epoch": 3.225296442687747, + "grad_norm": 0.32349426646355195, + "learning_rate": 1.349721929931301e-05, + "loss": 0.7724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4018934667110443, + "step": 4080, + "valid_targets_mean": 14249.7, + "valid_targets_min": 1842 + }, + { + "epoch": 3.2292490118577075, + "grad_norm": 0.4429350848586711, + "learning_rate": 1.3445049853394564e-05, + "loss": 0.7568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40372931957244873, + "step": 4085, + "valid_targets_mean": 15235.9, + "valid_targets_min": 4099 + }, + { + "epoch": 3.233201581027668, + "grad_norm": 0.38876188781890125, + "learning_rate": 1.3392930327994744e-05, + "loss": 0.7689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.406657874584198, + "step": 4090, + "valid_targets_mean": 14804.1, + "valid_targets_min": 1835 + }, + { + "epoch": 3.2371541501976284, + "grad_norm": 0.42050561643982903, + "learning_rate": 1.3340861120040111e-05, + "loss": 0.7637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3562227487564087, + "step": 4095, + "valid_targets_mean": 12094.6, + "valid_targets_min": 712 + }, + { + "epoch": 3.241106719367589, + "grad_norm": 0.3651540674540906, + "learning_rate": 1.3288842626073997e-05, + "loss": 0.7738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3978230357170105, + "step": 4100, + "valid_targets_mean": 14733.5, + "valid_targets_min": 4621 + }, + { + "epoch": 3.2450592885375493, + "grad_norm": 0.45178970862250434, + "learning_rate": 1.3236875242253539e-05, + "loss": 0.782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35652732849121094, + "step": 4105, + "valid_targets_mean": 13818.9, + "valid_targets_min": 1817 + }, + { + "epoch": 3.2490118577075098, + "grad_norm": 0.3610674154390934, + "learning_rate": 1.3184959364346614e-05, + "loss": 0.7818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3611879050731659, + "step": 4110, + "valid_targets_mean": 14500.9, + "valid_targets_min": 3533 + }, + { + "epoch": 3.2529644268774702, + "grad_norm": 0.3468175320275552, + "learning_rate": 1.313309538772885e-05, + "loss": 0.7753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3277224898338318, + "step": 4115, + "valid_targets_mean": 12266.1, + "valid_targets_min": 2069 + }, + { + "epoch": 3.2569169960474307, + "grad_norm": 0.36448268080955437, + "learning_rate": 1.3081283707380626e-05, + "loss": 0.776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39693161845207214, + "step": 4120, + "valid_targets_mean": 14298.7, + "valid_targets_min": 1546 + }, + { + "epoch": 3.260869565217391, + "grad_norm": 0.40390304703660684, + "learning_rate": 1.3029524717884021e-05, + "loss": 0.7546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3688093423843384, + "step": 4125, + "valid_targets_mean": 13765.5, + "valid_targets_min": 3017 + }, + { + "epoch": 3.2648221343873516, + "grad_norm": 0.4114520115168072, + "learning_rate": 1.2977818813419862e-05, + "loss": 0.7672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42505350708961487, + "step": 4130, + "valid_targets_mean": 14900.4, + "valid_targets_min": 2306 + }, + { + "epoch": 3.2687747035573125, + "grad_norm": 0.45935456778820044, + "learning_rate": 1.2926166387764675e-05, + "loss": 0.7687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4064413905143738, + "step": 4135, + "valid_targets_mean": 15126.3, + "valid_targets_min": 2246 + }, + { + "epoch": 3.2727272727272725, + "grad_norm": 0.4138265470857956, + "learning_rate": 1.2874567834287737e-05, + "loss": 0.7611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4118307828903198, + "step": 4140, + "valid_targets_mean": 15011.4, + "valid_targets_min": 1905 + }, + { + "epoch": 3.2766798418972334, + "grad_norm": 0.4359778576018634, + "learning_rate": 1.2823023545948012e-05, + "loss": 0.7735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3481375277042389, + "step": 4145, + "valid_targets_mean": 13402.0, + "valid_targets_min": 2302 + }, + { + "epoch": 3.280632411067194, + "grad_norm": 0.42749649292322756, + "learning_rate": 1.2771533915291228e-05, + "loss": 0.7481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37316757440567017, + "step": 4150, + "valid_targets_mean": 14560.2, + "valid_targets_min": 593 + }, + { + "epoch": 3.2845849802371543, + "grad_norm": 0.38206523632969724, + "learning_rate": 1.272009933444684e-05, + "loss": 0.7792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39392226934432983, + "step": 4155, + "valid_targets_mean": 14430.2, + "valid_targets_min": 908 + }, + { + "epoch": 3.2885375494071147, + "grad_norm": 0.42364349206487334, + "learning_rate": 1.2668720195125079e-05, + "loss": 0.7744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4021870493888855, + "step": 4160, + "valid_targets_mean": 14041.3, + "valid_targets_min": 2968 + }, + { + "epoch": 3.292490118577075, + "grad_norm": 0.35295387789234955, + "learning_rate": 1.261739688861392e-05, + "loss": 0.776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4169602394104004, + "step": 4165, + "valid_targets_mean": 14469.5, + "valid_targets_min": 2384 + }, + { + "epoch": 3.2964426877470356, + "grad_norm": 0.45926002388362025, + "learning_rate": 1.2566129805776157e-05, + "loss": 0.7806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41137149930000305, + "step": 4170, + "valid_targets_mean": 14545.2, + "valid_targets_min": 3029 + }, + { + "epoch": 3.300395256916996, + "grad_norm": 0.4789348499089069, + "learning_rate": 1.2514919337046392e-05, + "loss": 0.7701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.356997549533844, + "step": 4175, + "valid_targets_mean": 12748.1, + "valid_targets_min": 1599 + }, + { + "epoch": 3.3043478260869565, + "grad_norm": 0.45356282161274836, + "learning_rate": 1.2463765872428084e-05, + "loss": 0.7734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35269030928611755, + "step": 4180, + "valid_targets_mean": 12670.3, + "valid_targets_min": 3126 + }, + { + "epoch": 3.308300395256917, + "grad_norm": 0.381957266529499, + "learning_rate": 1.2412669801490536e-05, + "loss": 0.7859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4118671715259552, + "step": 4185, + "valid_targets_mean": 15768.4, + "valid_targets_min": 5236 + }, + { + "epoch": 3.3122529644268774, + "grad_norm": 0.39085864984498114, + "learning_rate": 1.2361631513365992e-05, + "loss": 0.7596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43855077028274536, + "step": 4190, + "valid_targets_mean": 16303.3, + "valid_targets_min": 7524 + }, + { + "epoch": 3.316205533596838, + "grad_norm": 0.4065438049975334, + "learning_rate": 1.2310651396746614e-05, + "loss": 0.7729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39063560962677, + "step": 4195, + "valid_targets_mean": 14549.3, + "valid_targets_min": 1683 + }, + { + "epoch": 3.3201581027667983, + "grad_norm": 0.4388075623341284, + "learning_rate": 1.2259729839881573e-05, + "loss": 0.7686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4054349958896637, + "step": 4200, + "valid_targets_mean": 13833.2, + "valid_targets_min": 1185 + }, + { + "epoch": 3.324110671936759, + "grad_norm": 0.4767163529866186, + "learning_rate": 1.2208867230574031e-05, + "loss": 0.7765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40986451506614685, + "step": 4205, + "valid_targets_mean": 14583.1, + "valid_targets_min": 1616 + }, + { + "epoch": 3.3280632411067192, + "grad_norm": 0.3767484990238543, + "learning_rate": 1.2158063956178259e-05, + "loss": 0.7684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36209630966186523, + "step": 4210, + "valid_targets_mean": 14010.2, + "valid_targets_min": 3442 + }, + { + "epoch": 3.3320158102766797, + "grad_norm": 0.43766410387341664, + "learning_rate": 1.210732040359663e-05, + "loss": 0.7618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3751262128353119, + "step": 4215, + "valid_targets_mean": 13687.7, + "valid_targets_min": 1218 + }, + { + "epoch": 3.33596837944664, + "grad_norm": 0.3515970006068086, + "learning_rate": 1.2056636959276706e-05, + "loss": 0.7716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3743066191673279, + "step": 4220, + "valid_targets_mean": 14693.9, + "valid_targets_min": 2048 + }, + { + "epoch": 3.3399209486166006, + "grad_norm": 0.3621590752264911, + "learning_rate": 1.2006014009208264e-05, + "loss": 0.7662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3954054117202759, + "step": 4225, + "valid_targets_mean": 15145.0, + "valid_targets_min": 4363 + }, + { + "epoch": 3.3438735177865615, + "grad_norm": 0.3774388735302919, + "learning_rate": 1.19554519389204e-05, + "loss": 0.7645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37899819016456604, + "step": 4230, + "valid_targets_mean": 14152.0, + "valid_targets_min": 1092 + }, + { + "epoch": 3.3478260869565215, + "grad_norm": 0.4178753092250553, + "learning_rate": 1.1904951133478555e-05, + "loss": 0.7601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43005502223968506, + "step": 4235, + "valid_targets_mean": 15190.8, + "valid_targets_min": 4603 + }, + { + "epoch": 3.3517786561264824, + "grad_norm": 0.393041085010036, + "learning_rate": 1.1854511977481583e-05, + "loss": 0.7642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37365978956222534, + "step": 4240, + "valid_targets_mean": 14066.2, + "valid_targets_min": 1727 + }, + { + "epoch": 3.355731225296443, + "grad_norm": 0.41030945891695064, + "learning_rate": 1.1804134855058865e-05, + "loss": 0.7722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3629663288593292, + "step": 4245, + "valid_targets_mean": 13677.5, + "valid_targets_min": 1445 + }, + { + "epoch": 3.3596837944664033, + "grad_norm": 0.4252069987104022, + "learning_rate": 1.175382014986731e-05, + "loss": 0.7686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40381327271461487, + "step": 4250, + "valid_targets_mean": 13727.9, + "valid_targets_min": 739 + }, + { + "epoch": 3.3636363636363638, + "grad_norm": 0.4280315521279236, + "learning_rate": 1.1703568245088533e-05, + "loss": 0.7626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35048937797546387, + "step": 4255, + "valid_targets_mean": 12708.1, + "valid_targets_min": 2425 + }, + { + "epoch": 3.367588932806324, + "grad_norm": 0.38720080127247086, + "learning_rate": 1.1653379523425823e-05, + "loss": 0.7797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4010440707206726, + "step": 4260, + "valid_targets_mean": 13964.4, + "valid_targets_min": 2023 + }, + { + "epoch": 3.3715415019762847, + "grad_norm": 0.35443873600254316, + "learning_rate": 1.1603254367101327e-05, + "loss": 0.7656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36993423104286194, + "step": 4265, + "valid_targets_mean": 14045.8, + "valid_targets_min": 2506 + }, + { + "epoch": 3.375494071146245, + "grad_norm": 0.4125905368250715, + "learning_rate": 1.1553193157853078e-05, + "loss": 0.7831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3913726210594177, + "step": 4270, + "valid_targets_mean": 13369.4, + "valid_targets_min": 793 + }, + { + "epoch": 3.3794466403162056, + "grad_norm": 0.4254851235495922, + "learning_rate": 1.1503196276932105e-05, + "loss": 0.7786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3640078604221344, + "step": 4275, + "valid_targets_mean": 13567.0, + "valid_targets_min": 2257 + }, + { + "epoch": 3.383399209486166, + "grad_norm": 0.4151459250569315, + "learning_rate": 1.1453264105099549e-05, + "loss": 0.7595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36574089527130127, + "step": 4280, + "valid_targets_mean": 12968.6, + "valid_targets_min": 1536 + }, + { + "epoch": 3.3873517786561265, + "grad_norm": 0.42380763168430746, + "learning_rate": 1.1403397022623723e-05, + "loss": 0.7752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3807469606399536, + "step": 4285, + "valid_targets_mean": 14339.8, + "valid_targets_min": 2858 + }, + { + "epoch": 3.391304347826087, + "grad_norm": 0.48823923366883665, + "learning_rate": 1.1353595409277252e-05, + "loss": 0.7635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40276944637298584, + "step": 4290, + "valid_targets_mean": 15432.3, + "valid_targets_min": 5159 + }, + { + "epoch": 3.3952569169960474, + "grad_norm": 0.5051305305578503, + "learning_rate": 1.130385964433419e-05, + "loss": 0.7805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40996918082237244, + "step": 4295, + "valid_targets_mean": 15013.0, + "valid_targets_min": 4064 + }, + { + "epoch": 3.399209486166008, + "grad_norm": 0.35898833410425635, + "learning_rate": 1.125419010656705e-05, + "loss": 0.7928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43315672874450684, + "step": 4300, + "valid_targets_mean": 14649.0, + "valid_targets_min": 2612 + }, + { + "epoch": 3.4031620553359683, + "grad_norm": 0.5363910103832, + "learning_rate": 1.1204587174244046e-05, + "loss": 0.7764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37175947427749634, + "step": 4305, + "valid_targets_mean": 14078.8, + "valid_targets_min": 1125 + }, + { + "epoch": 3.4071146245059287, + "grad_norm": 0.42109955971573776, + "learning_rate": 1.1155051225126101e-05, + "loss": 0.7838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3860953450202942, + "step": 4310, + "valid_targets_mean": 13481.9, + "valid_targets_min": 2337 + }, + { + "epoch": 3.411067193675889, + "grad_norm": 0.45700439849578206, + "learning_rate": 1.1105582636464031e-05, + "loss": 0.7757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4193469285964966, + "step": 4315, + "valid_targets_mean": 14694.0, + "valid_targets_min": 1469 + }, + { + "epoch": 3.4150197628458496, + "grad_norm": 0.384992776548732, + "learning_rate": 1.1056181784995652e-05, + "loss": 0.7871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39825087785720825, + "step": 4320, + "valid_targets_mean": 14578.8, + "valid_targets_min": 1346 + }, + { + "epoch": 3.4189723320158105, + "grad_norm": 0.3636079637013272, + "learning_rate": 1.1006849046942913e-05, + "loss": 0.756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41668176651000977, + "step": 4325, + "valid_targets_mean": 15213.2, + "valid_targets_min": 3049 + }, + { + "epoch": 3.4229249011857705, + "grad_norm": 0.3465621375507585, + "learning_rate": 1.0957584798009026e-05, + "loss": 0.7579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3653721809387207, + "step": 4330, + "valid_targets_mean": 13889.0, + "valid_targets_min": 4582 + }, + { + "epoch": 3.4268774703557314, + "grad_norm": 0.44648354344799934, + "learning_rate": 1.0908389413375641e-05, + "loss": 0.7921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46422117948532104, + "step": 4335, + "valid_targets_mean": 15348.2, + "valid_targets_min": 1908 + }, + { + "epoch": 3.430830039525692, + "grad_norm": 0.4102500395181871, + "learning_rate": 1.0859263267699903e-05, + "loss": 0.7755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.406475305557251, + "step": 4340, + "valid_targets_mean": 14725.0, + "valid_targets_min": 1156 + }, + { + "epoch": 3.4347826086956523, + "grad_norm": 0.5012471335933418, + "learning_rate": 1.0810206735111706e-05, + "loss": 0.7723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35060471296310425, + "step": 4345, + "valid_targets_mean": 13250.6, + "valid_targets_min": 2607 + }, + { + "epoch": 3.438735177865613, + "grad_norm": 0.4321429352078648, + "learning_rate": 1.0761220189210768e-05, + "loss": 0.7953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3488368093967438, + "step": 4350, + "valid_targets_mean": 11673.2, + "valid_targets_min": 665 + }, + { + "epoch": 3.4426877470355732, + "grad_norm": 0.44351594574381936, + "learning_rate": 1.071230400306381e-05, + "loss": 0.7711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4224506616592407, + "step": 4355, + "valid_targets_mean": 14675.6, + "valid_targets_min": 3499 + }, + { + "epoch": 3.4466403162055337, + "grad_norm": 0.5651520612178095, + "learning_rate": 1.0663458549201722e-05, + "loss": 0.7706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4084576368331909, + "step": 4360, + "valid_targets_mean": 14531.3, + "valid_targets_min": 1227 + }, + { + "epoch": 3.450592885375494, + "grad_norm": 0.4243706217714517, + "learning_rate": 1.0614684199616707e-05, + "loss": 0.776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37965062260627747, + "step": 4365, + "valid_targets_mean": 13605.6, + "valid_targets_min": 2537 + }, + { + "epoch": 3.4545454545454546, + "grad_norm": 0.3346744111351316, + "learning_rate": 1.056598132575947e-05, + "loss": 0.7628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3771563172340393, + "step": 4370, + "valid_targets_mean": 14693.4, + "valid_targets_min": 3567 + }, + { + "epoch": 3.458498023715415, + "grad_norm": 0.4320189539200391, + "learning_rate": 1.0517350298536375e-05, + "loss": 0.7786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3470962345600128, + "step": 4375, + "valid_targets_mean": 13507.7, + "valid_targets_min": 2352 + }, + { + "epoch": 3.4624505928853755, + "grad_norm": 0.34772657079393643, + "learning_rate": 1.0468791488306623e-05, + "loss": 0.7564, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35714271664619446, + "step": 4380, + "valid_targets_mean": 13692.7, + "valid_targets_min": 3042 + }, + { + "epoch": 3.466403162055336, + "grad_norm": 0.4312042496689236, + "learning_rate": 1.0420305264879423e-05, + "loss": 0.766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4147428274154663, + "step": 4385, + "valid_targets_mean": 15192.1, + "valid_targets_min": 4266 + }, + { + "epoch": 3.4703557312252964, + "grad_norm": 0.48563134771397737, + "learning_rate": 1.0371891997511219e-05, + "loss": 0.779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3745666742324829, + "step": 4390, + "valid_targets_mean": 12823.0, + "valid_targets_min": 2182 + }, + { + "epoch": 3.474308300395257, + "grad_norm": 0.34102714379686333, + "learning_rate": 1.0323552054902786e-05, + "loss": 0.795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4139321744441986, + "step": 4395, + "valid_targets_mean": 14038.5, + "valid_targets_min": 1494 + }, + { + "epoch": 3.4782608695652173, + "grad_norm": 0.3773972051494446, + "learning_rate": 1.027528580519654e-05, + "loss": 0.7685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37310367822647095, + "step": 4400, + "valid_targets_mean": 13982.4, + "valid_targets_min": 1755 + }, + { + "epoch": 3.4822134387351777, + "grad_norm": 0.4761350092277862, + "learning_rate": 1.0227093615973635e-05, + "loss": 0.7812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39066553115844727, + "step": 4405, + "valid_targets_mean": 15292.0, + "valid_targets_min": 7172 + }, + { + "epoch": 3.486166007905138, + "grad_norm": 0.4055900423561305, + "learning_rate": 1.0178975854251218e-05, + "loss": 0.7639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39123499393463135, + "step": 4410, + "valid_targets_mean": 13925.6, + "valid_targets_min": 1753 + }, + { + "epoch": 3.4901185770750986, + "grad_norm": 0.4275730059639518, + "learning_rate": 1.0130932886479611e-05, + "loss": 0.7697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39992716908454895, + "step": 4415, + "valid_targets_mean": 15053.3, + "valid_targets_min": 4625 + }, + { + "epoch": 3.494071146245059, + "grad_norm": 0.4090211779342511, + "learning_rate": 1.0082965078539542e-05, + "loss": 0.7797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3689619302749634, + "step": 4420, + "valid_targets_mean": 12993.7, + "valid_targets_min": 787 + }, + { + "epoch": 3.4980237154150196, + "grad_norm": 0.36673942572999524, + "learning_rate": 1.0035072795739322e-05, + "loss": 0.7579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.350469172000885, + "step": 4425, + "valid_targets_mean": 13287.0, + "valid_targets_min": 2564 + }, + { + "epoch": 3.5019762845849804, + "grad_norm": 0.4392129104003924, + "learning_rate": 9.987256402812105e-06, + "loss": 0.7796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41360145807266235, + "step": 4430, + "valid_targets_mean": 15139.7, + "valid_targets_min": 4214 + }, + { + "epoch": 3.5059288537549405, + "grad_norm": 0.3822937957998306, + "learning_rate": 9.939516263913077e-06, + "loss": 0.7845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.394186407327652, + "step": 4435, + "valid_targets_mean": 14963.1, + "valid_targets_min": 2023 + }, + { + "epoch": 3.5098814229249014, + "grad_norm": 0.33925202878718536, + "learning_rate": 9.891852742616724e-06, + "loss": 0.7571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3885740637779236, + "step": 4440, + "valid_targets_mean": 14584.2, + "valid_targets_min": 2529 + }, + { + "epoch": 3.513833992094862, + "grad_norm": 0.34940887700398326, + "learning_rate": 9.844266201913981e-06, + "loss": 0.7669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3935416340827942, + "step": 4445, + "valid_targets_mean": 14609.6, + "valid_targets_min": 2912 + }, + { + "epoch": 3.5177865612648223, + "grad_norm": 0.4503487598385426, + "learning_rate": 9.796757004209584e-06, + "loss": 0.7648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38769203424453735, + "step": 4450, + "valid_targets_mean": 14886.1, + "valid_targets_min": 590 + }, + { + "epoch": 3.5217391304347827, + "grad_norm": 0.3878490557991329, + "learning_rate": 9.74932551131921e-06, + "loss": 0.7798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39278578758239746, + "step": 4455, + "valid_targets_mean": 14341.4, + "valid_targets_min": 930 + }, + { + "epoch": 3.525691699604743, + "grad_norm": 0.3351155330919542, + "learning_rate": 9.701972084466769e-06, + "loss": 0.7562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42116373777389526, + "step": 4460, + "valid_targets_mean": 15088.2, + "valid_targets_min": 976 + }, + { + "epoch": 3.5296442687747036, + "grad_norm": 0.3780489052919321, + "learning_rate": 9.654697084281643e-06, + "loss": 0.774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37293052673339844, + "step": 4465, + "valid_targets_mean": 14039.7, + "valid_targets_min": 621 + }, + { + "epoch": 3.533596837944664, + "grad_norm": 0.39631272630138153, + "learning_rate": 9.607500870795946e-06, + "loss": 0.7813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3918147683143616, + "step": 4470, + "valid_targets_mean": 12352.5, + "valid_targets_min": 1108 + }, + { + "epoch": 3.5375494071146245, + "grad_norm": 0.3787766674845462, + "learning_rate": 9.56038380344176e-06, + "loss": 0.7759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4108538031578064, + "step": 4475, + "valid_targets_mean": 15737.8, + "valid_targets_min": 3053 + }, + { + "epoch": 3.541501976284585, + "grad_norm": 0.3558996449987609, + "learning_rate": 9.513346241048446e-06, + "loss": 0.771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37123560905456543, + "step": 4480, + "valid_targets_mean": 14194.6, + "valid_targets_min": 2484 + }, + { + "epoch": 3.5454545454545454, + "grad_norm": 0.4150870328214344, + "learning_rate": 9.466388541839841e-06, + "loss": 0.7859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39626458287239075, + "step": 4485, + "valid_targets_mean": 14481.5, + "valid_targets_min": 5334 + }, + { + "epoch": 3.549407114624506, + "grad_norm": 0.4371279707351982, + "learning_rate": 9.419511063431586e-06, + "loss": 0.7595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38481512665748596, + "step": 4490, + "valid_targets_mean": 15358.2, + "valid_targets_min": 4904 + }, + { + "epoch": 3.5533596837944663, + "grad_norm": 0.37387055116334095, + "learning_rate": 9.372714162828402e-06, + "loss": 0.7592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3551672101020813, + "step": 4495, + "valid_targets_mean": 12412.0, + "valid_targets_min": 1635 + }, + { + "epoch": 3.5573122529644268, + "grad_norm": 0.43070709023699905, + "learning_rate": 9.325998196421312e-06, + "loss": 0.773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3556666970252991, + "step": 4500, + "valid_targets_mean": 13282.1, + "valid_targets_min": 839 + }, + { + "epoch": 3.561264822134387, + "grad_norm": 0.3205749647378686, + "learning_rate": 9.279363519985012e-06, + "loss": 0.7589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37249162793159485, + "step": 4505, + "valid_targets_mean": 13697.3, + "valid_targets_min": 2834 + }, + { + "epoch": 3.5652173913043477, + "grad_norm": 0.38950641626308613, + "learning_rate": 9.232810488675094e-06, + "loss": 0.7756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35073041915893555, + "step": 4510, + "valid_targets_mean": 14037.0, + "valid_targets_min": 3489 + }, + { + "epoch": 3.5691699604743086, + "grad_norm": 0.35453217755154703, + "learning_rate": 9.18633945702537e-06, + "loss": 0.774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3753873109817505, + "step": 4515, + "valid_targets_mean": 13588.8, + "valid_targets_min": 1385 + }, + { + "epoch": 3.5731225296442686, + "grad_norm": 0.33050825954162005, + "learning_rate": 9.139950778945166e-06, + "loss": 0.7681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3722689747810364, + "step": 4520, + "valid_targets_mean": 13941.4, + "valid_targets_min": 1183 + }, + { + "epoch": 3.5770750988142295, + "grad_norm": 0.3467038798780729, + "learning_rate": 9.093644807716633e-06, + "loss": 0.7714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3990970253944397, + "step": 4525, + "valid_targets_mean": 13801.4, + "valid_targets_min": 1508 + }, + { + "epoch": 3.5810276679841895, + "grad_norm": 0.3509872164460196, + "learning_rate": 9.04742189599204e-06, + "loss": 0.7766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3964369297027588, + "step": 4530, + "valid_targets_mean": 14286.9, + "valid_targets_min": 823 + }, + { + "epoch": 3.5849802371541504, + "grad_norm": 0.3487509296144908, + "learning_rate": 9.001282395791129e-06, + "loss": 0.7732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4117966294288635, + "step": 4535, + "valid_targets_mean": 15295.0, + "valid_targets_min": 899 + }, + { + "epoch": 3.588932806324111, + "grad_norm": 0.3275437066231151, + "learning_rate": 8.955226658498358e-06, + "loss": 0.758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3797200322151184, + "step": 4540, + "valid_targets_mean": 14337.9, + "valid_targets_min": 2383 + }, + { + "epoch": 3.5928853754940713, + "grad_norm": 0.32729191766218674, + "learning_rate": 8.90925503486032e-06, + "loss": 0.7558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3329576253890991, + "step": 4545, + "valid_targets_mean": 12771.0, + "valid_targets_min": 1139 + }, + { + "epoch": 3.5968379446640317, + "grad_norm": 0.3579286319199534, + "learning_rate": 8.863367874982993e-06, + "loss": 0.7754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40079835057258606, + "step": 4550, + "valid_targets_mean": 15106.8, + "valid_targets_min": 4725 + }, + { + "epoch": 3.600790513833992, + "grad_norm": 0.3999011364649414, + "learning_rate": 8.81756552832912e-06, + "loss": 0.7726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40202635526657104, + "step": 4555, + "valid_targets_mean": 14887.5, + "valid_targets_min": 4587 + }, + { + "epoch": 3.6047430830039526, + "grad_norm": 0.35761524968022834, + "learning_rate": 8.771848343715519e-06, + "loss": 0.7705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4166146516799927, + "step": 4560, + "valid_targets_mean": 14937.2, + "valid_targets_min": 3725 + }, + { + "epoch": 3.608695652173913, + "grad_norm": 0.42091666503967806, + "learning_rate": 8.72621666931045e-06, + "loss": 0.7723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3659970164299011, + "step": 4565, + "valid_targets_mean": 14472.3, + "valid_targets_min": 2506 + }, + { + "epoch": 3.6126482213438735, + "grad_norm": 0.395069590089718, + "learning_rate": 8.680670852630943e-06, + "loss": 0.7679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.386849582195282, + "step": 4570, + "valid_targets_mean": 12436.2, + "valid_targets_min": 818 + }, + { + "epoch": 3.616600790513834, + "grad_norm": 0.3417856915294824, + "learning_rate": 8.635211240540191e-06, + "loss": 0.7698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3920336365699768, + "step": 4575, + "valid_targets_mean": 14504.0, + "valid_targets_min": 1151 + }, + { + "epoch": 3.6205533596837944, + "grad_norm": 0.3396198759142697, + "learning_rate": 8.589838179244826e-06, + "loss": 0.7676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36404210329055786, + "step": 4580, + "valid_targets_mean": 13156.5, + "valid_targets_min": 2887 + }, + { + "epoch": 3.624505928853755, + "grad_norm": 0.42756855197140786, + "learning_rate": 8.544552014292393e-06, + "loss": 0.7709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39976444840431213, + "step": 4585, + "valid_targets_mean": 13670.3, + "valid_targets_min": 754 + }, + { + "epoch": 3.6284584980237153, + "grad_norm": 0.33569586670767265, + "learning_rate": 8.49935309056861e-06, + "loss": 0.7742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3816384971141815, + "step": 4590, + "valid_targets_mean": 13751.9, + "valid_targets_min": 2066 + }, + { + "epoch": 3.632411067193676, + "grad_norm": 0.4665700359233907, + "learning_rate": 8.45424175229483e-06, + "loss": 0.775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3955335021018982, + "step": 4595, + "valid_targets_mean": 14690.9, + "valid_targets_min": 3434 + }, + { + "epoch": 3.6363636363636362, + "grad_norm": 0.36248423920009987, + "learning_rate": 8.409218343025367e-06, + "loss": 0.7681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3905838131904602, + "step": 4600, + "valid_targets_mean": 14818.8, + "valid_targets_min": 801 + }, + { + "epoch": 3.6403162055335967, + "grad_norm": 0.39487031174560877, + "learning_rate": 8.364283205644889e-06, + "loss": 0.7613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3700423240661621, + "step": 4605, + "valid_targets_mean": 14248.6, + "valid_targets_min": 1858 + }, + { + "epoch": 3.6442687747035576, + "grad_norm": 0.36184194671531206, + "learning_rate": 8.31943668236582e-06, + "loss": 0.7743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3514922857284546, + "step": 4610, + "valid_targets_mean": 13824.2, + "valid_targets_min": 1079 + }, + { + "epoch": 3.6482213438735176, + "grad_norm": 0.41003075689038604, + "learning_rate": 8.274679114725725e-06, + "loss": 0.7648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4155205488204956, + "step": 4615, + "valid_targets_mean": 15382.5, + "valid_targets_min": 3209 + }, + { + "epoch": 3.6521739130434785, + "grad_norm": 0.472582814009016, + "learning_rate": 8.230010843584706e-06, + "loss": 0.7741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3677980303764343, + "step": 4620, + "valid_targets_mean": 14565.3, + "valid_targets_min": 2070 + }, + { + "epoch": 3.6561264822134385, + "grad_norm": 0.3773477137754744, + "learning_rate": 8.185432209122813e-06, + "loss": 0.7596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36430823802948, + "step": 4625, + "valid_targets_mean": 15235.3, + "valid_targets_min": 2500 + }, + { + "epoch": 3.6600790513833994, + "grad_norm": 0.3466846135457391, + "learning_rate": 8.140943550837446e-06, + "loss": 0.7763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39034855365753174, + "step": 4630, + "valid_targets_mean": 14439.3, + "valid_targets_min": 1577 + }, + { + "epoch": 3.66403162055336, + "grad_norm": 0.4107062091672997, + "learning_rate": 8.096545207540774e-06, + "loss": 0.7675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3928315341472626, + "step": 4635, + "valid_targets_mean": 14535.7, + "valid_targets_min": 1069 + }, + { + "epoch": 3.6679841897233203, + "grad_norm": 0.35048708352913344, + "learning_rate": 8.052237517357175e-06, + "loss": 0.782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4032531976699829, + "step": 4640, + "valid_targets_mean": 13577.5, + "valid_targets_min": 930 + }, + { + "epoch": 3.6719367588932808, + "grad_norm": 0.3628283048335312, + "learning_rate": 8.008020817720596e-06, + "loss": 0.7634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3883836269378662, + "step": 4645, + "valid_targets_mean": 14524.3, + "valid_targets_min": 2680 + }, + { + "epoch": 3.675889328063241, + "grad_norm": 0.3731905279680819, + "learning_rate": 7.963895445372075e-06, + "loss": 0.7825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3952891528606415, + "step": 4650, + "valid_targets_mean": 13548.6, + "valid_targets_min": 1796 + }, + { + "epoch": 3.6798418972332017, + "grad_norm": 0.3794902801025146, + "learning_rate": 7.9198617363571e-06, + "loss": 0.7822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39074110984802246, + "step": 4655, + "valid_targets_mean": 14365.3, + "valid_targets_min": 1308 + }, + { + "epoch": 3.683794466403162, + "grad_norm": 0.3276624439830118, + "learning_rate": 7.875920026023087e-06, + "loss": 0.7792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39349621534347534, + "step": 4660, + "valid_targets_mean": 14524.4, + "valid_targets_min": 1065 + }, + { + "epoch": 3.6877470355731226, + "grad_norm": 0.3980461586724483, + "learning_rate": 7.832070649016816e-06, + "loss": 0.7704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35831165313720703, + "step": 4665, + "valid_targets_mean": 12899.2, + "valid_targets_min": 2330 + }, + { + "epoch": 3.691699604743083, + "grad_norm": 0.33138293939103397, + "learning_rate": 7.788313939281888e-06, + "loss": 0.7727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3859490752220154, + "step": 4670, + "valid_targets_mean": 14603.6, + "valid_targets_min": 2031 + }, + { + "epoch": 3.6956521739130435, + "grad_norm": 0.35556857299143674, + "learning_rate": 7.74465023005617e-06, + "loss": 0.7681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34844446182250977, + "step": 4675, + "valid_targets_mean": 12900.1, + "valid_targets_min": 2421 + }, + { + "epoch": 3.699604743083004, + "grad_norm": 0.33834846540194236, + "learning_rate": 7.701079853869288e-06, + "loss": 0.7824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4113885462284088, + "step": 4680, + "valid_targets_mean": 14466.9, + "valid_targets_min": 3130 + }, + { + "epoch": 3.7035573122529644, + "grad_norm": 0.3724245947527132, + "learning_rate": 7.657603142540033e-06, + "loss": 0.7647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3350259065628052, + "step": 4685, + "valid_targets_mean": 13231.6, + "valid_targets_min": 1101 + }, + { + "epoch": 3.707509881422925, + "grad_norm": 0.37378158959902047, + "learning_rate": 7.614220427173908e-06, + "loss": 0.7842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34899359941482544, + "step": 4690, + "valid_targets_mean": 12478.5, + "valid_targets_min": 399 + }, + { + "epoch": 3.7114624505928853, + "grad_norm": 0.35883521871546037, + "learning_rate": 7.570932038160557e-06, + "loss": 0.7765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3846393823623657, + "step": 4695, + "valid_targets_mean": 13728.7, + "valid_targets_min": 1608 + }, + { + "epoch": 3.7154150197628457, + "grad_norm": 0.327936038655851, + "learning_rate": 7.527738305171259e-06, + "loss": 0.7846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3563764691352844, + "step": 4700, + "valid_targets_mean": 12901.5, + "valid_targets_min": 480 + }, + { + "epoch": 3.719367588932806, + "grad_norm": 0.3740336280558383, + "learning_rate": 7.48463955715643e-06, + "loss": 0.7756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39217162132263184, + "step": 4705, + "valid_targets_mean": 14042.8, + "valid_targets_min": 1376 + }, + { + "epoch": 3.7233201581027666, + "grad_norm": 0.33433514659610214, + "learning_rate": 7.441636122343103e-06, + "loss": 0.7748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37061887979507446, + "step": 4710, + "valid_targets_mean": 13963.1, + "valid_targets_min": 2881 + }, + { + "epoch": 3.7272727272727275, + "grad_norm": 0.32650245807918116, + "learning_rate": 7.398728328232432e-06, + "loss": 0.7714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42431461811065674, + "step": 4715, + "valid_targets_mean": 14500.4, + "valid_targets_min": 1604 + }, + { + "epoch": 3.7312252964426875, + "grad_norm": 0.32671206106373374, + "learning_rate": 7.355916501597224e-06, + "loss": 0.7714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39087340235710144, + "step": 4720, + "valid_targets_mean": 15529.2, + "valid_targets_min": 8887 + }, + { + "epoch": 3.7351778656126484, + "grad_norm": 0.35638224959441817, + "learning_rate": 7.3132009684793815e-06, + "loss": 0.7606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35790348052978516, + "step": 4725, + "valid_targets_mean": 12764.6, + "valid_targets_min": 3293 + }, + { + "epoch": 3.7391304347826084, + "grad_norm": 0.3112443750786024, + "learning_rate": 7.2705820541875185e-06, + "loss": 0.78, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4059688448905945, + "step": 4730, + "valid_targets_mean": 15962.4, + "valid_targets_min": 2908 + }, + { + "epoch": 3.7430830039525693, + "grad_norm": 0.4309620730630617, + "learning_rate": 7.228060083294379e-06, + "loss": 0.7508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38671398162841797, + "step": 4735, + "valid_targets_mean": 14361.1, + "valid_targets_min": 1383 + }, + { + "epoch": 3.7470355731225298, + "grad_norm": 0.3088332088500845, + "learning_rate": 7.185635379634441e-06, + "loss": 0.768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.400546669960022, + "step": 4740, + "valid_targets_mean": 14837.0, + "valid_targets_min": 1220 + }, + { + "epoch": 3.7509881422924902, + "grad_norm": 0.35059692154311123, + "learning_rate": 7.143308266301439e-06, + "loss": 0.7788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40453895926475525, + "step": 4745, + "valid_targets_mean": 14297.8, + "valid_targets_min": 2616 + }, + { + "epoch": 3.7549407114624507, + "grad_norm": 0.32735087315032224, + "learning_rate": 7.1010790656458485e-06, + "loss": 0.7577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4112452566623688, + "step": 4750, + "valid_targets_mean": 14267.3, + "valid_targets_min": 1125 + }, + { + "epoch": 3.758893280632411, + "grad_norm": 0.29507865616064866, + "learning_rate": 7.058948099272514e-06, + "loss": 0.7651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36909353733062744, + "step": 4755, + "valid_targets_mean": 15219.3, + "valid_targets_min": 951 + }, + { + "epoch": 3.7628458498023716, + "grad_norm": 0.3300247401688082, + "learning_rate": 7.016915688038128e-06, + "loss": 0.7698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35644978284835815, + "step": 4760, + "valid_targets_mean": 12794.1, + "valid_targets_min": 770 + }, + { + "epoch": 3.766798418972332, + "grad_norm": 0.4182663671067831, + "learning_rate": 6.974982152048833e-06, + "loss": 0.7625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40223050117492676, + "step": 4765, + "valid_targets_mean": 14210.7, + "valid_targets_min": 3809 + }, + { + "epoch": 3.7707509881422925, + "grad_norm": 0.4480339200149031, + "learning_rate": 6.9331478106577585e-06, + "loss": 0.7756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4417881369590759, + "step": 4770, + "valid_targets_mean": 15424.8, + "valid_targets_min": 6228 + }, + { + "epoch": 3.774703557312253, + "grad_norm": 0.36118860834121014, + "learning_rate": 6.891412982462602e-06, + "loss": 0.7539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35723891854286194, + "step": 4775, + "valid_targets_mean": 13947.1, + "valid_targets_min": 974 + }, + { + "epoch": 3.7786561264822134, + "grad_norm": 0.3311234053624517, + "learning_rate": 6.84977798530319e-06, + "loss": 0.7717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36364713311195374, + "step": 4780, + "valid_targets_mean": 13686.5, + "valid_targets_min": 1014 + }, + { + "epoch": 3.782608695652174, + "grad_norm": 0.3210637857038055, + "learning_rate": 6.8082431362590964e-06, + "loss": 0.7758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3305812180042267, + "step": 4785, + "valid_targets_mean": 12230.1, + "valid_targets_min": 1053 + }, + { + "epoch": 3.7865612648221343, + "grad_norm": 0.39211357755891346, + "learning_rate": 6.7668087516471444e-06, + "loss": 0.7645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3761019706726074, + "step": 4790, + "valid_targets_mean": 14638.1, + "valid_targets_min": 1913 + }, + { + "epoch": 3.7905138339920947, + "grad_norm": 0.3659316169926598, + "learning_rate": 6.725475147019105e-06, + "loss": 0.7689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35932037234306335, + "step": 4795, + "valid_targets_mean": 13967.0, + "valid_targets_min": 1105 + }, + { + "epoch": 3.794466403162055, + "grad_norm": 0.41473777835469466, + "learning_rate": 6.6842426371592065e-06, + "loss": 0.7561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3994976282119751, + "step": 4800, + "valid_targets_mean": 13862.1, + "valid_targets_min": 306 + }, + { + "epoch": 3.7984189723320156, + "grad_norm": 0.3835466472215804, + "learning_rate": 6.643111536081776e-06, + "loss": 0.7696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3535078763961792, + "step": 4805, + "valid_targets_mean": 13374.5, + "valid_targets_min": 1557 + }, + { + "epoch": 3.8023715415019765, + "grad_norm": 0.3758233737417259, + "learning_rate": 6.602082157028851e-06, + "loss": 0.7754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3747210204601288, + "step": 4810, + "valid_targets_mean": 14516.1, + "valid_targets_min": 3486 + }, + { + "epoch": 3.8063241106719365, + "grad_norm": 0.38029981664547324, + "learning_rate": 6.561154812467771e-06, + "loss": 0.7636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39692482352256775, + "step": 4815, + "valid_targets_mean": 15880.5, + "valid_targets_min": 6156 + }, + { + "epoch": 3.8102766798418974, + "grad_norm": 0.38249588851585414, + "learning_rate": 6.520329814088817e-06, + "loss": 0.7809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3869725465774536, + "step": 4820, + "valid_targets_mean": 14065.9, + "valid_targets_min": 744 + }, + { + "epoch": 3.8142292490118574, + "grad_norm": 0.33838484656653195, + "learning_rate": 6.479607472802853e-06, + "loss": 0.7606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34898102283477783, + "step": 4825, + "valid_targets_mean": 13171.9, + "valid_targets_min": 1156 + }, + { + "epoch": 3.8181818181818183, + "grad_norm": 0.32144884545007635, + "learning_rate": 6.438988098738894e-06, + "loss": 0.7608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3750029504299164, + "step": 4830, + "valid_targets_mean": 14910.2, + "valid_targets_min": 1684 + }, + { + "epoch": 3.822134387351779, + "grad_norm": 0.35923667339381643, + "learning_rate": 6.398472001241833e-06, + "loss": 0.7749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38015663623809814, + "step": 4835, + "valid_targets_mean": 13092.2, + "valid_targets_min": 2306 + }, + { + "epoch": 3.8260869565217392, + "grad_norm": 0.3376844463886327, + "learning_rate": 6.35805948887001e-06, + "loss": 0.7563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38769906759262085, + "step": 4840, + "valid_targets_mean": 15153.9, + "valid_targets_min": 4161 + }, + { + "epoch": 3.8300395256916997, + "grad_norm": 0.3901506253721067, + "learning_rate": 6.3177508693929024e-06, + "loss": 0.7648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3707695007324219, + "step": 4845, + "valid_targets_mean": 13396.8, + "valid_targets_min": 471 + }, + { + "epoch": 3.83399209486166, + "grad_norm": 0.3141576486349326, + "learning_rate": 6.277546449788774e-06, + "loss": 0.7672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39295434951782227, + "step": 4850, + "valid_targets_mean": 15654.7, + "valid_targets_min": 5202 + }, + { + "epoch": 3.8379446640316206, + "grad_norm": 0.37242720245837035, + "learning_rate": 6.237446536242324e-06, + "loss": 0.7687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34778451919555664, + "step": 4855, + "valid_targets_mean": 13706.4, + "valid_targets_min": 1443 + }, + { + "epoch": 3.841897233201581, + "grad_norm": 0.31917295310079724, + "learning_rate": 6.197451434142376e-06, + "loss": 0.7543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37137681245803833, + "step": 4860, + "valid_targets_mean": 14635.4, + "valid_targets_min": 2152 + }, + { + "epoch": 3.8458498023715415, + "grad_norm": 0.3754393368313446, + "learning_rate": 6.1575614480795364e-06, + "loss": 0.7684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36462387442588806, + "step": 4865, + "valid_targets_mean": 13869.1, + "valid_targets_min": 3392 + }, + { + "epoch": 3.849802371541502, + "grad_norm": 0.3858292018795854, + "learning_rate": 6.117776881843876e-06, + "loss": 0.7844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4236716628074646, + "step": 4870, + "valid_targets_mean": 13605.6, + "valid_targets_min": 1339 + }, + { + "epoch": 3.8537549407114624, + "grad_norm": 0.3687402763723893, + "learning_rate": 6.078098038422624e-06, + "loss": 0.7673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41958799958229065, + "step": 4875, + "valid_targets_mean": 14788.2, + "valid_targets_min": 4211 + }, + { + "epoch": 3.857707509881423, + "grad_norm": 0.4257377746855842, + "learning_rate": 6.03852521999786e-06, + "loss": 0.7929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39090701937675476, + "step": 4880, + "valid_targets_mean": 14276.3, + "valid_targets_min": 633 + }, + { + "epoch": 3.8616600790513833, + "grad_norm": 0.33934963994350614, + "learning_rate": 5.999058727944193e-06, + "loss": 0.7554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3625437915325165, + "step": 4885, + "valid_targets_mean": 14468.4, + "valid_targets_min": 4969 + }, + { + "epoch": 3.8656126482213438, + "grad_norm": 0.33952703401094153, + "learning_rate": 5.959698862826521e-06, + "loss": 0.7598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38060325384140015, + "step": 4890, + "valid_targets_mean": 14547.2, + "valid_targets_min": 4364 + }, + { + "epoch": 3.869565217391304, + "grad_norm": 0.3548697877398626, + "learning_rate": 5.920445924397648e-06, + "loss": 0.7709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.358448326587677, + "step": 4895, + "valid_targets_mean": 13067.8, + "valid_targets_min": 2196 + }, + { + "epoch": 3.8735177865612647, + "grad_norm": 0.339263018117467, + "learning_rate": 5.881300211596106e-06, + "loss": 0.7682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4054974317550659, + "step": 4900, + "valid_targets_mean": 13840.8, + "valid_targets_min": 1676 + }, + { + "epoch": 3.8774703557312256, + "grad_norm": 0.3111784675158538, + "learning_rate": 5.842262022543803e-06, + "loss": 0.7707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38622355461120605, + "step": 4905, + "valid_targets_mean": 14714.1, + "valid_targets_min": 4275 + }, + { + "epoch": 3.8814229249011856, + "grad_norm": 0.3261039293312398, + "learning_rate": 5.803331654543785e-06, + "loss": 0.7844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.396939218044281, + "step": 4910, + "valid_targets_mean": 14502.2, + "valid_targets_min": 2924 + }, + { + "epoch": 3.8853754940711465, + "grad_norm": 0.3426804640318119, + "learning_rate": 5.7645094040779646e-06, + "loss": 0.7655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4014540910720825, + "step": 4915, + "valid_targets_mean": 15380.9, + "valid_targets_min": 5404 + }, + { + "epoch": 3.8893280632411065, + "grad_norm": 0.3456097788244671, + "learning_rate": 5.725795566804866e-06, + "loss": 0.776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3994362950325012, + "step": 4920, + "valid_targets_mean": 15225.2, + "valid_targets_min": 3784 + }, + { + "epoch": 3.8932806324110674, + "grad_norm": 0.3703525050634326, + "learning_rate": 5.6871904375573615e-06, + "loss": 0.7896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34410327672958374, + "step": 4925, + "valid_targets_mean": 12651.5, + "valid_targets_min": 2236 + }, + { + "epoch": 3.897233201581028, + "grad_norm": 0.3225630170540523, + "learning_rate": 5.6486943103404654e-06, + "loss": 0.7802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4236008822917938, + "step": 4930, + "valid_targets_mean": 15264.1, + "valid_targets_min": 4312 + }, + { + "epoch": 3.9011857707509883, + "grad_norm": 0.2950346481818456, + "learning_rate": 5.610307478329018e-06, + "loss": 0.7718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3939381241798401, + "step": 4935, + "valid_targets_mean": 15327.6, + "valid_targets_min": 5057 + }, + { + "epoch": 3.9051383399209487, + "grad_norm": 0.3208157097733114, + "learning_rate": 5.572030233865547e-06, + "loss": 0.7685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3937973380088806, + "step": 4940, + "valid_targets_mean": 14842.1, + "valid_targets_min": 4555 + }, + { + "epoch": 3.909090909090909, + "grad_norm": 0.3590280119099791, + "learning_rate": 5.533862868457966e-06, + "loss": 0.7636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38863110542297363, + "step": 4945, + "valid_targets_mean": 14640.0, + "valid_targets_min": 3033 + }, + { + "epoch": 3.9130434782608696, + "grad_norm": 0.3083351679429893, + "learning_rate": 5.495805672777394e-06, + "loss": 0.7743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40445053577423096, + "step": 4950, + "valid_targets_mean": 13687.5, + "valid_targets_min": 3446 + }, + { + "epoch": 3.91699604743083, + "grad_norm": 0.3115900518737178, + "learning_rate": 5.4578589366559245e-06, + "loss": 0.7644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35766810178756714, + "step": 4955, + "valid_targets_mean": 13455.4, + "valid_targets_min": 2630 + }, + { + "epoch": 3.9209486166007905, + "grad_norm": 0.3779699049164637, + "learning_rate": 5.420022949084431e-06, + "loss": 0.7649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3585634231567383, + "step": 4960, + "valid_targets_mean": 14447.8, + "valid_targets_min": 2803 + }, + { + "epoch": 3.924901185770751, + "grad_norm": 0.30462601617836843, + "learning_rate": 5.382297998210349e-06, + "loss": 0.7618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36766254901885986, + "step": 4965, + "valid_targets_mean": 13315.7, + "valid_targets_min": 793 + }, + { + "epoch": 3.9288537549407114, + "grad_norm": 0.2926923799111397, + "learning_rate": 5.344684371335516e-06, + "loss": 0.7815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38413774967193604, + "step": 4970, + "valid_targets_mean": 14675.7, + "valid_targets_min": 1153 + }, + { + "epoch": 3.932806324110672, + "grad_norm": 0.32813298970017074, + "learning_rate": 5.30718235491392e-06, + "loss": 0.7685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4305744171142578, + "step": 4975, + "valid_targets_mean": 15243.2, + "valid_targets_min": 1477 + }, + { + "epoch": 3.9367588932806323, + "grad_norm": 0.36163359382810567, + "learning_rate": 5.2697922345496e-06, + "loss": 0.7677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3983790874481201, + "step": 4980, + "valid_targets_mean": 14066.0, + "valid_targets_min": 1354 + }, + { + "epoch": 3.940711462450593, + "grad_norm": 0.32476951125982295, + "learning_rate": 5.232514294994409e-06, + "loss": 0.7593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3770415484905243, + "step": 4985, + "valid_targets_mean": 13686.8, + "valid_targets_min": 675 + }, + { + "epoch": 3.9446640316205532, + "grad_norm": 0.2914393759643683, + "learning_rate": 5.195348820145844e-06, + "loss": 0.7696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33173322677612305, + "step": 4990, + "valid_targets_mean": 13225.9, + "valid_targets_min": 2664 + }, + { + "epoch": 3.9486166007905137, + "grad_norm": 0.3191902677843529, + "learning_rate": 5.158296093044946e-06, + "loss": 0.7654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3761120140552521, + "step": 4995, + "valid_targets_mean": 13653.7, + "valid_targets_min": 1416 + }, + { + "epoch": 3.9525691699604746, + "grad_norm": 0.3808345509913792, + "learning_rate": 5.1213563958740754e-06, + "loss": 0.7679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.397722065448761, + "step": 5000, + "valid_targets_mean": 13904.3, + "valid_targets_min": 749 + }, + { + "epoch": 3.9565217391304346, + "grad_norm": 0.3670956320981544, + "learning_rate": 5.084530009954798e-06, + "loss": 0.7846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3988097310066223, + "step": 5005, + "valid_targets_mean": 13919.1, + "valid_targets_min": 1741 + }, + { + "epoch": 3.9604743083003955, + "grad_norm": 0.3352820794536484, + "learning_rate": 5.047817215745736e-06, + "loss": 0.7572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4051254987716675, + "step": 5010, + "valid_targets_mean": 14451.1, + "valid_targets_min": 1581 + }, + { + "epoch": 3.9644268774703555, + "grad_norm": 0.3734806169774864, + "learning_rate": 5.011218292840434e-06, + "loss": 0.7703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3974752426147461, + "step": 5015, + "valid_targets_mean": 14150.2, + "valid_targets_min": 3886 + }, + { + "epoch": 3.9683794466403164, + "grad_norm": 0.3238837732937898, + "learning_rate": 4.974733519965216e-06, + "loss": 0.7759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.421908438205719, + "step": 5020, + "valid_targets_mean": 14423.6, + "valid_targets_min": 1638 + }, + { + "epoch": 3.972332015810277, + "grad_norm": 0.3317457990528716, + "learning_rate": 4.938363174977099e-06, + "loss": 0.7677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3884807229042053, + "step": 5025, + "valid_targets_mean": 14692.5, + "valid_targets_min": 4159 + }, + { + "epoch": 3.9762845849802373, + "grad_norm": 0.3071741982690475, + "learning_rate": 4.902107534861613e-06, + "loss": 0.7629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.377503901720047, + "step": 5030, + "valid_targets_mean": 13599.8, + "valid_targets_min": 736 + }, + { + "epoch": 3.9802371541501977, + "grad_norm": 0.27986169727528226, + "learning_rate": 4.865966875730774e-06, + "loss": 0.7535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4184621572494507, + "step": 5035, + "valid_targets_mean": 15179.8, + "valid_targets_min": 3789 + }, + { + "epoch": 3.984189723320158, + "grad_norm": 0.3316543644183339, + "learning_rate": 4.829941472820894e-06, + "loss": 0.7734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3943921625614166, + "step": 5040, + "valid_targets_mean": 13824.0, + "valid_targets_min": 1995 + }, + { + "epoch": 3.9881422924901186, + "grad_norm": 0.30821732184687545, + "learning_rate": 4.79403160049057e-06, + "loss": 0.7605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38819795846939087, + "step": 5045, + "valid_targets_mean": 14316.6, + "valid_targets_min": 1380 + }, + { + "epoch": 3.992094861660079, + "grad_norm": 0.3366536235824967, + "learning_rate": 4.758237532218523e-06, + "loss": 0.7759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37276530265808105, + "step": 5050, + "valid_targets_mean": 13220.4, + "valid_targets_min": 1674 + }, + { + "epoch": 3.9960474308300395, + "grad_norm": 0.32160254289140294, + "learning_rate": 4.722559540601559e-06, + "loss": 0.7815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37925902009010315, + "step": 5055, + "valid_targets_mean": 15525.2, + "valid_targets_min": 4422 + }, + { + "epoch": 4.0, + "grad_norm": 0.33155352462515897, + "learning_rate": 4.686997897352475e-06, + "loss": 0.7804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4099022448062897, + "step": 5060, + "valid_targets_mean": 14266.3, + "valid_targets_min": 3169 + }, + { + "epoch": 4.003952569169961, + "grad_norm": 0.36967502293800747, + "learning_rate": 4.651552873297993e-06, + "loss": 0.7568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36785170435905457, + "step": 5065, + "valid_targets_mean": 13549.2, + "valid_targets_min": 839 + }, + { + "epoch": 4.007905138339921, + "grad_norm": 0.335717426903247, + "learning_rate": 4.6162247383767e-06, + "loss": 0.7601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36401963233947754, + "step": 5070, + "valid_targets_mean": 14266.1, + "valid_targets_min": 1149 + }, + { + "epoch": 4.011857707509882, + "grad_norm": 0.3316653044271957, + "learning_rate": 4.581013761637003e-06, + "loss": 0.7599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39764103293418884, + "step": 5075, + "valid_targets_mean": 13813.8, + "valid_targets_min": 2104 + }, + { + "epoch": 4.015810276679842, + "grad_norm": 0.3021454481584843, + "learning_rate": 4.545920211235038e-06, + "loss": 0.7741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4076896011829376, + "step": 5080, + "valid_targets_mean": 15480.1, + "valid_targets_min": 1603 + }, + { + "epoch": 4.019762845849803, + "grad_norm": 0.32140057243483255, + "learning_rate": 4.510944354432687e-06, + "loss": 0.764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38636717200279236, + "step": 5085, + "valid_targets_mean": 13422.5, + "valid_targets_min": 1768 + }, + { + "epoch": 4.023715415019763, + "grad_norm": 0.3179365994886472, + "learning_rate": 4.476086457595499e-06, + "loss": 0.7732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4096972346305847, + "step": 5090, + "valid_targets_mean": 14639.2, + "valid_targets_min": 838 + }, + { + "epoch": 4.027667984189724, + "grad_norm": 0.29967586768695137, + "learning_rate": 4.441346786190679e-06, + "loss": 0.7677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3935505151748657, + "step": 5095, + "valid_targets_mean": 13679.1, + "valid_targets_min": 2361 + }, + { + "epoch": 4.031620553359684, + "grad_norm": 0.31228709656223186, + "learning_rate": 4.4067256047850604e-06, + "loss": 0.7859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4319099187850952, + "step": 5100, + "valid_targets_mean": 16018.4, + "valid_targets_min": 11184 + }, + { + "epoch": 4.0355731225296445, + "grad_norm": 0.33967706809906106, + "learning_rate": 4.372223177043094e-06, + "loss": 0.7649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38802775740623474, + "step": 5105, + "valid_targets_mean": 14325.1, + "valid_targets_min": 1870 + }, + { + "epoch": 4.0395256916996045, + "grad_norm": 0.3197845903411849, + "learning_rate": 4.33783976572484e-06, + "loss": 0.7785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3931560516357422, + "step": 5110, + "valid_targets_mean": 13928.6, + "valid_targets_min": 838 + }, + { + "epoch": 4.043478260869565, + "grad_norm": 0.3778630949500078, + "learning_rate": 4.3035756326839625e-06, + "loss": 0.7854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3960169553756714, + "step": 5115, + "valid_targets_mean": 14867.3, + "valid_targets_min": 2663 + }, + { + "epoch": 4.047430830039525, + "grad_norm": 0.33578933565692415, + "learning_rate": 4.269431038865741e-06, + "loss": 0.7969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36356988549232483, + "step": 5120, + "valid_targets_mean": 14193.8, + "valid_targets_min": 2076 + }, + { + "epoch": 4.051383399209486, + "grad_norm": 0.39698664576605347, + "learning_rate": 4.235406244305076e-06, + "loss": 0.7865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43305546045303345, + "step": 5125, + "valid_targets_mean": 14296.2, + "valid_targets_min": 2121 + }, + { + "epoch": 4.055335968379446, + "grad_norm": 0.402866028994265, + "learning_rate": 4.201501508124528e-06, + "loss": 0.7572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36359381675720215, + "step": 5130, + "valid_targets_mean": 12300.1, + "valid_targets_min": 795 + }, + { + "epoch": 4.059288537549407, + "grad_norm": 0.3272907832188378, + "learning_rate": 4.167717088532302e-06, + "loss": 0.7849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37160420417785645, + "step": 5135, + "valid_targets_mean": 14325.1, + "valid_targets_min": 1660 + }, + { + "epoch": 4.063241106719367, + "grad_norm": 0.3098193755207695, + "learning_rate": 4.134053242820333e-06, + "loss": 0.771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3815852999687195, + "step": 5140, + "valid_targets_mean": 13641.7, + "valid_targets_min": 2253 + }, + { + "epoch": 4.067193675889328, + "grad_norm": 0.3194742338506433, + "learning_rate": 4.100510227362295e-06, + "loss": 0.7773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35533782839775085, + "step": 5145, + "valid_targets_mean": 13975.6, + "valid_targets_min": 3213 + }, + { + "epoch": 4.071146245059288, + "grad_norm": 0.3560614088944959, + "learning_rate": 4.067088297611647e-06, + "loss": 0.7626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4003778100013733, + "step": 5150, + "valid_targets_mean": 13644.4, + "valid_targets_min": 1027 + }, + { + "epoch": 4.075098814229249, + "grad_norm": 0.33536512836506893, + "learning_rate": 4.033787708099706e-06, + "loss": 0.7581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3681613802909851, + "step": 5155, + "valid_targets_mean": 13344.8, + "valid_targets_min": 1224 + }, + { + "epoch": 4.07905138339921, + "grad_norm": 0.34956611885571365, + "learning_rate": 4.000608712433693e-06, + "loss": 0.7829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3389948010444641, + "step": 5160, + "valid_targets_mean": 12546.9, + "valid_targets_min": 817 + }, + { + "epoch": 4.08300395256917, + "grad_norm": 0.35712944434577204, + "learning_rate": 3.9675515632948e-06, + "loss": 0.7682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3441237211227417, + "step": 5165, + "valid_targets_mean": 14020.4, + "valid_targets_min": 1318 + }, + { + "epoch": 4.086956521739131, + "grad_norm": 0.3911063794704056, + "learning_rate": 3.934616512436303e-06, + "loss": 0.7704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39325711131095886, + "step": 5170, + "valid_targets_mean": 13904.7, + "valid_targets_min": 1153 + }, + { + "epoch": 4.090909090909091, + "grad_norm": 0.3440944924188883, + "learning_rate": 3.901803810681566e-06, + "loss": 0.7765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38978511095046997, + "step": 5175, + "valid_targets_mean": 14848.3, + "valid_targets_min": 4962 + }, + { + "epoch": 4.094861660079052, + "grad_norm": 0.3275727568551669, + "learning_rate": 3.86911370792223e-06, + "loss": 0.784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.361552357673645, + "step": 5180, + "valid_targets_mean": 13707.1, + "valid_targets_min": 1863 + }, + { + "epoch": 4.098814229249012, + "grad_norm": 0.3264788392801055, + "learning_rate": 3.836546453116212e-06, + "loss": 0.7711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37624189257621765, + "step": 5185, + "valid_targets_mean": 14294.6, + "valid_targets_min": 1533 + }, + { + "epoch": 4.102766798418973, + "grad_norm": 0.3563575878791276, + "learning_rate": 3.804102294285894e-06, + "loss": 0.7506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35674768686294556, + "step": 5190, + "valid_targets_mean": 13240.8, + "valid_targets_min": 2185 + }, + { + "epoch": 4.106719367588933, + "grad_norm": 0.3017460948851496, + "learning_rate": 3.771781478516179e-06, + "loss": 0.7708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4285289943218231, + "step": 5195, + "valid_targets_mean": 15677.2, + "valid_targets_min": 7462 + }, + { + "epoch": 4.1106719367588935, + "grad_norm": 0.3384458679517088, + "learning_rate": 3.739584251952624e-06, + "loss": 0.7821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4052169919013977, + "step": 5200, + "valid_targets_mean": 13931.3, + "valid_targets_min": 2095 + }, + { + "epoch": 4.1146245059288535, + "grad_norm": 0.3182616279789024, + "learning_rate": 3.707510859799577e-06, + "loss": 0.7785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3889882564544678, + "step": 5205, + "valid_targets_mean": 13858.0, + "valid_targets_min": 1922 + }, + { + "epoch": 4.118577075098814, + "grad_norm": 0.3250414036473944, + "learning_rate": 3.675561546318296e-06, + "loss": 0.7582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4136720299720764, + "step": 5210, + "valid_targets_mean": 14765.9, + "valid_targets_min": 2915 + }, + { + "epoch": 4.122529644268774, + "grad_norm": 0.29885752660420495, + "learning_rate": 3.6437365548250857e-06, + "loss": 0.7806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3738839626312256, + "step": 5215, + "valid_targets_mean": 13853.5, + "valid_targets_min": 1927 + }, + { + "epoch": 4.126482213438735, + "grad_norm": 0.2999269108037473, + "learning_rate": 3.6120361276894757e-06, + "loss": 0.7643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3577503263950348, + "step": 5220, + "valid_targets_mean": 13480.3, + "valid_targets_min": 4180 + }, + { + "epoch": 4.130434782608695, + "grad_norm": 0.31774749244848366, + "learning_rate": 3.5804605063323172e-06, + "loss": 0.7701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3901096284389496, + "step": 5225, + "valid_targets_mean": 14607.1, + "valid_targets_min": 4851 + }, + { + "epoch": 4.134387351778656, + "grad_norm": 0.29434105357942564, + "learning_rate": 3.549009931224012e-06, + "loss": 0.7518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3962637186050415, + "step": 5230, + "valid_targets_mean": 14900.8, + "valid_targets_min": 3096 + }, + { + "epoch": 4.138339920948616, + "grad_norm": 0.31887082253391563, + "learning_rate": 3.517684641882628e-06, + "loss": 0.7796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38233599066734314, + "step": 5235, + "valid_targets_mean": 14384.0, + "valid_targets_min": 1036 + }, + { + "epoch": 4.142292490118577, + "grad_norm": 0.29637030755629673, + "learning_rate": 3.4864848768720892e-06, + "loss": 0.7703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3864697813987732, + "step": 5240, + "valid_targets_mean": 14118.6, + "valid_targets_min": 2655 + }, + { + "epoch": 4.146245059288537, + "grad_norm": 0.30235510011571093, + "learning_rate": 3.4554108738003842e-06, + "loss": 0.7778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39778488874435425, + "step": 5245, + "valid_targets_mean": 15251.1, + "valid_targets_min": 5108 + }, + { + "epoch": 4.150197628458498, + "grad_norm": 0.3464586926198461, + "learning_rate": 3.424462869317722e-06, + "loss": 0.7738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40956375002861023, + "step": 5250, + "valid_targets_mean": 14060.6, + "valid_targets_min": 3274 + }, + { + "epoch": 4.154150197628459, + "grad_norm": 0.31830602402911157, + "learning_rate": 3.3936410991147506e-06, + "loss": 0.7771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3700949549674988, + "step": 5255, + "valid_targets_mean": 14203.8, + "valid_targets_min": 1929 + }, + { + "epoch": 4.158102766798419, + "grad_norm": 0.3512198738668119, + "learning_rate": 3.3629457979207515e-06, + "loss": 0.7653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3866420388221741, + "step": 5260, + "valid_targets_mean": 15092.8, + "valid_targets_min": 1604 + }, + { + "epoch": 4.16205533596838, + "grad_norm": 0.3322986000337341, + "learning_rate": 3.3323771995018594e-06, + "loss": 0.7613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44531458616256714, + "step": 5265, + "valid_targets_mean": 15595.2, + "valid_targets_min": 2454 + }, + { + "epoch": 4.16600790513834, + "grad_norm": 0.3807404454693805, + "learning_rate": 3.3019355366592752e-06, + "loss": 0.7993, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43712306022644043, + "step": 5270, + "valid_targets_mean": 13508.5, + "valid_targets_min": 1453 + }, + { + "epoch": 4.169960474308301, + "grad_norm": 0.30852569285419507, + "learning_rate": 3.2716210412275128e-06, + "loss": 0.7655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4033241271972656, + "step": 5275, + "valid_targets_mean": 14626.5, + "valid_targets_min": 3203 + }, + { + "epoch": 4.173913043478261, + "grad_norm": 0.3420249552729287, + "learning_rate": 3.2414339440725872e-06, + "loss": 0.7814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38953977823257446, + "step": 5280, + "valid_targets_mean": 14202.9, + "valid_targets_min": 1729 + }, + { + "epoch": 4.177865612648222, + "grad_norm": 0.3156100918974129, + "learning_rate": 3.2113744750903163e-06, + "loss": 0.7794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4098476469516754, + "step": 5285, + "valid_targets_mean": 14040.6, + "valid_targets_min": 2569 + }, + { + "epoch": 4.181818181818182, + "grad_norm": 0.29982674699105805, + "learning_rate": 3.1814428632045245e-06, + "loss": 0.7703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3701314330101013, + "step": 5290, + "valid_targets_mean": 13843.7, + "valid_targets_min": 2017 + }, + { + "epoch": 4.1857707509881426, + "grad_norm": 0.3230103603633649, + "learning_rate": 3.1516393363653177e-06, + "loss": 0.7793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4270232319831848, + "step": 5295, + "valid_targets_mean": 15533.2, + "valid_targets_min": 6534 + }, + { + "epoch": 4.189723320158103, + "grad_norm": 0.30959485276191484, + "learning_rate": 3.1219641215473494e-06, + "loss": 0.7624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37373608350753784, + "step": 5300, + "valid_targets_mean": 15027.7, + "valid_targets_min": 4467 + }, + { + "epoch": 4.1936758893280635, + "grad_norm": 0.34048767203290886, + "learning_rate": 3.0924174447480813e-06, + "loss": 0.765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43227332830429077, + "step": 5305, + "valid_targets_mean": 15176.0, + "valid_targets_min": 2852 + }, + { + "epoch": 4.1976284584980235, + "grad_norm": 0.31274036127527427, + "learning_rate": 3.0629995309860704e-06, + "loss": 0.773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39449185132980347, + "step": 5310, + "valid_targets_mean": 15986.2, + "valid_targets_min": 9338 + }, + { + "epoch": 4.201581027667984, + "grad_norm": 0.30092214615856006, + "learning_rate": 3.0337106042992625e-06, + "loss": 0.7496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.337937593460083, + "step": 5315, + "valid_targets_mean": 14069.4, + "valid_targets_min": 2905 + }, + { + "epoch": 4.205533596837944, + "grad_norm": 0.4474263496961077, + "learning_rate": 3.004550887743256e-06, + "loss": 0.7683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42072004079818726, + "step": 5320, + "valid_targets_mean": 14801.8, + "valid_targets_min": 2343 + }, + { + "epoch": 4.209486166007905, + "grad_norm": 0.4003067759929318, + "learning_rate": 2.975520603389648e-06, + "loss": 0.7553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4059603214263916, + "step": 5325, + "valid_targets_mean": 15443.1, + "valid_targets_min": 1315 + }, + { + "epoch": 4.213438735177865, + "grad_norm": 0.43209913489544804, + "learning_rate": 2.9466199723243004e-06, + "loss": 0.775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3703386187553406, + "step": 5330, + "valid_targets_mean": 12097.0, + "valid_targets_min": 503 + }, + { + "epoch": 4.217391304347826, + "grad_norm": 0.3589878949042606, + "learning_rate": 2.917849214645687e-06, + "loss": 0.771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.385062575340271, + "step": 5335, + "valid_targets_mean": 13717.9, + "valid_targets_min": 838 + }, + { + "epoch": 4.221343873517786, + "grad_norm": 0.35143233446420363, + "learning_rate": 2.889208549463196e-06, + "loss": 0.7705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3873834013938904, + "step": 5340, + "valid_targets_mean": 14974.5, + "valid_targets_min": 745 + }, + { + "epoch": 4.225296442687747, + "grad_norm": 0.34203763391468417, + "learning_rate": 2.8606981948954794e-06, + "loss": 0.7708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37858426570892334, + "step": 5345, + "valid_targets_mean": 14167.0, + "valid_targets_min": 1614 + }, + { + "epoch": 4.229249011857707, + "grad_norm": 0.37253202127322904, + "learning_rate": 2.8323183680687737e-06, + "loss": 0.7641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3843737244606018, + "step": 5350, + "valid_targets_mean": 13867.3, + "valid_targets_min": 4235 + }, + { + "epoch": 4.233201581027668, + "grad_norm": 0.35469477028799634, + "learning_rate": 2.804069285115265e-06, + "loss": 0.7788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37525850534439087, + "step": 5355, + "valid_targets_mean": 12975.2, + "valid_targets_min": 1383 + }, + { + "epoch": 4.237154150197629, + "grad_norm": 0.38701943104689834, + "learning_rate": 2.775951161171424e-06, + "loss": 0.774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39522191882133484, + "step": 5360, + "valid_targets_mean": 14342.5, + "valid_targets_min": 729 + }, + { + "epoch": 4.241106719367589, + "grad_norm": 0.37959337627077755, + "learning_rate": 2.747964210376386e-06, + "loss": 0.781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4166771471500397, + "step": 5365, + "valid_targets_mean": 14108.7, + "valid_targets_min": 904 + }, + { + "epoch": 4.24505928853755, + "grad_norm": 0.32219983313468903, + "learning_rate": 2.720108645870305e-06, + "loss": 0.7573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3954906761646271, + "step": 5370, + "valid_targets_mean": 14304.5, + "valid_targets_min": 2089 + }, + { + "epoch": 4.24901185770751, + "grad_norm": 0.36172227095593623, + "learning_rate": 2.6923846797927346e-06, + "loss": 0.7572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3699180483818054, + "step": 5375, + "valid_targets_mean": 14483.7, + "valid_targets_min": 758 + }, + { + "epoch": 4.252964426877471, + "grad_norm": 0.3238917518344928, + "learning_rate": 2.6647925232810324e-06, + "loss": 0.7751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3601040542125702, + "step": 5380, + "valid_targets_mean": 12780.1, + "valid_targets_min": 979 + }, + { + "epoch": 4.256916996047431, + "grad_norm": 0.36206467528989483, + "learning_rate": 2.637332386468705e-06, + "loss": 0.777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3671260178089142, + "step": 5385, + "valid_targets_mean": 14643.2, + "valid_targets_min": 1073 + }, + { + "epoch": 4.260869565217392, + "grad_norm": 0.36269537410359515, + "learning_rate": 2.610004478483863e-06, + "loss": 0.7585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4122769832611084, + "step": 5390, + "valid_targets_mean": 14764.1, + "valid_targets_min": 2878 + }, + { + "epoch": 4.264822134387352, + "grad_norm": 0.394398114373753, + "learning_rate": 2.582809007447591e-06, + "loss": 0.7851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3488360643386841, + "step": 5395, + "valid_targets_mean": 13826.8, + "valid_targets_min": 2505 + }, + { + "epoch": 4.2687747035573125, + "grad_norm": 0.37564176373948704, + "learning_rate": 2.5557461804723714e-06, + "loss": 0.777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4079475402832031, + "step": 5400, + "valid_targets_mean": 13731.6, + "valid_targets_min": 667 + }, + { + "epoch": 4.2727272727272725, + "grad_norm": 0.29435764323469965, + "learning_rate": 2.5288162036605136e-06, + "loss": 0.7551, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3790532946586609, + "step": 5405, + "valid_targets_mean": 15240.4, + "valid_targets_min": 3569 + }, + { + "epoch": 4.276679841897233, + "grad_norm": 0.3262902642760719, + "learning_rate": 2.502019282102579e-06, + "loss": 0.773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37409332394599915, + "step": 5410, + "valid_targets_mean": 13709.2, + "valid_targets_min": 1108 + }, + { + "epoch": 4.280632411067193, + "grad_norm": 0.3031288703392213, + "learning_rate": 2.475355619875819e-06, + "loss": 0.7579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34520554542541504, + "step": 5415, + "valid_targets_mean": 13777.2, + "valid_targets_min": 2421 + }, + { + "epoch": 4.284584980237154, + "grad_norm": 0.3318778283454716, + "learning_rate": 2.4488254200426376e-06, + "loss": 0.7522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3736872375011444, + "step": 5420, + "valid_targets_mean": 14531.1, + "valid_targets_min": 2407 + }, + { + "epoch": 4.288537549407114, + "grad_norm": 0.39340656598298224, + "learning_rate": 2.422428884648995e-06, + "loss": 0.7678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4096152186393738, + "step": 5425, + "valid_targets_mean": 13687.5, + "valid_targets_min": 1722 + }, + { + "epoch": 4.292490118577075, + "grad_norm": 0.30505566888451763, + "learning_rate": 2.3961662147229393e-06, + "loss": 0.7632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35961610078811646, + "step": 5430, + "valid_targets_mean": 12917.2, + "valid_targets_min": 1766 + }, + { + "epoch": 4.296442687747035, + "grad_norm": 0.2805563273415158, + "learning_rate": 2.3700376102730216e-06, + "loss": 0.763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3543228507041931, + "step": 5435, + "valid_targets_mean": 13931.5, + "valid_targets_min": 5540 + }, + { + "epoch": 4.300395256916996, + "grad_norm": 0.4053589776777002, + "learning_rate": 2.344043270286793e-06, + "loss": 0.7592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3861280083656311, + "step": 5440, + "valid_targets_mean": 14016.5, + "valid_targets_min": 2536 + }, + { + "epoch": 4.304347826086957, + "grad_norm": 0.3238054202769742, + "learning_rate": 2.3181833927292873e-06, + "loss": 0.7672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3663375973701477, + "step": 5445, + "valid_targets_mean": 13313.0, + "valid_targets_min": 2492 + }, + { + "epoch": 4.308300395256917, + "grad_norm": 0.302606884899499, + "learning_rate": 2.2924581745415185e-06, + "loss": 0.7559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36494961380958557, + "step": 5450, + "valid_targets_mean": 13609.3, + "valid_targets_min": 3384 + }, + { + "epoch": 4.312252964426877, + "grad_norm": 0.32766025791969383, + "learning_rate": 2.266867811638962e-06, + "loss": 0.7764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4031813144683838, + "step": 5455, + "valid_targets_mean": 15082.1, + "valid_targets_min": 2233 + }, + { + "epoch": 4.316205533596838, + "grad_norm": 0.29650041173775005, + "learning_rate": 2.2414124989101005e-06, + "loss": 0.7727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3738417625427246, + "step": 5460, + "valid_targets_mean": 13745.1, + "valid_targets_min": 1833 + }, + { + "epoch": 4.320158102766799, + "grad_norm": 0.3517847278273192, + "learning_rate": 2.2160924302148822e-06, + "loss": 0.7701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3781439960002899, + "step": 5465, + "valid_targets_mean": 14579.3, + "valid_targets_min": 3789 + }, + { + "epoch": 4.324110671936759, + "grad_norm": 0.2938351535868343, + "learning_rate": 2.1909077983833105e-06, + "loss": 0.7614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3230235278606415, + "step": 5470, + "valid_targets_mean": 12224.1, + "valid_targets_min": 951 + }, + { + "epoch": 4.32806324110672, + "grad_norm": 0.29702401989704896, + "learning_rate": 2.16585879521392e-06, + "loss": 0.7601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3947426378726959, + "step": 5475, + "valid_targets_mean": 15775.3, + "valid_targets_min": 9667 + }, + { + "epoch": 4.33201581027668, + "grad_norm": 0.29878280312727895, + "learning_rate": 2.140945611472347e-06, + "loss": 0.7797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4131738543510437, + "step": 5480, + "valid_targets_mean": 14404.3, + "valid_targets_min": 1407 + }, + { + "epoch": 4.335968379446641, + "grad_norm": 0.31481095475568144, + "learning_rate": 2.1161684368898717e-06, + "loss": 0.7842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4058864712715149, + "step": 5485, + "valid_targets_mean": 14679.8, + "valid_targets_min": 965 + }, + { + "epoch": 4.339920948616601, + "grad_norm": 0.34573513422796953, + "learning_rate": 2.091527460161953e-06, + "loss": 0.7846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3872443437576294, + "step": 5490, + "valid_targets_mean": 14085.8, + "valid_targets_min": 2100 + }, + { + "epoch": 4.3438735177865615, + "grad_norm": 0.31631859105814786, + "learning_rate": 2.067022868946833e-06, + "loss": 0.7867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4647630453109741, + "step": 5495, + "valid_targets_mean": 15259.7, + "valid_targets_min": 2689 + }, + { + "epoch": 4.3478260869565215, + "grad_norm": 0.3267630640865053, + "learning_rate": 2.0426548498640655e-06, + "loss": 0.7643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3823457956314087, + "step": 5500, + "valid_targets_mean": 13830.4, + "valid_targets_min": 672 + }, + { + "epoch": 4.351778656126482, + "grad_norm": 0.37600680447355694, + "learning_rate": 2.0184235884931168e-06, + "loss": 0.7701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3485482335090637, + "step": 5505, + "valid_targets_mean": 12869.9, + "valid_targets_min": 1317 + }, + { + "epoch": 4.355731225296442, + "grad_norm": 0.2841853439070913, + "learning_rate": 1.9943292693719486e-06, + "loss": 0.778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35242539644241333, + "step": 5510, + "valid_targets_mean": 14102.9, + "valid_targets_min": 3550 + }, + { + "epoch": 4.359683794466403, + "grad_norm": 0.27447934051530143, + "learning_rate": 1.9703720759956104e-06, + "loss": 0.7772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.383189857006073, + "step": 5515, + "valid_targets_mean": 13682.2, + "valid_targets_min": 820 + }, + { + "epoch": 4.363636363636363, + "grad_norm": 0.328278828789419, + "learning_rate": 1.9465521908148386e-06, + "loss": 0.783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3948730528354645, + "step": 5520, + "valid_targets_mean": 14015.4, + "valid_targets_min": 752 + }, + { + "epoch": 4.367588932806324, + "grad_norm": 0.28727044598456186, + "learning_rate": 1.9228697952346943e-06, + "loss": 0.773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38110482692718506, + "step": 5525, + "valid_targets_mean": 14823.7, + "valid_targets_min": 4635 + }, + { + "epoch": 4.371541501976284, + "grad_norm": 0.3232318054055361, + "learning_rate": 1.8993250696131205e-06, + "loss": 0.7662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3837108314037323, + "step": 5530, + "valid_targets_mean": 12841.5, + "valid_targets_min": 1567 + }, + { + "epoch": 4.375494071146245, + "grad_norm": 0.3455172444031321, + "learning_rate": 1.875918193259647e-06, + "loss": 0.7845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3713795840740204, + "step": 5535, + "valid_targets_mean": 12491.8, + "valid_targets_min": 1060 + }, + { + "epoch": 4.379446640316205, + "grad_norm": 0.3679815545583544, + "learning_rate": 1.8526493444339611e-06, + "loss": 0.7828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3865644037723541, + "step": 5540, + "valid_targets_mean": 13841.2, + "valid_targets_min": 1061 + }, + { + "epoch": 4.383399209486166, + "grad_norm": 0.28557857559705263, + "learning_rate": 1.8295187003445814e-06, + "loss": 0.7687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39387574791908264, + "step": 5545, + "valid_targets_mean": 14227.2, + "valid_targets_min": 1220 + }, + { + "epoch": 4.387351778656127, + "grad_norm": 0.30292941839173393, + "learning_rate": 1.8065264371475e-06, + "loss": 0.7673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3817141056060791, + "step": 5550, + "valid_targets_mean": 14289.6, + "valid_targets_min": 961 + }, + { + "epoch": 4.391304347826087, + "grad_norm": 0.3101586395313176, + "learning_rate": 1.783672729944843e-06, + "loss": 0.772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3952176570892334, + "step": 5555, + "valid_targets_mean": 13763.5, + "valid_targets_min": 2146 + }, + { + "epoch": 4.395256916996048, + "grad_norm": 0.2808104424245787, + "learning_rate": 1.760957752783532e-06, + "loss": 0.7668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39179685711860657, + "step": 5560, + "valid_targets_mean": 14249.2, + "valid_targets_min": 5363 + }, + { + "epoch": 4.399209486166008, + "grad_norm": 0.3430767749810672, + "learning_rate": 1.7383816786539732e-06, + "loss": 0.7524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3839855194091797, + "step": 5565, + "valid_targets_mean": 13590.6, + "valid_targets_min": 2703 + }, + { + "epoch": 4.403162055335969, + "grad_norm": 0.3188303677921059, + "learning_rate": 1.7159446794887146e-06, + "loss": 0.7801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39078521728515625, + "step": 5570, + "valid_targets_mean": 16148.6, + "valid_targets_min": 5190 + }, + { + "epoch": 4.407114624505929, + "grad_norm": 0.3014474795646115, + "learning_rate": 1.6936469261611632e-06, + "loss": 0.7784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36791369318962097, + "step": 5575, + "valid_targets_mean": 12968.7, + "valid_targets_min": 892 + }, + { + "epoch": 4.41106719367589, + "grad_norm": 0.28906138320053093, + "learning_rate": 1.6714885884842647e-06, + "loss": 0.7751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45545321702957153, + "step": 5580, + "valid_targets_mean": 14895.2, + "valid_targets_min": 4853 + }, + { + "epoch": 4.41501976284585, + "grad_norm": 0.32145471592078934, + "learning_rate": 1.649469835209223e-06, + "loss": 0.7693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41598501801490784, + "step": 5585, + "valid_targets_mean": 14152.9, + "valid_targets_min": 942 + }, + { + "epoch": 4.4189723320158105, + "grad_norm": 0.2872860694647396, + "learning_rate": 1.627590834024204e-06, + "loss": 0.7649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3907788395881653, + "step": 5590, + "valid_targets_mean": 14514.9, + "valid_targets_min": 1858 + }, + { + "epoch": 4.4229249011857705, + "grad_norm": 0.27741336355206364, + "learning_rate": 1.6058517515530693e-06, + "loss": 0.7689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3879639506340027, + "step": 5595, + "valid_targets_mean": 13350.2, + "valid_targets_min": 1516 + }, + { + "epoch": 4.426877470355731, + "grad_norm": 0.2980253186671584, + "learning_rate": 1.5842527533540897e-06, + "loss": 0.7744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3884013295173645, + "step": 5600, + "valid_targets_mean": 14632.5, + "valid_targets_min": 687 + }, + { + "epoch": 4.430830039525691, + "grad_norm": 0.28046499987538087, + "learning_rate": 1.5627940039187216e-06, + "loss": 0.7737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3401361405849457, + "step": 5605, + "valid_targets_mean": 13314.4, + "valid_targets_min": 608 + }, + { + "epoch": 4.434782608695652, + "grad_norm": 0.2977982489491566, + "learning_rate": 1.5414756666702978e-06, + "loss": 0.7715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3786538243293762, + "step": 5610, + "valid_targets_mean": 13902.2, + "valid_targets_min": 1467 + }, + { + "epoch": 4.438735177865612, + "grad_norm": 0.2928912728505084, + "learning_rate": 1.5202979039628353e-06, + "loss": 0.7725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4067348837852478, + "step": 5615, + "valid_targets_mean": 13721.0, + "valid_targets_min": 2271 + }, + { + "epoch": 4.442687747035573, + "grad_norm": 0.32397384437479093, + "learning_rate": 1.4992608770797802e-06, + "loss": 0.7826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40762627124786377, + "step": 5620, + "valid_targets_mean": 13311.3, + "valid_targets_min": 966 + }, + { + "epoch": 4.446640316205533, + "grad_norm": 0.32856680751100203, + "learning_rate": 1.4783647462327544e-06, + "loss": 0.7635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3827669620513916, + "step": 5625, + "valid_targets_mean": 13487.7, + "valid_targets_min": 819 + }, + { + "epoch": 4.450592885375494, + "grad_norm": 0.35995820563113134, + "learning_rate": 1.4576096705603849e-06, + "loss": 0.7677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42208731174468994, + "step": 5630, + "valid_targets_mean": 15448.1, + "valid_targets_min": 4996 + }, + { + "epoch": 4.454545454545454, + "grad_norm": 0.3165701616928046, + "learning_rate": 1.4369958081270507e-06, + "loss": 0.7721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37499144673347473, + "step": 5635, + "valid_targets_mean": 13565.8, + "valid_targets_min": 1087 + }, + { + "epoch": 4.458498023715415, + "grad_norm": 0.28691454197675165, + "learning_rate": 1.416523315921694e-06, + "loss": 0.7504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3488866984844208, + "step": 5640, + "valid_targets_mean": 14791.9, + "valid_targets_min": 4437 + }, + { + "epoch": 4.462450592885375, + "grad_norm": 0.301258684554573, + "learning_rate": 1.3961923498566287e-06, + "loss": 0.7701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32578521966934204, + "step": 5645, + "valid_targets_mean": 11688.1, + "valid_targets_min": 1232 + }, + { + "epoch": 4.466403162055336, + "grad_norm": 0.3081048692613199, + "learning_rate": 1.3760030647663424e-06, + "loss": 0.7652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42566442489624023, + "step": 5650, + "valid_targets_mean": 13556.0, + "valid_targets_min": 1765 + }, + { + "epoch": 4.470355731225297, + "grad_norm": 0.29174807007344006, + "learning_rate": 1.3559556144063301e-06, + "loss": 0.7632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37059447169303894, + "step": 5655, + "valid_targets_mean": 12974.3, + "valid_targets_min": 681 + }, + { + "epoch": 4.474308300395257, + "grad_norm": 0.28665674372767685, + "learning_rate": 1.3360501514519132e-06, + "loss": 0.7667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37344032526016235, + "step": 5660, + "valid_targets_mean": 13467.8, + "valid_targets_min": 1090 + }, + { + "epoch": 4.478260869565218, + "grad_norm": 0.28870234261644107, + "learning_rate": 1.316286827497073e-06, + "loss": 0.7801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3834821581840515, + "step": 5665, + "valid_targets_mean": 13673.9, + "valid_targets_min": 2027 + }, + { + "epoch": 4.482213438735178, + "grad_norm": 0.27904447945110933, + "learning_rate": 1.2966657930533244e-06, + "loss": 0.7644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32825326919555664, + "step": 5670, + "valid_targets_mean": 12776.4, + "valid_targets_min": 2356 + }, + { + "epoch": 4.486166007905139, + "grad_norm": 0.3063257272525797, + "learning_rate": 1.277187197548515e-06, + "loss": 0.7663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36131933331489563, + "step": 5675, + "valid_targets_mean": 14200.1, + "valid_targets_min": 1409 + }, + { + "epoch": 4.490118577075099, + "grad_norm": 0.27503416413888293, + "learning_rate": 1.257851189325754e-06, + "loss": 0.76, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36917024850845337, + "step": 5680, + "valid_targets_mean": 15006.8, + "valid_targets_min": 4107 + }, + { + "epoch": 4.4940711462450595, + "grad_norm": 0.2881598326713359, + "learning_rate": 1.2386579156422295e-06, + "loss": 0.7486, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3434067964553833, + "step": 5685, + "valid_targets_mean": 13043.0, + "valid_targets_min": 2349 + }, + { + "epoch": 4.4980237154150196, + "grad_norm": 0.28722933342093143, + "learning_rate": 1.2196075226681137e-06, + "loss": 0.7577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3770991563796997, + "step": 5690, + "valid_targets_mean": 15388.2, + "valid_targets_min": 2655 + }, + { + "epoch": 4.5019762845849804, + "grad_norm": 0.3101850818133744, + "learning_rate": 1.2007001554854348e-06, + "loss": 0.7708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37362509965896606, + "step": 5695, + "valid_targets_mean": 14855.8, + "valid_targets_min": 1095 + }, + { + "epoch": 4.5059288537549405, + "grad_norm": 0.2830033479181037, + "learning_rate": 1.1819359580869906e-06, + "loss": 0.793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39906877279281616, + "step": 5700, + "valid_targets_mean": 14888.6, + "valid_targets_min": 4279 + }, + { + "epoch": 4.509881422924901, + "grad_norm": 0.31373609810719905, + "learning_rate": 1.1633150733752285e-06, + "loss": 0.7742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4191277027130127, + "step": 5705, + "valid_targets_mean": 14265.5, + "valid_targets_min": 1332 + }, + { + "epoch": 4.513833992094861, + "grad_norm": 0.31938160846204794, + "learning_rate": 1.1448376431611896e-06, + "loss": 0.7732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4150545001029968, + "step": 5710, + "valid_targets_mean": 14552.1, + "valid_targets_min": 1599 + }, + { + "epoch": 4.517786561264822, + "grad_norm": 0.2920549818264026, + "learning_rate": 1.1265038081633816e-06, + "loss": 0.7634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3818984627723694, + "step": 5715, + "valid_targets_mean": 15365.3, + "valid_targets_min": 3506 + }, + { + "epoch": 4.521739130434782, + "grad_norm": 0.3136576412464423, + "learning_rate": 1.1083137080067608e-06, + "loss": 0.7752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40557563304901123, + "step": 5720, + "valid_targets_mean": 14488.1, + "valid_targets_min": 1897 + }, + { + "epoch": 4.525691699604743, + "grad_norm": 0.29772472677682704, + "learning_rate": 1.0902674812216252e-06, + "loss": 0.7606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39123693108558655, + "step": 5725, + "valid_targets_mean": 15821.5, + "valid_targets_min": 4426 + }, + { + "epoch": 4.529644268774703, + "grad_norm": 0.28495244754175636, + "learning_rate": 1.0723652652425854e-06, + "loss": 0.7704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36636775732040405, + "step": 5730, + "valid_targets_mean": 13728.4, + "valid_targets_min": 2842 + }, + { + "epoch": 4.533596837944664, + "grad_norm": 0.31355808565276977, + "learning_rate": 1.0546071964075021e-06, + "loss": 0.7746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35670042037963867, + "step": 5735, + "valid_targets_mean": 13704.5, + "valid_targets_min": 2915 + }, + { + "epoch": 4.537549407114625, + "grad_norm": 0.31485565711900704, + "learning_rate": 1.0369934099564617e-06, + "loss": 0.7616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3748449981212616, + "step": 5740, + "valid_targets_mean": 13103.2, + "valid_targets_min": 832 + }, + { + "epoch": 4.541501976284585, + "grad_norm": 0.2960145911799916, + "learning_rate": 1.019524040030735e-06, + "loss": 0.7585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37683141231536865, + "step": 5745, + "valid_targets_mean": 13778.9, + "valid_targets_min": 3342 + }, + { + "epoch": 4.545454545454545, + "grad_norm": 0.26197743861598927, + "learning_rate": 1.0021992196717644e-06, + "loss": 0.7735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4121040403842926, + "step": 5750, + "valid_targets_mean": 15344.3, + "valid_targets_min": 3576 + }, + { + "epoch": 4.549407114624506, + "grad_norm": 0.28917176138522027, + "learning_rate": 9.85019080820142e-07, + "loss": 0.789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41319918632507324, + "step": 5755, + "valid_targets_mean": 14420.1, + "valid_targets_min": 2674 + }, + { + "epoch": 4.553359683794467, + "grad_norm": 0.2882051950614287, + "learning_rate": 9.679837543146099e-07, + "loss": 0.779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35977184772491455, + "step": 5760, + "valid_targets_mean": 14449.3, + "valid_targets_min": 3818 + }, + { + "epoch": 4.557312252964427, + "grad_norm": 0.2574014892157651, + "learning_rate": 9.510933698910741e-07, + "loss": 0.7459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34497183561325073, + "step": 5765, + "valid_targets_mean": 14051.2, + "valid_targets_min": 4960 + }, + { + "epoch": 4.561264822134388, + "grad_norm": 0.29837145370094476, + "learning_rate": 9.343480561815821e-07, + "loss": 0.763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3646391034126282, + "step": 5770, + "valid_targets_mean": 13052.5, + "valid_targets_min": 916 + }, + { + "epoch": 4.565217391304348, + "grad_norm": 0.32618950958707305, + "learning_rate": 9.177479407133893e-07, + "loss": 0.7752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3891456723213196, + "step": 5775, + "valid_targets_mean": 15308.8, + "valid_targets_min": 3764 + }, + { + "epoch": 4.569169960474309, + "grad_norm": 0.28094605725447974, + "learning_rate": 9.012931499079558e-07, + "loss": 0.7814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38870444893836975, + "step": 5780, + "valid_targets_mean": 14022.1, + "valid_targets_min": 3141 + }, + { + "epoch": 4.573122529644269, + "grad_norm": 0.3100899048868606, + "learning_rate": 8.849838090799934e-07, + "loss": 0.7727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41265344619750977, + "step": 5785, + "valid_targets_mean": 14854.4, + "valid_targets_min": 1651 + }, + { + "epoch": 4.5770750988142295, + "grad_norm": 0.2881042489045768, + "learning_rate": 8.688200424365112e-07, + "loss": 0.7782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38353782892227173, + "step": 5790, + "valid_targets_mean": 14243.8, + "valid_targets_min": 2094 + }, + { + "epoch": 4.5810276679841895, + "grad_norm": 0.31725848578226656, + "learning_rate": 8.528019730758674e-07, + "loss": 0.7629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3844844400882721, + "step": 5795, + "valid_targets_mean": 14188.2, + "valid_targets_min": 5037 + }, + { + "epoch": 4.58498023715415, + "grad_norm": 0.2770376972448289, + "learning_rate": 8.369297229868367e-07, + "loss": 0.7607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40883392095565796, + "step": 5800, + "valid_targets_mean": 14574.5, + "valid_targets_min": 3433 + }, + { + "epoch": 4.58893280632411, + "grad_norm": 0.289565820115361, + "learning_rate": 8.21203413047682e-07, + "loss": 0.7809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3883562684059143, + "step": 5805, + "valid_targets_mean": 14110.5, + "valid_targets_min": 2243 + }, + { + "epoch": 4.592885375494071, + "grad_norm": 0.30737011165224876, + "learning_rate": 8.056231630252153e-07, + "loss": 0.7823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4024915099143982, + "step": 5810, + "valid_targets_mean": 14199.9, + "valid_targets_min": 693 + }, + { + "epoch": 4.596837944664031, + "grad_norm": 0.2926750402747766, + "learning_rate": 7.901890915739207e-07, + "loss": 0.7812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3534872531890869, + "step": 5815, + "valid_targets_mean": 14131.1, + "valid_targets_min": 799 + }, + { + "epoch": 4.600790513833992, + "grad_norm": 0.31114066353839553, + "learning_rate": 7.749013162350128e-07, + "loss": 0.7696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38537201285362244, + "step": 5820, + "valid_targets_mean": 13748.5, + "valid_targets_min": 994 + }, + { + "epoch": 4.604743083003952, + "grad_norm": 0.29467331581186634, + "learning_rate": 7.597599534355726e-07, + "loss": 0.7512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3855005204677582, + "step": 5825, + "valid_targets_mean": 14108.6, + "valid_targets_min": 3905 + }, + { + "epoch": 4.608695652173913, + "grad_norm": 0.28762573614413683, + "learning_rate": 7.447651184876381e-07, + "loss": 0.7729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4084177017211914, + "step": 5830, + "valid_targets_mean": 14738.4, + "valid_targets_min": 6426 + }, + { + "epoch": 4.612648221343873, + "grad_norm": 0.2764815296253861, + "learning_rate": 7.299169255873417e-07, + "loss": 0.7681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3976055979728699, + "step": 5835, + "valid_targets_mean": 14690.3, + "valid_targets_min": 3794 + }, + { + "epoch": 4.616600790513834, + "grad_norm": 0.3116872447889535, + "learning_rate": 7.152154878140294e-07, + "loss": 0.7533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37292367219924927, + "step": 5840, + "valid_targets_mean": 13443.2, + "valid_targets_min": 1380 + }, + { + "epoch": 4.620553359683795, + "grad_norm": 0.28949945192035353, + "learning_rate": 7.006609171294076e-07, + "loss": 0.7843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4100838303565979, + "step": 5845, + "valid_targets_mean": 14395.1, + "valid_targets_min": 1787 + }, + { + "epoch": 4.624505928853755, + "grad_norm": 0.3229626311386786, + "learning_rate": 6.862533243766822e-07, + "loss": 0.7768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4303993582725525, + "step": 5850, + "valid_targets_mean": 14549.8, + "valid_targets_min": 1984 + }, + { + "epoch": 4.628458498023716, + "grad_norm": 0.27244260072499943, + "learning_rate": 6.719928192797343e-07, + "loss": 0.7689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3588414788246155, + "step": 5855, + "valid_targets_mean": 13827.6, + "valid_targets_min": 949 + }, + { + "epoch": 4.632411067193676, + "grad_norm": 0.31206635427071655, + "learning_rate": 6.578795104422498e-07, + "loss": 0.7606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3596968948841095, + "step": 5860, + "valid_targets_mean": 13054.5, + "valid_targets_min": 2206 + }, + { + "epoch": 4.636363636363637, + "grad_norm": 0.3012961365167657, + "learning_rate": 6.439135053469247e-07, + "loss": 0.7739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4152459502220154, + "step": 5865, + "valid_targets_mean": 14945.1, + "valid_targets_min": 4185 + }, + { + "epoch": 4.640316205533597, + "grad_norm": 0.2830939965834131, + "learning_rate": 6.300949103546372e-07, + "loss": 0.7758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39887765049934387, + "step": 5870, + "valid_targets_mean": 14446.0, + "valid_targets_min": 855 + }, + { + "epoch": 4.644268774703558, + "grad_norm": 0.27422383772712944, + "learning_rate": 6.164238307036186e-07, + "loss": 0.7701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3696143627166748, + "step": 5875, + "valid_targets_mean": 14811.5, + "valid_targets_min": 1958 + }, + { + "epoch": 4.648221343873518, + "grad_norm": 0.3043850043458249, + "learning_rate": 6.029003705086878e-07, + "loss": 0.7656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38738781213760376, + "step": 5880, + "valid_targets_mean": 14080.7, + "valid_targets_min": 1640 + }, + { + "epoch": 4.6521739130434785, + "grad_norm": 0.28059775634509143, + "learning_rate": 5.895246327604253e-07, + "loss": 0.7704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38710951805114746, + "step": 5885, + "valid_targets_mean": 14945.4, + "valid_targets_min": 6851 + }, + { + "epoch": 4.6561264822134385, + "grad_norm": 0.30656725412809, + "learning_rate": 5.762967193244051e-07, + "loss": 0.7577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3756695091724396, + "step": 5890, + "valid_targets_mean": 13045.9, + "valid_targets_min": 516 + }, + { + "epoch": 4.660079051383399, + "grad_norm": 0.28343217874064125, + "learning_rate": 5.63216730940419e-07, + "loss": 0.7637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3705994486808777, + "step": 5895, + "valid_targets_mean": 15393.6, + "valid_targets_min": 4487 + }, + { + "epoch": 4.664031620553359, + "grad_norm": 0.27814969952331187, + "learning_rate": 5.502847672217049e-07, + "loss": 0.781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3869550824165344, + "step": 5900, + "valid_targets_mean": 15015.3, + "valid_targets_min": 1690 + }, + { + "epoch": 4.66798418972332, + "grad_norm": 0.31553773276308456, + "learning_rate": 5.375009266541842e-07, + "loss": 0.7666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4166383147239685, + "step": 5905, + "valid_targets_mean": 13962.3, + "valid_targets_min": 1027 + }, + { + "epoch": 4.67193675889328, + "grad_norm": 0.2708130030445305, + "learning_rate": 5.248653065957277e-07, + "loss": 0.7628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35292550921440125, + "step": 5910, + "valid_targets_mean": 13726.2, + "valid_targets_min": 3222 + }, + { + "epoch": 4.675889328063241, + "grad_norm": 0.34663893421502096, + "learning_rate": 5.123780032753934e-07, + "loss": 0.7838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41928404569625854, + "step": 5915, + "valid_targets_mean": 15009.5, + "valid_targets_min": 4356 + }, + { + "epoch": 4.679841897233201, + "grad_norm": 0.32179274671785774, + "learning_rate": 5.000391117927117e-07, + "loss": 0.7753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40504464507102966, + "step": 5920, + "valid_targets_mean": 14132.0, + "valid_targets_min": 1272 + }, + { + "epoch": 4.683794466403162, + "grad_norm": 0.30307654651633653, + "learning_rate": 4.878487261169484e-07, + "loss": 0.7755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3449387550354004, + "step": 5925, + "valid_targets_mean": 13404.6, + "valid_targets_min": 1461 + }, + { + "epoch": 4.687747035573123, + "grad_norm": 0.2726406816303175, + "learning_rate": 4.7580693908639396e-07, + "loss": 0.7702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3858049511909485, + "step": 5930, + "valid_targets_mean": 15218.1, + "valid_targets_min": 1806 + }, + { + "epoch": 4.691699604743083, + "grad_norm": 0.3179592787123711, + "learning_rate": 4.639138424076617e-07, + "loss": 0.7788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4038875997066498, + "step": 5935, + "valid_targets_mean": 15061.6, + "valid_targets_min": 1797 + }, + { + "epoch": 4.695652173913043, + "grad_norm": 0.307742510569001, + "learning_rate": 4.5216952665497525e-07, + "loss": 0.7756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33432677388191223, + "step": 5940, + "valid_targets_mean": 12507.2, + "valid_targets_min": 754 + }, + { + "epoch": 4.699604743083004, + "grad_norm": 0.28783007484119116, + "learning_rate": 4.405740812694959e-07, + "loss": 0.7799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4173075258731842, + "step": 5945, + "valid_targets_mean": 14824.6, + "valid_targets_min": 485 + }, + { + "epoch": 4.703557312252965, + "grad_norm": 0.3164554981406943, + "learning_rate": 4.291275945586315e-07, + "loss": 0.7784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3744875192642212, + "step": 5950, + "valid_targets_mean": 13025.6, + "valid_targets_min": 896 + }, + { + "epoch": 4.707509881422925, + "grad_norm": 0.2894234617319459, + "learning_rate": 4.1783015369535995e-07, + "loss": 0.7857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3940580487251282, + "step": 5955, + "valid_targets_mean": 14797.8, + "valid_targets_min": 1669 + }, + { + "epoch": 4.711462450592886, + "grad_norm": 0.28368801622738843, + "learning_rate": 4.0668184471758244e-07, + "loss": 0.7815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3849085569381714, + "step": 5960, + "valid_targets_mean": 15112.2, + "valid_targets_min": 4024 + }, + { + "epoch": 4.715415019762846, + "grad_norm": 0.2668275579029859, + "learning_rate": 3.9568275252743983e-07, + "loss": 0.7746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4218544661998749, + "step": 5965, + "valid_targets_mean": 15145.6, + "valid_targets_min": 1960 + }, + { + "epoch": 4.719367588932807, + "grad_norm": 0.2960260806656674, + "learning_rate": 3.8483296089070197e-07, + "loss": 0.7659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39344167709350586, + "step": 5970, + "valid_targets_mean": 15320.4, + "valid_targets_min": 5429 + }, + { + "epoch": 4.723320158102767, + "grad_norm": 0.2800020074050273, + "learning_rate": 3.741325524360995e-07, + "loss": 0.7828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.394444078207016, + "step": 5975, + "valid_targets_mean": 14757.3, + "valid_targets_min": 1664 + }, + { + "epoch": 4.7272727272727275, + "grad_norm": 0.2861465517815945, + "learning_rate": 3.6358160865470616e-07, + "loss": 0.7891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.439083069562912, + "step": 5980, + "valid_targets_mean": 15475.8, + "valid_targets_min": 5242 + }, + { + "epoch": 4.7312252964426875, + "grad_norm": 0.2796875005383662, + "learning_rate": 3.53180209899322e-07, + "loss": 0.7611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39843645691871643, + "step": 5985, + "valid_targets_mean": 14023.4, + "valid_targets_min": 839 + }, + { + "epoch": 4.735177865612648, + "grad_norm": 0.2784535552995938, + "learning_rate": 3.429284353838536e-07, + "loss": 0.7834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3769073486328125, + "step": 5990, + "valid_targets_mean": 13414.2, + "valid_targets_min": 2607 + }, + { + "epoch": 4.739130434782608, + "grad_norm": 0.3404397004110326, + "learning_rate": 3.3282636318271886e-07, + "loss": 0.7713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38556233048439026, + "step": 5995, + "valid_targets_mean": 14151.3, + "valid_targets_min": 2183 + }, + { + "epoch": 4.743083003952569, + "grad_norm": 0.2880923633292252, + "learning_rate": 3.2287407023024354e-07, + "loss": 0.7744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3758050799369812, + "step": 6000, + "valid_targets_mean": 12844.8, + "valid_targets_min": 959 + }, + { + "epoch": 4.747035573122529, + "grad_norm": 0.28732581024809845, + "learning_rate": 3.1307163232008107e-07, + "loss": 0.7753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3699765205383301, + "step": 6005, + "valid_targets_mean": 13422.8, + "valid_targets_min": 1085 + }, + { + "epoch": 4.75098814229249, + "grad_norm": 0.25345575374285656, + "learning_rate": 3.0341912410463805e-07, + "loss": 0.7843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3235652446746826, + "step": 6010, + "valid_targets_mean": 12961.6, + "valid_targets_min": 1478 + }, + { + "epoch": 4.75494071146245, + "grad_norm": 0.2962470776940837, + "learning_rate": 2.9391661909449864e-07, + "loss": 0.7713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4008612632751465, + "step": 6015, + "valid_targets_mean": 14085.0, + "valid_targets_min": 2564 + }, + { + "epoch": 4.758893280632411, + "grad_norm": 0.28644266438183863, + "learning_rate": 2.8456418965786323e-07, + "loss": 0.7607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4085724949836731, + "step": 6020, + "valid_targets_mean": 15656.6, + "valid_targets_min": 5453 + }, + { + "epoch": 4.762845849802371, + "grad_norm": 0.2978641603020676, + "learning_rate": 2.753619070200131e-07, + "loss": 0.7741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3782803416252136, + "step": 6025, + "valid_targets_mean": 13691.2, + "valid_targets_min": 3619 + }, + { + "epoch": 4.766798418972332, + "grad_norm": 0.3022351283182132, + "learning_rate": 2.6630984126275074e-07, + "loss": 0.7791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3666799068450928, + "step": 6030, + "valid_targets_mean": 13313.7, + "valid_targets_min": 754 + }, + { + "epoch": 4.770750988142293, + "grad_norm": 0.28036644881283473, + "learning_rate": 2.574080613238672e-07, + "loss": 0.7664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.400973379611969, + "step": 6035, + "valid_targets_mean": 13746.6, + "valid_targets_min": 1277 + }, + { + "epoch": 4.774703557312253, + "grad_norm": 0.3175627276992497, + "learning_rate": 2.486566349966335e-07, + "loss": 0.7746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.410087525844574, + "step": 6040, + "valid_targets_mean": 14437.5, + "valid_targets_min": 1083 + }, + { + "epoch": 4.778656126482213, + "grad_norm": 0.27712885744217874, + "learning_rate": 2.40055628929261e-07, + "loss": 0.7714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4089733362197876, + "step": 6045, + "valid_targets_mean": 15534.0, + "valid_targets_min": 4370 + }, + { + "epoch": 4.782608695652174, + "grad_norm": 0.28905657301575827, + "learning_rate": 2.316051086244131e-07, + "loss": 0.7804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4004994034767151, + "step": 6050, + "valid_targets_mean": 14180.3, + "valid_targets_min": 1177 + }, + { + "epoch": 4.786561264822135, + "grad_norm": 0.29461553037435856, + "learning_rate": 2.2330513843870306e-07, + "loss": 0.7589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4067733883857727, + "step": 6055, + "valid_targets_mean": 14638.3, + "valid_targets_min": 1590 + }, + { + "epoch": 4.790513833992095, + "grad_norm": 0.2751888112756087, + "learning_rate": 2.1515578158218586e-07, + "loss": 0.7896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4106820523738861, + "step": 6060, + "valid_targets_mean": 15729.3, + "valid_targets_min": 1913 + }, + { + "epoch": 4.794466403162056, + "grad_norm": 0.28158511152187704, + "learning_rate": 2.0715710011790736e-07, + "loss": 0.7715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35702842473983765, + "step": 6065, + "valid_targets_mean": 13613.6, + "valid_targets_min": 951 + }, + { + "epoch": 4.798418972332016, + "grad_norm": 0.3094231944619869, + "learning_rate": 1.9930915496140236e-07, + "loss": 0.7693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3941049873828888, + "step": 6070, + "valid_targets_mean": 14791.4, + "valid_targets_min": 1701 + }, + { + "epoch": 4.8023715415019765, + "grad_norm": 0.26177780315554816, + "learning_rate": 1.9161200588025287e-07, + "loss": 0.7748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38375943899154663, + "step": 6075, + "valid_targets_mean": 15080.9, + "valid_targets_min": 5881 + }, + { + "epoch": 4.8063241106719365, + "grad_norm": 0.2705264747189133, + "learning_rate": 1.8406571149361508e-07, + "loss": 0.7619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35680443048477173, + "step": 6080, + "valid_targets_mean": 13278.0, + "valid_targets_min": 1727 + }, + { + "epoch": 4.810276679841897, + "grad_norm": 0.27775493665312423, + "learning_rate": 1.766703292717864e-07, + "loss": 0.7774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38175061345100403, + "step": 6085, + "valid_targets_mean": 13947.9, + "valid_targets_min": 2033 + }, + { + "epoch": 4.8142292490118574, + "grad_norm": 0.33869486286706546, + "learning_rate": 1.694259155357636e-07, + "loss": 0.772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41387200355529785, + "step": 6090, + "valid_targets_mean": 14768.6, + "valid_targets_min": 3906 + }, + { + "epoch": 4.818181818181818, + "grad_norm": 0.27771683107741413, + "learning_rate": 1.623325254568142e-07, + "loss": 0.7662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3828127980232239, + "step": 6095, + "valid_targets_mean": 13902.9, + "valid_targets_min": 2503 + }, + { + "epoch": 4.822134387351778, + "grad_norm": 0.273757654515107, + "learning_rate": 1.5539021305604584e-07, + "loss": 0.7559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39293235540390015, + "step": 6100, + "valid_targets_mean": 14677.0, + "valid_targets_min": 1617 + }, + { + "epoch": 4.826086956521739, + "grad_norm": 0.27172181032031156, + "learning_rate": 1.485990312040153e-07, + "loss": 0.7616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35599181056022644, + "step": 6105, + "valid_targets_mean": 13666.0, + "valid_targets_min": 616 + }, + { + "epoch": 4.830039525691699, + "grad_norm": 0.3015053558619269, + "learning_rate": 1.4195903162031345e-07, + "loss": 0.7689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37596964836120605, + "step": 6110, + "valid_targets_mean": 14244.3, + "valid_targets_min": 2719 + }, + { + "epoch": 4.83399209486166, + "grad_norm": 0.30373871241606576, + "learning_rate": 1.3547026487316096e-07, + "loss": 0.7627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3635774850845337, + "step": 6115, + "valid_targets_mean": 13301.0, + "valid_targets_min": 2046 + }, + { + "epoch": 4.837944664031621, + "grad_norm": 0.28107236280039244, + "learning_rate": 1.291327803790532e-07, + "loss": 0.7681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38801345229148865, + "step": 6120, + "valid_targets_mean": 13681.9, + "valid_targets_min": 996 + }, + { + "epoch": 4.841897233201581, + "grad_norm": 0.27668919226990496, + "learning_rate": 1.2294662640234712e-07, + "loss": 0.776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43432164192199707, + "step": 6125, + "valid_targets_mean": 14870.4, + "valid_targets_min": 1337 + }, + { + "epoch": 4.845849802371541, + "grad_norm": 0.2882674620292199, + "learning_rate": 1.1691185005492823e-07, + "loss": 0.7765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3733573257923126, + "step": 6130, + "valid_targets_mean": 13812.7, + "valid_targets_min": 1145 + }, + { + "epoch": 4.849802371541502, + "grad_norm": 0.2795185543439014, + "learning_rate": 1.1102849729582643e-07, + "loss": 0.7895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3636476993560791, + "step": 6135, + "valid_targets_mean": 13184.1, + "valid_targets_min": 717 + }, + { + "epoch": 4.853754940711463, + "grad_norm": 0.2521422251379488, + "learning_rate": 1.0529661293087856e-07, + "loss": 0.7711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3829251527786255, + "step": 6140, + "valid_targets_mean": 15141.0, + "valid_targets_min": 3633 + }, + { + "epoch": 4.857707509881423, + "grad_norm": 0.26552844479990256, + "learning_rate": 9.971624061237972e-08, + "loss": 0.7698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4105076193809509, + "step": 6145, + "valid_targets_mean": 14071.0, + "valid_targets_min": 3382 + }, + { + "epoch": 4.861660079051384, + "grad_norm": 0.28943934595645787, + "learning_rate": 9.428742283876357e-08, + "loss": 0.757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3635061979293823, + "step": 6150, + "valid_targets_mean": 13083.8, + "valid_targets_min": 764 + }, + { + "epoch": 4.865612648221344, + "grad_norm": 0.2723669884958789, + "learning_rate": 8.901020095425816e-08, + "loss": 0.7587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3793807029724121, + "step": 6155, + "valid_targets_mean": 13805.8, + "valid_targets_min": 906 + }, + { + "epoch": 4.869565217391305, + "grad_norm": 0.3053419207029483, + "learning_rate": 8.388461514859947e-08, + "loss": 0.7622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37018194794654846, + "step": 6160, + "valid_targets_mean": 13926.0, + "valid_targets_min": 2369 + }, + { + "epoch": 4.873517786561265, + "grad_norm": 0.27551004240728283, + "learning_rate": 7.891070445668947e-08, + "loss": 0.7759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40423721075057983, + "step": 6165, + "valid_targets_mean": 14367.2, + "valid_targets_min": 1815 + }, + { + "epoch": 4.877470355731226, + "grad_norm": 0.27384022192220214, + "learning_rate": 7.408850675833412e-08, + "loss": 0.7667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3768211007118225, + "step": 6170, + "valid_targets_mean": 13414.3, + "valid_targets_min": 1339 + }, + { + "epoch": 4.881422924901186, + "grad_norm": 0.2883749476046292, + "learning_rate": 6.941805877793473e-08, + "loss": 0.7606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3366874158382416, + "step": 6175, + "valid_targets_mean": 13272.3, + "valid_targets_min": 3069 + }, + { + "epoch": 4.8853754940711465, + "grad_norm": 0.30058954969748813, + "learning_rate": 6.489939608420593e-08, + "loss": 0.7694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42551374435424805, + "step": 6180, + "valid_targets_mean": 14410.6, + "valid_targets_min": 679 + }, + { + "epoch": 4.8893280632411065, + "grad_norm": 0.26612018080012745, + "learning_rate": 6.053255308992034e-08, + "loss": 0.7896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4114375710487366, + "step": 6185, + "valid_targets_mean": 15325.5, + "valid_targets_min": 9817 + }, + { + "epoch": 4.893280632411067, + "grad_norm": 0.30284809769954124, + "learning_rate": 5.631756305163105e-08, + "loss": 0.7818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41287821531295776, + "step": 6190, + "valid_targets_mean": 14768.0, + "valid_targets_min": 1939 + }, + { + "epoch": 4.897233201581027, + "grad_norm": 0.31242719589919826, + "learning_rate": 5.2254458069429524e-08, + "loss": 0.7665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3654315769672394, + "step": 6195, + "valid_targets_mean": 12915.7, + "valid_targets_min": 1299 + }, + { + "epoch": 4.901185770750988, + "grad_norm": 0.2717384603829168, + "learning_rate": 4.834326908669918e-08, + "loss": 0.7704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3583259582519531, + "step": 6200, + "valid_targets_mean": 13863.1, + "valid_targets_min": 1102 + }, + { + "epoch": 4.905138339920948, + "grad_norm": 0.29200194806321694, + "learning_rate": 4.4584025889864434e-08, + "loss": 0.7671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3965635895729065, + "step": 6205, + "valid_targets_mean": 15238.8, + "valid_targets_min": 2834 + }, + { + "epoch": 4.909090909090909, + "grad_norm": 0.29406148810340005, + "learning_rate": 4.0976757108193155e-08, + "loss": 0.7827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4047239124774933, + "step": 6210, + "valid_targets_mean": 13909.9, + "valid_targets_min": 1140 + }, + { + "epoch": 4.913043478260869, + "grad_norm": 0.31698172372006767, + "learning_rate": 3.75214902135479e-08, + "loss": 0.7595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37907809019088745, + "step": 6215, + "valid_targets_mean": 14637.8, + "valid_targets_min": 2049 + }, + { + "epoch": 4.91699604743083, + "grad_norm": 0.30189635301110185, + "learning_rate": 3.421825152019942e-08, + "loss": 0.786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41157132387161255, + "step": 6220, + "valid_targets_mean": 14661.4, + "valid_targets_min": 913 + }, + { + "epoch": 4.920948616600791, + "grad_norm": 0.28535648244621475, + "learning_rate": 3.1067066184615745e-08, + "loss": 0.7738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33094322681427, + "step": 6225, + "valid_targets_mean": 12835.3, + "valid_targets_min": 2318 + }, + { + "epoch": 4.924901185770751, + "grad_norm": 0.29548920006493695, + "learning_rate": 2.8067958205268975e-08, + "loss": 0.7652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37816956639289856, + "step": 6230, + "valid_targets_mean": 14988.8, + "valid_targets_min": 4374 + }, + { + "epoch": 4.928853754940711, + "grad_norm": 0.2819176696744424, + "learning_rate": 2.5220950422459868e-08, + "loss": 0.777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3896205723285675, + "step": 6235, + "valid_targets_mean": 15041.8, + "valid_targets_min": 1968 + }, + { + "epoch": 4.932806324110672, + "grad_norm": 0.28386568900892156, + "learning_rate": 2.252606451814021e-08, + "loss": 0.7913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.382510781288147, + "step": 6240, + "valid_targets_mean": 13762.1, + "valid_targets_min": 1226 + }, + { + "epoch": 4.936758893280633, + "grad_norm": 0.2599175145667644, + "learning_rate": 1.9983321015746294e-08, + "loss": 0.7666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3635385036468506, + "step": 6245, + "valid_targets_mean": 14636.7, + "valid_targets_min": 1834 + }, + { + "epoch": 4.940711462450593, + "grad_norm": 0.3239280698903887, + "learning_rate": 1.759273928004346e-08, + "loss": 0.7692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3868054449558258, + "step": 6250, + "valid_targets_mean": 13567.0, + "valid_targets_min": 1825 + }, + { + "epoch": 4.944664031620554, + "grad_norm": 0.2562802692901127, + "learning_rate": 1.5354337516979566e-08, + "loss": 0.7703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3685070872306824, + "step": 6255, + "valid_targets_mean": 15245.0, + "valid_targets_min": 4770 + }, + { + "epoch": 4.948616600790514, + "grad_norm": 0.3042986944394145, + "learning_rate": 1.3268132773542886e-08, + "loss": 0.7751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3644460439682007, + "step": 6260, + "valid_targets_mean": 12787.9, + "valid_targets_min": 2262 + }, + { + "epoch": 4.952569169960475, + "grad_norm": 0.2792030664588753, + "learning_rate": 1.133414093764218e-08, + "loss": 0.7709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33821552991867065, + "step": 6265, + "valid_targets_mean": 12820.7, + "valid_targets_min": 1419 + }, + { + "epoch": 4.956521739130435, + "grad_norm": 0.28000685948975235, + "learning_rate": 9.55237673797349e-09, + "loss": 0.7608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.386191189289093, + "step": 6270, + "valid_targets_mean": 14453.5, + "valid_targets_min": 1013 + }, + { + "epoch": 4.9604743083003955, + "grad_norm": 0.3037250841127876, + "learning_rate": 7.92285374391355e-09, + "loss": 0.7618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39335548877716064, + "step": 6275, + "valid_targets_mean": 14672.8, + "valid_targets_min": 640 + }, + { + "epoch": 4.9644268774703555, + "grad_norm": 0.2900462663958642, + "learning_rate": 6.445584365415425e-09, + "loss": 0.7631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39090967178344727, + "step": 6280, + "valid_targets_mean": 14623.8, + "valid_targets_min": 4334 + }, + { + "epoch": 4.968379446640316, + "grad_norm": 0.2991851204681601, + "learning_rate": 5.120579852919694e-09, + "loss": 0.7568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3814154863357544, + "step": 6285, + "valid_targets_mean": 12700.5, + "valid_targets_min": 603 + }, + { + "epoch": 4.972332015810276, + "grad_norm": 0.30163627161086537, + "learning_rate": 3.94785029725897e-09, + "loss": 0.7672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3743637502193451, + "step": 6290, + "valid_targets_mean": 14033.9, + "valid_targets_min": 2416 + }, + { + "epoch": 4.976284584980237, + "grad_norm": 0.27435585149013453, + "learning_rate": 2.9274046295846294e-09, + "loss": 0.7652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39311206340789795, + "step": 6295, + "valid_targets_mean": 14656.3, + "valid_targets_min": 924 + }, + { + "epoch": 4.980237154150197, + "grad_norm": 0.27260281650654355, + "learning_rate": 2.059250621302411e-09, + "loss": 0.7728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3766372799873352, + "step": 6300, + "valid_targets_mean": 14180.1, + "valid_targets_min": 2125 + }, + { + "epoch": 4.984189723320158, + "grad_norm": 0.2796617852422442, + "learning_rate": 1.3433948840146926e-09, + "loss": 0.7591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38235118985176086, + "step": 6305, + "valid_targets_mean": 13109.5, + "valid_targets_min": 2255 + }, + { + "epoch": 4.988142292490118, + "grad_norm": 0.2874910563016602, + "learning_rate": 7.798428694605342e-10, + "loss": 0.7713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4013074040412903, + "step": 6310, + "valid_targets_mean": 14462.2, + "valid_targets_min": 3290 + }, + { + "epoch": 4.992094861660079, + "grad_norm": 0.26923376388205733, + "learning_rate": 3.6859886948015233e-10, + "loss": 0.7642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3615990877151489, + "step": 6315, + "valid_targets_mean": 13492.6, + "valid_targets_min": 1982 + }, + { + "epoch": 4.996047430830039, + "grad_norm": 0.29811275437694523, + "learning_rate": 1.0966601598383364e-10, + "loss": 0.7648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3580400347709656, + "step": 6320, + "valid_targets_mean": 13570.4, + "valid_targets_min": 675 + }, + { + "epoch": 5.0, + "grad_norm": 0.2684132515596671, + "learning_rate": 3.046280927510026e-12, + "loss": 0.7565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4211060404777527, + "step": 6325, + "valid_targets_mean": 15457.9, + "valid_targets_min": 697 + }, + { + "epoch": 5.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4211060404777527, + "step": 6325, + "total_flos": 1.1298365271930044e+19, + "train_loss": 0.22232198994150273, + "train_runtime": 31881.5091, + "train_samples_per_second": 12.696, + "train_steps_per_second": 0.198, + "valid_targets_mean": 15457.9, + "valid_targets_min": 697 + } + ], + "logging_steps": 5, + "max_steps": 6325, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.1298365271930044e+19, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}