{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999066044029353, "eval_steps": 500, "global_step": 234, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04269513008672448, "grad_norm": 2.6684887191049826, "learning_rate": 9.999516923531906e-06, "loss": 1.0656, "step": 10 }, { "epoch": 0.08539026017344896, "grad_norm": 1.946006163799421, "learning_rate": 9.94166060990718e-06, "loss": 0.9639, "step": 20 }, { "epoch": 0.12808539026017346, "grad_norm": 2.135334096726232, "learning_rate": 9.788468426444968e-06, "loss": 0.9299, "step": 30 }, { "epoch": 0.17078052034689792, "grad_norm": 2.1888584544569722, "learning_rate": 9.542895798702702e-06, "loss": 0.9412, "step": 40 }, { "epoch": 0.2134756504336224, "grad_norm": 1.9868402856495835, "learning_rate": 9.209680381068698e-06, "loss": 0.9209, "step": 50 }, { "epoch": 0.2561707805203469, "grad_norm": 2.102655697602942, "learning_rate": 8.795250656636207e-06, "loss": 0.9003, "step": 60 }, { "epoch": 0.2988659106070714, "grad_norm": 1.9826269424949003, "learning_rate": 8.30760191714706e-06, "loss": 0.9108, "step": 70 }, { "epoch": 0.34156104069379584, "grad_norm": 1.9933280995797364, "learning_rate": 7.75614201563372e-06, "loss": 0.8927, "step": 80 }, { "epoch": 0.3842561707805203, "grad_norm": 1.9585442421548411, "learning_rate": 7.1515098675421125e-06, "loss": 0.8832, "step": 90 }, { "epoch": 0.4269513008672448, "grad_norm": 1.799203130126713, "learning_rate": 6.50537020186121e-06, "loss": 0.9115, "step": 100 }, { "epoch": 0.4696464309539693, "grad_norm": 1.9010281711566903, "learning_rate": 5.830188521976794e-06, "loss": 0.8754, "step": 110 }, { "epoch": 0.5123415610406938, "grad_norm": 1.8562212696724616, "learning_rate": 5.1389906177662705e-06, "loss": 0.8855, "step": 120 }, { "epoch": 0.5550366911274183, "grad_norm": 1.706613487203547, "learning_rate": 4.4451112684930424e-06, "loss": 0.8842, "step": 130 }, { "epoch": 0.5977318212141428, "grad_norm": 1.88229629741251, "learning_rate": 3.7619369845930195e-06, "loss": 0.8982, "step": 140 }, { "epoch": 0.6404269513008672, "grad_norm": 1.6721435452363316, "learning_rate": 3.102647751449174e-06, "loss": 0.8964, "step": 150 }, { "epoch": 0.6831220813875917, "grad_norm": 1.8653475170240106, "learning_rate": 2.4799627575040014e-06, "loss": 0.8797, "step": 160 }, { "epoch": 0.7258172114743162, "grad_norm": 1.722742554654972, "learning_rate": 1.90589501219273e-06, "loss": 0.8681, "step": 170 }, { "epoch": 0.7685123415610406, "grad_norm": 1.791072390996306, "learning_rate": 1.3915195876753495e-06, "loss": 0.8865, "step": 180 }, { "epoch": 0.8112074716477652, "grad_norm": 1.5310513104275707, "learning_rate": 9.467599555114137e-07, "loss": 0.8728, "step": 190 }, { "epoch": 0.8539026017344896, "grad_norm": 1.6358652689401452, "learning_rate": 5.801965403290221e-07, "loss": 0.8748, "step": 200 }, { "epoch": 0.8965977318212142, "grad_norm": 1.5118489110770934, "learning_rate": 2.9890118392300493e-07, "loss": 0.8737, "step": 210 }, { "epoch": 0.9392928619079386, "grad_norm": 1.7168852460490818, "learning_rate": 1.0830071334628655e-07, "loss": 0.8662, "step": 220 }, { "epoch": 0.9819879919946631, "grad_norm": 1.6143069955493654, "learning_rate": 1.2072245076156786e-08, "loss": 0.8602, "step": 230 } ], "logging_steps": 10, "max_steps": 234, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 25657518981120.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }