| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9920424403183024, | |
| "eval_steps": 500, | |
| "global_step": 282, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.10610079575596817, | |
| "grad_norm": 0.6782001256942749, | |
| "learning_rate": 3.4482758620689657e-05, | |
| "loss": 0.8975, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.21220159151193635, | |
| "grad_norm": 0.858634889125824, | |
| "learning_rate": 6.896551724137931e-05, | |
| "loss": 1.0343, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.3183023872679045, | |
| "grad_norm": 0.5417032241821289, | |
| "learning_rate": 9.99961452773888e-05, | |
| "loss": 0.5668, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.4244031830238727, | |
| "grad_norm": 0.8525161743164062, | |
| "learning_rate": 9.953429730181653e-05, | |
| "loss": 0.6499, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5305039787798409, | |
| "grad_norm": 0.9144126772880554, | |
| "learning_rate": 9.830965649597455e-05, | |
| "loss": 0.6146, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.636604774535809, | |
| "grad_norm": 0.5597995519638062, | |
| "learning_rate": 9.634108145435665e-05, | |
| "loss": 0.346, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.7427055702917772, | |
| "grad_norm": 0.7313889861106873, | |
| "learning_rate": 9.36588868278086e-05, | |
| "loss": 0.4951, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.8488063660477454, | |
| "grad_norm": 1.2549511194229126, | |
| "learning_rate": 9.03043764995379e-05, | |
| "loss": 0.4583, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.9549071618037135, | |
| "grad_norm": 0.37749794125556946, | |
| "learning_rate": 8.63292075347872e-05, | |
| "loss": 0.4206, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.0610079575596818, | |
| "grad_norm": 0.6580765843391418, | |
| "learning_rate": 8.179459469889268e-05, | |
| "loss": 0.4612, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.16710875331565, | |
| "grad_norm": 0.5585550665855408, | |
| "learning_rate": 7.67703677935813e-05, | |
| "loss": 0.2816, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.273209549071618, | |
| "grad_norm": 1.0563403367996216, | |
| "learning_rate": 7.133389632785543e-05, | |
| "loss": 0.435, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.3793103448275863, | |
| "grad_norm": 0.35568875074386597, | |
| "learning_rate": 6.556889808276594e-05, | |
| "loss": 0.2524, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.4854111405835544, | |
| "grad_norm": 1.0185539722442627, | |
| "learning_rate": 5.956414991732584e-05, | |
| "loss": 0.3828, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.5915119363395225, | |
| "grad_norm": 0.653967022895813, | |
| "learning_rate": 5.341212066823355e-05, | |
| "loss": 0.3319, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.6976127320954908, | |
| "grad_norm": 0.9374193549156189, | |
| "learning_rate": 4.7207547195774475e-05, | |
| "loss": 0.3626, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.8037135278514589, | |
| "grad_norm": 0.7635377645492554, | |
| "learning_rate": 4.1045975503777756e-05, | |
| "loss": 0.3388, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.909814323607427, | |
| "grad_norm": 0.9419568777084351, | |
| "learning_rate": 3.502228939933993e-05, | |
| "loss": 0.3257, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.0159151193633953, | |
| "grad_norm": 0.3300299346446991, | |
| "learning_rate": 2.9229249349905684e-05, | |
| "loss": 0.3826, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.1220159151193636, | |
| "grad_norm": 0.9070532321929932, | |
| "learning_rate": 2.375606403826403e-05, | |
| "loss": 0.267, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.2281167108753315, | |
| "grad_norm": 0.7301483154296875, | |
| "learning_rate": 1.8687016612493542e-05, | |
| "loss": 0.2643, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.3342175066313, | |
| "grad_norm": 1.0478216409683228, | |
| "learning_rate": 1.41001667856273e-05, | |
| "loss": 0.3146, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.4403183023872677, | |
| "grad_norm": 0.6027346849441528, | |
| "learning_rate": 1.0066148771776379e-05, | |
| "loss": 0.3457, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.546419098143236, | |
| "grad_norm": 0.7205595970153809, | |
| "learning_rate": 6.647083569637797e-06, | |
| "loss": 0.2173, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.6525198938992043, | |
| "grad_norm": 0.44989392161369324, | |
| "learning_rate": 3.895622343444794e-06, | |
| "loss": 0.2089, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.7586206896551726, | |
| "grad_norm": 0.7996519804000854, | |
| "learning_rate": 1.8541356326100433e-06, | |
| "loss": 0.2485, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.8647214854111405, | |
| "grad_norm": 1.2713292837142944, | |
| "learning_rate": 5.540608756547106e-07, | |
| "loss": 0.2668, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.970822281167109, | |
| "grad_norm": 0.9034178853034973, | |
| "learning_rate": 1.5418296089358963e-08, | |
| "loss": 0.2985, | |
| "step": 280 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 282, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4056799126732800.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |