| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.972972972972973, | |
| "eval_steps": 55, | |
| "global_step": 110, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0, | |
| "eval_loss": 6.015130996704102, | |
| "eval_runtime": 175.115, | |
| "eval_samples_per_second": 0.571, | |
| "eval_steps_per_second": 0.074, | |
| "memory/device_reserved (GiB)": 57.3, | |
| "memory/max_active (GiB)": 43.52, | |
| "memory/max_allocated (GiB)": 43.52, | |
| "step": 0 | |
| }, | |
| { | |
| "epoch": 0.018018018018018018, | |
| "grad_norm": 0.601690411567688, | |
| "learning_rate": 0.0, | |
| "loss": 5.8902, | |
| "memory/device_reserved (GiB)": 57.78, | |
| "memory/max_active (GiB)": 52.56, | |
| "memory/max_allocated (GiB)": 52.56, | |
| "step": 1, | |
| "tokens_per_second_per_gpu": 177.27 | |
| }, | |
| { | |
| "epoch": 0.036036036036036036, | |
| "grad_norm": 0.5987619161605835, | |
| "learning_rate": 1.818181818181818e-06, | |
| "loss": 5.9238, | |
| "memory/device_reserved (GiB)": 57.83, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 2, | |
| "tokens_per_second_per_gpu": 32.34 | |
| }, | |
| { | |
| "epoch": 0.05405405405405406, | |
| "grad_norm": 0.6140171885490417, | |
| "learning_rate": 3.636363636363636e-06, | |
| "loss": 6.108, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 3, | |
| "tokens_per_second_per_gpu": 32.29 | |
| }, | |
| { | |
| "epoch": 0.07207207207207207, | |
| "grad_norm": 0.6393939256668091, | |
| "learning_rate": 5.4545454545454545e-06, | |
| "loss": 6.0509, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 4, | |
| "tokens_per_second_per_gpu": 34.19 | |
| }, | |
| { | |
| "epoch": 0.09009009009009009, | |
| "grad_norm": 0.6186049580574036, | |
| "learning_rate": 7.272727272727272e-06, | |
| "loss": 6.0799, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 5, | |
| "tokens_per_second_per_gpu": 48.39 | |
| }, | |
| { | |
| "epoch": 0.10810810810810811, | |
| "grad_norm": 0.6133891344070435, | |
| "learning_rate": 9.090909090909091e-06, | |
| "loss": 6.0418, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 6, | |
| "tokens_per_second_per_gpu": 35.74 | |
| }, | |
| { | |
| "epoch": 0.12612612612612611, | |
| "grad_norm": 0.6060707569122314, | |
| "learning_rate": 1.0909090909090909e-05, | |
| "loss": 5.9976, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 7, | |
| "tokens_per_second_per_gpu": 42.46 | |
| }, | |
| { | |
| "epoch": 0.14414414414414414, | |
| "grad_norm": 0.6184359192848206, | |
| "learning_rate": 1.2727272727272727e-05, | |
| "loss": 6.0328, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.6, | |
| "memory/max_allocated (GiB)": 52.6, | |
| "step": 8, | |
| "tokens_per_second_per_gpu": 38.38 | |
| }, | |
| { | |
| "epoch": 0.16216216216216217, | |
| "grad_norm": 0.6444172859191895, | |
| "learning_rate": 1.4545454545454545e-05, | |
| "loss": 5.9618, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 9, | |
| "tokens_per_second_per_gpu": 26.99 | |
| }, | |
| { | |
| "epoch": 0.18018018018018017, | |
| "grad_norm": 0.6325266361236572, | |
| "learning_rate": 1.6363636363636366e-05, | |
| "loss": 6.1674, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 10, | |
| "tokens_per_second_per_gpu": 29.93 | |
| }, | |
| { | |
| "epoch": 0.1981981981981982, | |
| "grad_norm": 0.6881551146507263, | |
| "learning_rate": 1.8181818181818182e-05, | |
| "loss": 5.9809, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 11, | |
| "tokens_per_second_per_gpu": 33.12 | |
| }, | |
| { | |
| "epoch": 0.21621621621621623, | |
| "grad_norm": 0.6200075745582581, | |
| "learning_rate": 2e-05, | |
| "loss": 5.9737, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 12, | |
| "tokens_per_second_per_gpu": 33.23 | |
| }, | |
| { | |
| "epoch": 0.23423423423423423, | |
| "grad_norm": 0.6758139133453369, | |
| "learning_rate": 2.1818181818181818e-05, | |
| "loss": 5.9377, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 13, | |
| "tokens_per_second_per_gpu": 31.95 | |
| }, | |
| { | |
| "epoch": 0.25225225225225223, | |
| "grad_norm": 0.6687948107719421, | |
| "learning_rate": 2.3636363636363637e-05, | |
| "loss": 6.0492, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 14, | |
| "tokens_per_second_per_gpu": 31.88 | |
| }, | |
| { | |
| "epoch": 0.2702702702702703, | |
| "grad_norm": 0.6872105002403259, | |
| "learning_rate": 2.5454545454545454e-05, | |
| "loss": 6.1111, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.6, | |
| "memory/max_allocated (GiB)": 52.6, | |
| "step": 15, | |
| "tokens_per_second_per_gpu": 40.32 | |
| }, | |
| { | |
| "epoch": 0.2882882882882883, | |
| "grad_norm": 0.7107413411140442, | |
| "learning_rate": 2.7272727272727273e-05, | |
| "loss": 5.9434, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 16, | |
| "tokens_per_second_per_gpu": 31.11 | |
| }, | |
| { | |
| "epoch": 0.3063063063063063, | |
| "grad_norm": 0.7199774384498596, | |
| "learning_rate": 2.909090909090909e-05, | |
| "loss": 6.0871, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.62, | |
| "memory/max_allocated (GiB)": 52.62, | |
| "step": 17, | |
| "tokens_per_second_per_gpu": 40.84 | |
| }, | |
| { | |
| "epoch": 0.32432432432432434, | |
| "grad_norm": 0.7394067049026489, | |
| "learning_rate": 3.090909090909091e-05, | |
| "loss": 5.8008, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 18, | |
| "tokens_per_second_per_gpu": 52.0 | |
| }, | |
| { | |
| "epoch": 0.34234234234234234, | |
| "grad_norm": 0.7493309378623962, | |
| "learning_rate": 3.272727272727273e-05, | |
| "loss": 5.8617, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 19, | |
| "tokens_per_second_per_gpu": 35.37 | |
| }, | |
| { | |
| "epoch": 0.36036036036036034, | |
| "grad_norm": 0.7457339763641357, | |
| "learning_rate": 3.454545454545455e-05, | |
| "loss": 5.7479, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.62, | |
| "memory/max_allocated (GiB)": 52.62, | |
| "step": 20, | |
| "tokens_per_second_per_gpu": 35.34 | |
| }, | |
| { | |
| "epoch": 0.3783783783783784, | |
| "grad_norm": 0.7670865058898926, | |
| "learning_rate": 3.6363636363636364e-05, | |
| "loss": 5.7939, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 21, | |
| "tokens_per_second_per_gpu": 37.74 | |
| }, | |
| { | |
| "epoch": 0.3963963963963964, | |
| "grad_norm": 0.7689312100410461, | |
| "learning_rate": 3.818181818181819e-05, | |
| "loss": 5.7546, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 22, | |
| "tokens_per_second_per_gpu": 34.11 | |
| }, | |
| { | |
| "epoch": 0.4144144144144144, | |
| "grad_norm": 0.7929359674453735, | |
| "learning_rate": 4e-05, | |
| "loss": 5.8247, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 23, | |
| "tokens_per_second_per_gpu": 22.92 | |
| }, | |
| { | |
| "epoch": 0.43243243243243246, | |
| "grad_norm": 0.7598868012428284, | |
| "learning_rate": 4.181818181818182e-05, | |
| "loss": 5.6557, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.6, | |
| "memory/max_allocated (GiB)": 52.6, | |
| "step": 24, | |
| "tokens_per_second_per_gpu": 29.09 | |
| }, | |
| { | |
| "epoch": 0.45045045045045046, | |
| "grad_norm": 0.7897383570671082, | |
| "learning_rate": 4.3636363636363636e-05, | |
| "loss": 5.6503, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.62, | |
| "memory/max_allocated (GiB)": 52.62, | |
| "step": 25, | |
| "tokens_per_second_per_gpu": 43.23 | |
| }, | |
| { | |
| "epoch": 0.46846846846846846, | |
| "grad_norm": 0.8077855706214905, | |
| "learning_rate": 4.545454545454546e-05, | |
| "loss": 5.7059, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 26, | |
| "tokens_per_second_per_gpu": 38.97 | |
| }, | |
| { | |
| "epoch": 0.4864864864864865, | |
| "grad_norm": 0.7965295910835266, | |
| "learning_rate": 4.7272727272727275e-05, | |
| "loss": 5.5299, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 27, | |
| "tokens_per_second_per_gpu": 29.02 | |
| }, | |
| { | |
| "epoch": 0.5045045045045045, | |
| "grad_norm": 0.7723223567008972, | |
| "learning_rate": 4.909090909090909e-05, | |
| "loss": 5.442, | |
| "memory/device_reserved (GiB)": 57.85, | |
| "memory/max_active (GiB)": 52.62, | |
| "memory/max_allocated (GiB)": 52.62, | |
| "step": 28, | |
| "tokens_per_second_per_gpu": 25.82 | |
| }, | |
| { | |
| "epoch": 0.5225225225225225, | |
| "grad_norm": 0.7679380178451538, | |
| "learning_rate": 5.090909090909091e-05, | |
| "loss": 5.3683, | |
| "memory/device_reserved (GiB)": 57.86, | |
| "memory/max_active (GiB)": 52.62, | |
| "memory/max_allocated (GiB)": 52.62, | |
| "step": 29, | |
| "tokens_per_second_per_gpu": 36.19 | |
| }, | |
| { | |
| "epoch": 0.5405405405405406, | |
| "grad_norm": 0.7431294322013855, | |
| "learning_rate": 5.272727272727272e-05, | |
| "loss": 5.2314, | |
| "memory/device_reserved (GiB)": 57.86, | |
| "memory/max_active (GiB)": 52.62, | |
| "memory/max_allocated (GiB)": 52.62, | |
| "step": 30, | |
| "tokens_per_second_per_gpu": 48.12 | |
| }, | |
| { | |
| "epoch": 0.5585585585585585, | |
| "grad_norm": 0.7318829298019409, | |
| "learning_rate": 5.4545454545454546e-05, | |
| "loss": 5.2971, | |
| "memory/device_reserved (GiB)": 57.86, | |
| "memory/max_active (GiB)": 52.62, | |
| "memory/max_allocated (GiB)": 52.62, | |
| "step": 31, | |
| "tokens_per_second_per_gpu": 30.47 | |
| }, | |
| { | |
| "epoch": 0.5765765765765766, | |
| "grad_norm": 0.6973780393600464, | |
| "learning_rate": 5.636363636363636e-05, | |
| "loss": 5.0413, | |
| "memory/device_reserved (GiB)": 57.86, | |
| "memory/max_active (GiB)": 52.62, | |
| "memory/max_allocated (GiB)": 52.62, | |
| "step": 32, | |
| "tokens_per_second_per_gpu": 39.86 | |
| }, | |
| { | |
| "epoch": 0.5945945945945946, | |
| "grad_norm": 0.7133749127388, | |
| "learning_rate": 5.818181818181818e-05, | |
| "loss": 5.1071, | |
| "memory/device_reserved (GiB)": 57.86, | |
| "memory/max_active (GiB)": 52.63, | |
| "memory/max_allocated (GiB)": 52.63, | |
| "step": 33, | |
| "tokens_per_second_per_gpu": 23.77 | |
| }, | |
| { | |
| "epoch": 0.6126126126126126, | |
| "grad_norm": 0.6688926219940186, | |
| "learning_rate": 6e-05, | |
| "loss": 4.906, | |
| "memory/device_reserved (GiB)": 57.86, | |
| "memory/max_active (GiB)": 52.62, | |
| "memory/max_allocated (GiB)": 52.62, | |
| "step": 34, | |
| "tokens_per_second_per_gpu": 24.91 | |
| }, | |
| { | |
| "epoch": 0.6306306306306306, | |
| "grad_norm": 0.6534309983253479, | |
| "learning_rate": 6.181818181818182e-05, | |
| "loss": 4.9764, | |
| "memory/device_reserved (GiB)": 57.88, | |
| "memory/max_active (GiB)": 52.64, | |
| "memory/max_allocated (GiB)": 52.64, | |
| "step": 35, | |
| "tokens_per_second_per_gpu": 33.75 | |
| }, | |
| { | |
| "epoch": 0.6486486486486487, | |
| "grad_norm": 0.6284618377685547, | |
| "learning_rate": 6.363636363636364e-05, | |
| "loss": 4.8235, | |
| "memory/device_reserved (GiB)": 57.91, | |
| "memory/max_active (GiB)": 52.64, | |
| "memory/max_allocated (GiB)": 52.64, | |
| "step": 36, | |
| "tokens_per_second_per_gpu": 32.66 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.5952987670898438, | |
| "learning_rate": 6.545454545454546e-05, | |
| "loss": 4.7779, | |
| "memory/device_reserved (GiB)": 57.91, | |
| "memory/max_active (GiB)": 52.63, | |
| "memory/max_allocated (GiB)": 52.63, | |
| "step": 37, | |
| "tokens_per_second_per_gpu": 28.28 | |
| }, | |
| { | |
| "epoch": 0.6846846846846847, | |
| "grad_norm": 0.6216407418251038, | |
| "learning_rate": 6.727272727272727e-05, | |
| "loss": 4.7969, | |
| "memory/device_reserved (GiB)": 57.94, | |
| "memory/max_active (GiB)": 52.64, | |
| "memory/max_allocated (GiB)": 52.64, | |
| "step": 38, | |
| "tokens_per_second_per_gpu": 30.94 | |
| }, | |
| { | |
| "epoch": 0.7027027027027027, | |
| "grad_norm": 0.5679822564125061, | |
| "learning_rate": 6.90909090909091e-05, | |
| "loss": 4.7705, | |
| "memory/device_reserved (GiB)": 57.95, | |
| "memory/max_active (GiB)": 52.63, | |
| "memory/max_allocated (GiB)": 52.63, | |
| "step": 39, | |
| "tokens_per_second_per_gpu": 39.6 | |
| }, | |
| { | |
| "epoch": 0.7207207207207207, | |
| "grad_norm": 0.5590559244155884, | |
| "learning_rate": 7.090909090909092e-05, | |
| "loss": 4.7452, | |
| "memory/device_reserved (GiB)": 57.95, | |
| "memory/max_active (GiB)": 52.63, | |
| "memory/max_allocated (GiB)": 52.63, | |
| "step": 40, | |
| "tokens_per_second_per_gpu": 37.24 | |
| }, | |
| { | |
| "epoch": 0.7387387387387387, | |
| "grad_norm": 0.5368968844413757, | |
| "learning_rate": 7.272727272727273e-05, | |
| "loss": 4.7783, | |
| "memory/device_reserved (GiB)": 57.95, | |
| "memory/max_active (GiB)": 52.63, | |
| "memory/max_allocated (GiB)": 52.63, | |
| "step": 41, | |
| "tokens_per_second_per_gpu": 28.42 | |
| }, | |
| { | |
| "epoch": 0.7567567567567568, | |
| "grad_norm": 0.5522942543029785, | |
| "learning_rate": 7.454545454545455e-05, | |
| "loss": 4.7181, | |
| "memory/device_reserved (GiB)": 57.95, | |
| "memory/max_active (GiB)": 52.64, | |
| "memory/max_allocated (GiB)": 52.64, | |
| "step": 42, | |
| "tokens_per_second_per_gpu": 32.13 | |
| }, | |
| { | |
| "epoch": 0.7747747747747747, | |
| "grad_norm": 0.4933941066265106, | |
| "learning_rate": 7.636363636363637e-05, | |
| "loss": 4.8171, | |
| "memory/device_reserved (GiB)": 57.95, | |
| "memory/max_active (GiB)": 52.63, | |
| "memory/max_allocated (GiB)": 52.63, | |
| "step": 43, | |
| "tokens_per_second_per_gpu": 24.91 | |
| }, | |
| { | |
| "epoch": 0.7927927927927928, | |
| "grad_norm": 0.487724244594574, | |
| "learning_rate": 7.818181818181818e-05, | |
| "loss": 4.5267, | |
| "memory/device_reserved (GiB)": 57.95, | |
| "memory/max_active (GiB)": 52.64, | |
| "memory/max_allocated (GiB)": 52.64, | |
| "step": 44, | |
| "tokens_per_second_per_gpu": 38.94 | |
| }, | |
| { | |
| "epoch": 0.8108108108108109, | |
| "grad_norm": 0.484387069940567, | |
| "learning_rate": 8e-05, | |
| "loss": 4.4651, | |
| "memory/device_reserved (GiB)": 57.95, | |
| "memory/max_active (GiB)": 52.62, | |
| "memory/max_allocated (GiB)": 52.62, | |
| "step": 45, | |
| "tokens_per_second_per_gpu": 28.12 | |
| }, | |
| { | |
| "epoch": 0.8288288288288288, | |
| "grad_norm": 0.47269508242607117, | |
| "learning_rate": 8.181818181818183e-05, | |
| "loss": 4.4782, | |
| "memory/device_reserved (GiB)": 57.95, | |
| "memory/max_active (GiB)": 52.63, | |
| "memory/max_allocated (GiB)": 52.63, | |
| "step": 46, | |
| "tokens_per_second_per_gpu": 37.47 | |
| }, | |
| { | |
| "epoch": 0.8468468468468469, | |
| "grad_norm": 0.4501938819885254, | |
| "learning_rate": 8.363636363636364e-05, | |
| "loss": 4.2217, | |
| "memory/device_reserved (GiB)": 57.95, | |
| "memory/max_active (GiB)": 52.64, | |
| "memory/max_allocated (GiB)": 52.64, | |
| "step": 47, | |
| "tokens_per_second_per_gpu": 36.52 | |
| }, | |
| { | |
| "epoch": 0.8648648648648649, | |
| "grad_norm": 0.4240598678588867, | |
| "learning_rate": 8.545454545454545e-05, | |
| "loss": 4.2584, | |
| "memory/device_reserved (GiB)": 57.95, | |
| "memory/max_active (GiB)": 52.63, | |
| "memory/max_allocated (GiB)": 52.63, | |
| "step": 48, | |
| "tokens_per_second_per_gpu": 25.0 | |
| }, | |
| { | |
| "epoch": 0.8828828828828829, | |
| "grad_norm": 0.4062960743904114, | |
| "learning_rate": 8.727272727272727e-05, | |
| "loss": 4.3664, | |
| "memory/device_reserved (GiB)": 57.95, | |
| "memory/max_active (GiB)": 52.66, | |
| "memory/max_allocated (GiB)": 52.66, | |
| "step": 49, | |
| "tokens_per_second_per_gpu": 34.05 | |
| }, | |
| { | |
| "epoch": 0.9009009009009009, | |
| "grad_norm": 0.4040940999984741, | |
| "learning_rate": 8.90909090909091e-05, | |
| "loss": 4.31, | |
| "memory/device_reserved (GiB)": 57.95, | |
| "memory/max_active (GiB)": 52.63, | |
| "memory/max_allocated (GiB)": 52.63, | |
| "step": 50, | |
| "tokens_per_second_per_gpu": 31.81 | |
| }, | |
| { | |
| "epoch": 0.918918918918919, | |
| "grad_norm": 0.38634198904037476, | |
| "learning_rate": 9.090909090909092e-05, | |
| "loss": 4.1829, | |
| "memory/device_reserved (GiB)": 57.95, | |
| "memory/max_active (GiB)": 52.64, | |
| "memory/max_allocated (GiB)": 52.64, | |
| "step": 51, | |
| "tokens_per_second_per_gpu": 31.96 | |
| }, | |
| { | |
| "epoch": 0.9369369369369369, | |
| "grad_norm": 0.4119090139865875, | |
| "learning_rate": 9.272727272727273e-05, | |
| "loss": 4.211, | |
| "memory/device_reserved (GiB)": 57.95, | |
| "memory/max_active (GiB)": 52.64, | |
| "memory/max_allocated (GiB)": 52.64, | |
| "step": 52, | |
| "tokens_per_second_per_gpu": 26.32 | |
| }, | |
| { | |
| "epoch": 0.954954954954955, | |
| "grad_norm": 0.39360716938972473, | |
| "learning_rate": 9.454545454545455e-05, | |
| "loss": 4.1027, | |
| "memory/device_reserved (GiB)": 57.95, | |
| "memory/max_active (GiB)": 52.62, | |
| "memory/max_allocated (GiB)": 52.62, | |
| "step": 53, | |
| "tokens_per_second_per_gpu": 36.47 | |
| }, | |
| { | |
| "epoch": 0.972972972972973, | |
| "grad_norm": 0.358804851770401, | |
| "learning_rate": 9.636363636363637e-05, | |
| "loss": 4.1262, | |
| "memory/device_reserved (GiB)": 57.95, | |
| "memory/max_active (GiB)": 52.63, | |
| "memory/max_allocated (GiB)": 52.63, | |
| "step": 54, | |
| "tokens_per_second_per_gpu": 35.0 | |
| }, | |
| { | |
| "epoch": 0.990990990990991, | |
| "grad_norm": 0.3619638681411743, | |
| "learning_rate": 9.818181818181818e-05, | |
| "loss": 3.9671, | |
| "memory/device_reserved (GiB)": 57.95, | |
| "memory/max_active (GiB)": 52.64, | |
| "memory/max_allocated (GiB)": 52.64, | |
| "step": 55, | |
| "tokens_per_second_per_gpu": 21.34 | |
| }, | |
| { | |
| "epoch": 0.990990990990991, | |
| "eval_loss": 4.040650367736816, | |
| "eval_runtime": 179.0976, | |
| "eval_samples_per_second": 0.558, | |
| "eval_steps_per_second": 0.073, | |
| "memory/device_reserved (GiB)": 57.95, | |
| "memory/max_active (GiB)": 43.5, | |
| "memory/max_allocated (GiB)": 43.5, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.3906014859676361, | |
| "learning_rate": 0.0001, | |
| "loss": 4.1367, | |
| "memory/device_reserved (GiB)": 57.88, | |
| "memory/max_active (GiB)": 52.55, | |
| "memory/max_allocated (GiB)": 52.55, | |
| "step": 56, | |
| "tokens_per_second_per_gpu": 28.38 | |
| }, | |
| { | |
| "epoch": 1.018018018018018, | |
| "grad_norm": 0.34430640935897827, | |
| "learning_rate": 0.00010181818181818181, | |
| "loss": 3.9357, | |
| "memory/device_reserved (GiB)": 57.91, | |
| "memory/max_active (GiB)": 52.62, | |
| "memory/max_allocated (GiB)": 52.62, | |
| "step": 57, | |
| "tokens_per_second_per_gpu": 26.74 | |
| }, | |
| { | |
| "epoch": 1.0360360360360361, | |
| "grad_norm": 0.348283588886261, | |
| "learning_rate": 0.00010363636363636364, | |
| "loss": 3.9594, | |
| "memory/device_reserved (GiB)": 57.91, | |
| "memory/max_active (GiB)": 52.63, | |
| "memory/max_allocated (GiB)": 52.63, | |
| "step": 58, | |
| "tokens_per_second_per_gpu": 25.8 | |
| }, | |
| { | |
| "epoch": 1.054054054054054, | |
| "grad_norm": 0.3484898507595062, | |
| "learning_rate": 0.00010545454545454545, | |
| "loss": 4.0163, | |
| "memory/device_reserved (GiB)": 57.91, | |
| "memory/max_active (GiB)": 52.63, | |
| "memory/max_allocated (GiB)": 52.63, | |
| "step": 59, | |
| "tokens_per_second_per_gpu": 26.23 | |
| }, | |
| { | |
| "epoch": 1.072072072072072, | |
| "grad_norm": 0.3627394735813141, | |
| "learning_rate": 0.00010727272727272728, | |
| "loss": 3.9347, | |
| "memory/device_reserved (GiB)": 57.91, | |
| "memory/max_active (GiB)": 52.64, | |
| "memory/max_allocated (GiB)": 52.64, | |
| "step": 60, | |
| "tokens_per_second_per_gpu": 32.7 | |
| }, | |
| { | |
| "epoch": 1.09009009009009, | |
| "grad_norm": 0.3439123034477234, | |
| "learning_rate": 0.00010909090909090909, | |
| "loss": 3.9091, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.64, | |
| "memory/max_allocated (GiB)": 52.64, | |
| "step": 61, | |
| "tokens_per_second_per_gpu": 46.09 | |
| }, | |
| { | |
| "epoch": 1.1081081081081081, | |
| "grad_norm": 0.34011831879615784, | |
| "learning_rate": 0.00011090909090909092, | |
| "loss": 3.8579, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.63, | |
| "memory/max_allocated (GiB)": 52.63, | |
| "step": 62, | |
| "tokens_per_second_per_gpu": 34.82 | |
| }, | |
| { | |
| "epoch": 1.1261261261261262, | |
| "grad_norm": 0.3363277018070221, | |
| "learning_rate": 0.00011272727272727272, | |
| "loss": 3.8762, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.62, | |
| "memory/max_allocated (GiB)": 52.62, | |
| "step": 63, | |
| "tokens_per_second_per_gpu": 41.68 | |
| }, | |
| { | |
| "epoch": 1.1441441441441442, | |
| "grad_norm": 0.30976247787475586, | |
| "learning_rate": 0.00011454545454545456, | |
| "loss": 3.8585, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.63, | |
| "memory/max_allocated (GiB)": 52.63, | |
| "step": 64, | |
| "tokens_per_second_per_gpu": 37.27 | |
| }, | |
| { | |
| "epoch": 1.1621621621621623, | |
| "grad_norm": 0.3248283565044403, | |
| "learning_rate": 0.00011636363636363636, | |
| "loss": 3.7179, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.62, | |
| "memory/max_allocated (GiB)": 52.62, | |
| "step": 65, | |
| "tokens_per_second_per_gpu": 26.17 | |
| }, | |
| { | |
| "epoch": 1.1801801801801801, | |
| "grad_norm": 0.3173442482948303, | |
| "learning_rate": 0.0001181818181818182, | |
| "loss": 3.8197, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.62, | |
| "memory/max_allocated (GiB)": 52.62, | |
| "step": 66, | |
| "tokens_per_second_per_gpu": 28.41 | |
| }, | |
| { | |
| "epoch": 1.1981981981981982, | |
| "grad_norm": 0.33076199889183044, | |
| "learning_rate": 0.00012, | |
| "loss": 3.6631, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 67, | |
| "tokens_per_second_per_gpu": 32.3 | |
| }, | |
| { | |
| "epoch": 1.2162162162162162, | |
| "grad_norm": 0.32531851530075073, | |
| "learning_rate": 0.00012181818181818183, | |
| "loss": 3.6563, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.62, | |
| "memory/max_allocated (GiB)": 52.62, | |
| "step": 68, | |
| "tokens_per_second_per_gpu": 32.11 | |
| }, | |
| { | |
| "epoch": 1.2342342342342343, | |
| "grad_norm": 0.295604944229126, | |
| "learning_rate": 0.00012363636363636364, | |
| "loss": 3.6487, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.62, | |
| "memory/max_allocated (GiB)": 52.62, | |
| "step": 69, | |
| "tokens_per_second_per_gpu": 31.08 | |
| }, | |
| { | |
| "epoch": 1.2522522522522523, | |
| "grad_norm": 0.3253607749938965, | |
| "learning_rate": 0.00012545454545454546, | |
| "loss": 3.741, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 70, | |
| "tokens_per_second_per_gpu": 29.52 | |
| }, | |
| { | |
| "epoch": 1.2702702702702702, | |
| "grad_norm": 0.28945258259773254, | |
| "learning_rate": 0.00012727272727272728, | |
| "loss": 3.6727, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 71, | |
| "tokens_per_second_per_gpu": 38.43 | |
| }, | |
| { | |
| "epoch": 1.2882882882882882, | |
| "grad_norm": 0.287298321723938, | |
| "learning_rate": 0.0001290909090909091, | |
| "loss": 3.5821, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 72, | |
| "tokens_per_second_per_gpu": 29.55 | |
| }, | |
| { | |
| "epoch": 1.3063063063063063, | |
| "grad_norm": 0.26835423707962036, | |
| "learning_rate": 0.00013090909090909093, | |
| "loss": 3.648, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.62, | |
| "memory/max_allocated (GiB)": 52.62, | |
| "step": 73, | |
| "tokens_per_second_per_gpu": 37.82 | |
| }, | |
| { | |
| "epoch": 1.3243243243243243, | |
| "grad_norm": 0.27674639225006104, | |
| "learning_rate": 0.00013272727272727275, | |
| "loss": 3.4623, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.62, | |
| "memory/max_allocated (GiB)": 52.62, | |
| "step": 74, | |
| "tokens_per_second_per_gpu": 48.99 | |
| }, | |
| { | |
| "epoch": 1.3423423423423424, | |
| "grad_norm": 0.28284698724746704, | |
| "learning_rate": 0.00013454545454545455, | |
| "loss": 3.4366, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 75, | |
| "tokens_per_second_per_gpu": 32.8 | |
| }, | |
| { | |
| "epoch": 1.3603603603603602, | |
| "grad_norm": 0.2780005931854248, | |
| "learning_rate": 0.00013636363636363637, | |
| "loss": 3.4308, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 76, | |
| "tokens_per_second_per_gpu": 33.52 | |
| }, | |
| { | |
| "epoch": 1.3783783783783785, | |
| "grad_norm": 0.2978385388851166, | |
| "learning_rate": 0.0001381818181818182, | |
| "loss": 3.4822, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 77, | |
| "tokens_per_second_per_gpu": 35.74 | |
| }, | |
| { | |
| "epoch": 1.3963963963963963, | |
| "grad_norm": 0.28048908710479736, | |
| "learning_rate": 0.00014, | |
| "loss": 3.4922, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.62, | |
| "memory/max_allocated (GiB)": 52.62, | |
| "step": 78, | |
| "tokens_per_second_per_gpu": 32.76 | |
| }, | |
| { | |
| "epoch": 1.4144144144144144, | |
| "grad_norm": 0.2921410799026489, | |
| "learning_rate": 0.00014181818181818184, | |
| "loss": 3.5634, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 79, | |
| "tokens_per_second_per_gpu": 22.05 | |
| }, | |
| { | |
| "epoch": 1.4324324324324325, | |
| "grad_norm": 0.28046825528144836, | |
| "learning_rate": 0.00014363636363636363, | |
| "loss": 3.4562, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 80, | |
| "tokens_per_second_per_gpu": 27.93 | |
| }, | |
| { | |
| "epoch": 1.4504504504504505, | |
| "grad_norm": 0.28950053453445435, | |
| "learning_rate": 0.00014545454545454546, | |
| "loss": 3.4771, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.6, | |
| "memory/max_allocated (GiB)": 52.6, | |
| "step": 81, | |
| "tokens_per_second_per_gpu": 43.19 | |
| }, | |
| { | |
| "epoch": 1.4684684684684686, | |
| "grad_norm": 0.2990242838859558, | |
| "learning_rate": 0.00014727272727272728, | |
| "loss": 3.4552, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 82, | |
| "tokens_per_second_per_gpu": 39.02 | |
| }, | |
| { | |
| "epoch": 1.4864864864864864, | |
| "grad_norm": 0.3110749125480652, | |
| "learning_rate": 0.0001490909090909091, | |
| "loss": 3.3635, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 83, | |
| "tokens_per_second_per_gpu": 28.78 | |
| }, | |
| { | |
| "epoch": 1.5045045045045045, | |
| "grad_norm": 0.2659832537174225, | |
| "learning_rate": 0.0001509090909090909, | |
| "loss": 3.309, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 84, | |
| "tokens_per_second_per_gpu": 26.52 | |
| }, | |
| { | |
| "epoch": 1.5225225225225225, | |
| "grad_norm": 0.2891514003276825, | |
| "learning_rate": 0.00015272727272727275, | |
| "loss": 3.2953, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 85, | |
| "tokens_per_second_per_gpu": 37.61 | |
| }, | |
| { | |
| "epoch": 1.5405405405405406, | |
| "grad_norm": 0.2862309217453003, | |
| "learning_rate": 0.00015454545454545454, | |
| "loss": 3.3016, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.6, | |
| "memory/max_allocated (GiB)": 52.6, | |
| "step": 86, | |
| "tokens_per_second_per_gpu": 49.89 | |
| }, | |
| { | |
| "epoch": 1.5585585585585586, | |
| "grad_norm": 0.3269289433956146, | |
| "learning_rate": 0.00015636363636363637, | |
| "loss": 3.4022, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 87, | |
| "tokens_per_second_per_gpu": 30.66 | |
| }, | |
| { | |
| "epoch": 1.5765765765765765, | |
| "grad_norm": 0.2758469581604004, | |
| "learning_rate": 0.0001581818181818182, | |
| "loss": 3.1596, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.6, | |
| "memory/max_allocated (GiB)": 52.6, | |
| "step": 88, | |
| "tokens_per_second_per_gpu": 39.43 | |
| }, | |
| { | |
| "epoch": 1.5945945945945947, | |
| "grad_norm": 0.2842893600463867, | |
| "learning_rate": 0.00016, | |
| "loss": 3.2368, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 89, | |
| "tokens_per_second_per_gpu": 23.64 | |
| }, | |
| { | |
| "epoch": 1.6126126126126126, | |
| "grad_norm": 0.27873268723487854, | |
| "learning_rate": 0.00016181818181818184, | |
| "loss": 3.1778, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.6, | |
| "memory/max_allocated (GiB)": 52.6, | |
| "step": 90, | |
| "tokens_per_second_per_gpu": 24.52 | |
| }, | |
| { | |
| "epoch": 1.6306306306306306, | |
| "grad_norm": 0.25983887910842896, | |
| "learning_rate": 0.00016363636363636366, | |
| "loss": 3.2287, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.6, | |
| "memory/max_allocated (GiB)": 52.6, | |
| "step": 91, | |
| "tokens_per_second_per_gpu": 34.4 | |
| }, | |
| { | |
| "epoch": 1.6486486486486487, | |
| "grad_norm": 0.2840956151485443, | |
| "learning_rate": 0.00016545454545454545, | |
| "loss": 3.1411, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.6, | |
| "memory/max_allocated (GiB)": 52.6, | |
| "step": 92, | |
| "tokens_per_second_per_gpu": 33.89 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 0.2628091275691986, | |
| "learning_rate": 0.00016727272727272728, | |
| "loss": 3.1159, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.6, | |
| "memory/max_allocated (GiB)": 52.6, | |
| "step": 93, | |
| "tokens_per_second_per_gpu": 29.27 | |
| }, | |
| { | |
| "epoch": 1.6846846846846848, | |
| "grad_norm": 0.2681942582130432, | |
| "learning_rate": 0.0001690909090909091, | |
| "loss": 3.1647, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 94, | |
| "tokens_per_second_per_gpu": 30.7 | |
| }, | |
| { | |
| "epoch": 1.7027027027027026, | |
| "grad_norm": 0.2515859603881836, | |
| "learning_rate": 0.0001709090909090909, | |
| "loss": 3.1587, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 95, | |
| "tokens_per_second_per_gpu": 40.07 | |
| }, | |
| { | |
| "epoch": 1.7207207207207207, | |
| "grad_norm": 0.2735103666782379, | |
| "learning_rate": 0.00017272727272727275, | |
| "loss": 3.1537, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.6, | |
| "memory/max_allocated (GiB)": 52.6, | |
| "step": 96, | |
| "tokens_per_second_per_gpu": 37.91 | |
| }, | |
| { | |
| "epoch": 1.7387387387387387, | |
| "grad_norm": 0.24973994493484497, | |
| "learning_rate": 0.00017454545454545454, | |
| "loss": 3.2266, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 97, | |
| "tokens_per_second_per_gpu": 28.61 | |
| }, | |
| { | |
| "epoch": 1.7567567567567568, | |
| "grad_norm": 0.26508864760398865, | |
| "learning_rate": 0.00017636363636363637, | |
| "loss": 3.135, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.6, | |
| "memory/max_allocated (GiB)": 52.6, | |
| "step": 98, | |
| "tokens_per_second_per_gpu": 34.76 | |
| }, | |
| { | |
| "epoch": 1.7747747747747749, | |
| "grad_norm": 0.2922559678554535, | |
| "learning_rate": 0.0001781818181818182, | |
| "loss": 3.359, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.6, | |
| "memory/max_allocated (GiB)": 52.6, | |
| "step": 99, | |
| "tokens_per_second_per_gpu": 26.34 | |
| }, | |
| { | |
| "epoch": 1.7927927927927927, | |
| "grad_norm": 0.2632916271686554, | |
| "learning_rate": 0.00018, | |
| "loss": 3.1131, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 100, | |
| "tokens_per_second_per_gpu": 42.17 | |
| }, | |
| { | |
| "epoch": 1.810810810810811, | |
| "grad_norm": 0.2974204123020172, | |
| "learning_rate": 0.00018181818181818183, | |
| "loss": 3.1127, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.6, | |
| "memory/max_allocated (GiB)": 52.6, | |
| "step": 101, | |
| "tokens_per_second_per_gpu": 30.71 | |
| }, | |
| { | |
| "epoch": 1.8288288288288288, | |
| "grad_norm": 0.28947019577026367, | |
| "learning_rate": 0.00018363636363636366, | |
| "loss": 3.1019, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.6, | |
| "memory/max_allocated (GiB)": 52.6, | |
| "step": 102, | |
| "tokens_per_second_per_gpu": 40.45 | |
| }, | |
| { | |
| "epoch": 1.8468468468468469, | |
| "grad_norm": 0.29779183864593506, | |
| "learning_rate": 0.00018545454545454545, | |
| "loss": 2.8855, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.6, | |
| "memory/max_allocated (GiB)": 52.6, | |
| "step": 103, | |
| "tokens_per_second_per_gpu": 38.82 | |
| }, | |
| { | |
| "epoch": 1.864864864864865, | |
| "grad_norm": 0.27393272519111633, | |
| "learning_rate": 0.00018727272727272728, | |
| "loss": 2.9937, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.6, | |
| "memory/max_allocated (GiB)": 52.6, | |
| "step": 104, | |
| "tokens_per_second_per_gpu": 26.96 | |
| }, | |
| { | |
| "epoch": 1.8828828828828827, | |
| "grad_norm": 0.28197985887527466, | |
| "learning_rate": 0.0001890909090909091, | |
| "loss": 3.1189, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.6, | |
| "memory/max_allocated (GiB)": 52.6, | |
| "step": 105, | |
| "tokens_per_second_per_gpu": 37.87 | |
| }, | |
| { | |
| "epoch": 1.900900900900901, | |
| "grad_norm": 0.27397748827934265, | |
| "learning_rate": 0.00019090909090909092, | |
| "loss": 3.0858, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.59, | |
| "memory/max_allocated (GiB)": 52.59, | |
| "step": 106, | |
| "tokens_per_second_per_gpu": 35.13 | |
| }, | |
| { | |
| "epoch": 1.9189189189189189, | |
| "grad_norm": 0.274027943611145, | |
| "learning_rate": 0.00019272727272727274, | |
| "loss": 2.9537, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.59, | |
| "memory/max_allocated (GiB)": 52.59, | |
| "step": 107, | |
| "tokens_per_second_per_gpu": 33.48 | |
| }, | |
| { | |
| "epoch": 1.936936936936937, | |
| "grad_norm": 0.2898459732532501, | |
| "learning_rate": 0.00019454545454545457, | |
| "loss": 2.9996, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.6, | |
| "memory/max_allocated (GiB)": 52.6, | |
| "step": 108, | |
| "tokens_per_second_per_gpu": 27.65 | |
| }, | |
| { | |
| "epoch": 1.954954954954955, | |
| "grad_norm": 0.2991600036621094, | |
| "learning_rate": 0.00019636363636363636, | |
| "loss": 2.9123, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 109, | |
| "tokens_per_second_per_gpu": 39.01 | |
| }, | |
| { | |
| "epoch": 1.972972972972973, | |
| "grad_norm": 0.27946925163269043, | |
| "learning_rate": 0.00019818181818181821, | |
| "loss": 3.0439, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 52.61, | |
| "memory/max_allocated (GiB)": 52.61, | |
| "step": 110, | |
| "tokens_per_second_per_gpu": 35.44 | |
| }, | |
| { | |
| "epoch": 1.972972972972973, | |
| "eval_loss": 2.936415672302246, | |
| "eval_runtime": 158.8267, | |
| "eval_samples_per_second": 0.63, | |
| "eval_steps_per_second": 0.082, | |
| "memory/device_reserved (GiB)": 57.92, | |
| "memory/max_active (GiB)": 43.5, | |
| "memory/max_allocated (GiB)": 43.5, | |
| "step": 110 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1100, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 55, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.0902735727406088e+19, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |