llama3.2-3b-code-ig / checkpoint-282 /trainer_state.json
SidhaarthMurali's picture
Upload adapter model
6372673 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9920424403183024,
"eval_steps": 500,
"global_step": 282,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.10610079575596817,
"grad_norm": 0.6782001256942749,
"learning_rate": 3.4482758620689657e-05,
"loss": 0.8975,
"step": 10
},
{
"epoch": 0.21220159151193635,
"grad_norm": 0.858634889125824,
"learning_rate": 6.896551724137931e-05,
"loss": 1.0343,
"step": 20
},
{
"epoch": 0.3183023872679045,
"grad_norm": 0.5417032241821289,
"learning_rate": 9.99961452773888e-05,
"loss": 0.5668,
"step": 30
},
{
"epoch": 0.4244031830238727,
"grad_norm": 0.8525161743164062,
"learning_rate": 9.953429730181653e-05,
"loss": 0.6499,
"step": 40
},
{
"epoch": 0.5305039787798409,
"grad_norm": 0.9144126772880554,
"learning_rate": 9.830965649597455e-05,
"loss": 0.6146,
"step": 50
},
{
"epoch": 0.636604774535809,
"grad_norm": 0.5597995519638062,
"learning_rate": 9.634108145435665e-05,
"loss": 0.346,
"step": 60
},
{
"epoch": 0.7427055702917772,
"grad_norm": 0.7313889861106873,
"learning_rate": 9.36588868278086e-05,
"loss": 0.4951,
"step": 70
},
{
"epoch": 0.8488063660477454,
"grad_norm": 1.2549511194229126,
"learning_rate": 9.03043764995379e-05,
"loss": 0.4583,
"step": 80
},
{
"epoch": 0.9549071618037135,
"grad_norm": 0.37749794125556946,
"learning_rate": 8.63292075347872e-05,
"loss": 0.4206,
"step": 90
},
{
"epoch": 1.0610079575596818,
"grad_norm": 0.6580765843391418,
"learning_rate": 8.179459469889268e-05,
"loss": 0.4612,
"step": 100
},
{
"epoch": 1.16710875331565,
"grad_norm": 0.5585550665855408,
"learning_rate": 7.67703677935813e-05,
"loss": 0.2816,
"step": 110
},
{
"epoch": 1.273209549071618,
"grad_norm": 1.0563403367996216,
"learning_rate": 7.133389632785543e-05,
"loss": 0.435,
"step": 120
},
{
"epoch": 1.3793103448275863,
"grad_norm": 0.35568875074386597,
"learning_rate": 6.556889808276594e-05,
"loss": 0.2524,
"step": 130
},
{
"epoch": 1.4854111405835544,
"grad_norm": 1.0185539722442627,
"learning_rate": 5.956414991732584e-05,
"loss": 0.3828,
"step": 140
},
{
"epoch": 1.5915119363395225,
"grad_norm": 0.653967022895813,
"learning_rate": 5.341212066823355e-05,
"loss": 0.3319,
"step": 150
},
{
"epoch": 1.6976127320954908,
"grad_norm": 0.9374193549156189,
"learning_rate": 4.7207547195774475e-05,
"loss": 0.3626,
"step": 160
},
{
"epoch": 1.8037135278514589,
"grad_norm": 0.7635377645492554,
"learning_rate": 4.1045975503777756e-05,
"loss": 0.3388,
"step": 170
},
{
"epoch": 1.909814323607427,
"grad_norm": 0.9419568777084351,
"learning_rate": 3.502228939933993e-05,
"loss": 0.3257,
"step": 180
},
{
"epoch": 2.0159151193633953,
"grad_norm": 0.3300299346446991,
"learning_rate": 2.9229249349905684e-05,
"loss": 0.3826,
"step": 190
},
{
"epoch": 2.1220159151193636,
"grad_norm": 0.9070532321929932,
"learning_rate": 2.375606403826403e-05,
"loss": 0.267,
"step": 200
},
{
"epoch": 2.2281167108753315,
"grad_norm": 0.7301483154296875,
"learning_rate": 1.8687016612493542e-05,
"loss": 0.2643,
"step": 210
},
{
"epoch": 2.3342175066313,
"grad_norm": 1.0478216409683228,
"learning_rate": 1.41001667856273e-05,
"loss": 0.3146,
"step": 220
},
{
"epoch": 2.4403183023872677,
"grad_norm": 0.6027346849441528,
"learning_rate": 1.0066148771776379e-05,
"loss": 0.3457,
"step": 230
},
{
"epoch": 2.546419098143236,
"grad_norm": 0.7205595970153809,
"learning_rate": 6.647083569637797e-06,
"loss": 0.2173,
"step": 240
},
{
"epoch": 2.6525198938992043,
"grad_norm": 0.44989392161369324,
"learning_rate": 3.895622343444794e-06,
"loss": 0.2089,
"step": 250
},
{
"epoch": 2.7586206896551726,
"grad_norm": 0.7996519804000854,
"learning_rate": 1.8541356326100433e-06,
"loss": 0.2485,
"step": 260
},
{
"epoch": 2.8647214854111405,
"grad_norm": 1.2713292837142944,
"learning_rate": 5.540608756547106e-07,
"loss": 0.2668,
"step": 270
},
{
"epoch": 2.970822281167109,
"grad_norm": 0.9034178853034973,
"learning_rate": 1.5418296089358963e-08,
"loss": 0.2985,
"step": 280
}
],
"logging_steps": 10,
"max_steps": 282,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4056799126732800.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}