llama3.2-3b-code-ig / checkpoint-282 /trainer_state.json

Upload adapter model

6372673 verified 11 months ago

5.58 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 2.9920424403183024,
	"eval_steps": 500,
	"global_step": 282,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.10610079575596817,
	"grad_norm": 0.6782001256942749,
	"learning_rate": 3.4482758620689657e-05,
	"loss": 0.8975,
	"step": 10
	},
	{
	"epoch": 0.21220159151193635,
	"grad_norm": 0.858634889125824,
	"learning_rate": 6.896551724137931e-05,
	"loss": 1.0343,
	"step": 20
	},
	{
	"epoch": 0.3183023872679045,
	"grad_norm": 0.5417032241821289,
	"learning_rate": 9.99961452773888e-05,
	"loss": 0.5668,
	"step": 30
	},
	{
	"epoch": 0.4244031830238727,
	"grad_norm": 0.8525161743164062,
	"learning_rate": 9.953429730181653e-05,
	"loss": 0.6499,
	"step": 40
	},
	{
	"epoch": 0.5305039787798409,
	"grad_norm": 0.9144126772880554,
	"learning_rate": 9.830965649597455e-05,
	"loss": 0.6146,
	"step": 50
	},
	{
	"epoch": 0.636604774535809,
	"grad_norm": 0.5597995519638062,
	"learning_rate": 9.634108145435665e-05,
	"loss": 0.346,
	"step": 60
	},
	{
	"epoch": 0.7427055702917772,
	"grad_norm": 0.7313889861106873,
	"learning_rate": 9.36588868278086e-05,
	"loss": 0.4951,
	"step": 70
	},
	{
	"epoch": 0.8488063660477454,
	"grad_norm": 1.2549511194229126,
	"learning_rate": 9.03043764995379e-05,
	"loss": 0.4583,
	"step": 80
	},
	{
	"epoch": 0.9549071618037135,
	"grad_norm": 0.37749794125556946,
	"learning_rate": 8.63292075347872e-05,
	"loss": 0.4206,
	"step": 90
	},
	{
	"epoch": 1.0610079575596818,
	"grad_norm": 0.6580765843391418,
	"learning_rate": 8.179459469889268e-05,
	"loss": 0.4612,
	"step": 100
	},
	{
	"epoch": 1.16710875331565,
	"grad_norm": 0.5585550665855408,
	"learning_rate": 7.67703677935813e-05,
	"loss": 0.2816,
	"step": 110
	},
	{
	"epoch": 1.273209549071618,
	"grad_norm": 1.0563403367996216,
	"learning_rate": 7.133389632785543e-05,
	"loss": 0.435,
	"step": 120
	},
	{
	"epoch": 1.3793103448275863,
	"grad_norm": 0.35568875074386597,
	"learning_rate": 6.556889808276594e-05,
	"loss": 0.2524,
	"step": 130
	},
	{
	"epoch": 1.4854111405835544,
	"grad_norm": 1.0185539722442627,
	"learning_rate": 5.956414991732584e-05,
	"loss": 0.3828,
	"step": 140
	},
	{
	"epoch": 1.5915119363395225,
	"grad_norm": 0.653967022895813,
	"learning_rate": 5.341212066823355e-05,
	"loss": 0.3319,
	"step": 150
	},
	{
	"epoch": 1.6976127320954908,
	"grad_norm": 0.9374193549156189,
	"learning_rate": 4.7207547195774475e-05,
	"loss": 0.3626,
	"step": 160
	},
	{
	"epoch": 1.8037135278514589,
	"grad_norm": 0.7635377645492554,
	"learning_rate": 4.1045975503777756e-05,
	"loss": 0.3388,
	"step": 170
	},
	{
	"epoch": 1.909814323607427,
	"grad_norm": 0.9419568777084351,
	"learning_rate": 3.502228939933993e-05,
	"loss": 0.3257,
	"step": 180
	},
	{
	"epoch": 2.0159151193633953,
	"grad_norm": 0.3300299346446991,
	"learning_rate": 2.9229249349905684e-05,
	"loss": 0.3826,
	"step": 190
	},
	{
	"epoch": 2.1220159151193636,
	"grad_norm": 0.9070532321929932,
	"learning_rate": 2.375606403826403e-05,
	"loss": 0.267,
	"step": 200
	},
	{
	"epoch": 2.2281167108753315,
	"grad_norm": 0.7301483154296875,
	"learning_rate": 1.8687016612493542e-05,
	"loss": 0.2643,
	"step": 210
	},
	{
	"epoch": 2.3342175066313,
	"grad_norm": 1.0478216409683228,
	"learning_rate": 1.41001667856273e-05,
	"loss": 0.3146,
	"step": 220
	},
	{
	"epoch": 2.4403183023872677,
	"grad_norm": 0.6027346849441528,
	"learning_rate": 1.0066148771776379e-05,
	"loss": 0.3457,
	"step": 230
	},
	{
	"epoch": 2.546419098143236,
	"grad_norm": 0.7205595970153809,
	"learning_rate": 6.647083569637797e-06,
	"loss": 0.2173,
	"step": 240
	},
	{
	"epoch": 2.6525198938992043,
	"grad_norm": 0.44989392161369324,
	"learning_rate": 3.895622343444794e-06,
	"loss": 0.2089,
	"step": 250
	},
	{
	"epoch": 2.7586206896551726,
	"grad_norm": 0.7996519804000854,
	"learning_rate": 1.8541356326100433e-06,
	"loss": 0.2485,
	"step": 260
	},
	{
	"epoch": 2.8647214854111405,
	"grad_norm": 1.2713292837142944,
	"learning_rate": 5.540608756547106e-07,
	"loss": 0.2668,
	"step": 270
	},
	{
	"epoch": 2.970822281167109,
	"grad_norm": 0.9034178853034973,
	"learning_rate": 1.5418296089358963e-08,
	"loss": 0.2985,
	"step": 280
	}
	],
	"logging_steps": 10,
	"max_steps": 282,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 4056799126732800.0,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}