dyang39
/

SIM-RAG-Llama3-2B

Safetensors

Model card Files Files and versions

xet

Community

dyang39 commited on May 22

Commit

0e1cc45

verified ·

1 Parent(s): a3caedc

Initial commit

Browse files

Files changed (1) hide show

trainer_state.json +708 -0

trainer_state.json ADDED Viewed

	@@ -0,0 +1,708 @@

+{
+  "best_global_step": 46266,
+  "best_metric": 0.838254282517001,
+  "best_model_checkpoint": "dm_training/finetuned_checkpoints/SIM-RAG-full/checkpoint-46266",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 46266,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.02161414429602732,
+      "grad_norm": 0.7032617926597595,
+      "learning_rate": 2.994e-05,
+      "loss": 0.3292,
+      "step": 500
+    },
+    {
+      "epoch": 0.04322828859205464,
+      "grad_norm": 8.765708923339844,
+      "learning_rate": 2.9782725438685614e-05,
+      "loss": 0.3537,
+      "step": 1000
+    },
+    {
+      "epoch": 0.06484243288808196,
+      "grad_norm": 3.9439210891723633,
+      "learning_rate": 2.956501545740867e-05,
+      "loss": 0.3206,
+      "step": 1500
+    },
+    {
+      "epoch": 0.08645657718410928,
+      "grad_norm": 12.651052474975586,
+      "learning_rate": 2.934730547613173e-05,
+      "loss": 0.3505,
+      "step": 2000
+    },
+    {
+      "epoch": 0.1080707214801366,
+      "grad_norm": 0.14961493015289307,
+      "learning_rate": 2.9129595494854787e-05,
+      "loss": 0.335,
+      "step": 2500
+    },
+    {
+      "epoch": 0.12968486577616392,
+      "grad_norm": 2.6468780040740967,
+      "learning_rate": 2.8911885513577847e-05,
+      "loss": 0.3601,
+      "step": 3000
+    },
+    {
+      "epoch": 0.15129901007219124,
+      "grad_norm": 27.052431106567383,
+      "learning_rate": 2.8694175532300907e-05,
+      "loss": 0.3292,
+      "step": 3500
+    },
+    {
+      "epoch": 0.17291315436821855,
+      "grad_norm": 0.17826782166957855,
+      "learning_rate": 2.8476465551023964e-05,
+      "loss": 0.3218,
+      "step": 4000
+    },
+    {
+      "epoch": 0.1945272986642459,
+      "grad_norm": 0.4498291015625,
+      "learning_rate": 2.825875556974702e-05,
+      "loss": 0.2925,
+      "step": 4500
+    },
+    {
+      "epoch": 0.2161414429602732,
+      "grad_norm": 0.27510523796081543,
+      "learning_rate": 2.804104558847008e-05,
+      "loss": 0.3428,
+      "step": 5000
+    },
+    {
+      "epoch": 0.23775558725630053,
+      "grad_norm": 38.518646240234375,
+      "learning_rate": 2.782333560719314e-05,
+      "loss": 0.2883,
+      "step": 5500
+    },
+    {
+      "epoch": 0.25936973155232784,
+      "grad_norm": 0.18676885962486267,
+      "learning_rate": 2.7605625625916196e-05,
+      "loss": 0.3015,
+      "step": 6000
+    },
+    {
+      "epoch": 0.28098387584835516,
+      "grad_norm": 0.5126625895500183,
+      "learning_rate": 2.7387915644639256e-05,
+      "loss": 0.2988,
+      "step": 6500
+    },
+    {
+      "epoch": 0.3025980201443825,
+      "grad_norm": 36.30085754394531,
+      "learning_rate": 2.7170205663362313e-05,
+      "loss": 0.2914,
+      "step": 7000
+    },
+    {
+      "epoch": 0.3242121644404098,
+      "grad_norm": 5.256961345672607,
+      "learning_rate": 2.695249568208537e-05,
+      "loss": 0.3128,
+      "step": 7500
+    },
+    {
+      "epoch": 0.3458263087364371,
+      "grad_norm": 0.18740588426589966,
+      "learning_rate": 2.6734785700808432e-05,
+      "loss": 0.2849,
+      "step": 8000
+    },
+    {
+      "epoch": 0.3674404530324644,
+      "grad_norm": 33.268550872802734,
+      "learning_rate": 2.651707571953149e-05,
+      "loss": 0.3023,
+      "step": 8500
+    },
+    {
+      "epoch": 0.3890545973284918,
+      "grad_norm": 0.0934978798031807,
+      "learning_rate": 2.629936573825455e-05,
+      "loss": 0.2882,
+      "step": 9000
+    },
+    {
+      "epoch": 0.4106687416245191,
+      "grad_norm": 17.90414047241211,
+      "learning_rate": 2.6081655756977605e-05,
+      "loss": 0.2699,
+      "step": 9500
+    },
+    {
+      "epoch": 0.4322828859205464,
+      "grad_norm": 0.9339249134063721,
+      "learning_rate": 2.5863945775700662e-05,
+      "loss": 0.3039,
+      "step": 10000
+    },
+    {
+      "epoch": 0.45389703021657374,
+      "grad_norm": 0.04919858276844025,
+      "learning_rate": 2.5646235794423725e-05,
+      "loss": 0.2661,
+      "step": 10500
+    },
+    {
+      "epoch": 0.47551117451260105,
+      "grad_norm": 0.06951851397752762,
+      "learning_rate": 2.542852581314678e-05,
+      "loss": 0.2478,
+      "step": 11000
+    },
+    {
+      "epoch": 0.49712531880862837,
+      "grad_norm": 0.058041177690029144,
+      "learning_rate": 2.5210815831869838e-05,
+      "loss": 0.2489,
+      "step": 11500
+    },
+    {
+      "epoch": 0.5187394631046557,
+      "grad_norm": 22.77214813232422,
+      "learning_rate": 2.4993105850592898e-05,
+      "loss": 0.2744,
+      "step": 12000
+    },
+    {
+      "epoch": 0.5403536074006831,
+      "grad_norm": 26.296802520751953,
+      "learning_rate": 2.4775395869315954e-05,
+      "loss": 0.2627,
+      "step": 12500
+    },
+    {
+      "epoch": 0.5619677516967103,
+      "grad_norm": 18.95213508605957,
+      "learning_rate": 2.4557685888039014e-05,
+      "loss": 0.2405,
+      "step": 13000
+    },
+    {
+      "epoch": 0.5835818959927377,
+      "grad_norm": 10.959939002990723,
+      "learning_rate": 2.4339975906762074e-05,
+      "loss": 0.2472,
+      "step": 13500
+    },
+    {
+      "epoch": 0.605196040288765,
+      "grad_norm": 0.1388530284166336,
+      "learning_rate": 2.412226592548513e-05,
+      "loss": 0.2532,
+      "step": 14000
+    },
+    {
+      "epoch": 0.6268101845847923,
+      "grad_norm": 0.28232917189598083,
+      "learning_rate": 2.3904555944208187e-05,
+      "loss": 0.2727,
+      "step": 14500
+    },
+    {
+      "epoch": 0.6484243288808196,
+      "grad_norm": 0.17236585915088654,
+      "learning_rate": 2.3686845962931247e-05,
+      "loss": 0.2593,
+      "step": 15000
+    },
+    {
+      "epoch": 0.670038473176847,
+      "grad_norm": 0.013697458431124687,
+      "learning_rate": 2.3469135981654307e-05,
+      "loss": 0.2163,
+      "step": 15500
+    },
+    {
+      "epoch": 0.6916526174728742,
+      "grad_norm": 0.04355601221323013,
+      "learning_rate": 2.3251426000377363e-05,
+      "loss": 0.2418,
+      "step": 16000
+    },
+    {
+      "epoch": 0.7132667617689016,
+      "grad_norm": 0.15288621187210083,
+      "learning_rate": 2.3033716019100423e-05,
+      "loss": 0.2458,
+      "step": 16500
+    },
+    {
+      "epoch": 0.7348809060649288,
+      "grad_norm": 0.11191035062074661,
+      "learning_rate": 2.281600603782348e-05,
+      "loss": 0.2299,
+      "step": 17000
+    },
+    {
+      "epoch": 0.7564950503609562,
+      "grad_norm": 0.06535373628139496,
+      "learning_rate": 2.259829605654654e-05,
+      "loss": 0.2299,
+      "step": 17500
+    },
+    {
+      "epoch": 0.7781091946569836,
+      "grad_norm": 0.10419075191020966,
+      "learning_rate": 2.23805860752696e-05,
+      "loss": 0.2606,
+      "step": 18000
+    },
+    {
+      "epoch": 0.7997233389530108,
+      "grad_norm": 25.499109268188477,
+      "learning_rate": 2.2162876093992656e-05,
+      "loss": 0.2452,
+      "step": 18500
+    },
+    {
+      "epoch": 0.8213374832490382,
+      "grad_norm": 7.74832010269165,
+      "learning_rate": 2.1945166112715716e-05,
+      "loss": 0.2125,
+      "step": 19000
+    },
+    {
+      "epoch": 0.8429516275450655,
+      "grad_norm": 0.14268887042999268,
+      "learning_rate": 2.1727456131438773e-05,
+      "loss": 0.2155,
+      "step": 19500
+    },
+    {
+      "epoch": 0.8645657718410928,
+      "grad_norm": 0.7247521877288818,
+      "learning_rate": 2.1509746150161832e-05,
+      "loss": 0.2234,
+      "step": 20000
+    },
+    {
+      "epoch": 0.8861799161371201,
+      "grad_norm": 14.978250503540039,
+      "learning_rate": 2.1292036168884892e-05,
+      "loss": 0.2129,
+      "step": 20500
+    },
+    {
+      "epoch": 0.9077940604331475,
+      "grad_norm": 0.235914945602417,
+      "learning_rate": 2.107432618760795e-05,
+      "loss": 0.2189,
+      "step": 21000
+    },
+    {
+      "epoch": 0.9294082047291747,
+      "grad_norm": 20.6143798828125,
+      "learning_rate": 2.0856616206331005e-05,
+      "loss": 0.2051,
+      "step": 21500
+    },
+    {
+      "epoch": 0.9510223490252021,
+      "grad_norm": 67.3742904663086,
+      "learning_rate": 2.0638906225054065e-05,
+      "loss": 0.2045,
+      "step": 22000
+    },
+    {
+      "epoch": 0.9726364933212294,
+      "grad_norm": 0.51312255859375,
+      "learning_rate": 2.0421196243777125e-05,
+      "loss": 0.2013,
+      "step": 22500
+    },
+    {
+      "epoch": 0.9942506376172567,
+      "grad_norm": 0.40760791301727295,
+      "learning_rate": 2.020348626250018e-05,
+      "loss": 0.2329,
+      "step": 23000
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.8315399845054662,
+      "eval_f1_0": 0.8379296066252587,
+      "eval_f1_1": 0.8246258625324849,
+      "eval_loss": 0.27029550075531006,
+      "eval_precision_0": 0.8346807457515262,
+      "eval_precision_1": 0.828113750899928,
+      "eval_recall_0": 0.8412038576654473,
+      "eval_recall_1": 0.8211672318400857,
+      "eval_runtime": 6851.1436,
+      "eval_samples_per_second": 1.696,
+      "eval_steps_per_second": 0.848,
+      "step": 23133
+    },
+    {
+      "epoch": 1.015864781913284,
+      "grad_norm": 0.1842740774154663,
+      "learning_rate": 1.998577628122324e-05,
+      "loss": 0.1819,
+      "step": 23500
+    },
+    {
+      "epoch": 1.0374789262093114,
+      "grad_norm": 18.2067928314209,
+      "learning_rate": 1.9768066299946298e-05,
+      "loss": 0.1862,
+      "step": 24000
+    },
+    {
+      "epoch": 1.0590930705053387,
+      "grad_norm": 56.502193450927734,
+      "learning_rate": 1.9550356318669354e-05,
+      "loss": 0.1461,
+      "step": 24500
+    },
+    {
+      "epoch": 1.0807072148013661,
+      "grad_norm": 0.013557116501033306,
+      "learning_rate": 1.9332646337392418e-05,
+      "loss": 0.1521,
+      "step": 25000
+    },
+    {
+      "epoch": 1.1023213590973933,
+      "grad_norm": 0.05783897638320923,
+      "learning_rate": 1.9114936356115474e-05,
+      "loss": 0.1428,
+      "step": 25500
+    },
+    {
+      "epoch": 1.1239355033934206,
+      "grad_norm": 0.026215313002467155,
+      "learning_rate": 1.889722637483853e-05,
+      "loss": 0.1491,
+      "step": 26000
+    },
+    {
+      "epoch": 1.145549647689448,
+      "grad_norm": 0.0602092407643795,
+      "learning_rate": 1.867951639356159e-05,
+      "loss": 0.1442,
+      "step": 26500
+    },
+    {
+      "epoch": 1.1671637919854754,
+      "grad_norm": 0.03851708024740219,
+      "learning_rate": 1.8461806412284647e-05,
+      "loss": 0.1511,
+      "step": 27000
+    },
+    {
+      "epoch": 1.1887779362815025,
+      "grad_norm": 0.029633022844791412,
+      "learning_rate": 1.8244096431007707e-05,
+      "loss": 0.1438,
+      "step": 27500
+    },
+    {
+      "epoch": 1.21039208057753,
+      "grad_norm": 4.901973247528076,
+      "learning_rate": 1.8026386449730767e-05,
+      "loss": 0.1541,
+      "step": 28000
+    },
+    {
+      "epoch": 1.2320062248735573,
+      "grad_norm": 0.05654510483145714,
+      "learning_rate": 1.7808676468453823e-05,
+      "loss": 0.1537,
+      "step": 28500
+    },
+    {
+      "epoch": 1.2536203691695846,
+      "grad_norm": 0.0678759291768074,
+      "learning_rate": 1.7590966487176883e-05,
+      "loss": 0.1362,
+      "step": 29000
+    },
+    {
+      "epoch": 1.275234513465612,
+      "grad_norm": 14.40847110748291,
+      "learning_rate": 1.737325650589994e-05,
+      "loss": 0.1611,
+      "step": 29500
+    },
+    {
+      "epoch": 1.2968486577616392,
+      "grad_norm": 0.13242945075035095,
+      "learning_rate": 1.7155546524623e-05,
+      "loss": 0.1743,
+      "step": 30000
+    },
+    {
+      "epoch": 1.3184628020576665,
+      "grad_norm": 0.05444726720452309,
+      "learning_rate": 1.693783654334606e-05,
+      "loss": 0.1427,
+      "step": 30500
+    },
+    {
+      "epoch": 1.340076946353694,
+      "grad_norm": 28.47621726989746,
+      "learning_rate": 1.6720126562069116e-05,
+      "loss": 0.1485,
+      "step": 31000
+    },
+    {
+      "epoch": 1.361691090649721,
+      "grad_norm": 0.04358465224504471,
+      "learning_rate": 1.6502416580792172e-05,
+      "loss": 0.1422,
+      "step": 31500
+    },
+    {
+      "epoch": 1.3833052349457484,
+      "grad_norm": 0.024943144991993904,
+      "learning_rate": 1.6284706599515236e-05,
+      "loss": 0.1386,
+      "step": 32000
+    },
+    {
+      "epoch": 1.4049193792417758,
+      "grad_norm": 0.1587284654378891,
+      "learning_rate": 1.6066996618238292e-05,
+      "loss": 0.1252,
+      "step": 32500
+    },
+    {
+      "epoch": 1.4265335235378032,
+      "grad_norm": 0.054883528500795364,
+      "learning_rate": 1.584928663696135e-05,
+      "loss": 0.1429,
+      "step": 33000
+    },
+    {
+      "epoch": 1.4481476678338305,
+      "grad_norm": 0.21283945441246033,
+      "learning_rate": 1.563157665568441e-05,
+      "loss": 0.1465,
+      "step": 33500
+    },
+    {
+      "epoch": 1.4697618121298577,
+      "grad_norm": 63.26069259643555,
+      "learning_rate": 1.5413866674407465e-05,
+      "loss": 0.1157,
+      "step": 34000
+    },
+    {
+      "epoch": 1.491375956425885,
+      "grad_norm": 20.822044372558594,
+      "learning_rate": 1.5196156693130527e-05,
+      "loss": 0.1505,
+      "step": 34500
+    },
+    {
+      "epoch": 1.5129901007219124,
+      "grad_norm": 0.2792131006717682,
+      "learning_rate": 1.4978446711853583e-05,
+      "loss": 0.1205,
+      "step": 35000
+    },
+    {
+      "epoch": 1.5346042450179398,
+      "grad_norm": 0.04913631081581116,
+      "learning_rate": 1.4760736730576641e-05,
+      "loss": 0.124,
+      "step": 35500
+    },
+    {
+      "epoch": 1.5562183893139672,
+      "grad_norm": 0.007014845497906208,
+      "learning_rate": 1.45430267492997e-05,
+      "loss": 0.1404,
+      "step": 36000
+    },
+    {
+      "epoch": 1.5778325336099943,
+      "grad_norm": 0.03032066859304905,
+      "learning_rate": 1.432531676802276e-05,
+      "loss": 0.1345,
+      "step": 36500
+    },
+    {
+      "epoch": 1.5994466779060217,
+      "grad_norm": 0.02347446419298649,
+      "learning_rate": 1.4107606786745816e-05,
+      "loss": 0.141,
+      "step": 37000
+    },
+    {
+      "epoch": 1.621060822202049,
+      "grad_norm": 0.03960123285651207,
+      "learning_rate": 1.3889896805468876e-05,
+      "loss": 0.1399,
+      "step": 37500
+    },
+    {
+      "epoch": 1.6426749664980762,
+      "grad_norm": 0.0023522686678916216,
+      "learning_rate": 1.3672186824191934e-05,
+      "loss": 0.1301,
+      "step": 38000
+    },
+    {
+      "epoch": 1.6642891107941038,
+      "grad_norm": 0.10293618589639664,
+      "learning_rate": 1.345447684291499e-05,
+      "loss": 0.1504,
+      "step": 38500
+    },
+    {
+      "epoch": 1.685903255090131,
+      "grad_norm": 0.6177674531936646,
+      "learning_rate": 1.323676686163805e-05,
+      "loss": 0.1244,
+      "step": 39000
+    },
+    {
+      "epoch": 1.7075173993861583,
+      "grad_norm": 0.2255789041519165,
+      "learning_rate": 1.3019056880361109e-05,
+      "loss": 0.1229,
+      "step": 39500
+    },
+    {
+      "epoch": 1.7291315436821857,
+      "grad_norm": 0.01264307089149952,
+      "learning_rate": 1.2801346899084167e-05,
+      "loss": 0.1199,
+      "step": 40000
+    },
+    {
+      "epoch": 1.7507456879782128,
+      "grad_norm": 21.45499610900879,
+      "learning_rate": 1.2583636917807225e-05,
+      "loss": 0.1234,
+      "step": 40500
+    },
+    {
+      "epoch": 1.7723598322742402,
+      "grad_norm": 0.02090781182050705,
+      "learning_rate": 1.2365926936530283e-05,
+      "loss": 0.121,
+      "step": 41000
+    },
+    {
+      "epoch": 1.7939739765702676,
+      "grad_norm": 0.054082971066236496,
+      "learning_rate": 1.2148216955253343e-05,
+      "loss": 0.1158,
+      "step": 41500
+    },
+    {
+      "epoch": 1.815588120866295,
+      "grad_norm": 0.04447195306420326,
+      "learning_rate": 1.19305069739764e-05,
+      "loss": 0.1183,
+      "step": 42000
+    },
+    {
+      "epoch": 1.8372022651623223,
+      "grad_norm": 38.29100799560547,
+      "learning_rate": 1.171279699269946e-05,
+      "loss": 0.1302,
+      "step": 42500
+    },
+    {
+      "epoch": 1.8588164094583495,
+      "grad_norm": 0.014447253197431564,
+      "learning_rate": 1.1495087011422518e-05,
+      "loss": 0.1011,
+      "step": 43000
+    },
+    {
+      "epoch": 1.8804305537543768,
+      "grad_norm": 0.0001848287502070889,
+      "learning_rate": 1.1277377030145576e-05,
+      "loss": 0.1161,
+      "step": 43500
+    },
+    {
+      "epoch": 1.9020446980504042,
+      "grad_norm": 0.003150364151224494,
+      "learning_rate": 1.1059667048868634e-05,
+      "loss": 0.1332,
+      "step": 44000
+    },
+    {
+      "epoch": 1.9236588423464314,
+      "grad_norm": 0.2078738808631897,
+      "learning_rate": 1.0841957067591692e-05,
+      "loss": 0.1409,
+      "step": 44500
+    },
+    {
+      "epoch": 1.945272986642459,
+      "grad_norm": 0.059768468141555786,
+      "learning_rate": 1.062424708631475e-05,
+      "loss": 0.1326,
+      "step": 45000
+    },
+    {
+      "epoch": 1.966887130938486,
+      "grad_norm": 0.038765549659729004,
+      "learning_rate": 1.0406537105037809e-05,
+      "loss": 0.0918,
+      "step": 45500
+    },
+    {
+      "epoch": 1.9885012752345135,
+      "grad_norm": 18.927526473999023,
+      "learning_rate": 1.0188827123760869e-05,
+      "loss": 0.1091,
+      "step": 46000
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.838254282517001,
+      "eval_f1_0": 0.8413944458512703,
+      "eval_f1_1": 0.8349872661807324,
+      "eval_loss": 0.3542537987232208,
+      "eval_precision_0": 0.8544488256471798,
+      "eval_precision_1": 0.8219225449515906,
+      "eval_recall_0": 0.828732956434985,
+      "eval_recall_1": 0.8484740317686953,
+      "eval_runtime": 6835.7043,
+      "eval_samples_per_second": 1.699,
+      "eval_steps_per_second": 0.85,
+      "step": 46266
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 69399,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.5826915766462054e+18,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}