Training in progress, step 50
Browse files- config.json +1 -1
- model-00001-of-00002.safetensors +1 -1
- model-00002-of-00002.safetensors +1 -1
- trainer_log.jsonl +66 -63
- training_args.bin +1 -1
config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "/workspace/titulm/saves/Llama-3.2-3B/sft-train-
|
| 3 |
"architectures": [
|
| 4 |
"LlamaForCausalLM"
|
| 5 |
],
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "/workspace/titulm/saves/Llama-3.2-3B/sft-train-3/checkpoint-50",
|
| 3 |
"architectures": [
|
| 4 |
"LlamaForCausalLM"
|
| 5 |
],
|
model-00001-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4973656136
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:18c19d7cd296b10a0b8d7ce9eed0cc41c8b3ea3005f0a3800fcb0277548b2bb9
|
| 3 |
size 4973656136
|
model-00002-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1711401640
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:96e592f43c5294cb21d3dadc1936e652474f930bb5e53897ab6fd6dc3bc5f9de
|
| 3 |
size 1711401640
|
trainer_log.jsonl
CHANGED
|
@@ -1,63 +1,66 @@
|
|
| 1 |
-
{"current_steps": 1, "total_steps":
|
| 2 |
-
{"current_steps": 2, "total_steps":
|
| 3 |
-
{"current_steps": 3, "total_steps":
|
| 4 |
-
{"current_steps": 4, "total_steps":
|
| 5 |
-
{"current_steps": 5, "total_steps":
|
| 6 |
-
{"current_steps": 6, "total_steps":
|
| 7 |
-
{"current_steps": 7, "total_steps":
|
| 8 |
-
{"current_steps": 8, "total_steps":
|
| 9 |
-
{"current_steps": 9, "total_steps":
|
| 10 |
-
{"current_steps": 10, "total_steps":
|
| 11 |
-
{"current_steps": 11, "total_steps":
|
| 12 |
-
{"current_steps": 12, "total_steps":
|
| 13 |
-
{"current_steps": 13, "total_steps":
|
| 14 |
-
{"current_steps": 14, "total_steps":
|
| 15 |
-
{"current_steps": 15, "total_steps":
|
| 16 |
-
{"current_steps": 16, "total_steps":
|
| 17 |
-
{"current_steps": 17, "total_steps":
|
| 18 |
-
{"current_steps": 18, "total_steps":
|
| 19 |
-
{"current_steps": 19, "total_steps":
|
| 20 |
-
{"current_steps": 20, "total_steps":
|
| 21 |
-
{"current_steps": 21, "total_steps":
|
| 22 |
-
{"current_steps": 22, "total_steps":
|
| 23 |
-
{"current_steps": 23, "total_steps":
|
| 24 |
-
{"current_steps": 24, "total_steps":
|
| 25 |
-
{"current_steps": 25, "total_steps":
|
| 26 |
-
{"current_steps": 26, "total_steps":
|
| 27 |
-
{"current_steps": 27, "total_steps":
|
| 28 |
-
{"current_steps": 28, "total_steps":
|
| 29 |
-
{"current_steps": 29, "total_steps":
|
| 30 |
-
{"current_steps": 30, "total_steps":
|
| 31 |
-
{"current_steps": 31, "total_steps":
|
| 32 |
-
{"current_steps": 32, "total_steps":
|
| 33 |
-
{"current_steps": 33, "total_steps":
|
| 34 |
-
{"current_steps": 34, "total_steps":
|
| 35 |
-
{"current_steps": 35, "total_steps":
|
| 36 |
-
{"current_steps": 36, "total_steps":
|
| 37 |
-
{"current_steps": 37, "total_steps":
|
| 38 |
-
{"current_steps": 38, "total_steps":
|
| 39 |
-
{"current_steps": 39, "total_steps":
|
| 40 |
-
{"current_steps": 40, "total_steps":
|
| 41 |
-
{"current_steps": 41, "total_steps":
|
| 42 |
-
{"current_steps": 42, "total_steps":
|
| 43 |
-
{"current_steps": 43, "total_steps":
|
| 44 |
-
{"current_steps": 44, "total_steps":
|
| 45 |
-
{"current_steps": 45, "total_steps":
|
| 46 |
-
{"current_steps": 46, "total_steps":
|
| 47 |
-
{"current_steps": 47, "total_steps":
|
| 48 |
-
{"current_steps": 48, "total_steps":
|
| 49 |
-
{"current_steps": 49, "total_steps":
|
| 50 |
-
{"current_steps": 50, "total_steps":
|
| 51 |
-
{"current_steps": 51, "total_steps":
|
| 52 |
-
{"current_steps": 52, "total_steps":
|
| 53 |
-
{"current_steps": 53, "total_steps":
|
| 54 |
-
{"current_steps": 54, "total_steps":
|
| 55 |
-
{"current_steps": 55, "total_steps":
|
| 56 |
-
{"current_steps": 56, "total_steps":
|
| 57 |
-
{"current_steps": 57, "total_steps":
|
| 58 |
-
{"current_steps": 58, "total_steps":
|
| 59 |
-
{"current_steps": 59, "total_steps":
|
| 60 |
-
{"current_steps": 60, "total_steps":
|
| 61 |
-
{"current_steps": 61, "total_steps":
|
| 62 |
-
{"current_steps": 62, "total_steps":
|
| 63 |
-
{"current_steps": 63, "total_steps":
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"current_steps": 1, "total_steps": 435, "loss": 1.0671, "lr": 2.0000000000000003e-06, "epoch": 0.0068775790921595595, "percentage": 0.23, "elapsed_time": "0:00:24", "remaining_time": "2:55:51"}
|
| 2 |
+
{"current_steps": 2, "total_steps": 435, "loss": 1.0833, "lr": 4.000000000000001e-06, "epoch": 0.013755158184319119, "percentage": 0.46, "elapsed_time": "0:00:36", "remaining_time": "2:11:05"}
|
| 3 |
+
{"current_steps": 3, "total_steps": 435, "loss": 1.0313, "lr": 6e-06, "epoch": 0.02063273727647868, "percentage": 0.69, "elapsed_time": "0:00:48", "remaining_time": "1:55:58"}
|
| 4 |
+
{"current_steps": 4, "total_steps": 435, "loss": 1.0645, "lr": 8.000000000000001e-06, "epoch": 0.027510316368638238, "percentage": 0.92, "elapsed_time": "0:01:00", "remaining_time": "1:48:20"}
|
| 5 |
+
{"current_steps": 5, "total_steps": 435, "loss": 1.0358, "lr": 1e-05, "epoch": 0.0343878954607978, "percentage": 1.15, "elapsed_time": "0:01:12", "remaining_time": "1:43:42"}
|
| 6 |
+
{"current_steps": 6, "total_steps": 435, "loss": 1.0538, "lr": 9.999866555428619e-06, "epoch": 0.04126547455295736, "percentage": 1.38, "elapsed_time": "0:01:24", "remaining_time": "1:40:32"}
|
| 7 |
+
{"current_steps": 7, "total_steps": 435, "loss": 1.0822, "lr": 9.999466228837452e-06, "epoch": 0.048143053645116916, "percentage": 1.61, "elapsed_time": "0:01:36", "remaining_time": "1:38:13"}
|
| 8 |
+
{"current_steps": 8, "total_steps": 435, "loss": 1.0687, "lr": 9.998799041595064e-06, "epoch": 0.055020632737276476, "percentage": 1.84, "elapsed_time": "0:01:48", "remaining_time": "1:36:27"}
|
| 9 |
+
{"current_steps": 9, "total_steps": 435, "loss": 1.058, "lr": 9.997865029314464e-06, "epoch": 0.061898211829436035, "percentage": 2.07, "elapsed_time": "0:02:00", "remaining_time": "1:35:01"}
|
| 10 |
+
{"current_steps": 10, "total_steps": 435, "loss": 1.0813, "lr": 9.996664241851197e-06, "epoch": 0.0687757909215956, "percentage": 2.3, "elapsed_time": "0:02:12", "remaining_time": "1:33:50"}
|
| 11 |
+
{"current_steps": 11, "total_steps": 435, "loss": 1.0043, "lr": 9.995196743300693e-06, "epoch": 0.07565337001375516, "percentage": 2.53, "elapsed_time": "0:02:24", "remaining_time": "1:32:50"}
|
| 12 |
+
{"current_steps": 12, "total_steps": 435, "loss": 1.0392, "lr": 9.993462611994833e-06, "epoch": 0.08253094910591471, "percentage": 2.76, "elapsed_time": "0:02:36", "remaining_time": "1:31:59"}
|
| 13 |
+
{"current_steps": 13, "total_steps": 435, "loss": 1.0456, "lr": 9.991461940497786e-06, "epoch": 0.08940852819807428, "percentage": 2.99, "elapsed_time": "0:02:48", "remaining_time": "1:31:13"}
|
| 14 |
+
{"current_steps": 14, "total_steps": 435, "loss": 1.0848, "lr": 9.989194835601048e-06, "epoch": 0.09628610729023383, "percentage": 3.22, "elapsed_time": "0:03:00", "remaining_time": "1:30:32"}
|
| 15 |
+
{"current_steps": 15, "total_steps": 435, "loss": 1.0622, "lr": 9.986661418317759e-06, "epoch": 0.1031636863823934, "percentage": 3.45, "elapsed_time": "0:03:12", "remaining_time": "1:29:55"}
|
| 16 |
+
{"current_steps": 16, "total_steps": 435, "loss": 1.0732, "lr": 9.983861823876231e-06, "epoch": 0.11004126547455295, "percentage": 3.68, "elapsed_time": "0:03:24", "remaining_time": "1:29:21"}
|
| 17 |
+
{"current_steps": 17, "total_steps": 435, "loss": 1.0611, "lr": 9.980796201712734e-06, "epoch": 0.11691884456671252, "percentage": 3.91, "elapsed_time": "0:03:36", "remaining_time": "1:28:50"}
|
| 18 |
+
{"current_steps": 18, "total_steps": 435, "loss": 1.0793, "lr": 9.977464715463525e-06, "epoch": 0.12379642365887207, "percentage": 4.14, "elapsed_time": "0:03:48", "remaining_time": "1:28:21"}
|
| 19 |
+
{"current_steps": 19, "total_steps": 435, "loss": 1.0525, "lr": 9.973867542956104e-06, "epoch": 0.13067400275103164, "percentage": 4.37, "elapsed_time": "0:04:00", "remaining_time": "1:27:54"}
|
| 20 |
+
{"current_steps": 20, "total_steps": 435, "loss": 1.0845, "lr": 9.970004876199731e-06, "epoch": 0.1375515818431912, "percentage": 4.6, "elapsed_time": "0:04:12", "remaining_time": "1:27:29"}
|
| 21 |
+
{"current_steps": 21, "total_steps": 435, "loss": 1.082, "lr": 9.965876921375165e-06, "epoch": 0.14442916093535077, "percentage": 4.83, "elapsed_time": "0:04:25", "remaining_time": "1:27:04"}
|
| 22 |
+
{"current_steps": 22, "total_steps": 435, "loss": 1.0947, "lr": 9.961483898823679e-06, "epoch": 0.15130674002751032, "percentage": 5.06, "elapsed_time": "0:04:37", "remaining_time": "1:26:41"}
|
| 23 |
+
{"current_steps": 23, "total_steps": 435, "loss": 1.0993, "lr": 9.956826043035268e-06, "epoch": 0.15818431911966988, "percentage": 5.29, "elapsed_time": "0:04:49", "remaining_time": "1:26:19"}
|
| 24 |
+
{"current_steps": 24, "total_steps": 435, "loss": 1.103, "lr": 9.951903602636166e-06, "epoch": 0.16506189821182943, "percentage": 5.52, "elapsed_time": "0:05:01", "remaining_time": "1:25:58"}
|
| 25 |
+
{"current_steps": 25, "total_steps": 435, "loss": 1.0702, "lr": 9.946716840375552e-06, "epoch": 0.171939477303989, "percentage": 5.75, "elapsed_time": "0:05:13", "remaining_time": "1:25:37"}
|
| 26 |
+
{"current_steps": 26, "total_steps": 435, "loss": 1.0898, "lr": 9.94126603311153e-06, "epoch": 0.17881705639614856, "percentage": 5.98, "elapsed_time": "0:05:25", "remaining_time": "1:25:17"}
|
| 27 |
+
{"current_steps": 27, "total_steps": 435, "loss": 1.0751, "lr": 9.935551471796358e-06, "epoch": 0.1856946354883081, "percentage": 6.21, "elapsed_time": "0:05:37", "remaining_time": "1:24:58"}
|
| 28 |
+
{"current_steps": 28, "total_steps": 435, "loss": 1.0651, "lr": 9.92957346146091e-06, "epoch": 0.19257221458046767, "percentage": 6.44, "elapsed_time": "0:05:49", "remaining_time": "1:24:39"}
|
| 29 |
+
{"current_steps": 29, "total_steps": 435, "loss": 1.0826, "lr": 9.923332321198396e-06, "epoch": 0.19944979367262725, "percentage": 6.67, "elapsed_time": "0:06:01", "remaining_time": "1:24:21"}
|
| 30 |
+
{"current_steps": 30, "total_steps": 435, "loss": 1.0536, "lr": 9.91682838414733e-06, "epoch": 0.2063273727647868, "percentage": 6.9, "elapsed_time": "0:06:13", "remaining_time": "1:24:04"}
|
| 31 |
+
{"current_steps": 31, "total_steps": 435, "loss": 1.0824, "lr": 9.910061997473753e-06, "epoch": 0.21320495185694635, "percentage": 7.13, "elapsed_time": "0:06:25", "remaining_time": "1:23:46"}
|
| 32 |
+
{"current_steps": 32, "total_steps": 435, "loss": 1.0861, "lr": 9.903033522352688e-06, "epoch": 0.2200825309491059, "percentage": 7.36, "elapsed_time": "0:06:37", "remaining_time": "1:23:29"}
|
| 33 |
+
{"current_steps": 33, "total_steps": 435, "loss": 1.0752, "lr": 9.895743333948875e-06, "epoch": 0.22696011004126548, "percentage": 7.59, "elapsed_time": "0:06:49", "remaining_time": "1:23:12"}
|
| 34 |
+
{"current_steps": 34, "total_steps": 435, "loss": 1.071, "lr": 9.888191821396745e-06, "epoch": 0.23383768913342504, "percentage": 7.82, "elapsed_time": "0:07:01", "remaining_time": "1:22:56"}
|
| 35 |
+
{"current_steps": 35, "total_steps": 435, "loss": 1.0592, "lr": 9.880379387779637e-06, "epoch": 0.2407152682255846, "percentage": 8.05, "elapsed_time": "0:07:14", "remaining_time": "1:22:40"}
|
| 36 |
+
{"current_steps": 36, "total_steps": 435, "loss": 1.1034, "lr": 9.872306450108294e-06, "epoch": 0.24759284731774414, "percentage": 8.28, "elapsed_time": "0:07:26", "remaining_time": "1:22:24"}
|
| 37 |
+
{"current_steps": 37, "total_steps": 435, "loss": 1.0485, "lr": 9.863973439298597e-06, "epoch": 0.2544704264099037, "percentage": 8.51, "elapsed_time": "0:07:38", "remaining_time": "1:22:08"}
|
| 38 |
+
{"current_steps": 38, "total_steps": 435, "loss": 1.0816, "lr": 9.855380800148573e-06, "epoch": 0.2613480055020633, "percentage": 8.74, "elapsed_time": "0:07:50", "remaining_time": "1:21:52"}
|
| 39 |
+
{"current_steps": 39, "total_steps": 435, "loss": 1.0487, "lr": 9.846528991314638e-06, "epoch": 0.26822558459422285, "percentage": 8.97, "elapsed_time": "0:08:02", "remaining_time": "1:21:36"}
|
| 40 |
+
{"current_steps": 40, "total_steps": 435, "loss": 1.0357, "lr": 9.837418485287126e-06, "epoch": 0.2751031636863824, "percentage": 9.2, "elapsed_time": "0:08:14", "remaining_time": "1:21:21"}
|
| 41 |
+
{"current_steps": 41, "total_steps": 435, "loss": 1.0468, "lr": 9.82804976836507e-06, "epoch": 0.28198074277854196, "percentage": 9.43, "elapsed_time": "0:08:26", "remaining_time": "1:21:06"}
|
| 42 |
+
{"current_steps": 42, "total_steps": 435, "loss": 1.0692, "lr": 9.81842334063023e-06, "epoch": 0.28885832187070154, "percentage": 9.66, "elapsed_time": "0:08:38", "remaining_time": "1:20:51"}
|
| 43 |
+
{"current_steps": 43, "total_steps": 435, "loss": 1.1063, "lr": 9.808539715920415e-06, "epoch": 0.29573590096286106, "percentage": 9.89, "elapsed_time": "0:08:50", "remaining_time": "1:20:36"}
|
| 44 |
+
{"current_steps": 44, "total_steps": 435, "loss": 1.0884, "lr": 9.798399421802057e-06, "epoch": 0.30261348005502064, "percentage": 10.11, "elapsed_time": "0:09:02", "remaining_time": "1:20:21"}
|
| 45 |
+
{"current_steps": 45, "total_steps": 435, "loss": 1.0781, "lr": 9.78800299954203e-06, "epoch": 0.30949105914718017, "percentage": 10.34, "elapsed_time": "0:09:14", "remaining_time": "1:20:07"}
|
| 46 |
+
{"current_steps": 46, "total_steps": 435, "loss": 1.0367, "lr": 9.777351004078784e-06, "epoch": 0.31636863823933975, "percentage": 10.57, "elapsed_time": "0:09:26", "remaining_time": "1:19:52"}
|
| 47 |
+
{"current_steps": 47, "total_steps": 435, "loss": 1.0439, "lr": 9.766444003992704e-06, "epoch": 0.32324621733149933, "percentage": 10.8, "elapsed_time": "0:09:38", "remaining_time": "1:19:38"}
|
| 48 |
+
{"current_steps": 48, "total_steps": 435, "loss": 1.0491, "lr": 9.755282581475769e-06, "epoch": 0.33012379642365886, "percentage": 11.03, "elapsed_time": "0:09:50", "remaining_time": "1:19:24"}
|
| 49 |
+
{"current_steps": 49, "total_steps": 435, "loss": 1.1172, "lr": 9.743867332300478e-06, "epoch": 0.33700137551581844, "percentage": 11.26, "elapsed_time": "0:10:02", "remaining_time": "1:19:10"}
|
| 50 |
+
{"current_steps": 50, "total_steps": 435, "loss": 1.0595, "lr": 9.732198865788047e-06, "epoch": 0.343878954607978, "percentage": 11.49, "elapsed_time": "0:10:15", "remaining_time": "1:18:55"}
|
| 51 |
+
{"current_steps": 51, "total_steps": 435, "loss": 1.0546, "lr": 9.720277804775879e-06, "epoch": 0.35075653370013754, "percentage": 11.72, "elapsed_time": "0:10:50", "remaining_time": "1:21:38"}
|
| 52 |
+
{"current_steps": 52, "total_steps": 435, "loss": 1.0956, "lr": 9.708104785584324e-06, "epoch": 0.3576341127922971, "percentage": 11.95, "elapsed_time": "0:11:02", "remaining_time": "1:21:20"}
|
| 53 |
+
{"current_steps": 53, "total_steps": 435, "loss": 1.0446, "lr": 9.695680457982713e-06, "epoch": 0.36451169188445665, "percentage": 12.18, "elapsed_time": "0:11:14", "remaining_time": "1:21:03"}
|
| 54 |
+
{"current_steps": 54, "total_steps": 435, "loss": 1.0271, "lr": 9.683005485154677e-06, "epoch": 0.3713892709766162, "percentage": 12.41, "elapsed_time": "0:11:26", "remaining_time": "1:20:46"}
|
| 55 |
+
{"current_steps": 55, "total_steps": 435, "loss": 1.0415, "lr": 9.670080543662742e-06, "epoch": 0.3782668500687758, "percentage": 12.64, "elapsed_time": "0:11:38", "remaining_time": "1:20:28"}
|
| 56 |
+
{"current_steps": 56, "total_steps": 435, "loss": 1.0525, "lr": 9.656906323412216e-06, "epoch": 0.38514442916093533, "percentage": 12.87, "elapsed_time": "0:11:50", "remaining_time": "1:20:11"}
|
| 57 |
+
{"current_steps": 57, "total_steps": 435, "loss": 1.08, "lr": 9.643483527614372e-06, "epoch": 0.3920220082530949, "percentage": 13.1, "elapsed_time": "0:12:03", "remaining_time": "1:19:55"}
|
| 58 |
+
{"current_steps": 58, "total_steps": 435, "loss": 1.0232, "lr": 9.629812872748901e-06, "epoch": 0.3988995873452545, "percentage": 13.33, "elapsed_time": "0:12:15", "remaining_time": "1:19:39"}
|
| 59 |
+
{"current_steps": 59, "total_steps": 435, "loss": 1.0505, "lr": 9.615895088525677e-06, "epoch": 0.405777166437414, "percentage": 13.56, "elapsed_time": "0:12:27", "remaining_time": "1:19:22"}
|
| 60 |
+
{"current_steps": 60, "total_steps": 435, "loss": 1.0695, "lr": 9.601730917845798e-06, "epoch": 0.4126547455295736, "percentage": 13.79, "elapsed_time": "0:12:39", "remaining_time": "1:19:06"}
|
| 61 |
+
{"current_steps": 61, "total_steps": 435, "loss": 1.0814, "lr": 9.587321116761938e-06, "epoch": 0.4195323246217332, "percentage": 14.02, "elapsed_time": "0:12:51", "remaining_time": "1:18:50"}
|
| 62 |
+
{"current_steps": 62, "total_steps": 435, "loss": 1.0655, "lr": 9.572666454437992e-06, "epoch": 0.4264099037138927, "percentage": 14.25, "elapsed_time": "0:13:03", "remaining_time": "1:18:34"}
|
| 63 |
+
{"current_steps": 63, "total_steps": 435, "loss": 1.0558, "lr": 9.557767713108009e-06, "epoch": 0.4332874828060523, "percentage": 14.48, "elapsed_time": "0:13:15", "remaining_time": "1:18:19"}
|
| 64 |
+
{"current_steps": 64, "total_steps": 435, "loss": 1.0738, "lr": 9.542625688034449e-06, "epoch": 0.4401650618982118, "percentage": 14.71, "elapsed_time": "0:13:27", "remaining_time": "1:18:03"}
|
| 65 |
+
{"current_steps": 65, "total_steps": 435, "loss": 1.0859, "lr": 9.527241187465735e-06, "epoch": 0.4470426409903714, "percentage": 14.94, "elapsed_time": "0:13:40", "remaining_time": "1:17:48"}
|
| 66 |
+
{"current_steps": 66, "total_steps": 435, "loss": 1.0582, "lr": 9.511615032593096e-06, "epoch": 0.45392022008253097, "percentage": 15.17, "elapsed_time": "0:13:52", "remaining_time": "1:17:32"}
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 7416
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8134158f56609fe6ecda420f50473eb4eaf86cad1b2189ba1ac1fc9b861a4bb5
|
| 3 |
size 7416
|