JohanDL
/

qwen_1.5B_s1_custom_no_80

+{
+    "experiment_id": "qwen_1.5b_s1_experiment_no_80_20250922_212348",
+    "config_path": "configs/experiment_config_no_80.yaml",
+    "training_config": batch_size: 16
+epochs: 5
+hub_model_id: JohanDL/qwen_1.5B_s1_custom_no_80
+learning_rate: 1e-5
+loss_type: topk_cross_entropy
+model_name: Qwen/Qwen2.5-1.5B-Instruct
+push_to_hub: true
+slurm:
+  cpus_per_task: 16
+  gres: gpu:4
+  job_name: train_s1
+  mem: 256G
+  nodes: 1
+  ntasks_per_node: 1
+  partition: gpu
+  time: 3-00:00:00
+topk_k: 128
+topk_temperature: 1.0
+train_dataset_name: s1K_tokenized
+use_custom_loss: true
+wandb_entity: loevliedenny
+wandb_project: Qwen2.5-1.5B-Instruct-s1-top128
+weight_decay: 1e-4
+,
+    "completed_at": "Mon Sep 22 21:52:06 EDT 2025",
+    "slurm_job_id": "15764216"
+}