science-of-finetuning
/

gemma-2-2b-L13-k100-lr1e-04-local-shuffling-SAELoss

+{
+    "trainer": {
+        "trainer_class": "BatchTopKCrossCoderTrainer",
+        "dict_class": "BatchTopKCrossCoder",
+        "lr": 0.0001,
+        "steps": 97656,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": null,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1152,
+        "seed": null,
+        "activation_dim": 2304,
+        "dict_size": 73728,
+        "k": 100,
+        "sparsity_loss_type": "SAE",
+        "sparsity_loss_alpha_sae": 1.0,
+        "sparsity_loss_alpha_cc": 0.1,
+        "device": "cuda",
+        "layer": 13,
+        "lm_name": "google/gemma-2-2b-it-google/gemma-2-2b",
+        "wandb_name": "gemma-2-2b-L13-k100-lr1e-04-local-shuffling-SAELoss",
+        "submodule_name": null,
+        "dict_class_kwargs": {
+            "same_init_for_all_layers": "True",
+            "norm_init_scale": "1.0",
+            "init_with_transpose": "True",
+            "encoder_layers": "None",
+            "sparsity_loss_type": "SAE",
+            "sparsity_loss_alpha_sae": "1.0",
+            "sparsity_loss_alpha_cc": "0.1"
+        }
+    }
+}