yuntian-deng
/

computer-doom-nibik-032k-updated-autoencoder-16x-fixed-222222-128k

Safetensors

Model card Files Files and versions

xet

Community

yuntian-deng commited on 5 days ago

Commit

cc941ea

verified ·

1 Parent(s): b08bbdb

Upload config.json with huggingface_hub

Browse files

Files changed (1) hide show

config.json +126 -0

config.json ADDED Viewed

	@@ -0,0 +1,126 @@

+{
+  "save_path": "saved_standard_challenging_context32_nocond_cont_cont_all_cont_eval",
+  "model": {
+    "base_learning_rate": 8e-05,
+    "target": "latent_diffusion.ldm.models.diffusion.ddpm.LatentDiffusion",
+    "params": {
+      "linear_start": 0.0015,
+      "linear_end": 0.0195,
+      "num_timesteps_cond": 1,
+      "log_every_t": 200,
+      "timesteps": 1000,
+      "first_stage_key": "image",
+      "cond_stage_key": "action_",
+      "scheduler_sampling_rate": 0.0,
+      "hybrid_key": "c_concat",
+      "image_size": [
+        64,
+        48
+      ],
+      "channels": 3,
+      "cond_stage_trainable": false,
+      "conditioning_key": "hybrid",
+      "monitor": "val/loss_simple_ema",
+      "unet_config": {
+        "target": "latent_diffusion.ldm.modules.diffusionmodules.openaimodel.UNetModel",
+        "params": {
+          "image_size": [
+            64,
+            48
+          ],
+          "in_channels": 8,
+          "out_channels": 4,
+          "model_channels": 192,
+          "attention_resolutions": [
+            8,
+            4,
+            2
+          ],
+          "num_res_blocks": 2,
+          "channel_mult": [
+            1,
+            2,
+            3,
+            5
+          ],
+          "num_head_channels": 32,
+          "use_spatial_transformer": false,
+          "transformer_depth": 1
+        }
+      },
+      "temporal_encoder_config": {
+        "target": "latent_diffusion.ldm.modules.encoders.temporal_encoder.TemporalEncoder",
+        "params": {
+          "input_channels": 6,
+          "hidden_size": 1024,
+          "num_layers": 1,
+          "dropout": 0.1,
+          "output_channels": 4,
+          "output_height": 48,
+          "output_width": 64
+        }
+      },
+      "first_stage_config": {
+        "target": "latent_diffusion.ldm.models.autoencoder.AutoencoderKL",
+        "params": {
+          "embed_dim": 4,
+          "monitor": "val/rec_loss",
+          "ddconfig": {
+            "double_z": true,
+            "z_channels": 4,
+            "resolution": 256,
+            "in_channels": 3,
+            "out_ch": 3,
+            "ch": 128,
+            "ch_mult": [
+              1,
+              2,
+              4,
+              4
+            ],
+            "num_res_blocks": 2,
+            "attn_resolutions": [],
+            "dropout": 0.0
+          },
+          "lossconfig": {
+            "target": "torch.nn.Identity"
+          }
+        }
+      },
+      "cond_stage_config": "__is_unconditional__"
+    }
+  },
+  "data": {
+    "target": "data.data_processing.datasets.DataModule",
+    "params": {
+      "batch_size": 8,
+      "num_workers": 1,
+      "wrap": false,
+      "shuffle": true,
+      "drop_last": true,
+      "pin_memory": true,
+      "prefetch_factor": 2,
+      "persistent_workers": true,
+      "train": {
+        "target": "data.data_processing.datasets.ActionsData",
+        "params": {
+          "data_csv_path": "desktop_sequences_filtered_with_desktop_1.5k.challenging.train.target_frames.csv",
+          "normalization": "standard",
+          "context_length": 32
+        }
+      }
+    }
+  },
+  "lightning": {
+    "trainer": {
+      "benchmark": false,
+      "max_epochs": 6400,
+      "limit_val_batches": 0,
+      "accelerator": "gpu",
+      "gpus": 1,
+      "accumulate_grad_batches": 999999,
+      "gradient_clip_val": 1,
+      "checkpoint_callback": true
+    }
+  }
+}