xingjianleng's picture
update: upload sdvae + vavae ckpts
892ef8c
{
"output_dir": "exps",
"exp_name": "sit-xl-1-dinov2-b-enc8-vavae-freq1-lr1e-4-momentum0.1-l1-lpips-kl-gan-vaealign1.5-compile-400k",
"logging_dir": "logs",
"report_to": "wandb",
"sampling_steps": 10000,
"resume_step": 0,
"continue_train_exp_dir": null,
"wandb_history_path": null,
"model": "SiT-XL/1",
"num_classes": 1000,
"encoder_depth": 8,
"fused_attn": true,
"qk_norm": false,
"bn_momentum": 0.1,
"compile": true,
"data_dir": "data/imagenet-latents",
"resolution": 256,
"batch_size": 256,
"allow_tf32": true,
"mixed_precision": "fp16",
"epochs": 1400,
"max_train_steps": 400000,
"checkpointing_steps": 50000,
"gradient_accumulation_steps": 1,
"learning_rate": 0.0001,
"adam_beta1": 0.9,
"adam_beta2": 0.999,
"adam_weight_decay": 0.0,
"adam_epsilon": 1e-08,
"max_grad_norm": 1.0,
"seed": 0,
"num_workers": 4,
"path_type": "linear",
"prediction": "v",
"cfg_prob": 0.1,
"enc_type": "dinov2-vit-b",
"proj_coeff": 0.5,
"weighting": "uniform",
"legacy": false,
"vae": "f16d32",
"vae_ckpt": "pretrained_models/vavae-f16d32.pt",
"use_sd_vae_stats": false,
"vae_repa_align": false,
"freeze_bn_stats": false,
"imagenette_stats": 0,
"disc_pretrained_ckpt": "pretrained_models/vavae-discriminator-ckpt.pt",
"loss_cfg_path": "configs/loss_cfg/l1_lpips_kl_gan.yaml",
"vae_learning_rate": 0.0001,
"disc_learning_rate": 0.0001,
"vae_train_freq": 1,
"vae_align": true,
"vae_align_proj_coeff": 1.5
}