Upload 2 files

Browse files

Files changed (2) hide show

transformer/config.yaml +70 -0
transformer/diffusion_pytorch_model.bin +3 -0

transformer/config.yaml ADDED Viewed

	@@ -0,0 +1,70 @@

+model:
+    type: PixArtVideo_XL_1x2x2
+    space_scale: 0.5
+    time_scale: 1.0
+    mlp_type: "llama"
+    #enable_rope: True
+    position_embed_spaltial: "absolute"
+    position_embed_temporal: "rope"
+    norm_type: "llamarmsnorm"
+    in_channels: 8                    # to be consistent with videovae
+    temp_window_size: [-1, 8, 8]      # windown attn for temporal-attn
+    adain_with_text: True
+    qk_norm: False
+    prob_text_condition: 1.0
+    prob_img_condition: 0
+    prob_img_condition_attn: 0
+    class_dropout_prob: 0.1
+    grad_checkpointing: True
+    enable_frames_embedder: False
+    enable_tgt_size_embedder: False
+clip_image_encoder: "pretrain_models/openai/clip-vit-large-patch14"
+vae:
+    type: "CausualVAEVideo"
+    # z=8
+    config: "configs/vae_config.yaml"
+    from_pretrained: "./pretrain_model/vidgen/vae/vae_pytorch_model.bin"
+text_encoder:
+    type: "t5"
+    from_pretrained: "pretrain_models/"
+    model_max_length: 200
+    shardformer: True
+diffusion:
+    type: "IDDPM"
+    snr: False
+    train_sampling_steps: 1000
+    prob_self_condition: 0
+    v_predict: False
+optimizer:
+    learning_rate: 1e-4
+    weight_decay: 0
+    eps: 1e-8
+    min_lr_ratio: 0.95
+gradient_clip: 1.0
+num_frames_video: 17            # base frames of one video slice
+num_slice_for_long_video: -1    # how many 2s slice is the long video be split, -1 denotes dynamic
+resolution_video: -1
+resolution_image: -1
+mode_various_resolution: False
+precision: "bf16"
+seed: 42
+workers: 4
+grad_checkpoint: False
+gradient_accumulation_steps: 4
+logging_steps: 10

transformer/diffusion_pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5df86cfb1fcaaeec882535ce381e99a6b9c98a59e82291c61ec5b25c76d7087b
+size 3636367638