Spaces:

tmaham
/

DS-Fusion-Express

Runtime error

App Files Files Community

mta122 commited on Jul 17, 2023

Commit

a8e58c8

1 Parent(s): 40845cc

update

Browse files

Files changed (32) hide show

__pycache__/categories.cpython-38.pyc +0 -0
configs/finetune/finetune_bert.yaml +0 -128
configs/finetune/finetune_clip.yaml +0 -118
configs/finetune/finetune_generic.yaml +7 -7
configs/finetune/finetune_multi_bert.yaml +0 -127
configs/finetune/finetune_multi_clip.yaml +0 -118
ldm/__pycache__/util.cpython-38.pyc +0 -0
ldm/models/__pycache__/autoencoder.cpython-38.pyc +0 -0
ldm/models/diffusion/__pycache__/__init__.cpython-38.pyc +0 -0
ldm/models/diffusion/__pycache__/ddim.cpython-38.pyc +0 -0
ldm/models/diffusion/__pycache__/ddpm.cpython-38.pyc +0 -0
ldm/models/diffusion/__pycache__/plms.cpython-38.pyc +0 -0
ldm/models/diffusion/ddpm.py +1 -105
ldm/modules/__pycache__/attention.cpython-38.pyc +0 -0
ldm/modules/__pycache__/discriminator.cpython-38.pyc +0 -0
ldm/modules/__pycache__/ema.cpython-38.pyc +0 -0
ldm/modules/__pycache__/x_transformer.cpython-38.pyc +0 -0
ldm/modules/diffusionmodules/__pycache__/__init__.cpython-38.pyc +0 -0
ldm/modules/diffusionmodules/__pycache__/model.cpython-38.pyc +0 -0
ldm/modules/diffusionmodules/__pycache__/openaimodel.cpython-38.pyc +0 -0
ldm/modules/diffusionmodules/__pycache__/util.cpython-38.pyc +0 -0
ldm/modules/discriminator.py +0 -97
ldm/modules/distributions/__pycache__/__init__.cpython-38.pyc +0 -0
ldm/modules/distributions/__pycache__/distributions.cpython-38.pyc +0 -0
ldm/modules/encoders/__pycache__/__init__.cpython-38.pyc +0 -0
ldm/modules/encoders/__pycache__/modules.cpython-38.pyc +0 -0
out/express/DRAGON-R.jpg +0 -0
out/express/samples/0000.png +0 -0
out/express/samples/0001.png +0 -0
out/express/samples/0002.png +0 -0
out/express/samples/0003.png +0 -0
txt2img.py +0 -4

__pycache__/categories.cpython-38.pyc CHANGED Viewed

Binary files a/__pycache__/categories.cpython-38.pyc and b/__pycache__/categories.cpython-38.pyc differ

configs/finetune/finetune_bert.yaml DELETED Viewed

@@ -1,128 +0,0 @@
-model:
-  base_learning_rate: 1.0e-5
-  target: ldm.models.diffusion.ddpm.LatentDiffusion
-  params:
-    linear_start: 0.00085
-    linear_end: 0.0120
-    num_timesteps_cond: 1
-    log_every_t: 200
-    timesteps: 1000
-    first_stage_key: "image"
-    cond_stage_key: "caption"
-    image_size: 32
-    channels: 4
-    cond_stage_trainable: False
-    conditioning_key: crossattn
-    monitor: val/loss_simple_ema
-    scale_factor: 0.18215
-    use_ema: False
-    weight_disc: 0.01
-    unet_config:
-      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
-      params:
-        image_size: 32
-        in_channels: 4
-        out_channels: 4
-        model_channels: 320
-        attention_resolutions: [ 4, 2, 1 ]
-        num_res_blocks: 2
-        channel_mult: [ 1, 2, 4, 4 ]
-        num_heads: 8
-        use_spatial_transformer: True
-        transformer_depth: 1
-        context_dim: 1280
-        use_checkpoint: True
-        legacy: False
-    first_stage_config:
-      target: ldm.models.autoencoder.AutoencoderKL
-      params:
-        embed_dim: 4
-        monitor: val/rec_loss
-        ddconfig:
-          double_z: true
-          z_channels: 4
-          resolution: 256
-          in_channels: 3
-          out_ch: 3
-          ch: 128
-          ch_mult:
-          - 1
-          - 2
-          - 4
-          - 4
-          num_res_blocks: 2
-          attn_resolutions: []
-          dropout: 0.0
-        lossconfig:
-          target: torch.nn.Identity
-    cond_stage_config:
-      target: ldm.modules.encoders.modules.BERTEmbedder
-      params:
-        n_embed: 1280
-        n_layer: 32
-        device: "cuda"
-    discriminator_config:
-      target: ldm.modules.discriminator.Discriminator
-      params:
-        bnorm: True
-        leakyparam: 0.2
-        bias: False
-        generic: False
-data:
-  target: main.DataModuleFromConfig
-  params:
-    batch_size: 1
-    num_workers: 32
-    wrap: false
-    train:
-      target: ldm.data.rasterizer.Rasterizer
-      params:
-        img_size: 256
-        text: "R"
-        style_word: "DRAGON"
-        data_path: "data/cat"
-        alternate_glyph: None
-        num_samples: 2001
-        make_black: False
-        one_font: False
-        full_word: False
-        font_name: "Garuda-Bold.ttf"
-        just_use_style: false
-        use_alt: False
-    validation:
-      target: ldm.data.rasterizer.Rasterizer
-      params:
-        img_size: 256
-        text: "R"
-        style_word: "DRAGON"
-        data_path: "data/cat"
-        alternate_glyph: None
-        num_samples: 5
-        make_black: False
-        one_font: False
-        full_word: False
-        font_name: "Garuda-Bold.ttf"
-        just_use_style: false
-        use_alt: False
-lightning:
-  modelcheckpoint:
-    params:
-      every_n_train_steps: 5000
-  callbacks:
-    image_logger:
-      target: main.ImageLogger
-      params:
-        batch_frequency: 1000
-        max_images: 1
-        increase_log_steps: False
-  trainer:
-    benchmark: True
-    max_steps: 500

configs/finetune/finetune_clip.yaml DELETED Viewed

@@ -1,118 +0,0 @@
-model:
-  base_learning_rate: 1.0e-5 #1e-4
-  target: ldm.models.diffusion.ddpm.LatentDiffusion
-  params:
-    linear_start: 0.00085
-    linear_end: 0.0120
-    num_timesteps_cond: 1
-    log_every_t: 200
-    timesteps: 1000
-    first_stage_key: "image"
-    cond_stage_key: "caption"
-    image_size: 64 # 32
-    channels: 4
-    cond_stage_trainable: False   # Note: different from the one we trained before
-    conditioning_key: crossattn
-    monitor: val/loss_simple_ema
-    scale_factor: 0.18215
-    use_ema: False
-    weight_disc: 0.01
-    unet_config:
-      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
-      params:
-        image_size: 64 # unused
-        in_channels: 4
-        out_channels: 4
-        model_channels: 320
-        attention_resolutions: [ 4, 2, 1 ]
-        num_res_blocks: 2
-        channel_mult: [ 1, 2, 4, 4 ]
-        num_heads: 8
-        use_spatial_transformer: True
-        transformer_depth: 1
-        context_dim: 768 # 1280
-        use_checkpoint: True
-        legacy: False
-    first_stage_config:
-      target: ldm.models.autoencoder.AutoencoderKL
-      params:
-        embed_dim: 4
-        monitor: val/rec_loss
-        ddconfig:
-          double_z: true
-          z_channels: 4
-          resolution: 512 #256
-          in_channels: 3
-          out_ch: 3
-          ch: 128
-          ch_mult:
-          - 1
-          - 2
-          - 4
-          - 4
-          num_res_blocks: 2
-          attn_resolutions: []
-          dropout: 0.0
-        lossconfig:
-          target: torch.nn.Identity
-    cond_stage_config:
-      target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
-    discriminator_config:
-      target: ldm.modules.discriminator.Discriminator64
-data:
-  target: main.DataModuleFromConfig
-  params:
-    batch_size: 1
-    num_workers: 32
-    wrap: false
-    train:
-      target: ldm.data.rasterizer.Rasterizer
-      params:
-        img_size: 256
-        text: "R"
-        style_word: "DRAGON"
-        data_path: "data/cat"
-        alternate_glyph: None
-        num_samples: 2001
-        make_black: False
-        one_font: False
-        full_word: False
-        font_name: "Garuda-Bold.ttf"
-        just_use_style: false
-        use_alt: False
-    validation:
-      target: ldm.data.rasterizer.Rasterizer
-      params:
-        img_size: 256
-        text: "R"
-        style_word: "DRAGON"
-        data_path: "data/cat"
-        alternate_glyph: None
-        num_samples: 5
-        make_black: False
-        one_font: False
-        full_word: False
-        font_name: "Garuda-Bold.ttf"
-        just_use_style: false
-        use_alt: False
-lightning:
-  modelcheckpoint:
-    params:
-      every_n_train_steps: 200
-  callbacks:
-    image_logger:
-      target: main.ImageLogger
-      params:
-        batch_frequency: 100
-        max_images: 1
-        increase_log_steps: False
-  trainer:
-    benchmark: True
-    max_steps: 1001

configs/finetune/finetune_generic.yaml CHANGED Viewed

@@ -65,13 +65,13 @@ model:
         n_layer: 32
         device: "cuda"
-    discriminator_config:
-      target: ldm.modules.discriminator.Discriminator
-      params:
-        bnorm: True
-        leakyparam: 0.2
-        bias: False
-        generic: True
 data:

         n_layer: 32
         device: "cuda"
+    # discriminator_config:
+    #   target: ldm.modules.discriminator.Discriminator
+    #   params:
+    #     bnorm: True
+    #     leakyparam: 0.2
+    #     bias: False
+    #     generic: True
 data:

configs/finetune/finetune_multi_bert.yaml DELETED Viewed

@@ -1,127 +0,0 @@
-model:
-  base_learning_rate: 1.0e-5 #1e-4
-  target: ldm.models.diffusion.ddpm.LatentDiffusion
-  params:
-    linear_start: 0.00085
-    linear_end: 0.0120
-    num_timesteps_cond: 1
-    log_every_t: 200
-    timesteps: 1000
-    first_stage_key: "image"
-    cond_stage_key: "caption"
-    image_size: 32 # 32
-    channels: 4
-    cond_stage_trainable: False   # Note: different from the one we trained before
-    conditioning_key: crossattn
-    monitor: val/loss_simple_ema
-    scale_factor: 0.18215
-    use_ema: False
-    weight_disc: 0.01
-    unet_config:
-      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
-      params:
-        image_size: 32 # unused
-        in_channels: 4
-        out_channels: 4
-        model_channels: 320
-        attention_resolutions: [ 4, 2, 1 ]
-        num_res_blocks: 2
-        channel_mult: [ 1, 2, 4, 4 ]
-        num_heads: 8
-        use_spatial_transformer: True
-        transformer_depth: 1
-        context_dim: 1280 # 1280
-        use_checkpoint: True
-        legacy: False
-    first_stage_config:
-      target: ldm.models.autoencoder.AutoencoderKL
-      params:
-        embed_dim: 4
-        monitor: val/rec_loss
-        ddconfig:
-          double_z: true
-          z_channels: 4
-          resolution: 256 #256
-          in_channels: 3
-          out_ch: 3
-          ch: 128
-          ch_mult:
-          - 1
-          - 2
-          - 4
-          - 4
-          num_res_blocks: 2
-          attn_resolutions: []
-          dropout: 0.0
-        lossconfig:
-          target: torch.nn.Identity
-    cond_stage_config:
-      target: ldm.modules.encoders.modules.BERTEmbedder
-      params:
-        n_embed: 1280
-        n_layer: 32
-    discriminator_config:
-      target: ldm.modules.discriminator.Discriminator
-      params:
-        bnorm: True
-        leakyparam: 0.2
-        bias: False
-        generic: False
-data:
-  target: main.DataModuleFromConfig
-  params:
-    batch_size: 1
-    num_workers: 32
-    wrap: false
-    train:
-      target: ldm.data.rasterizer.Rasterizer
-      params:
-        img_size: 256
-        text: "R"
-        style_word: "DRAGON"
-        data_path: "data/cat"
-        alternate_glyph: None
-        num_samples: 2001
-        make_black: False
-        one_font: False
-        full_word: False
-        font_name: "Garuda-Bold.ttf"
-        just_use_style: false
-        use_alt: False
-    validation:
-      target: ldm.data.rasterizer.Rasterizer
-      params:
-        img_size: 256
-        text: "R"
-        style_word: "DRAGON"
-        data_path: "data/cat"
-        alternate_glyph: None
-        num_samples: 5
-        make_black: False
-        one_font: False
-        full_word: False
-        font_name: "Garuda-Bold.ttf"
-        just_use_style: false
-        use_alt: False
-lightning:
-  modelcheckpoint:
-    params:
-      every_n_train_steps: 2000
-  callbacks:
-    image_logger:
-      target: main.ImageLogger
-      params:
-        batch_frequency: 5000
-        max_images: 1
-        increase_log_steps: False
-  trainer:
-    benchmark: True
-    max_steps: 800

configs/finetune/finetune_multi_clip.yaml DELETED Viewed

@@ -1,118 +0,0 @@
-model:
-  base_learning_rate: 1.0e-5 #1e-4
-  target: ldm.models.diffusion.ddpm.LatentDiffusion
-  params:
-    linear_start: 0.00085
-    linear_end: 0.0120
-    num_timesteps_cond: 1
-    log_every_t: 200
-    timesteps: 1000
-    first_stage_key: "image"
-    cond_stage_key: "caption"
-    image_size: 64 # 32
-    channels: 4
-    cond_stage_trainable: False   # Note: different from the one we trained before
-    conditioning_key: crossattn
-    monitor: val/loss_simple_ema
-    scale_factor: 0.18215
-    use_ema: False
-    weight_disc: 0.01
-    unet_config:
-      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
-      params:
-        image_size: 64 # unused
-        in_channels: 4
-        out_channels: 4
-        model_channels: 320
-        attention_resolutions: [ 4, 2, 1 ]
-        num_res_blocks: 2
-        channel_mult: [ 1, 2, 4, 4 ]
-        num_heads: 8
-        use_spatial_transformer: True
-        transformer_depth: 1
-        context_dim: 768 # 1280
-        use_checkpoint: True
-        legacy: False
-    first_stage_config:
-      target: ldm.models.autoencoder.AutoencoderKL
-      params:
-        embed_dim: 4
-        monitor: val/rec_loss
-        ddconfig:
-          double_z: true
-          z_channels: 4
-          resolution: 512 #256
-          in_channels: 3
-          out_ch: 3
-          ch: 128
-          ch_mult:
-          - 1
-          - 2
-          - 4
-          - 4
-          num_res_blocks: 2
-          attn_resolutions: []
-          dropout: 0.0
-        lossconfig:
-          target: torch.nn.Identity
-    cond_stage_config:
-      target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
-    discriminator_config:
-      target: ldm.modules.discriminator.Discriminator64
-data:
-  target: main.DataModuleFromConfig
-  params:
-    batch_size: 1
-    num_workers: 32
-    wrap: false
-    train:
-      target: ldm.data.rasterizer.Rasterizer
-      params:
-        img_size: 256
-        text: "R"
-        style_word: "DRAGON"
-        data_path: "data/cat"
-        alternate_glyph: None
-        num_samples: 2001
-        make_black: False
-        one_font: False
-        full_word: False
-        font_name: "Garuda-Bold.ttf"
-        just_use_style: false
-        use_alt: False
-    validation:
-      target: ldm.data.rasterizer.Rasterizer
-      params:
-        img_size: 256
-        text: "R"
-        style_word: "DRAGON"
-        data_path: "data/cat"
-        alternate_glyph: None
-        num_samples: 5
-        make_black: False
-        one_font: False
-        full_word: False
-        font_name: "Garuda-Bold.ttf"
-        just_use_style: false
-        use_alt: False
-lightning:
-  modelcheckpoint:
-    params:
-      every_n_train_steps: 200
-  callbacks:
-    image_logger:
-      target: main.ImageLogger
-      params:
-        batch_frequency: 100
-        max_images: 1
-        increase_log_steps: False
-  trainer:
-    benchmark: True
-    max_steps: 1501

ldm/__pycache__/util.cpython-38.pyc CHANGED Viewed

Binary files a/ldm/__pycache__/util.cpython-38.pyc and b/ldm/__pycache__/util.cpython-38.pyc differ

ldm/models/__pycache__/autoencoder.cpython-38.pyc CHANGED Viewed

Binary files a/ldm/models/__pycache__/autoencoder.cpython-38.pyc and b/ldm/models/__pycache__/autoencoder.cpython-38.pyc differ

ldm/models/diffusion/__pycache__/__init__.cpython-38.pyc CHANGED Viewed

Binary files a/ldm/models/diffusion/__pycache__/__init__.cpython-38.pyc and b/ldm/models/diffusion/__pycache__/__init__.cpython-38.pyc differ

ldm/models/diffusion/__pycache__/ddim.cpython-38.pyc CHANGED Viewed

Binary files a/ldm/models/diffusion/__pycache__/ddim.cpython-38.pyc and b/ldm/models/diffusion/__pycache__/ddim.cpython-38.pyc differ

ldm/models/diffusion/__pycache__/ddpm.cpython-38.pyc CHANGED Viewed

Binary files a/ldm/models/diffusion/__pycache__/ddpm.cpython-38.pyc and b/ldm/models/diffusion/__pycache__/ddpm.cpython-38.pyc differ

ldm/models/diffusion/__pycache__/plms.cpython-38.pyc CHANGED Viewed

Binary files a/ldm/models/diffusion/__pycache__/plms.cpython-38.pyc and b/ldm/models/diffusion/__pycache__/plms.cpython-38.pyc differ

ldm/models/diffusion/ddpm.py CHANGED Viewed

@@ -485,7 +485,7 @@ class LatentDiffusion(DDPM):
             self.init_from_ckpt(ckpt_path, ignore_keys)
             self.restarted_from_ckpt = True
-        self.discriminator = instantiate_from_config(discriminator_config)
         self.weight_disc = weight_disc
         self.iter = 0
@@ -919,84 +919,6 @@ class LatentDiffusion(DDPM):
         return z_C, alpha
-    def discriminator_loss(self, batch, optimizer_idx =0):
-        #
-        criterion = nn.BCELoss()
-        real_label = 1.
-        fake_label = 0.
-        caption = batch["cond"]
-        with torch.no_grad():
-            cond = self.get_learned_conditioning(caption)
-        img1 = rearrange(batch["style"]["image"], 'b h w c -> b c h w')
-        save_image(img1, "img_style.png")
-        img1_base = img1.to(memory_format=torch.contiguous_format).float()
-        img1 = self.encode_first_stage(img1_base)
-        z_S = self.get_first_stage_encoding(img1).detach()
-        img2 = rearrange(batch["base"]["image"], 'b h w c -> b c h w')
-        save_image(img2, "img_base.png")
-        img2_base = img2.to(memory_format=torch.contiguous_format).float()
-        img2 = self.encode_first_stage(img2_base)
-        z_R = self.get_first_stage_encoding(img2).detach()
-        x_start = z_S
-        real_x = z_R
-        t = torch.randint(0, self.num_timesteps, (z_S.shape[0],), device=self.device).long()
-        logvar_t = self.logvar[t.cpu()].to(self.device)
-        noise = default(None, lambda: torch.randn_like(z_S))
-        x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise)
-        letter = batch["number"][0].cpu().detach().numpy()
-        #update generator
-        if optimizer_idx == 0:
-            noise1 = self.apply_model(x_noisy, t, cond)
-            z_theta = self.predict_start_from_noise(x_noisy,t,noise1)
-            fake_x = z_theta
-            loss_diff = self.get_loss(noise1, noise, mean=False).mean([1,2,3])
-            loss_diff = loss_diff / torch.exp(logvar_t) + logvar_t
-            loss_diff = self.l_simple_weight * loss_diff.mean()
-            label = torch.full((1,), real_label, dtype=torch.float, device=self.device)
-            output = self.discriminator(fake_x, letter).view(-1)
-            loss_disc = criterion(output, label)
-            return loss_diff, loss_disc
-        #update discriminator
-        if optimizer_idx == 1:
-            noise1 = self.apply_model(x_noisy, t, cond)
-            z_theta = self.predict_start_from_noise(x_noisy,t,noise1)
-            fake_x = z_theta
-            loss_diff = self.get_loss(noise1, noise, mean=False).mean([1,2,3])
-            loss_diff = loss_diff / torch.exp(logvar_t) + logvar_t
-            loss_diff = self.l_simple_weight * loss_diff.mean()
-            label = torch.full((1,), real_label, dtype=torch.float, device=self.device)
-            output = self.discriminator(real_x, letter).view(-1)
-            loss1 = criterion(output, label)
-            label = torch.full((1,), fake_label, dtype=torch.float, device=self.device)
-            output = self.discriminator(fake_x, letter).view(-1)
-            loss2 = criterion(output, label)
-            loss_disc = (loss1+loss2)/2
-            return loss_diff, loss_disc
     def make_images(self, batch):
         batch = batch["base"]
         use_ddim = 50
@@ -1073,32 +995,6 @@ class LatentDiffusion(DDPM):
                     Image.fromarray(x_sample.astype(np.uint8)).save(os.path.join("out_cur/", f"{base_count:04}.png"))
                     base_count += 1
-    def training_step(self, batch,batch_idx, optimizer_idx=None):
-        loss_diff, loss_disc = self.discriminator_loss(batch, optimizer_idx=optimizer_idx)
-        loss =loss_diff+ self.weight_disc*loss_disc
-        self.iter+=1
-        if (self.iter-1) % 100 == 0:
-            self.log_view(batch)
-        # if self.iter == batch["epochs"]:
-        #     self.last_step_run(batch)
-        return loss
-    @torch.no_grad()
-    def validation_step(self, batch,optimizer_idx):
-        return None
-        cap = batch["cond"]
-        batch = batch["base"]
-        batch["caption"] = cap
-        loss, loss_dict_no_ema = self.shared_step(batch)
-        self.log_dict(loss_dict_no_ema, prog_bar=False, logger=True, on_step=False, on_epoch=True)
     def forward(self, x, c, *args, **kwargs):
         t = torch.randint(0, self.num_timesteps, (x.shape[0],), device=self.device).long()
         if self.model.conditioning_key is not None:

             self.init_from_ckpt(ckpt_path, ignore_keys)
             self.restarted_from_ckpt = True
+        # self.discriminator = instantiate_from_config(discriminator_config)
         self.weight_disc = weight_disc
         self.iter = 0
         return z_C, alpha
     def make_images(self, batch):
         batch = batch["base"]
         use_ddim = 50
                     Image.fromarray(x_sample.astype(np.uint8)).save(os.path.join("out_cur/", f"{base_count:04}.png"))
                     base_count += 1
     def forward(self, x, c, *args, **kwargs):
         t = torch.randint(0, self.num_timesteps, (x.shape[0],), device=self.device).long()
         if self.model.conditioning_key is not None:

ldm/modules/__pycache__/attention.cpython-38.pyc CHANGED Viewed

Binary files a/ldm/modules/__pycache__/attention.cpython-38.pyc and b/ldm/modules/__pycache__/attention.cpython-38.pyc differ

ldm/modules/__pycache__/discriminator.cpython-38.pyc CHANGED Viewed

Binary files a/ldm/modules/__pycache__/discriminator.cpython-38.pyc and b/ldm/modules/__pycache__/discriminator.cpython-38.pyc differ

ldm/modules/__pycache__/ema.cpython-38.pyc CHANGED Viewed

Binary files a/ldm/modules/__pycache__/ema.cpython-38.pyc and b/ldm/modules/__pycache__/ema.cpython-38.pyc differ

ldm/modules/__pycache__/x_transformer.cpython-38.pyc CHANGED Viewed

Binary files a/ldm/modules/__pycache__/x_transformer.cpython-38.pyc and b/ldm/modules/__pycache__/x_transformer.cpython-38.pyc differ

ldm/modules/diffusionmodules/__pycache__/__init__.cpython-38.pyc CHANGED Viewed

Binary files a/ldm/modules/diffusionmodules/__pycache__/__init__.cpython-38.pyc and b/ldm/modules/diffusionmodules/__pycache__/__init__.cpython-38.pyc differ

ldm/modules/diffusionmodules/__pycache__/model.cpython-38.pyc CHANGED Viewed

Binary files a/ldm/modules/diffusionmodules/__pycache__/model.cpython-38.pyc and b/ldm/modules/diffusionmodules/__pycache__/model.cpython-38.pyc differ

ldm/modules/diffusionmodules/__pycache__/openaimodel.cpython-38.pyc CHANGED Viewed

Binary files a/ldm/modules/diffusionmodules/__pycache__/openaimodel.cpython-38.pyc and b/ldm/modules/diffusionmodules/__pycache__/openaimodel.cpython-38.pyc differ

ldm/modules/diffusionmodules/__pycache__/util.cpython-38.pyc CHANGED Viewed

Binary files a/ldm/modules/diffusionmodules/__pycache__/util.cpython-38.pyc and b/ldm/modules/diffusionmodules/__pycache__/util.cpython-38.pyc differ

ldm/modules/discriminator.py DELETED Viewed

@@ -1,97 +0,0 @@
-from torch import nn
-import pdb
-import torch
-# to use with clip
-class Discriminator64(nn.Module):
-    def __init__(self, bnorm=True, leakyparam=0.0, bias=False, generic=False):
-        super(Discriminator64, self).__init__()
-        self.bnorm = bnorm
-        self.generic = generic
-        self.relu = nn.LeakyReLU(leakyparam, inplace=True)
-        self.bn2 = nn.BatchNorm2d(128)
-        self.bn3 = nn.BatchNorm2d(256)
-        self.bn4 = nn.BatchNorm2d(512)
-        self.layer1 = nn.Conv2d(4, 64, 4, 2, 1, bias=bias)
-        self.layer2 = nn.Conv2d(64, 128, 4, 2, 1, bias=bias)
-        self.layer3 = nn.Conv2d(128, 256, 4, 2, 1, bias=bias)
-        self.layer4 = nn.Conv2d(256, 512, 4, 2, 1, bias=bias)
-        if generic:
-            self.layer5 = nn.Conv2d(512, 26, 4, 1, 0, bias=bias)
-        else:
-            self.layer5 = nn.Conv2d(512, 1, 4, 1, 0, bias=bias)
-        self.sig = nn.Sigmoid()
-    def forward(self, input, letter):
-        out1 = self.relu(self.layer1(input))
-        if self.bnorm:
-            out2 = self.relu(self.bn2(self.layer2(out1)))
-            out3 = self.relu(self.bn3(self.layer3(out2)))
-            out4= self.relu(self.bn4(self.layer4(out3)))
-        else:
-            out2 = self.relu(self.layer2(out1))
-            out3 = self.relu(self.layer3(out2))
-            out4= self.relu(self.layer4(out3))
-        out5 = self.sig(self.layer5(out4))
-        out5 = out5.flatten()
-        if self.generic:
-            out5 = out5[letter].mean()
-        else:
-            out5 = out5.mean()
-        return out5
-# to use with bert
-class Discriminator(nn.Module):
-    def __init__(self, bnorm=True, leakyparam=0.0, bias=False, generic=False):
-        super(Discriminator, self).__init__()
-        self.bnorm = bnorm
-        self.generic = generic
-        self.relu = nn.LeakyReLU(leakyparam, inplace=True)
-        self.sig = nn.Sigmoid()
-        self.bn2 = nn.BatchNorm2d(128)
-        self.bn3 = nn.BatchNorm2d(256)
-        self.bn4 = nn.BatchNorm2d(512)
-        self.layer1 = nn.Conv2d(4, 64, 4, 2, 1, bias=bias)
-        self.layer2 = nn.Conv2d(64, 128, 4, 2, 1, bias=bias)
-        self.layer3 = nn.Conv2d(128, 256, 4, 2, 1, bias=bias)
-        self.layer4 = nn.Conv2d(256, 512, 4, 2, 1, bias=bias)
-        if generic:
-            self.layer5 = nn.Conv2d(512, 26, 2, 1, 0, bias=bias)
-        else:
-            self.layer5 = nn.Conv2d(512, 1, 2, 1, 0, bias=bias)
-    def forward(self, input, letter):
-        out1 = self.relu(self.layer1(input))
-        if self.bnorm:
-            out2 = self.relu(self.bn2(self.layer2(out1)))
-            out3 = self.relu(self.bn3(self.layer3(out2)))
-            out4= self.relu(self.bn4(self.layer4(out3)))
-        else:
-            out2 = self.relu(self.layer2(out1))
-            out3 = self.relu(self.layer3(out2))
-            out4= self.relu(self.layer4(out3))
-        out5 = self.sig(self.layer5(out4))
-        out5 = out5.flatten()
-        if self.generic:
-            out5 = out5[letter].mean()
-        else:
-            out5 = out5.mean()
-        return out5

ldm/modules/distributions/__pycache__/__init__.cpython-38.pyc CHANGED Viewed

Binary files a/ldm/modules/distributions/__pycache__/__init__.cpython-38.pyc and b/ldm/modules/distributions/__pycache__/__init__.cpython-38.pyc differ

ldm/modules/distributions/__pycache__/distributions.cpython-38.pyc CHANGED Viewed

Binary files a/ldm/modules/distributions/__pycache__/distributions.cpython-38.pyc and b/ldm/modules/distributions/__pycache__/distributions.cpython-38.pyc differ

ldm/modules/encoders/__pycache__/__init__.cpython-38.pyc CHANGED Viewed

Binary files a/ldm/modules/encoders/__pycache__/__init__.cpython-38.pyc and b/ldm/modules/encoders/__pycache__/__init__.cpython-38.pyc differ

ldm/modules/encoders/__pycache__/modules.cpython-38.pyc CHANGED Viewed

Binary files a/ldm/modules/encoders/__pycache__/modules.cpython-38.pyc and b/ldm/modules/encoders/__pycache__/modules.cpython-38.pyc differ

out/express/DRAGON-R.jpg CHANGED Viewed

out/express/samples/0000.png CHANGED Viewed

out/express/samples/0001.png CHANGED Viewed

out/express/samples/0002.png CHANGED Viewed

out/express/samples/0003.png CHANGED Viewed

txt2img.py CHANGED Viewed

@@ -132,10 +132,6 @@ if __name__ == "__main__":
     seed_everything(seed)
     # config = OmegaConf.load("configs/latent-diffusion/txt2img-1p4B-eval_with_tokens.yaml")  # TODO: Optionally download from same location as ckpt and chnage this logic
-    if opt.H == 512:
-        config = OmegaConf.load("configs/finetune/finetune_clip.yaml")
-    else:
-        config = OmegaConf.load("configs/finetune/finetune_bert.yaml")
     config = OmegaConf.load("configs/finetune/finetune_generic.yaml")
     # config = OmegaConf.load("configs/latent-diffusion/txt2img-1p4B-finetune2.yaml")

     seed_everything(seed)
     # config = OmegaConf.load("configs/latent-diffusion/txt2img-1p4B-eval_with_tokens.yaml")  # TODO: Optionally download from same location as ckpt and chnage this logic
     config = OmegaConf.load("configs/finetune/finetune_generic.yaml")
     # config = OmegaConf.load("configs/latent-diffusion/txt2img-1p4B-finetune2.yaml")