Spaces:
Runtime error
Runtime error
gaparmar
commited on
Commit
·
13ed5cd
1
Parent(s):
d3864b2
sketch demo
Browse files- app.py +4 -8
- src/pix2pix_turbo.py +37 -24
app.py
CHANGED
|
@@ -1,7 +1,3 @@
|
|
| 1 |
-
"""
|
| 2 |
-
3.43.1
|
| 3 |
-
"""
|
| 4 |
-
|
| 5 |
import os
|
| 6 |
import sys
|
| 7 |
import pdb
|
|
@@ -78,7 +74,8 @@ def run(image, prompt, prompt_template, style_name, seed, val_r):
|
|
| 78 |
print("sketch updated")
|
| 79 |
if image is None:
|
| 80 |
ones = Image.new("L", (512, 512), 255)
|
| 81 |
-
|
|
|
|
| 82 |
prompt = prompt_template.replace("{prompt}", prompt)
|
| 83 |
image = image.convert("RGB")
|
| 84 |
image_t = TF.to_tensor(image) > 0.5
|
|
@@ -234,8 +231,8 @@ with gr.Blocks(css="style.css") as demo:
|
|
| 234 |
<div class="pad2"> <button href="TODO" download="image" id="my-button-down" onclick='return theSketchDownloadFunction()'></button> </div>
|
| 235 |
</div>
|
| 236 |
""")
|
| 237 |
-
gr.Markdown("## Prompt", elem_id="tools_header")
|
| 238 |
-
prompt = gr.Textbox(label=
|
| 239 |
with gr.Row():
|
| 240 |
style = gr.Dropdown(label="Style", choices=STYLE_NAMES, value=DEFAULT_STYLE_NAME, scale=1)
|
| 241 |
prompt_temp = gr.Textbox(label="Prompt Style Template", value=styles[DEFAULT_STYLE_NAME], scale=2, max_lines=1)
|
|
@@ -269,4 +266,3 @@ with gr.Blocks(css="style.css") as demo:
|
|
| 269 |
|
| 270 |
if __name__ == "__main__":
|
| 271 |
demo.queue().launch(debug=True)
|
| 272 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import sys
|
| 3 |
import pdb
|
|
|
|
| 74 |
print("sketch updated")
|
| 75 |
if image is None:
|
| 76 |
ones = Image.new("L", (512, 512), 255)
|
| 77 |
+
temp_uri = pil_image_to_data_uri(ones)
|
| 78 |
+
return ones, gr.update(link=temp_uri), gr.update(link=temp_uri)
|
| 79 |
prompt = prompt_template.replace("{prompt}", prompt)
|
| 80 |
image = image.convert("RGB")
|
| 81 |
image_t = TF.to_tensor(image) > 0.5
|
|
|
|
| 231 |
<div class="pad2"> <button href="TODO" download="image" id="my-button-down" onclick='return theSketchDownloadFunction()'></button> </div>
|
| 232 |
</div>
|
| 233 |
""")
|
| 234 |
+
# gr.Markdown("## Prompt", elem_id="tools_header")
|
| 235 |
+
prompt = gr.Textbox(label="Prompt", value="", show_label=True)
|
| 236 |
with gr.Row():
|
| 237 |
style = gr.Dropdown(label="Style", choices=STYLE_NAMES, value=DEFAULT_STYLE_NAME, scale=1)
|
| 238 |
prompt_temp = gr.Textbox(label="Prompt Style Template", value=styles[DEFAULT_STYLE_NAME], scale=2, max_lines=1)
|
|
|
|
| 266 |
|
| 267 |
if __name__ == "__main__":
|
| 268 |
demo.queue().launch(debug=True)
|
|
|
src/pix2pix_turbo.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
| 1 |
-
import os
|
|
|
|
|
|
|
| 2 |
import pdb
|
| 3 |
import copy
|
| 4 |
from tqdm import tqdm
|
|
@@ -7,11 +9,13 @@ from transformers import AutoTokenizer, PretrainedConfig, CLIPTextModel
|
|
| 7 |
from diffusers import AutoencoderKL, UNet2DConditionModel, DDPMScheduler
|
| 8 |
from diffusers.utils.peft_utils import set_weights_and_activate_adapters
|
| 9 |
from peft import LoraConfig
|
| 10 |
-
|
|
|
|
|
|
|
| 11 |
|
| 12 |
|
|
|
|
| 13 |
def my_vae_encoder_fwd(self, sample):
|
| 14 |
-
r"""The forward method of the `Encoder` class."""
|
| 15 |
sample = self.conv_in(sample)
|
| 16 |
l_blocks = []
|
| 17 |
# down
|
|
@@ -27,6 +31,7 @@ def my_vae_encoder_fwd(self, sample):
|
|
| 27 |
return sample
|
| 28 |
|
| 29 |
|
|
|
|
| 30 |
def my_vae_decoder_fwd(self,sample, latent_embeds = None):
|
| 31 |
sample = self.conv_in(sample)
|
| 32 |
upscale_dtype = next(iter(self.up_blocks.parameters())).dtype
|
|
@@ -76,21 +81,33 @@ class Pix2Pix_Turbo(torch.nn.Module):
|
|
| 76 |
vae = AutoencoderKL.from_pretrained("stabilityai/sd-turbo", subfolder="vae")
|
| 77 |
unet = UNet2DConditionModel.from_pretrained("stabilityai/sd-turbo", subfolder="unet")
|
| 78 |
|
| 79 |
-
if name=="
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
if name=="sketch_to_image_stochastic":
|
| 90 |
# download from url
|
| 91 |
-
url = "https://www.cs.cmu.edu/~
|
| 92 |
os.makedirs(ckpt_folder, exist_ok=True)
|
| 93 |
-
outf = os.path.join(ckpt_folder, "
|
| 94 |
if not os.path.exists(outf):
|
| 95 |
print(f"Downloading checkpoint to {outf}")
|
| 96 |
response = requests.get(url, stream=True)
|
|
@@ -105,7 +122,6 @@ class Pix2Pix_Turbo(torch.nn.Module):
|
|
| 105 |
if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
|
| 106 |
print("ERROR, something went wrong")
|
| 107 |
print(f"Downloaded successfully to {outf}")
|
| 108 |
-
# p_ckpt = "/home/gparmar/code/img2img-turbo/single_step_translation/notebooks/DEMO/sketch_to_image_stochastic.pkl"
|
| 109 |
p_ckpt = outf
|
| 110 |
sd = torch.load(p_ckpt, map_location="cpu")
|
| 111 |
unet_lora_config = LoraConfig(r=sd["rank_unet"], init_lora_weights="gaussian", target_modules=sd["unet_lora_target_modules"])
|
|
@@ -123,15 +139,17 @@ class Pix2Pix_Turbo(torch.nn.Module):
|
|
| 123 |
vae.decoder.ignore_skip = False
|
| 124 |
vae.add_adapter(vae_lora_config, adapter_name="vae_skip")
|
| 125 |
unet.add_adapter(unet_lora_config)
|
| 126 |
-
unet.
|
|
|
|
|
|
|
| 127 |
unet.enable_xformers_memory_efficient_attention()
|
| 128 |
-
|
| 129 |
-
|
|
|
|
| 130 |
unet.to("cuda")
|
| 131 |
vae.to("cuda")
|
| 132 |
unet.eval()
|
| 133 |
vae.eval()
|
| 134 |
-
|
| 135 |
self.unet, self.vae = unet, vae
|
| 136 |
self.timesteps = torch.tensor([999], device="cuda").long()
|
| 137 |
|
|
@@ -141,7 +159,6 @@ class Pix2Pix_Turbo(torch.nn.Module):
|
|
| 141 |
caption_tokens = self.tokenizer(prompt, max_length=self.tokenizer.model_max_length,
|
| 142 |
padding="max_length", truncation=True, return_tensors="pt").input_ids.cuda()
|
| 143 |
caption_enc = self.text_encoder(caption_tokens)[0]
|
| 144 |
-
|
| 145 |
if deterministic:
|
| 146 |
encoded_control = self.vae.encode(c_t).latent_dist.sample()*self.vae.config.scaling_factor
|
| 147 |
model_pred = self.unet(encoded_control, self.timesteps, encoder_hidden_states=caption_enc,).sample
|
|
@@ -161,8 +178,4 @@ class Pix2Pix_Turbo(torch.nn.Module):
|
|
| 161 |
x_denoised = self.sched.step(unet_output, self.timesteps, unet_input, return_dict=True).prev_sample
|
| 162 |
self.vae.decoder.incoming_skip_acts = self.vae.encoder.current_down_blocks
|
| 163 |
output_image = (self.vae.decode(x_denoised / self.vae.config.scaling_factor ).sample).clamp(-1,1)
|
| 164 |
-
|
| 165 |
return output_image
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import requests
|
| 3 |
+
import sys
|
| 4 |
import pdb
|
| 5 |
import copy
|
| 6 |
from tqdm import tqdm
|
|
|
|
| 9 |
from diffusers import AutoencoderKL, UNet2DConditionModel, DDPMScheduler
|
| 10 |
from diffusers.utils.peft_utils import set_weights_and_activate_adapters
|
| 11 |
from peft import LoraConfig
|
| 12 |
+
p = "src/"
|
| 13 |
+
sys.path.append(p)
|
| 14 |
+
from model import make_1step_sched
|
| 15 |
|
| 16 |
|
| 17 |
+
"""The forward method of the `Encoder` class."""
|
| 18 |
def my_vae_encoder_fwd(self, sample):
|
|
|
|
| 19 |
sample = self.conv_in(sample)
|
| 20 |
l_blocks = []
|
| 21 |
# down
|
|
|
|
| 31 |
return sample
|
| 32 |
|
| 33 |
|
| 34 |
+
"""The forward method of the `Decoder` class."""
|
| 35 |
def my_vae_decoder_fwd(self,sample, latent_embeds = None):
|
| 36 |
sample = self.conv_in(sample)
|
| 37 |
upscale_dtype = next(iter(self.up_blocks.parameters())).dtype
|
|
|
|
| 81 |
vae = AutoencoderKL.from_pretrained("stabilityai/sd-turbo", subfolder="vae")
|
| 82 |
unet = UNet2DConditionModel.from_pretrained("stabilityai/sd-turbo", subfolder="unet")
|
| 83 |
|
| 84 |
+
if name=="edge_to_image":
|
| 85 |
+
url = "https://www.cs.cmu.edu/~img2img-turbo/models/edge_to_image_loras.pkl"
|
| 86 |
+
os.makedirs(ckpt_folder, exist_ok=True)
|
| 87 |
+
outf = os.path.join(ckpt_folder, "edge_to_image_loras.pkl")
|
| 88 |
+
if not os.path.exists(outf):
|
| 89 |
+
print(f"Downloading checkpoint to {outf}")
|
| 90 |
+
response = requests.get(url, stream=True)
|
| 91 |
+
total_size_in_bytes= int(response.headers.get('content-length', 0))
|
| 92 |
+
block_size = 1024 # 1 Kibibyte
|
| 93 |
+
progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
|
| 94 |
+
with open(outf, 'wb') as file:
|
| 95 |
+
for data in response.iter_content(block_size):
|
| 96 |
+
progress_bar.update(len(data))
|
| 97 |
+
file.write(data)
|
| 98 |
+
progress_bar.close()
|
| 99 |
+
if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
|
| 100 |
+
print("ERROR, something went wrong")
|
| 101 |
+
print(f"Downloaded successfully to {outf}")
|
| 102 |
+
p_ckpt = outf
|
| 103 |
+
sd = torch.load(p_ckpt, map_location="cpu")
|
| 104 |
+
unet_lora_config = LoraConfig(r=sd["rank_unet"], init_lora_weights="gaussian", target_modules=sd["unet_lora_target_modules"])
|
| 105 |
|
| 106 |
if name=="sketch_to_image_stochastic":
|
| 107 |
# download from url
|
| 108 |
+
url = "https://www.cs.cmu.edu/~img2img-turbo/models/sketch_to_image_stochastic_lora.pkl"
|
| 109 |
os.makedirs(ckpt_folder, exist_ok=True)
|
| 110 |
+
outf = os.path.join(ckpt_folder, "sketch_to_image_stochastic_lora.pkl")
|
| 111 |
if not os.path.exists(outf):
|
| 112 |
print(f"Downloading checkpoint to {outf}")
|
| 113 |
response = requests.get(url, stream=True)
|
|
|
|
| 122 |
if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
|
| 123 |
print("ERROR, something went wrong")
|
| 124 |
print(f"Downloaded successfully to {outf}")
|
|
|
|
| 125 |
p_ckpt = outf
|
| 126 |
sd = torch.load(p_ckpt, map_location="cpu")
|
| 127 |
unet_lora_config = LoraConfig(r=sd["rank_unet"], init_lora_weights="gaussian", target_modules=sd["unet_lora_target_modules"])
|
|
|
|
| 139 |
vae.decoder.ignore_skip = False
|
| 140 |
vae.add_adapter(vae_lora_config, adapter_name="vae_skip")
|
| 141 |
unet.add_adapter(unet_lora_config)
|
| 142 |
+
_sd_unet = unet.state_dict()
|
| 143 |
+
for k in sd["state_dict_unet"]: _sd_unet[k] = sd["state_dict_unet"][k]
|
| 144 |
+
unet.load_state_dict(_sd_unet)
|
| 145 |
unet.enable_xformers_memory_efficient_attention()
|
| 146 |
+
_sd_vae = vae.state_dict()
|
| 147 |
+
for k in sd["state_dict_vae"]: _sd_vae[k] = sd["state_dict_vae"][k]
|
| 148 |
+
vae.load_state_dict(_sd_vae)
|
| 149 |
unet.to("cuda")
|
| 150 |
vae.to("cuda")
|
| 151 |
unet.eval()
|
| 152 |
vae.eval()
|
|
|
|
| 153 |
self.unet, self.vae = unet, vae
|
| 154 |
self.timesteps = torch.tensor([999], device="cuda").long()
|
| 155 |
|
|
|
|
| 159 |
caption_tokens = self.tokenizer(prompt, max_length=self.tokenizer.model_max_length,
|
| 160 |
padding="max_length", truncation=True, return_tensors="pt").input_ids.cuda()
|
| 161 |
caption_enc = self.text_encoder(caption_tokens)[0]
|
|
|
|
| 162 |
if deterministic:
|
| 163 |
encoded_control = self.vae.encode(c_t).latent_dist.sample()*self.vae.config.scaling_factor
|
| 164 |
model_pred = self.unet(encoded_control, self.timesteps, encoder_hidden_states=caption_enc,).sample
|
|
|
|
| 178 |
x_denoised = self.sched.step(unet_output, self.timesteps, unet_input, return_dict=True).prev_sample
|
| 179 |
self.vae.decoder.incoming_skip_acts = self.vae.encoder.current_down_blocks
|
| 180 |
output_image = (self.vae.decode(x_denoised / self.vae.config.scaling_factor ).sample).clamp(-1,1)
|
|
|
|
| 181 |
return output_image
|
|
|
|
|
|
|
|
|