Qwen-Image-ControlNet-Inpainting

Running on Zero

App Files Files Community

Utilize HF's "balanced" device_map + dynamically pair diffusion components to relevant execution cores

by diopside - opened Sep 19

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+34

-7

Files changed (1) hide show

app.py +34 -7

app.py CHANGED Viewed

@@ -71,12 +71,39 @@ def use_output_as_input(output_image):
 base_model = "Qwen/Qwen-Image"
 controlnet_model = "InstantX/Qwen-Image-ControlNet-Inpainting"
-controlnet = QwenImageControlNetModel.from_pretrained(controlnet_model, torch_dtype=torch.bfloat16)
 pipe = QwenImageControlNetInpaintPipeline.from_pretrained(
-    base_model, controlnet=controlnet, torch_dtype=torch.bfloat16
 )
-pipe.to("cuda")
 @spaces.GPU(duration=150)
@@ -93,7 +120,7 @@ def infer(edit_images,
     image = edit_images["background"]
     mask = edit_images["layers"][0]
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
@@ -113,7 +140,7 @@ def infer(edit_images,
         width=image.size[0],
         height=image.size[1],
         true_cfg_scale=true_cfg_scale,
-        generator=torch.Generator(device="cuda").manual_seed(seed)
     ).images[0]
     return [image, result_image], seed
@@ -140,7 +167,7 @@ css = """
 with gr.Blocks(css=css, theme=gr.themes.Citrus()) as demo:
-    gr.HTML("<h1 style='text-align: center'>Qwen-Image with InstantX Inpainting ControlNet</style>")
     gr.Markdown(
         "Inpaint images with [InstantX/Qwen-Image-ControlNet-Inpainting](https://huggingface.co/InstantX/Qwen-Image-ControlNet-Inpainting)"
     )

 base_model = "Qwen/Qwen-Image"
 controlnet_model = "InstantX/Qwen-Image-ControlNet-Inpainting"
+# First create the pipeline with device_map="balanced"
 pipe = QwenImageControlNetInpaintPipeline.from_pretrained(
+    base_model,
+    controlnet=None,  # We'll add the controlnet later
+    torch_dtype=torch.bfloat16,
+    device_map="balanced"
 )
+pipe_device_map = pipe.hf_device_map
+print("Initial device map:", pipe_device_map)
+# Expected output: {'transformer': 0, 'text_encoder': 1, 'vae': 2}
+# Move the controlnet to the same device as the VAE (cuda:2)
+vae_device = pipe_device_map['vae']
+vae_device = f"cuda:{vae_device}"  # This is where the VAE is in the balanced config
+controlnet = QwenImageControlNetModel.from_pretrained(
+    controlnet_model,
+    torch_dtype=torch.bfloat16
+).to(vae_device)
+# Attach the controlnet to the pipeline
+pipe.controlnet = controlnet
+pipe.enable_vae_slicing()
+pipe.enable_vae_tiling()
+print("Controlnet device:", next(pipe.controlnet.parameters()).device)
+print("VAE device:", next(pipe.vae.parameters()).device)
+# Create a helper function to get a generator on the correct device
+def get_generator(seed):
+    return torch.Generator(device=vae_device).manual_seed(seed)
 @spaces.GPU(duration=150)
     image = edit_images["background"]
     mask = edit_images["layers"][0]
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
         width=image.size[0],
         height=image.size[1],
         true_cfg_scale=true_cfg_scale,
+        generator=get_generator(seed)
     ).images[0]
     return [image, result_image], seed
 with gr.Blocks(css=css, theme=gr.themes.Citrus()) as demo:
+    gr.HTML("<h1 style='text-align: center'>Qwen-Image + InstantX Inpainting ControlNet</style>")
     gr.Markdown(
         "Inpaint images with [InstantX/Qwen-Image-ControlNet-Inpainting](https://huggingface.co/InstantX/Qwen-Image-ControlNet-Inpainting)"
     )