Spaces:

NoobDoge
/

Multi_Ref_Edit

Running on Zero

App Files Files Community

bo.l commited on Sep 4

Commit

46657b2

1 Parent(s): ee3f2c0

update input1

Browse files

Files changed (1) hide show

app.py +129 -84

app.py CHANGED Viewed

@@ -1,7 +1,9 @@
 import gradio as gr
 import numpy as np
 import random
-import spaces #[uncomment to use ZeroGPU]
 from kontext.pipeline_flux_kontext import FluxKontextPipeline
 from kontext.scheduling_flow_match_euler_discrete import FlowMatchEulerDiscreteScheduler
 from diffusers import FluxTransformer2DModel
@@ -9,6 +11,9 @@ import torch
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file
 def resize_by_bucket(images_pil, resolution=512):
     assert len(images_pil) > 0, "images_pil 不能为空"
     bucket_override = [
@@ -18,14 +23,9 @@ def resize_by_bucket(images_pil, resolution=512):
         (552, 472), (592, 440), (624, 416), (664, 400),
         (696, 376), (728, 360), (752, 344), (784, 336),
     ]
-    bucket_override = [
-        (int(h / 512 * resolution), int(w / 512 * resolution))
-        for h, w in bucket_override
-    ]
-    bucket_override = [
-        (h // 16 * 16, w // 16 * 16)
-        for h, w in bucket_override
-    ]
     aspect_ratios = [img.height / img.width for img in images_pil]
     mean_aspect_ratio = float(np.mean(aspect_ratios))
@@ -38,60 +38,88 @@ def resize_by_bucket(images_pil, resolution=512):
             min_aspect_diff = aspect_diff
             new_h, new_w = h, w
-    resized_images = [
-        img.resize((new_w, new_h), resample=Image.BICUBIC) for img in images_pil
-    ]
     return resized_images
 device = "cuda" if torch.cuda.is_available() else "cpu"
 flux_pipeline = FluxKontextPipeline.from_pretrained("black-forest-labs/FLUX.1-Kontext-dev")
 flux_pipeline.scheduler = FlowMatchEulerDiscreteScheduler.from_config(flux_pipeline.scheduler.config)
 flux_pipeline.vae.to(device).to(torch.bfloat16)
 flux_pipeline.text_encoder.to(device).to(torch.bfloat16)
 flux_pipeline.text_encoder_2.to(device).to(torch.bfloat16)
-flux_pipeline.scheduler.config.stochastic_sampling = False
 ckpt_path = hf_hub_download("NoobDoge/Multi_Ref_Model", "full_ema_model.safetensors")
-# new_weight = load_file(ckpt_path)
 flux_pipeline.transformer.from_single_file(ckpt_path, torch_dtype=torch.bfloat16)
 flux_pipeline.transformer.to(device).to(torch.bfloat16)
 MAX_SEED = np.iinfo(np.int32).max
-MAX_IMAGE_SIZE = 512
-@spaces.GPU #[uncomment to use ZeroGPU]
 def infer(
     prompt,
-    ref1,
-    ref2,
     seed,
     randomize_seed,
     width,
     height,
-    guidance_scale,
     num_inference_steps,
     progress=gr.Progress(track_tqdm=True),
 ):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
-    raw_images = [resize_by_bucket(x) for x in raw_images]
-    generator = torch.Generator().manual_seed(seed)
     with torch.no_grad():
-        output_img = flux_pipeline(
-            image = raw_images,
-            prompt = prompts,
-            height = height,
-            width = width,
-            num_inference_steps = num_inference_steps,
-            max_area=MAX_IMAGE_SIZE**2,
             generator=generator,
-        ).images[0]
-    return image, seed
 examples = [
     "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
     "An astronaut riding a green horse",
@@ -110,66 +138,83 @@ with gr.Blocks(css=css) as demo:
         gr.Markdown("# Text-to-Image Gradio Template")
         with gr.Row():
-            prompt = gr.Text(label="Prompt", show_label=False, max_lines=1,
-                             placeholder="Enter your prompt", container=False)
             run_button = gr.Button("Run", scale=0, variant="primary")
-        # 两张输入图片，其中 ref2 可为空
         with gr.Row():
-            ref1 = gr.Image(label="Input Image 1", type="pil")
-            ref2 = gr.Image(label="Input Image 2", type="pil")
         result = gr.Image(label="Result", show_label=False)
         with gr.Accordion("Advanced Settings", open=False):
-            seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
-            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
             with gr.Row():
-                width = gr.Slider(label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024)
-                height = gr.Slider(label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024)
             with gr.Row():
-                guidance_scale = gr.Slider(label="Guidance scale", minimum=0.0, maximum=10.0, step=0.1, value=0.0)
-                num_inference_steps = gr.Slider(label="Number of inference steps", minimum=1, maximum=50, step=1, value=2)
-        examples = [
-            ["a cute corgi in a wizard hat"],
-            ["a watercolor painting of yosemite valley at sunrise"],
-        ]
-        gr.Examples(examples=examples, inputs=[prompt])
-        # 用于装“可变长度”的参考图列表
-        refs_state = gr.State([])
-        # 先把两张图打包到 state，自动过滤 None，这样 ref2 就是可选的
-        def pack_refs(a, b):
-            return [x for x in (a, b) if x is not None]
-        # 你的推理函数接受“列表”refs
-        def infer(prompt, refs, seed, randomize_seed, width, height, guidance_scale, num_steps):
-            # 如需长度为2，可补齐到 [ref1, None]
-            if len(refs) == 0:
-                refs = [None, None]
-            elif len(refs) == 1:
-                refs = [refs[0], None]
-            # TODO: 在这里调用你的模型，使用 refs[0], refs[1]（第二张可能是 None）
-            # out_img = ...
-            # used_seed = ...
-            return out_img, used_seed
-        # 第一步：把 ref1/ref2 打包进 refs_state
-        dep = gr.on(
-            triggers=[run_button.click, prompt.submit],
-            fn=pack_refs,
-            inputs=[ref1, ref2],
-            outputs=refs_state,
-        )
-        # 第二步：再把打包好的列表传给 infer
-        dep.then(
-            fn=infer,
-            inputs=[prompt, refs_state, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
-            outputs=[result, seed],
-        )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import numpy as np
 import random
+import spaces  # [uncomment to use ZeroGPU]
+from PIL import Image
 from kontext.pipeline_flux_kontext import FluxKontextPipeline
 from kontext.scheduling_flow_match_euler_discrete import FlowMatchEulerDiscreteScheduler
 from diffusers import FluxTransformer2DModel
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file
+# ---------------------------
+# utils
+# ---------------------------
 def resize_by_bucket(images_pil, resolution=512):
     assert len(images_pil) > 0, "images_pil 不能为空"
     bucket_override = [
         (552, 472), (592, 440), (624, 416), (664, 400),
         (696, 376), (728, 360), (752, 344), (784, 336),
     ]
+    # 按目标分辨率缩放，并对齐到 16
+    bucket_override = [(int(h / 512 * resolution), int(w / 512 * resolution)) for h, w in bucket_override]
+    bucket_override = [(h // 16 * 16, w // 16 * 16) for h, w in bucket_override]
     aspect_ratios = [img.height / img.width for img in images_pil]
     mean_aspect_ratio = float(np.mean(aspect_ratios))
             min_aspect_diff = aspect_diff
             new_h, new_w = h, w
+    resized_images = [img.resize((new_w, new_h), resample=Image.BICUBIC) for img in images_pil]
     return resized_images
+# ---------------------------
+# pipeline init
+# ---------------------------
 device = "cuda" if torch.cuda.is_available() else "cpu"
 flux_pipeline = FluxKontextPipeline.from_pretrained("black-forest-labs/FLUX.1-Kontext-dev")
 flux_pipeline.scheduler = FlowMatchEulerDiscreteScheduler.from_config(flux_pipeline.scheduler.config)
+flux_pipeline.scheduler.config.stochastic_sampling = False
+# precision & device
 flux_pipeline.vae.to(device).to(torch.bfloat16)
 flux_pipeline.text_encoder.to(device).to(torch.bfloat16)
 flux_pipeline.text_encoder_2.to(device).to(torch.bfloat16)
+# 替换 transformer 权重
 ckpt_path = hf_hub_download("NoobDoge/Multi_Ref_Model", "full_ema_model.safetensors")
 flux_pipeline.transformer.from_single_file(ckpt_path, torch_dtype=torch.bfloat16)
 flux_pipeline.transformer.to(device).to(torch.bfloat16)
+# ---------------------------
+# constants
+# ---------------------------
 MAX_SEED = np.iinfo(np.int32).max
+MAX_IMAGE_SIZE = 512   # 与下方滑块默认值 1024 保持一致
+# ---------------------------
+# inference
+# ---------------------------
+@spaces.GPU  # [uncomment to use ZeroGPU]
 def infer(
     prompt,
+    ref1,            # PIL.Image 或 None
+    ref2,            # PIL.Image 或 None（可选）
     seed,
     randomize_seed,
     width,
     height,
+    guidance_scale,      # 目前没传入 pipeline，如需要可在下面调用里加上
     num_inference_steps,
     progress=gr.Progress(track_tqdm=True),
 ):
+    # 组装可选参考图列表
+    refs = [x for x in (ref1, ref2) if x is not None]
+    if len(refs) == 0:
+        raise gr.Error("请至少上传一张参考图（ref1 或 ref2）。")
+    # 规范宽高：不超过 MAX_IMAGE_SIZE 且对齐到 16
+    width = max(16, min(width, MAX_IMAGE_SIZE)) // 16 * 16
+    height = max(16, min(height, MAX_IMAGE_SIZE)) // 16 * 16
+    # 随机种子
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
+    generator = torch.Generator(device=device).manual_seed(int(seed))
+    # 参考图按桶缩放
+    base_res = min(width, height, MAX_IMAGE_SIZE)
+    raw_images = resize_by_bucket(refs, resolution=base_res)
+    # 推理
     with torch.no_grad():
+        out = flux_pipeline(
+            image=raw_images,
+            prompt=prompt,
+            height=height,
+            width=width,
+            num_inference_steps=int(num_inference_steps),
+            max_area=MAX_IMAGE_SIZE ** 2,
             generator=generator,
+            # 如需 guidance_scale，确保 pipeline 支持这个参数后再打开：
+            # guidance_scale=float(guidance_scale),
+        )
+        output_img = out.images[0]
+    return output_img, int(seed)
+# ---------------------------
+# UI
+# ---------------------------
 examples = [
     "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
     "An astronaut riding a green horse",
         gr.Markdown("# Text-to-Image Gradio Template")
         with gr.Row():
+            prompt = gr.Text(
+                label="Prompt",
+                show_label=False,
+                max_lines=1,
+                placeholder="Enter your prompt",
+                container=False,
+            )
             run_button = gr.Button("Run", scale=0, variant="primary")
+        # 两张输入图片（ref2 可空）
         with gr.Row():
+            ref1_comp = gr.Image(label="Input Image 1", type="pil")
+            ref2_comp = gr.Image(label="Input Image 2 (optional)", type="pil")
         result = gr.Image(label="Result", show_label=False)
         with gr.Accordion("Advanced Settings", open=False):
+            seed_comp = gr.Slider(
+                label="Seed",
+                minimum=0,
+                maximum=MAX_SEED,
+                step=1,
+                value=0,
+            )
+            randomize_seed_comp = gr.Checkbox(label="Randomize seed", value=True)
             with gr.Row():
+                width_comp = gr.Slider(
+                    label="Width",
+                    minimum=256,
+                    maximum=MAX_IMAGE_SIZE,
+                    step=32,
+                    value=512,
+                )
+                height_comp = gr.Slider(
+                    label="Height",
+                    minimum=256,
+                    maximum=MAX_IMAGE_SIZE,
+                    step=32,
+                    value=512,
+                )
             with gr.Row():
+                guidance_scale_comp = gr.Slider(
+                    label="Guidance scale",
+                    minimum=0.0,
+                    maximum=10.0,
+                    step=0.1,
+                    value=2.5,
+                )
+                num_inference_steps_comp = gr.Slider(
+                    label="Number of inference steps",
+                    minimum=1,
+                    maximum=50,
+                    step=1,
+                    value=28,
+                )
+        gr.Examples(examples=[[e] for e in examples], inputs=[prompt])
+    # 注意：不要把 [ref1, ref2] 当作列表传给 inputs！
+    gr.on(
+        triggers=[run_button.click, prompt.submit],
+        fn=infer,
+        inputs=[
+            prompt,
+            ref1_comp,
+            ref2_comp,              # ref2 可为空
+            seed_comp,
+            randomize_seed_comp,
+            width_comp,
+            height_comp,
+            guidance_scale_comp,
+            num_inference_steps_comp,
+        ],
+        outputs=[result, seed_comp],
+    )
 if __name__ == "__main__":
+    demo.launch()