import gradio as gr
import numpy as np
import random
import spaces  # [uncomment to use ZeroGPU]
from PIL import Image

from kontext.pipeline_flux_kontext import FluxKontextPipeline
from kontext.scheduling_flow_match_euler_discrete import FlowMatchEulerDiscreteScheduler
from diffusers import FluxTransformer2DModel
import torch
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file

# ---------------------------
# utils
# ---------------------------
def resize_by_bucket(images_pil, resolution=512):
    assert len(images_pil) > 0, "images_pil 不能为空"
    bucket_override = [
        (336, 784), (344, 752), (360, 728), (376, 696),
        (400, 664), (416, 624), (440, 592), (472, 552),
        (512, 512),
        (552, 472), (592, 440), (624, 416), (664, 400),
        (696, 376), (728, 360), (752, 344), (784, 336),
    ]
    # 按目标分辨率缩放，并对齐到 16
    bucket_override = [(int(h / 512 * resolution), int(w / 512 * resolution)) for h, w in bucket_override]
    bucket_override = [(h // 16 * 16, w // 16 * 16) for h, w in bucket_override]

    aspect_ratios = [img.height / img.width for img in images_pil]
    mean_aspect_ratio = float(np.mean(aspect_ratios))

    new_h, new_w = bucket_override[0]
    min_aspect_diff = abs(new_h / new_w - mean_aspect_ratio)
    for h, w in bucket_override:
        aspect_diff = abs(h / w - mean_aspect_ratio)
        if aspect_diff < min_aspect_diff:
            min_aspect_diff = aspect_diff
            new_h, new_w = h, w

    resized_images = [img.resize((new_w, new_h), resample=Image.BICUBIC) for img in images_pil]
    return resized_images

# ---------------------------
# pipeline init
# ---------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"

flux_pipeline = FluxKontextPipeline.from_pretrained("black-forest-labs/FLUX.1-Kontext-dev")
flux_pipeline.scheduler = FlowMatchEulerDiscreteScheduler.from_config(flux_pipeline.scheduler.config)
flux_pipeline.scheduler.config.stochastic_sampling = False

# precision & device
flux_pipeline.vae.to(device).to(torch.bfloat16)
flux_pipeline.text_encoder.to(device).to(torch.bfloat16)
flux_pipeline.text_encoder_2.to(device).to(torch.bfloat16)

# 替换 transformer 权重
ckpt_path = hf_hub_download("NoobDoge/Multi_Ref_Model", "full_ema_model.safetensors")
flux_pipeline.transformer.from_single_file(ckpt_path, torch_dtype=torch.bfloat16)
flux_pipeline.transformer.to(device).to(torch.bfloat16)

# ---------------------------
# constants
# ---------------------------
MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 512   # 与下方滑块默认值 1024 保持一致

# ---------------------------
# inference
# ---------------------------
@spaces.GPU  # [uncomment to use ZeroGPU]
def infer(
    prompt,
    ref1,            # PIL.Image 或 None
    ref2,            # PIL.Image 或 None（可选）
    seed,
    randomize_seed,
    width,
    height,
    guidance_scale,      # 目前没传入 pipeline，如需要可在下面调用里加上
    num_inference_steps,
    progress=gr.Progress(track_tqdm=True),
):
    # 组装可选参考图列表
    refs = [x for x in (ref1, ref2) if x is not None]
    if len(refs) == 0:
        raise gr.Error("请至少上传一张参考图（ref1 或 ref2）。")
    

    # 规范宽高：不超过 MAX_IMAGE_SIZE 且对齐到 16
    width = max(16, min(width, MAX_IMAGE_SIZE)) // 16 * 16
    height = max(16, min(height, MAX_IMAGE_SIZE)) // 16 * 16

    # 随机种子
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    generator = torch.Generator(device=device).manual_seed(int(seed))

    # 参考图按桶缩放
    base_res = min(width, height, MAX_IMAGE_SIZE)
    raw_images = resize_by_bucket(refs, resolution=base_res)

    if len(raw_images) == 2:
        raw_images = [[raw_images[0]],[raw_images[1]]]

    # 推理
    with torch.no_grad():
        out = flux_pipeline(
            image=raw_images,
            prompt=prompt,
            height=height,
            width=width,
            num_inference_steps=int(num_inference_steps),
            max_area=MAX_IMAGE_SIZE ** 2,
            generator=generator,
            # 如需 guidance_scale，确保 pipeline 支持这个参数后再打开：
            # guidance_scale=float(guidance_scale),
        )
        output_img = out.images[0]

    return output_img, int(seed)

# ---------------------------
# UI
# ---------------------------
examples = [
    "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
    "An astronaut riding a green horse",
    "A delicious ceviche cheesecake slice",
]

css = """
#col-container {
    margin: 0 auto;
    max-width: 640px;
}
"""

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown("# Text-to-Image Gradio Template")

        with gr.Row():
            prompt = gr.Text(
                label="Prompt",
                show_label=False,
                max_lines=1,
                placeholder="Enter your prompt",
                container=False,
            )
            run_button = gr.Button("Run", scale=0, variant="primary")

        # 两张输入图片（ref2 可空）
        with gr.Row():
            ref1_comp = gr.Image(label="Input Image 1", type="pil")
            ref2_comp = gr.Image(label="Input Image 2 (optional)", type="pil")

        result = gr.Image(label="Result", show_label=False)

        with gr.Accordion("Advanced Settings", open=False):
            seed_comp = gr.Slider(
                label="Seed",
                minimum=0,
                maximum=MAX_SEED,
                step=1,
                value=0,
            )
            randomize_seed_comp = gr.Checkbox(label="Randomize seed", value=True)

            with gr.Row():
                width_comp = gr.Slider(
                    label="Width",
                    minimum=256,
                    maximum=MAX_IMAGE_SIZE,
                    step=32,
                    value=512,
                )
                height_comp = gr.Slider(
                    label="Height",
                    minimum=256,
                    maximum=MAX_IMAGE_SIZE,
                    step=32,
                    value=512,
                )

            with gr.Row():
                guidance_scale_comp = gr.Slider(
                    label="Guidance scale",
                    minimum=0.0,
                    maximum=10.0,
                    step=0.1,
                    value=2.5,
                )
                num_inference_steps_comp = gr.Slider(
                    label="Number of inference steps",
                    minimum=1,
                    maximum=50,
                    step=1,
                    value=28,
                )

        gr.Examples(examples=[[e] for e in examples], inputs=[prompt])

    # 注意：不要把 [ref1, ref2] 当作列表传给 inputs！
    gr.on(
        triggers=[run_button.click, prompt.submit],
        fn=infer,
        inputs=[
            prompt,
            ref1_comp,
            ref2_comp,              # ref2 可为空
            seed_comp,
            randomize_seed_comp,
            width_comp,
            height_comp,
            guidance_scale_comp,
            num_inference_steps_comp,
        ],
        outputs=[result, seed_comp],
    )

if __name__ == "__main__":
    demo.launch()