import os, random, tempfile
import gradio as gr
import torch
import numpy as np
from PIL import Image

from diffusers import LTXConditionPipeline
from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXVideoCondition
from diffusers.utils import export_to_video

# Modelo LTX-Video via Diffusers
MODEL_REPO = os.getenv("LTX_REPO", "Lightricks/LTX-Video")

# Parâmetros básicos
FPS = 24
MAX_FRAMES = 161
MIN_DIM = 256
MAX_DIM = 1280

device = "cuda" if torch.cuda.is_available() else "cpu"

# Helpers de tipo
def _to_int(x, d):
    if isinstance(x, (list, tuple)):
        x = x[0] if x else d
    try:
        return int(x)
    except Exception:
        return d

def _to_float(x, d):
    if isinstance(x, (list, tuple)):
        x = x[0] if x else d
    try:
        return float(x)
    except Exception:
        return d

def _to_bool(x, d=True):
    if isinstance(x, (list, tuple)):
        x = x[0] if x else d
    return bool(x)

# Frames alinhados a (8k+1)
def _frames_from_secs(secs):
    secs = _to_float(secs, 2.0)
    n = max(9, int(round(secs * FPS)))
    k = round((n - 1) / 8.0)
    return int(max(9, min(MAX_FRAMES, k * 8 + 1)))

def _pad32(v):
    return ((v - 1) // 32 + 1) * 32

def _dims_for_image(path, target=768):
    im = Image.open(path)
    w, h = im.size
    if w >= h:
        new_h = target
        new_w = int(round((w / max(1, h)) * new_h / 32) * 32)
    else:
        new_w = target
        new_h = int(round((h / max(1, w)) * new_w / 32) * 32)
    new_h = max(MIN_DIM, min(new_h, MAX_DIM))
    new_w = max(MIN_DIM, min(new_w, MAX_DIM))
    return new_h, new_w

print(f"Carregando {MODEL_REPO} (LTXConditionPipeline)...")
pipe = LTXConditionPipeline.from_pretrained(
    MODEL_REPO,
    torch_dtype=torch.bfloat16,  # simples e estável; FP8 opcional pode ser adicionado depois
)
pipe.to(device)

# Desliga dynamic shifting no scheduler para não exigir 'mu'
if hasattr(pipe, "scheduler") and hasattr(pipe.scheduler, "use_dynamic_shifting"):
    pipe.scheduler.use_dynamic_shifting = False

# Tiling do VAE para reduzir picos de VRAM
if hasattr(pipe, "vae") and hasattr(pipe.vae, "enable_tiling"):
    pipe.vae.enable_tiling()

def handle_dims(image_path, cur_h, cur_w):
    if not image_path:
        return gr.update(value=cur_h), gr.update(value=cur_w)
    try:
        h, w = _dims_for_image(image_path, 768)
        return gr.update(value=h), gr.update(value=w)
    except Exception as e:
        print(f"Erro ao ajustar dimensões: {e}")
        return gr.update(value=cur_h), gr.update(value=cur_w)

def generate_i2v(
    prompt,
    neg_prompt,
    image_path,
    height_ui,
    width_ui,
    duration_ui,
    seed_ui,
    randomize_seed,
    guidance_ui,
    denoise_ui,
    image_noise_ui,
    progress=gr.Progress(track_tqdm=True)
):
    if not image_path:
        raise gr.Error("Selecione uma imagem.")
    h = _to_int(height_ui, 512)
    w = _to_int(width_ui, 704)
    h_pad = _pad32(h)
    w_pad = _pad32(w)
    num_frames = _frames_from_secs(duration_ui)

    # Parâmetros de qualidade estáveis
    guidance_scale = _to_float(guidance_ui, 5.0)            # 4.0–6.0 funcionam bem
    denoise_strength = _to_float(denoise_ui, 0.4)           # 0.3–0.5 preserva bem a imagem
    image_cond_noise_scale = _to_float(image_noise_ui, 0.0) # 0.0 fixa a aparência; 0.01–0.03 relaxa

    seed = _to_int(seed_ui, 42)
    if _to_bool(randomize_seed, True):
        seed = random.randint(0, 2**32 - 1)

    # Condição: imagem como primeiro frame
    img = Image.open(image_path).convert("RGB")
    cond = LTXVideoCondition(image=img, frame_index=0, strength=1.0)

    gen = torch.Generator(device=device).manual_seed(seed)

    progress(0.0, desc="Gerando vídeo...")
    out = pipe(
        conditions=[cond],
        prompt=prompt,
        negative_prompt=neg_prompt,
        width=w_pad,
        height=h_pad,
        num_frames=num_frames,
        num_inference_steps=30,       # simples e estável
        #guidance_scale=guidance_scale,
        #guidance_rescale=0.7,         # ajuda a estabilizar CFG
        #decode_timestep=0.05,         # valores seguros para >=0.9.1
        #decode_noise_scale=0.025,
        #image_cond_noise_scale=image_cond_noise_scale,
        #denoise_strength=denoise_strength,
        generator=gen,
        output_type="pil",
    )
    frames = out.frames[0]

    tmp = tempfile.mkdtemp()
    out_path = os.path.join(tmp, f"output_{random.randint(10000,99999)}.mp4")
    progress(0.8, desc="Salvando vídeo")
    export_to_video(frames, out_path, fps=FPS)
    return out_path, int(seed)

# UI simples
with gr.Blocks() as demo:
    gr.Markdown("LTX I2V (Diffusers) simples com denoise e dynamic shifting desligado")
    with gr.Row():
        with gr.Column():
            img = gr.Image(label="Imagem", type="filepath")
            prompt = gr.Textbox(label="Prompt", value="Subject moves gently; subtle camera push-in", lines=2)
            neg = gr.Textbox(label="Negative", value="worst quality, jitter, blur, distortions", lines=2)
            dur = gr.Slider(label="Duração (s)", minimum=0.5, maximum=8.0, step=0.1, value=2.0)
            with gr.Row():
                h = gr.Slider(label="Altura", minimum=MIN_DIM, maximum=MAX_DIM, step=32, value=512)
                w = gr.Slider(label="Largura", minimum=MIN_DIM, maximum=MAX_DIM, step=32, value=704)
            with gr.Accordion("Avançado", open=False):
                seed = gr.Number(label="Seed", value=42, precision=0, minimum=0, maximum=2**32-1)
                rand = gr.Checkbox(label="Randomize seed", value=True)
                guidance = gr.Slider(label="Guidance scale", minimum=1.0, maximum=10.0, step=0.1, value=5.0)
                denoise = gr.Slider(label="Denoise strength", minimum=0.0, maximum=1.0, step=0.05, value=0.4)
                image_noise = gr.Slider(label="Image cond noise", minimum=0.0, maximum=0.2, step=0.005, value=0.0)
            btn = gr.Button("Gerar", variant="primary")
        with gr.Column():
            vid = gr.Video(label="Vídeo")

    img.upload(handle_dims, [img, h, w], [h, w])
    btn.click(
        generate_i2v,
        [prompt, neg, img, h, w, dur, seed, rand, guidance, denoise, image_noise],
        [vid, seed]
    )

if __name__ == "__main__":
    # Para integração MCP: instale gradio[mcp] e adicione mcp_server=True se necessário.
    demo.queue().launch(debug=True, share=False)