LTX-Video - Geração de Vídeo Multi-Scale (FP8)

import os
import gradio as gr
from pathlib import Path
from PIL import Image
from typing import Optional

try:
    from services.ltx_server import ltx_server_singleton as server
except Exception as e:
    print(f"ERRO FATAL: Não foi possível importar o LTXServer. {e}")
    raise RuntimeError("Falha na inicialização do LTXServer.") from e

def generate_video_from_image(
    prompt: str, image_input: Optional[str],
    height: int, width: int, num_frames: int, seed: int,
    guidance_scale: float, num_inference_steps: int, denoise_strength: float,
    progress=gr.Progress(track_tqdm=True)
):
    progress(0.1, desc="Validando entradas...")
    if not image_input and (not prompt or not prompt.strip()):
        gr.Warning("Por favor, forneça uma imagem de entrada ou um prompt de texto.")
        return None
    
    try:
        progress(0.2, desc="Enviando tarefa para a pipeline LTX (Multi-Scale)...")
        video_path = server.run_inference(
            prompt=prompt, image_path=image_input,
            target_height=int(height), target_width=int(width),
            num_frames=int(num_frames), seed=int(seed),
            guidance_scale=float(guidance_scale),
            num_inference_steps=int(num_inference_steps),
            denoise_strength=float(denoise_strength)
        )
        progress(1.0, desc="Inferência concluída!")
        return video_path
    except Exception as e:
        print(f"[UI LTX ERROR] A inferência falhou: {e}")
        gr.Error(f"Erro na Geração: {e}")
        return None

with gr.Blocks(title="LTX-Video (Multi-Scale)", theme=gr.themes.Soft()) as demo:
    gr.HTML("<h1>LTX-Video - Geração de Vídeo Multi-Scale (FP8)</h1>")
    
    with gr.Row():
        with gr.Column(scale=1):
            image_in = gr.Image(type="filepath", label="Imagem de Entrada (Opcional para txt2vid)")
            prompt_in = gr.Textbox(label="Prompt", lines=4, placeholder="Ex: a cinematic shot of a majestic lion walking in the savanna, 4k, high quality")
            
            with gr.Accordion("Parâmetros Principais", open=True):
                with gr.Row():
                    height_in = gr.Slider(label="Altura Final (Height)", minimum=256, maximum=1024, step=32, value=480)
                    width_in = gr.Slider(label="Largura Final (Width)", minimum=256, maximum=1280, step=32, value=832)
                with gr.Row():
                    frames_in = gr.Slider(label="Número de Frames", minimum=17, maximum=161, step=8, value=97, info="Deve ser um múltiplo de 8 + 1 (ex: 17, 25, 33, ...)")
                    seed_in = gr.Number(label="Seed", value=42, precision=0)

            with gr.Accordion("Parâmetros Avançados", open=False):
                 num_inference_steps_in = gr.Slider(label="Passos de Inferência (Etapa 1)", minimum=4, maximum=50, step=1, value=30, info="Mais passos = melhor qualidade inicial. 4-10 para 'distilled'.")
                 guidance_scale_in = gr.Slider(label="Força do Guia (Guidance)", minimum=1.0, maximum=10.0, step=0.5, value=1.0, info="Para modelos 'distilled', o valor recomendado é 1.0.")
                 denoise_strength_in = gr.Slider(label="Força do Refinamento (Denoise)", minimum=0.1, maximum=1.0, step=0.05, value=0.4, info="Controla a intensidade da Etapa 3 (refinamento).")

            run_button = gr.Button("Gerar Vídeo", variant="primary")

        with gr.Column(scale=1):
            video_out = gr.Video(label="Vídeo Gerado")

    run_button.click(
        fn=generate_video_from_image,
        inputs=[prompt_in, image_in, height_in, width_in, frames_in, seed_in, guidance_scale_in, num_inference_steps_in, denoise_strength_in],
        outputs=[video_out],
    )
    
    example_image_path = "ltx_example_penguin.png"
    if not os.path.exists(example_image_path):
        try:
            from diffusers.utils import load_image
            load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/penguin.png").save(example_image_path)
        except:
            Image.new('RGB', (512, 512), color = 'gray').save(example_image_path)
    
    gr.Examples(
        examples=[["A cute little penguin takes out a book and starts reading it", example_image_path, 480, 832, 97, 0, 1.0, 30, 0.4]],
        inputs=[prompt_in, image_in, height_in, width_in, frames_in, seed_in, guidance_scale_in, num_inference_steps_in, denoise_strength_in],
    )

if __name__ == "__main__":
    demo.launch(
        server_name=os.getenv("GRADIO_SERVER_NAME", "0.0.0.0"),
        server_port=int(os.getenv("GRADIO_SERVER_PORT", "7861")),
        show_error=True,
    )