Aduc-sdr-2_5s

Paused

App Files Files Community

euIaxs22 commited on Oct 2

Commit

010513a

verified ·

1 Parent(s): a349ca3

Update app_ltx.py

Browse files

Files changed (1) hide show

app_ltx.py +37 -28

app_ltx.py CHANGED Viewed

@@ -1,15 +1,16 @@
 import os
 import gradio as gr
 from pathlib import Path
 from PIL import Image
 # Importa o singleton do nosso novo servidor LTX (API Diffusers)
 try:
     from services.ltx_server import ltx_server_singleton as server
 except Exception as e:
     print(f"ERRO FATAL: Não foi possível importar o LTXServer. {e}")
-    raise
 # --- Função de Callback da UI ---
 def generate_video_from_image(
@@ -21,28 +22,27 @@ def generate_video_from_image(
     seed: int,
     guidance_scale: float,
     num_inference_steps: int,
-    decode_timestep: float,
-    decode_noise_scale: float,
     progress=gr.Progress(track_tqdm=True)
 ):
     progress(0.1, desc="Validando entradas...")
     if not image_input and (not prompt or not prompt.strip()):
-        gr.Warning("Por favor, forneça uma imagem de entrada ou um prompt de texto.")
         return None
     try:
-        progress(0.3, desc="Enviando tarefa para a pipeline LTX (pode demorar um pouco)...")
         video_path = server.run_inference(
             prompt=prompt,
             image_path=image_input,
-            height=int(height),
-            width=int(width),
             num_frames=int(num_frames),
             seed=int(seed),
             guidance_scale=float(guidance_scale),
             num_inference_steps=int(num_inference_steps),
-            decode_timestep=float(decode_timestep),
-            decode_noise_scale=float(decode_noise_scale)
         )
         progress(1.0, desc="Inferência concluída!")
         return video_path
@@ -52,34 +52,33 @@ def generate_video_from_image(
         return None
 # --- Definição da Interface Gráfica com Gradio ---
-with gr.Blocks(title="LTX-Video (Diffusers)", theme=gr.themes.Soft()) as demo:
     gr.HTML(
         """
         <div style='text-align:center; margin-bottom: 20px;'>
-            <h1>LTX-Video - Geração de Vídeo via Diffusers</h1>
-            <p>Interface para a pipeline oficial LTX-Video.</p>
         </div>
         """
     )
     with gr.Row():
         with gr.Column(scale=1):
-            image_in = gr.Image(type="filepath", label="Imagem de Entrada (Opcional)")
-            prompt_in = gr.Textbox(label="Prompt", lines=4, placeholder="Ex: a cinematic shot of a majestic lion walking in the savanna")
-            with gr.Accordion("Parâmetros de Geração", open=True):
                 with gr.Row():
-                    height_in = gr.Slider(label="Altura (Height)", minimum=256, maximum=1024, step=32, value=512)
-                    width_in = gr.Slider(label="Largura (Width)", minimum=256, maximum=1024, step=32, value=768)
                 with gr.Row():
-                    frames_in = gr.Slider(label="Número de Frames", minimum=17, maximum=161, step=8, value=33)
                     seed_in = gr.Number(label="Seed", value=42, precision=0)
             with gr.Accordion("Parâmetros Avançados", open=False):
-                 num_inference_steps_in = gr.Slider(label="Passos de Inferência", minimum=10, maximum=100, step=1, value=50, info="Mais passos = melhor qualidade, mais lento.")
-                 guidance_scale_in = gr.Slider(label="Força do Guia (Guidance)", minimum=1.0, maximum=10.0, step=0.5, value=3.0, info="Valores mais altos seguem mais o prompt.")
-                 decode_timestep_in = gr.Slider(label="Decode Timestep", minimum=0.0, maximum=0.2, step=0.005, value=0.03, info="Parâmetro do VAE.")
-                 decode_noise_scale_in = gr.Slider(label="Decode Noise Scale", minimum=0.0, maximum=0.2, step=0.005, value=0.025, info="Parâmetro do VAE.")
             run_button = gr.Button("Gerar Vídeo", variant="primary")
@@ -88,18 +87,28 @@ with gr.Blocks(title="LTX-Video (Diffusers)", theme=gr.themes.Soft()) as demo:
     run_button.click(
         fn=generate_video_from_image,
-        inputs=[prompt_in, image_in, height_in, width_in, frames_in, seed_in, guidance_scale_in, num_inference_steps_in, decode_timestep_in, decode_noise_scale_in],
         outputs=[video_out],
     )
     gr.Markdown("---")
     # Cria uma imagem de exemplo se ela não existir
-    if not os.path.exists("ltx_example.png"):
-        Image.new('RGB', (512, 512), color = 'gray').save('ltx_example.png')
     gr.Examples(
-        examples=[["A beautiful cinematic shot of a sunset over the ocean", "ltx_example.png", 512, 768, 33, 123, 3.0, 50, 0.03, 0.025]],
-        inputs=[prompt_in, image_in, height_in, width_in, frames_in, seed_in, guidance_scale_in, num_inference_steps_in, decode_timestep_in, decode_noise_scale_in],
     )
 if __name__ == "__main__":

 import os
 import gradio as gr
 from pathlib import Path
 from PIL import Image
+from typing import Optional
 # Importa o singleton do nosso novo servidor LTX (API Diffusers)
 try:
     from services.ltx_server import ltx_server_singleton as server
 except Exception as e:
     print(f"ERRO FATAL: Não foi possível importar o LTXServer. {e}")
+    # Se o import falhar, a aplicação não tem como funcionar.
+    raise RuntimeError("Falha na inicialização do LTXServer.") from e
 # --- Função de Callback da UI ---
 def generate_video_from_image(
     seed: int,
     guidance_scale: float,
     num_inference_steps: int,
+    denoise_strength: float,
     progress=gr.Progress(track_tqdm=True)
 ):
+    """Callback para a UI que chama o backend LTXServer."""
     progress(0.1, desc="Validando entradas...")
     if not image_input and (not prompt or not prompt.strip()):
+        gr.Warning("Por favor, forneça uma imagem de entrada e/ou um prompt de texto.")
         return None
     try:
+        progress(0.2, desc="Enviando tarefa para a pipeline LTX (Multi-Scale)...")
         video_path = server.run_inference(
             prompt=prompt,
             image_path=image_input,
+            target_height=int(height),
+            target_width=int(width),
             num_frames=int(num_frames),
             seed=int(seed),
             guidance_scale=float(guidance_scale),
             num_inference_steps=int(num_inference_steps),
+            denoise_strength=float(denoise_strength)
         )
         progress(1.0, desc="Inferência concluída!")
         return video_path
         return None
 # --- Definição da Interface Gráfica com Gradio ---
+with gr.Blocks(title="LTX-Video (Multi-Scale)", theme=gr.themes.Soft()) as demo:
     gr.HTML(
         """
         <div style='text-align:center; margin-bottom: 20px;'>
+            <h1>LTX-Video - Geração de Vídeo Multi-Scale (FP8)</h1>
+            <p>Interface para a pipeline oficial LTX-Video via Diffusers.</p>
         </div>
         """
     )
     with gr.Row():
         with gr.Column(scale=1):
+            image_in = gr.Image(type="filepath", label="Imagem de Entrada (Opcional para txt2vid)")
+            prompt_in = gr.Textbox(label="Prompt", lines=4, placeholder="Ex: a cinematic shot of a majestic lion walking in the savanna, 4k, high quality")
+            with gr.Accordion("Parâmetros Principais", open=True):
                 with gr.Row():
+                    height_in = gr.Slider(label="Altura Final (Height)", minimum=256, maximum=1024, step=32, value=480)
+                    width_in = gr.Slider(label="Largura Final (Width)", minimum=256, maximum=1280, step=32, value=832)
                 with gr.Row():
+                    frames_in = gr.Slider(label="Número de Frames", minimum=17, maximum=161, step=8, value=97, info="Deve ser um múltiplo de 8 + 1 (ex: 17, 25, 33, ...)")
                     seed_in = gr.Number(label="Seed", value=42, precision=0)
             with gr.Accordion("Parâmetros Avançados", open=False):
+                 num_inference_steps_in = gr.Slider(label="Passos de Inferência (Etapa 1)", minimum=4, maximum=50, step=1, value=30, info="Mais passos = melhor qualidade inicial. 4-10 para modelos 'distilled'.")
+                 guidance_scale_in = gr.Slider(label="Força do Guia (Guidance)", minimum=1.0, maximum=10.0, step=0.5, value=1.0, info="Para modelos 'distilled', o valor recomendado é 1.0. Para outros, use ~5.0.")
+                 denoise_strength_in = gr.Slider(label="Força do Refinamento (Denoise)", minimum=0.1, maximum=1.0, step=0.05, value=0.4, info="Controla a intensidade da Etapa 3 (refinamento). 0.4 significa 40% dos passos de inferência.")
             run_button = gr.Button("Gerar Vídeo", variant="primary")
     run_button.click(
         fn=generate_video_from_image,
+        inputs=[prompt_in, image_in, height_in, width_in, frames_in, seed_in, guidance_scale_in, num_inference_steps_in, denoise_strength_in],
         outputs=[video_out],
     )
     gr.Markdown("---")
     # Cria uma imagem de exemplo se ela não existir
+    example_image_path = "ltx_example_penguin.png"
+    if not os.path.exists(example_image_path):
+        try:
+            # Tenta baixar a imagem de exemplo da documentação
+            from diffusers.utils import load_image
+            load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/penguin.png").save(example_image_path)
+        except:
+            # Se falhar, cria uma imagem cinza
+            Image.new('RGB', (512, 512), color = 'gray').save(example_image_path)
     gr.Examples(
+        examples=[["A cute little penguin takes out a book and starts reading it", example_image_path, 480, 832, 97, 0, 1.0, 30, 0.4]],
+        inputs=[prompt_in, image_in, height_in, width_in, frames_in, seed_in, guidance_scale_in, num_inference_steps_in, denoise_strength_in],
+        outputs=[video_out],
+        fn=generate_video_from_image,
+        cache_examples=False,
     )
 if __name__ == "__main__":