Spaces:

preSalesAIAutomation
/

LTXpipeline

Running on Zero

App Files Files Community

preSalesAIAutomation commited on Jul 21

Commit

19053fa

verified ·

1 Parent(s): 8fa0161

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -16

app.py CHANGED Viewed

@@ -46,22 +46,24 @@ def generate_video(prompt, image_url):
         raw_image = Image.open(BytesIO(requests.get(image_url).content)).convert("RGB")
         image = prepare_image_condition(raw_image)
-    # Set target resolutions - using higher quality settings
-    base_width, base_height = 512, 512  # Increased from 480x480
-    down_width, down_height = 384, 384  # Increased from 320x320 for better quality
-    # Step 1: Generate latents at lower resolution with better quality settings
     latents = pipe(
         prompt=prompt,
         image=image,
         width=down_width,
         height=down_height,
         num_frames=60,
-        num_inference_steps=12,  # Increased from 7 for better quality
         output_type="latent",
-        guidance_scale=2.0,  # Increased from 1.0 for better prompt adherence
-        decode_timestep=0.1,  # Adjusted for better quality
-        decode_noise_scale=0.1,  # Adjusted for better quality
         generator=generator
     ).frames
@@ -74,19 +76,20 @@ def generate_video(prompt, image_url):
     torch.cuda.empty_cache()
     gc.collect()
-    # Step 3: Decode upscaled latents to frames using the pipeline with better settings
     frames = pipe(
-        prompt=prompt,  # Use original prompt for better consistency
         latents=upscaled_latents,
         width=base_width,
         height=base_height,
         num_frames=60,
-        num_inference_steps=15,  # Increased for better decoding quality
         output_type="pil",
-        guidance_scale=2.0,  # Consistent with generation
-        decode_timestep=0.1,
-        decode_noise_scale=0.1,
-        denoise_strength=0.2,  # Reduced for less noise
         generator=generator
     ).frames[0]
@@ -204,7 +207,7 @@ demo = gr.Interface(
     ],
     outputs=gr.Video(label="Generated Video"),
     title="🎬 LTX AI Video Generator",
-    description="AI-powered video with voiceover and subtitles. Now outputs at 512x512 resolution with improved quality."
 )
 demo.launch()

         raw_image = Image.open(BytesIO(requests.get(image_url).content)).convert("RGB")
         image = prepare_image_condition(raw_image)
+    # Set target resolutions - using dimensions that match expected latent shapes
+    # LTX uses 32x downsampling, so we need multiples of 32
+    # For latent shape (1, 128, 8, 16, 16), we need 16*32 = 512x512
+    base_width, base_height = 512, 512  # final upscaled size (16*32)
+    down_width, down_height = 256, 256  # for initial generation (8*32) - smaller ratio for upscaling
+    # Step 1: Generate latents at lower resolution with improved quality settings
     latents = pipe(
         prompt=prompt,
         image=image,
         width=down_width,
         height=down_height,
         num_frames=60,
+        num_inference_steps=10,  # Increased from 7 for better quality
         output_type="latent",
+        guidance_scale=1.5,  # Slightly increased for better prompt adherence
+        decode_timestep=0.08,  # Optimized value
+        decode_noise_scale=0.05,  # Reduced noise
         generator=generator
     ).frames
     torch.cuda.empty_cache()
     gc.collect()
+    # Step 3: Decode upscaled latents to frames with improved settings
     frames = pipe(
+        prompt=prompt,  # Use original prompt for consistency
         latents=upscaled_latents,
         width=base_width,
         height=base_height,
         num_frames=60,
+        num_inference_steps=12,  # Increased for better decoding quality
         output_type="pil",
+        guidance_scale=1.5,  # Consistent with generation
+        decode_timestep=0.08,  # Optimized
+        decode_noise_scale=0.05,  # Reduced noise
+        image_cond_noise_scale=0.02,  # Reduced for cleaner output
+        denoise_strength=0.25,  # Balanced denoising
         generator=generator
     ).frames[0]
     ],
     outputs=gr.Video(label="Generated Video"),
     title="🎬 LTX AI Video Generator",
+    description="AI-powered video with voiceover and subtitles. Generates at 256x256 and upscales to 512x512 with improved quality."
 )
 demo.launch()