Spaces:

preSalesAIAutomation
/

LTXpipeline

Running on Zero

preSalesAIAutomation commited on Jul 21

Commit

1acbd2a

verified ·

1 Parent(s): d38c396

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -71,24 +71,28 @@ def generate_video(prompt, image_url):
     torch.cuda.empty_cache()
     gc.collect()
-    # Step 3: Decode upscaled latents to frames
-    # Use the VAE decoder directly instead of the full pipeline
-    frames = pipe.vae.decode(upscaled_latents).sample
-    frames = (frames / 2 + 0.5).clamp(0, 1)  # Normalize to [0, 1]
-    frames = (frames * 255).to(torch.uint8)  # Convert to uint8
-    # Convert tensor to PIL Images
-    pil_frames = []
-    for i in range(frames.shape[2]):  # num_frames dimension
-        frame = frames[0, :, i, :, :].permute(1, 2, 0).cpu().numpy()
-        pil_frames.append(Image.fromarray(frame))
     torch.cuda.empty_cache()
     gc.collect()
     # Step 4: Export video
     video_path = "output.mp4"
-    export_to_video(pil_frames, video_path, fps=24)
     # Step 5: TTS
     tts = gTTS(text=prompt, lang='en')

     torch.cuda.empty_cache()
     gc.collect()
+    # Step 3: Decode upscaled latents to frames using the pipeline
+    # Create a new pipeline call specifically for decoding
+    frames = pipe(
+        prompt="",  # Empty prompt for decode-only
+        latents=upscaled_latents,
+        width=base_width,
+        height=base_height,
+        num_frames=60,
+        num_inference_steps=1,  # Minimal steps since we're just decoding
+        output_type="pil",
+        guidance_scale=1.0,
+        decode_timestep=0.0,  # Use 0 for pure decoding
+        decode_noise_scale=0.0,  # No noise for decoding
+        generator=generator
+    ).frames[0]
     torch.cuda.empty_cache()
     gc.collect()
     # Step 4: Export video
     video_path = "output.mp4"
+    export_to_video(frames, video_path, fps=24)
     # Step 5: TTS
     tts = gTTS(text=prompt, lang='en')