Spaces:

wlyu
/

FaceLift

Running on Zero

App Files Files Community

weijielyu commited on Oct 20

Commit

839dee5

1 Parent(s): 40e81a0

Update demo

Browse files

Files changed (1) hide show

app.py +32 -21

app.py CHANGED Viewed

@@ -29,19 +29,36 @@ import spaces
 import subprocess
 import sys
 import os
 try:
-    import diff_gaussian_rasterization
 except ImportError:
-    print("Installing diff-gaussian-rasterization...")
-    # Set CUDA architecture for A100 (compute capability 8.0)
-    # This prevents the IndexError in _get_cuda_arch_flags
     env = os.environ.copy()
-    env["TORCH_CUDA_ARCH_LIST"] = "8.0"
-    subprocess.check_call([
-        sys.executable, "-m", "pip", "install",
-        "git+https://github.com/graphdeco-inria/diff-gaussian-rasterization"
-    ], env=env)
-    import diff_gaussian_rasterization
 from gslrm.model.gaussians_renderer import render_turntable, imageseq2video
 from mvdiffusion.pipelines.pipeline_mvdiffusion_unclip import StableUnCLIPImg2ImgPipeline
@@ -93,7 +110,7 @@ class FaceLiftPipeline:
         # Parameters
         self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-        self.image_size = 384  # Reduced from 512 for ZeroGPU memory constraints
         self.camera_indices = [2, 1, 0, 5, 4, 3]
         # Load models (keep on CPU for ZeroGPU compatibility)
@@ -244,13 +261,7 @@ class FaceLiftPipeline:
                 print("Moving GS-LRM model to GPU...")
                 self.gs_lrm_model.to(self.device)
                 torch.cuda.empty_cache()
-            # Log GPU memory status
-            if torch.cuda.is_available():
-                allocated = torch.cuda.memory_allocated() / 1024**3
-                reserved = torch.cuda.memory_reserved() / 1024**3
-                print(f"GPU memory before GS-LRM: {allocated:.2f}GB allocated, {reserved:.2f}GB reserved")
             # Final memory cleanup before reconstruction
             torch.cuda.empty_cache()
@@ -283,9 +294,9 @@ class FaceLiftPipeline:
             output_path = output_dir / "output.png"
             Image.fromarray(comp_image).save(output_path)
-            # Generate turntable video (reduced resolution and frames for ZeroGPU memory limits)
-            turntable_resolution = 256  # Lower resolution for turntable to save memory
-            num_turntable_views = 120  # Reduced from 180
             turntable_frames = render_turntable(gaussians, rendering_resolution=turntable_resolution,
                                                num_views=num_turntable_views)
             turntable_frames = rearrange(turntable_frames, "h (v w) c -> v h w c", v=num_turntable_views)

 import subprocess
 import sys
 import os
+import subprocess, sys, os
+# Ensure diff-gaussian-rasterization is compiled for the current GPU arch
 try:
+    import diff_gaussian_rasterization  # noqa: F401
 except ImportError:
+    print("Installing diff-gaussian-rasterization (compiling for detected CUDA arch)...")
     env = os.environ.copy()
+    try:
+        import torch
+        if torch.cuda.is_available():
+            maj, minr = torch.cuda.get_device_capability()
+            arch = f"{maj}.{minr}"                 # e.g., "9.0" on H100/H200, "8.0" on A100
+            env["TORCH_CUDA_ARCH_LIST"] = f"{arch}+PTX"
+        else:
+            # Build stage may not see a GPU on HF Spaces: compile a cross-arch set
+            env["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6;8.9;9.0+PTX"
+    except Exception:
+        env["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6;8.9;9.0+PTX"
+    # (Optional) side-step allocator+NVML quirks in restrictive containers
+    env.setdefault("PYTORCH_NO_CUDA_MEMORY_CACHING", "1")
+    subprocess.check_call(
+        [sys.executable, "-m", "pip", "install",
+         "git+https://github.com/graphdeco-inria/diff-gaussian-rasterization"],
+        env=env,
+    )
+    import diff_gaussian_rasterization  # noqa: F401
 from gslrm.model.gaussians_renderer import render_turntable, imageseq2video
 from mvdiffusion.pipelines.pipeline_mvdiffusion_unclip import StableUnCLIPImg2ImgPipeline
         # Parameters
         self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        self.image_size = 512
         self.camera_indices = [2, 1, 0, 5, 4, 3]
         # Load models (keep on CPU for ZeroGPU compatibility)
                 print("Moving GS-LRM model to GPU...")
                 self.gs_lrm_model.to(self.device)
                 torch.cuda.empty_cache()
             # Final memory cleanup before reconstruction
             torch.cuda.empty_cache()
             output_path = output_dir / "output.png"
             Image.fromarray(comp_image).save(output_path)
+            # Generate turntable video
+            turntable_resolution = 512
+            num_turntable_views = 180
             turntable_frames = render_turntable(gaussians, rendering_resolution=turntable_resolution,
                                                num_views=num_turntable_views)
             turntable_frames = rearrange(turntable_frames, "h (v w) c -> v h w c", v=num_turntable_views)