Spaces:

wlyu
/

FaceLift

Running on Zero

App Files Files Community

weijielyu commited on Oct 20

Commit

59ae2c2

1 Parent(s): 4d09c83

Update demo

Browse files

Files changed (1) hide show

app.py +54 -322

app.py CHANGED Viewed

@@ -14,6 +14,9 @@ Generates 3D head models from single images using multi-view diffusion and GS-LR
 import json
 from pathlib import Path
 from datetime import datetime
 import gradio as gr
 import numpy as np
@@ -29,337 +32,66 @@ import spaces
 import subprocess
 import sys
 import os
-import subprocess, sys, os
-# Ensure diff-gaussian-rasterization is compiled for the current GPU arch
 try:
     import diff_gaussian_rasterization  # noqa: F401
 except ImportError:
     print("Installing diff-gaussian-rasterization (compiling for detected CUDA arch)...")
     env = os.environ.copy()
     try:
-        import torch
-        if torch.cuda.is_available():
-            maj, minr = torch.cuda.get_device_capability()
             arch = f"{maj}.{minr}"                 # e.g., "9.0" on H100/H200, "8.0" on A100
             env["TORCH_CUDA_ARCH_LIST"] = f"{arch}+PTX"
         else:
             # Build stage may not see a GPU on HF Spaces: compile a cross-arch set
             env["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6;8.9;9.0+PTX"
-    except Exception:
-        env["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6;8.9;9.0+PTX"
-    # (Optional) side-step allocator+NVML quirks in restrictive containers
-    env.setdefault("PYTORCH_NO_CUDA_MEMORY_CACHING", "1")
-    subprocess.check_call(
-        [sys.executable, "-m", "pip", "install",
-         "git+https://github.com/graphdeco-inria/diff-gaussian-rasterization"],
-        env=env,
-    )
-    import diff_gaussian_rasterization  # noqa: F401
-from gslrm.model.gaussians_renderer import render_turntable, imageseq2video
-from mvdiffusion.pipelines.pipeline_mvdiffusion_unclip import StableUnCLIPImg2ImgPipeline
-from utils_folder.face_utils import preprocess_image, preprocess_image_without_cropping
-# HuggingFace repository configuration
-HF_REPO_ID = "wlyu/OpenFaceLift"
-def download_weights_from_hf() -> Path:
-    """Download model weights from HuggingFace if not already present.
-    Returns:
-        Path to the downloaded repository
-    """
-    workspace_dir = Path(__file__).parent
-    # Check if weights already exist locally
-    mvdiffusion_path = workspace_dir / "checkpoints/mvdiffusion/pipeckpts"
-    gslrm_path = workspace_dir / "checkpoints/gslrm/ckpt_0000000000021125.pt"
-    if mvdiffusion_path.exists() and gslrm_path.exists():
-        print("Using local model weights")
-        return workspace_dir
-    print(f"Downloading model weights from HuggingFace: {HF_REPO_ID}")
-    print("This may take a few minutes on first run...")
-    # Download to local directory
-    snapshot_download(
-        repo_id=HF_REPO_ID,
-        local_dir=str(workspace_dir / "checkpoints"),
-        local_dir_use_symlinks=False,
-    )
-    print("Model weights downloaded successfully!")
-    return workspace_dir
-class FaceLiftPipeline:
-    """Pipeline for FaceLift 3D head generation from single images."""
-    def __init__(self):
-        # Download weights from HuggingFace if needed
-        workspace_dir = download_weights_from_hf()
-        # Setup paths
-        self.output_dir = workspace_dir / "outputs"
-        self.examples_dir = workspace_dir / "examples"
-        self.output_dir.mkdir(exist_ok=True)
-        # Parameters
-        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-        self.image_size = 512
-        self.camera_indices = [2, 1, 0, 5, 4, 3]
-        # Load models (keep on CPU for ZeroGPU compatibility)
-        print("Loading models...")
-        try:
-            self.mvdiffusion_pipeline = StableUnCLIPImg2ImgPipeline.from_pretrained(
-                str(workspace_dir / "checkpoints/mvdiffusion/pipeckpts"),
-                torch_dtype=torch.float16,
-            )
-            # Don't move to device or enable xformers here - will be done in GPU-decorated function
-            self._models_on_gpu = False
-            with open(workspace_dir / "configs/gslrm.yaml", "r") as f:
-                config = edict(yaml.safe_load(f))
-            module_name, class_name = config.model.class_name.rsplit(".", 1)
-            module = __import__(module_name, fromlist=[class_name])
-            ModelClass = getattr(module, class_name)
-            self.gs_lrm_model = ModelClass(config)
-            checkpoint = torch.load(
-                workspace_dir / "checkpoints/gslrm/ckpt_0000000000021125.pt",
-                map_location="cpu"
-            )
-            # Filter out loss_calculator weights (training-only, not needed for inference)
-            state_dict = {k: v for k, v in checkpoint["model"].items()
-                          if not k.startswith("loss_calculator.")}
-            self.gs_lrm_model.load_state_dict(state_dict)
-            # Keep on CPU initially - will move to GPU in decorated function
-            self.color_prompt_embedding = torch.load(
-                workspace_dir / "mvdiffusion/fixed_prompt_embeds_6view/clr_embeds.pt",
-                map_location="cpu"
-            )
-            with open(workspace_dir / "utils_folder/opencv_cameras.json", 'r') as f:
-                self.cameras_data = json.load(f)["frames"]
-            print("Models loaded successfully!")
-        except Exception as e:
-            print(f"Error loading models: {e}")
-            import traceback
-            traceback.print_exc()
-            raise
-    def _move_models_to_gpu(self):
-        """Move models to GPU and enable optimizations. Called within @spaces.GPU context."""
-        if not self._models_on_gpu and torch.cuda.is_available():
-            print("Moving models to GPU...")
-            self.device = torch.device("cuda:0")
-            self.mvdiffusion_pipeline.to(self.device)
-            self.mvdiffusion_pipeline.unet.enable_xformers_memory_efficient_attention()
-            self.gs_lrm_model.to(self.device)
-            self.gs_lrm_model.eval()  # Set to eval mode
-            self.color_prompt_embedding = self.color_prompt_embedding.to(self.device)
-            self._models_on_gpu = True
-            torch.cuda.empty_cache()  # Clear cache after moving models
-            print("Models on GPU, xformers enabled!")
-    @spaces.GPU(duration=120)
-    def generate_3d_head(self, image_path, auto_crop=True, guidance_scale=3.0,
-                         random_seed=4, num_steps=50):
-        """Generate 3D head from single image."""
-        try:
-            # Move models to GPU now that we're in the GPU context
-            self._move_models_to_gpu()
-            # Setup output directory
-            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-            output_dir = self.output_dir / timestamp
-            output_dir.mkdir(exist_ok=True)
-            # Preprocess input
-            original_img = np.array(Image.open(image_path))
-            input_image = preprocess_image(original_img) if auto_crop else \
-                         preprocess_image_without_cropping(original_img)
-            if input_image.size != (self.image_size, self.image_size):
-                input_image = input_image.resize((self.image_size, self.image_size))
-            input_path = output_dir / "input.png"
-            input_image.save(input_path)
-            # Generate multi-view images
-            generator = torch.Generator(device=self.mvdiffusion_pipeline.unet.device)
-            generator.manual_seed(random_seed)
-            result = self.mvdiffusion_pipeline(
-                input_image, None,
-                prompt_embeds=self.color_prompt_embedding,
-                height=self.image_size,
-                width=self.image_size,
-                guidance_scale=guidance_scale,
-                num_images_per_prompt=1,
-                num_inference_steps=num_steps,
-                generator=generator,
-                eta=1.0,
-            )
-            selected_views = result.images[:6]
-            # Save multi-view composite
-            multiview_image = Image.new("RGB", (self.image_size * 6, self.image_size))
-            for i, view in enumerate(selected_views):
-                multiview_image.paste(view, (self.image_size * i, 0))
-            multiview_path = output_dir / "multiview.png"
-            multiview_image.save(multiview_path)
-            # Move diffusion model to CPU to free GPU memory for GS-LRM
-            print("Moving diffusion model to CPU to free memory...")
-            self.mvdiffusion_pipeline.to("cpu")
-            # Delete intermediate variables to free memory
-            del result, generator
-            torch.cuda.empty_cache()
-            torch.cuda.synchronize()
-            # Prepare 3D reconstruction input
-            view_arrays = [np.array(view) for view in selected_views]
-            lrm_input = torch.from_numpy(np.stack(view_arrays, axis=0)).float()
-            lrm_input = lrm_input[None].to(self.device) / 255.0
-            lrm_input = rearrange(lrm_input, "b v h w c -> b v c h w")
-            # Prepare camera parameters
-            selected_cameras = [self.cameras_data[i] for i in self.camera_indices]
-            fxfycxcy_list = [[c["fx"], c["fy"], c["cx"], c["cy"]] for c in selected_cameras]
-            c2w_list = [np.linalg.inv(np.array(c["w2c"])) for c in selected_cameras]
-            fxfycxcy = torch.from_numpy(np.stack(fxfycxcy_list, axis=0).astype(np.float32))
-            c2w = torch.from_numpy(np.stack(c2w_list, axis=0).astype(np.float32))
-            fxfycxcy = fxfycxcy[None].to(self.device)
-            c2w = c2w[None].to(self.device)
-            batch_indices = torch.stack([
-                torch.zeros(lrm_input.size(1)).long(),
-                torch.arange(lrm_input.size(1)).long(),
-            ], dim=-1)[None].to(self.device)
-            batch = edict({
-                "image": lrm_input,
-                "c2w": c2w,
-                "fxfycxcy": fxfycxcy,
-                "index": batch_indices,
-            })
-            # Ensure GS-LRM model is on GPU
-            if next(self.gs_lrm_model.parameters()).device.type == "cpu":
-                print("Moving GS-LRM model to GPU...")
-                self.gs_lrm_model.to(self.device)
-                torch.cuda.empty_cache()
-            # Final memory cleanup before reconstruction
-            torch.cuda.empty_cache()
-            # Run 3D reconstruction
-            with torch.no_grad(), torch.autocast(enabled=True, device_type="cuda", dtype=torch.float16):
-                result = self.gs_lrm_model.forward(batch, create_visual=False, split_data=True)
-            comp_image = result.render[0].unsqueeze(0).detach()
-            gaussians = result.gaussians[0]
-            # Clear CUDA cache after reconstruction
-            torch.cuda.empty_cache()
-            # Save filtered gaussians
-            filtered_gaussians = gaussians.apply_all_filters(
-                cam_origins=None,
-                opacity_thres=0.04,
-                scaling_thres=0.2,
-                floater_thres=0.75,
-                crop_bbx=[-0.91, 0.91, -0.91, 0.91, -1.0, 1.0],
-                nearfar_percent=(0.0001, 1.0),
-            )
-            ply_path = output_dir / "gaussians.ply"
-            filtered_gaussians.save_ply(str(ply_path))
-            # Save output image
-            comp_image = rearrange(comp_image, "x v c h w -> (x h) (v w) c")
-            comp_image = (comp_image.cpu().numpy() * 255.0).clip(0, 255).astype(np.uint8)
-            output_path = output_dir / "output.png"
-            Image.fromarray(comp_image).save(output_path)
-            # Generate turntable video
-            turntable_resolution = 512
-            num_turntable_views = 180
-            turntable_frames = render_turntable(gaussians, rendering_resolution=turntable_resolution,
-                                               num_views=num_turntable_views)
-            turntable_frames = rearrange(turntable_frames, "h (v w) c -> v h w c", v=num_turntable_views)
-            turntable_frames = np.ascontiguousarray(turntable_frames)
-            turntable_path = output_dir / "turntable.mp4"
-            imageseq2video(turntable_frames, str(turntable_path), fps=30)
-            # Final CUDA cache clear
-            torch.cuda.empty_cache()
-            return str(input_path), str(multiview_path), str(output_path), \
-                   str(turntable_path), str(ply_path)
-        except Exception as e:
-            import traceback
-            error_details = traceback.format_exc()
-            print(f"Error details:\n{error_details}")
-            raise gr.Error(f"Generation failed: {str(e)}")
-def main():
-    """Run the FaceLift application."""
-    pipeline = FaceLiftPipeline()
-    # Load examples - provide all 5 input values (image, auto_crop, guidance_scale, random_seed, num_steps)
-    examples = []
-    if pipeline.examples_dir.exists():
-        examples = [[str(f), True, 3.0, 4, 50] for f in sorted(pipeline.examples_dir.iterdir())
-                   if f.suffix.lower() in {'.png', '.jpg', '.jpeg'}]
-    # Create interface
-    demo = gr.Interface(
-        fn=pipeline.generate_3d_head,
-        title="FaceLift: Single Image 3D Face Reconstruction",
-        description="""
-        Transform a single portrait image into a complete 3D head model.
-        **Tips:**
-        - Use high-quality portrait images with clear facial features
-        - If face detection fails, try disabling auto-cropping and manually crop to square
-        """,
-        inputs=[
-            gr.Image(type="filepath", label="Input Portrait Image"),
-            gr.Checkbox(value=True, label="Auto Cropping"),
-            gr.Slider(1.0, 10.0, 3.0, step=0.1, label="Guidance Scale"),
-            gr.Number(value=4, label="Random Seed"),
-            gr.Slider(10, 100, 50, step=5, label="Generation Steps"),
-        ],
-        outputs=[
-            gr.Image(label="Processed Input"),
-            gr.Image(label="Multi-view Generation"),
-            gr.Image(label="3D Reconstruction"),
-            gr.PlayableVideo(label="Turntable Animation"),
-            gr.File(label="3D Model (.ply)"),
-        ],
-        examples=examples,
-        allow_flagging="never",
-    )
-    demo.queue(max_size=10)
-    demo.launch(share=True, server_name="0.0.0.0", server_port=7860, show_error=True)
-if __name__ == "__main__":
-    main()

 import json
 from pathlib import Path
 from datetime import datetime
+import uuid
+import time
+import shutil
 import gradio as gr
 import numpy as np
 import subprocess
 import sys
 import os
+# -----------------------------
+# Static paths (for viewer files)
+# -----------------------------
+OUTPUTS_DIR = Path.cwd() / "outputs"
+SPLATS_ROOT = OUTPUTS_DIR / "splats"
+SPLATS_ROOT.mkdir(parents=True, exist_ok=True)
+# Serve ./outputs via Gradio's static router: /gradio_api/file=outputs/...
+gr.set_static_paths(paths=[OUTPUTS_DIR])
+# -----------------------------
+# Per-session helpers
+# -----------------------------
+def new_session_id() -> str:
+    return uuid.uuid4().hex[:10]
+def session_dir(session_id: str) -> Path:
+    p = SPLATS_ROOT / session_id
+    p.mkdir(parents=True, exist_ok=True)
+    return p
+def cleanup_old_sessions(max_age_hours: int = 6):
+    cutoff = time.time() - max_age_hours * 3600
+    if not SPLATS_ROOT.exists():
+        return
+    for child in SPLATS_ROOT.iterdir():
+        try:
+            if child.is_dir() and child.stat().st_mtime < cutoff:
+                shutil.rmtree(child, ignore_errors=True)
+        except Exception:
+            pass
+def copy_to_session_and_get_url(src_path: str, session_id: str) -> str:
+    """
+    Copy a .splat or .ply into this user's session folder and return a cache-busted URL.
+    """
+    src = Path(src_path)
+    ext = src.suffix.lower() if src.suffix else ".ply"
+    fn = f"{int(time.time())}_{uuid.uuid4().hex[:6]}{ext}"
+    dst = session_dir(session_id) / fn
+    shutil.copy2(src, dst)
+    # /gradio_api/file=outputs/...
+    return f"/gradio_api/file=outputs/splats/{session_id}/{fn}?v={uuid.uuid4().hex[:6]}"
+# -----------------------------
+# Ensure diff-gaussian-rasterization builds for current GPU
+# -----------------------------
 try:
     import diff_gaussian_rasterization  # noqa: F401
 except ImportError:
     print("Installing diff-gaussian-rasterization (compiling for detected CUDA arch)...")
     env = os.environ.copy()
     try:
+        import torch as _torch
+        if _torch.cuda.is_available():
+            maj, minr = _torch.cuda.get_device_capability()
             arch = f"{maj}.{minr}"                 # e.g., "9.0" on H100/H200, "8.0" on A100
             env["TORCH_CUDA_ARCH_LIST"] = f"{arch}+PTX"
         else:
             # Build stage may not see a GPU on HF Spaces: compile a cross-arch set
             env["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6;8.9;9.0+PTX"
+    except Excep