SAM3 + SAM 3D streamlining?

#1
by prithivMLmods - opened

Hi @akhaliq ,
Instead of manually uploading the object mask, let SAM3 handle it through text guided segmentation. It should segment the object, mask it fully, and generate the glb-vis at the end. Basically, an image-text to 3D flow. (image-text-to-3d) ? ๐Ÿ™‚

Text โ†’ Mask (SAM3)

from transformers import Sam3Processor, Sam3Model
import torch
from PIL import Image

HF_MODEL_ID_SAM3 = "facebook/sam3"

def get_mask(image_pil, text_prompt):
    device = "cuda" if torch.cuda.is_available() else "cpu"

    processor = Sam3Processor.from_pretrained(HF_MODEL_ID_SAM3)
    model = Sam3Model.from_pretrained(HF_MODEL_ID_SAM3).to(device)

    inputs = processor(images=image_pil, text=text_prompt, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model(**inputs)

    results = processor.post_process_instance_segmentation(
        outputs,
        threshold=0.4,
        mask_threshold=0.5,
        target_sizes=inputs["original_sizes"].tolist()
    )[0]

    best_idx = results["scores"].argmax()
    mask = results["masks"][best_idx].cpu().numpy()  # [H,W] binary mask

    return mask

โ†’ 3D (SAM-3D Objects)

import numpy as np
from omegaconf import OmegaConf
from hydra.utils import instantiate

PIPELINE_CONFIG = "checkpoints/pipeline.yaml"

def mask_to_rgba(image, mask):
    img = np.array(image.convert("RGB"))
    alpha = (mask.astype(np.uint8) * 255)[..., None]
    return np.concatenate([img, alpha], axis=-1)

def generate_3d(image_pil, mask, seed=42):
    rgba = mask_to_rgba(image_pil, mask)

    config = OmegaConf.load(PIPELINE_CONFIG)
    config.rendering_engine = "pytorch3d"
    config.compile_model = False

    pipeline = instantiate(config).to("cuda")

    result = pipeline.run(
        rgba,
        None,
        seed,
        stage1_only=False,
        use_vertex_color=True
    )

    if "gaussian" in result:
        result["gaussian"][0].save_ply("output.ply")
        return "output.ply"
    elif "mesh" in result:
        result["mesh"].export("output.obj")
        return "output.obj"

    return None

Sign up or log in to comment