Spaces:
Running
on
Zero
Running
on
Zero
SAM3 + SAM 3D streamlining?
#1
by
prithivMLmods
- opened
Hi
@akhaliq
,
Instead of manually uploading the object mask, let SAM3 handle it through text guided segmentation. It should segment the object, mask it fully, and generate the glb-vis at the end. Basically, an image-text to 3D flow. (image-text-to-3d) ? ๐
Text โ Mask (SAM3)
from transformers import Sam3Processor, Sam3Model
import torch
from PIL import Image
HF_MODEL_ID_SAM3 = "facebook/sam3"
def get_mask(image_pil, text_prompt):
device = "cuda" if torch.cuda.is_available() else "cpu"
processor = Sam3Processor.from_pretrained(HF_MODEL_ID_SAM3)
model = Sam3Model.from_pretrained(HF_MODEL_ID_SAM3).to(device)
inputs = processor(images=image_pil, text=text_prompt, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model(**inputs)
results = processor.post_process_instance_segmentation(
outputs,
threshold=0.4,
mask_threshold=0.5,
target_sizes=inputs["original_sizes"].tolist()
)[0]
best_idx = results["scores"].argmax()
mask = results["masks"][best_idx].cpu().numpy() # [H,W] binary mask
return mask
โ 3D (SAM-3D Objects)
import numpy as np
from omegaconf import OmegaConf
from hydra.utils import instantiate
PIPELINE_CONFIG = "checkpoints/pipeline.yaml"
def mask_to_rgba(image, mask):
img = np.array(image.convert("RGB"))
alpha = (mask.astype(np.uint8) * 255)[..., None]
return np.concatenate([img, alpha], axis=-1)
def generate_3d(image_pil, mask, seed=42):
rgba = mask_to_rgba(image_pil, mask)
config = OmegaConf.load(PIPELINE_CONFIG)
config.rendering_engine = "pytorch3d"
config.compile_model = False
pipeline = instantiate(config).to("cuda")
result = pipeline.run(
rgba,
None,
seed,
stage1_only=False,
use_vertex_color=True
)
if "gaussian" in result:
result["gaussian"][0].save_ply("output.ply")
return "output.ply"
elif "mesh" in result:
result["mesh"].export("output.obj")
return "output.obj"
return None