doguscank
/

facenet-onnx

ONNX

Model card Files Files and versions

xet

Community

doguscank commited on Dec 17, 2024

Commit

b3f76cd

verified ·

1 Parent(s): 4a0a71b

Create facenet.py

Browse files

Files changed (1) hide show

facenet.py +197 -0

facenet.py ADDED Viewed

	@@ -0,0 +1,197 @@

+# This script is mostly based on the openpose preprocessor script of
+# the sd-webui-controlnet project by Mikubill.
+# https://github.com/Mikubill/sd-webui-controlnet/blob/main/annotator/openpose/face.py
+import numpy as np
+import onnxruntime as ort
+import cv2
+from PIL import Image
+import pathlib
+from typing import Tuple, Union, List
+from tqdm import tqdm
+def smart_resize(image: np.ndarray, shape: Tuple[int, int]) -> np.ndarray:
+    """
+    Resize an image to a target shape while preserving aspect ratio.
+    Parameters
+    ----------
+    image : np.ndarray
+        The input image.
+    shape : Tuple[int, int]
+        The target shape (height, width).
+    Returns
+    -------
+    np.ndarray
+        The resized image
+    """
+    Ht, Wt = shape
+    if image.ndim == 2:
+        Ho, Wo = image.shape
+        Co = 1
+    else:
+        Ho, Wo, Co = image.shape
+    if Co == 3 or Co == 1:
+        k = float(Ht + Wt) / float(Ho + Wo)
+        return cv2.resize(
+            image,
+            (int(Wt), int(Ht)),
+            interpolation=cv2.INTER_AREA if k < 1 else cv2.INTER_LANCZOS4,
+        )
+    else:
+        return np.stack(
+            [smart_resize(image[:, :, i], shape) for i in range(Co)], axis=2
+        )
+class FaceLandmarkDetector:
+    """
+    The OpenPose face landmark detector model using ONNXRuntime.
+    Parameters
+    ----------
+    face_model_path : str
+        The path to the ONNX model file.
+    """
+    def __init__(self, face_model_path: pathlib.Path) -> None:
+        """
+        Initialize the OpenPose face landmark detector model.
+        Parameters
+        ----------
+        face_model_path : pathlib.Path
+            The path to the ONNX model file.
+        """
+        # Initialize ONNX runtime session
+        self.session = ort.InferenceSession(
+            face_model_path, providers=["CPUExecutionProvider"]
+        )
+        self.input_name = self.session.get_inputs()[0].name
+    def _inference(self, face_img: np.ndarray) -> np.ndarray:
+        """
+        Run the OpenPose face landmark detector model on an image.
+        Parameters
+        ----------
+        face_img : np.ndarray
+            The input image.
+        Returns
+        -------
+        np.ndarray
+            The detected keypoints.
+        """
+        # face_img should be a numpy array: H x W x C (likely RGB or BGR)
+        H, W, C = face_img.shape
+        # Preprocessing
+        w_size = 384  # ONNX is exported for this size
+        # Resize input image
+        resized_img = cv2.resize(
+            face_img, (w_size, w_size), interpolation=cv2.INTER_LINEAR
+        )
+        # Normalize: /256.0 - 0.5 (mimicking original code)
+        x_data = resized_img.astype(np.float32) / 256.0 - 0.5
+        # Convert to channel-first format: (C, H, W)
+        x_data = np.transpose(x_data, (2, 0, 1))
+        # Add batch dimension: (1, C, H, W)
+        x_data = np.expand_dims(x_data, axis=0)
+        # Run inference
+        outputs = self.session.run(None, {self.input_name: x_data})
+        # Assuming the model's last output corresponds to the heatmaps
+        # and is shaped like (1, num_parts, h_out, w_out)
+        heatmaps_original = outputs[-1]
+        # Remove batch dimension: (num_parts, h_out, w_out)
+        heatmaps_original = np.squeeze(heatmaps_original, axis=0)
+        # Resize the heatmaps back to the original image size
+        num_parts = heatmaps_original.shape[0]
+        heatmaps = np.zeros((num_parts, H, W), dtype=np.float32)
+        for i in range(num_parts):
+            heatmaps[i] = cv2.resize(
+                heatmaps_original[i], (W, H), interpolation=cv2.INTER_LINEAR
+            )
+        peaks = self.compute_peaks_from_heatmaps(heatmaps)
+        return peaks
+    def __call__(
+        self,
+        face_img: Union[np.ndarray, List[np.ndarray], Image.Image, List[Image.Image]],
+    ) -> List[np.ndarray]:
+        """
+        Run the OpenPose face landmark detector model on an image.
+        Parameters
+        ----------
+        face_img : Union[np.ndarray, Image.Image, List[Image.Image]]
+            The input image or a list of input images.
+        Returns
+        -------
+        List[np.ndarray]
+            The detected keypoints.
+        """
+        if isinstance(face_img, Image.Image):
+            image_list = [np.array(face_img)]
+        elif isinstance(face_img, list):
+            if isinstance(face_img[0], Image.Image):
+                image_list = [np.array(img) for img in face_img]
+        elif isinstance(face_img, np.ndarray):
+            if face_img.ndim == 4:
+                image_list = [img for img in face_img]
+        results = []
+        for image in tqdm(image_list):
+            keypoints = self._inference(image)
+            results.append(keypoints)
+        return results
+    def compute_peaks_from_heatmaps(self, heatmaps: np.ndarray) -> np.ndarray:
+        """
+        Compute the peaks from the heatmaps.
+        Parameters
+        ----------
+        heatmaps : np.ndarray
+            The heatmaps.
+        Returns
+        -------
+        np.ndarray
+            The peaks, which are keypoints.
+        """
+        all_peaks = []
+        for part in range(heatmaps.shape[0]):
+            map_ori = heatmaps[part].copy()
+            binary = np.ascontiguousarray(map_ori > 0.05, dtype=np.uint8)
+            if np.sum(binary) == 0:
+                all_peaks.append([-1, -1])
+                continue
+            positions = np.where(binary > 0.5)
+            intensities = map_ori[positions]
+            mi = np.argmax(intensities)
+            y, x = positions[0][mi], positions[1][mi]
+            all_peaks.append([x, y])
+        return np.array(all_peaks)