Spaces:

onnx
/

ArcFace

Runtime error

App Files Files Community

akhaliq HF Staff commited on Mar 16, 2022

Commit

c003417

1 Parent(s): 07a12d1

Create mtcnn_detector.py

Browse files

Files changed (1) hide show

mtcnn_detector.py +650 -0

mtcnn_detector.py ADDED Viewed

	@@ -0,0 +1,650 @@

+# SPDX-License-Identifier: Apache-2.0
+# coding: utf-8
+import os
+import mxnet as mx
+import numpy as np
+import math
+import cv2
+from multiprocessing import Pool
+from itertools import repeat
+from helper import nms, adjust_input, generate_bbox, detect_first_stage_warpper
+try:
+    from itertools import izip as zip
+except ImportError:
+    pass
+class MtcnnDetector(object):
+    """
+        Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Neural Networks
+        see https://github.com/kpzhang93/MTCNN_face_detection_alignment
+        this is a mxnet version
+    """
+    def __init__(self,
+                 model_folder='.',
+                 minsize = 20,
+                 threshold = [0.6, 0.7, 0.8],
+                 factor = 0.709,
+                 num_worker = 1,
+                 accurate_landmark = False,
+                 ctx=mx.cpu()):
+        """
+            Initialize the detector
+            Parameters:
+            ----------
+                model_folder : string
+                    path for the models
+                minsize : float number
+                    minimal face to detect
+                threshold : float number
+                    detect threshold for 3 stages
+                factor: float number
+                    scale factor for image pyramid
+                num_worker: int number
+                    number of processes we use for first stage
+                accurate_landmark: bool
+                    use accurate landmark localization or not
+        """
+        self.num_worker = num_worker
+        self.accurate_landmark = accurate_landmark
+        # load 4 models from folder
+        models = ['det1', 'det2', 'det3','det4']
+        models = [ os.path.join(model_folder, f) for f in models]
+        self.PNets = []
+        for i in range(num_worker):
+            workner_net = mx.model.FeedForward.load(models[0], 1, ctx=ctx)
+            self.PNets.append(workner_net)
+        self.RNet = mx.model.FeedForward.load(models[1], 1, ctx=ctx)
+        self.ONet = mx.model.FeedForward.load(models[2], 1, ctx=ctx)
+        self.LNet = mx.model.FeedForward.load(models[3], 1, ctx=ctx)
+        self.minsize   = float(minsize)
+        self.factor    = float(factor)
+        self.threshold = threshold
+    def convert_to_square(self, bbox):
+        """
+            convert bbox to square
+        Parameters:
+        ----------
+            bbox: numpy array , shape n x 5
+                input bbox
+        Returns:
+        -------
+            square bbox
+        """
+        square_bbox = bbox.copy()
+        h = bbox[:, 3] - bbox[:, 1] + 1
+        w = bbox[:, 2] - bbox[:, 0] + 1
+        max_side = np.maximum(h,w)
+        square_bbox[:, 0] = bbox[:, 0] + w*0.5 - max_side*0.5
+        square_bbox[:, 1] = bbox[:, 1] + h*0.5 - max_side*0.5
+        square_bbox[:, 2] = square_bbox[:, 0] + max_side - 1
+        square_bbox[:, 3] = square_bbox[:, 1] + max_side - 1
+        return square_bbox
+    def calibrate_box(self, bbox, reg):
+        """
+            calibrate bboxes
+        Parameters:
+        ----------
+            bbox: numpy array, shape n x 5
+                input bboxes
+            reg:  numpy array, shape n x 4
+                bboxex adjustment
+        Returns:
+        -------
+            bboxes after refinement
+        """
+        w = bbox[:, 2] - bbox[:, 0] + 1
+        w = np.expand_dims(w, 1)
+        h = bbox[:, 3] - bbox[:, 1] + 1
+        h = np.expand_dims(h, 1)
+        reg_m = np.hstack([w, h, w, h])
+        aug = reg_m * reg
+        bbox[:, 0:4] = bbox[:, 0:4] + aug
+        return bbox
+    def pad(self, bboxes, w, h):
+        """
+            pad the the bboxes, alse restrict the size of it
+        Parameters:
+        ----------
+            bboxes: numpy array, n x 5
+                input bboxes
+            w: float number
+                width of the input image
+            h: float number
+                height of the input image
+        Returns :
+        ------s
+            dy, dx : numpy array, n x 1
+                start point of the bbox in target image
+            edy, edx : numpy array, n x 1
+                end point of the bbox in target image
+            y, x : numpy array, n x 1
+                start point of the bbox in original image
+            ex, ex : numpy array, n x 1
+                end point of the bbox in original image
+            tmph, tmpw: numpy array, n x 1
+                height and width of the bbox
+        """
+        tmpw, tmph = bboxes[:, 2] - bboxes[:, 0] + 1,  bboxes[:, 3] - bboxes[:, 1] + 1
+        num_box = bboxes.shape[0]
+        dx , dy= np.zeros((num_box, )), np.zeros((num_box, ))
+        edx, edy  = tmpw.copy()-1, tmph.copy()-1
+        x, y, ex, ey = bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 3]
+        tmp_index = np.where(ex > w-1)
+        edx[tmp_index] = tmpw[tmp_index] + w - 2 - ex[tmp_index]
+        ex[tmp_index] = w - 1
+        tmp_index = np.where(ey > h-1)
+        edy[tmp_index] = tmph[tmp_index] + h - 2 - ey[tmp_index]
+        ey[tmp_index] = h - 1
+        tmp_index = np.where(x < 0)
+        dx[tmp_index] = 0 - x[tmp_index]
+        x[tmp_index] = 0
+        tmp_index = np.where(y < 0)
+        dy[tmp_index] = 0 - y[tmp_index]
+        y[tmp_index] = 0
+        return_list = [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph]
+        return_list = [item.astype(np.int32) for item in return_list]
+        return  return_list
+    def slice_index(self, number):
+        """
+            slice the index into (n,n,m), m < n
+        Parameters:
+        ----------
+            number: int number
+                number
+        """
+        def chunks(l, n):
+            """Yield successive n-sized chunks from l."""
+            for i in range(0, len(l), n):
+                yield l[i:i + n]
+        num_list = range(number)
+        return list(chunks(num_list, self.num_worker))
+    def detect_face_limited(self, img, det_type=2):
+        height, width, _ = img.shape
+        if det_type>=2:
+          total_boxes = np.array( [ [0.0, 0.0, img.shape[1], img.shape[0], 0.9] ] ,dtype=np.float32)
+          num_box = total_boxes.shape[0]
+          # pad the bbox
+          [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height)
+          # (3, 24, 24) is the input shape for RNet
+          input_buf = np.zeros((num_box, 3, 24, 24), dtype=np.float32)
+          for i in range(num_box):
+              tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
+              tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = img[y[i]:ey[i]+1, x[i]:ex[i]+1, :]
+              input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24)))
+          output = self.RNet.predict(input_buf)
+          # filter the total_boxes with threshold
+          passed = np.where(output[1][:, 1] > self.threshold[1])
+          total_boxes = total_boxes[passed]
+          if total_boxes.size == 0:
+              return None
+          total_boxes[:, 4] = output[1][passed, 1].reshape((-1,))
+          reg = output[0][passed]
+          # nms
+          pick = nms(total_boxes, 0.7, 'Union')
+          total_boxes = total_boxes[pick]
+          total_boxes = self.calibrate_box(total_boxes, reg[pick])
+          total_boxes = self.convert_to_square(total_boxes)
+          total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])
+        else:
+          total_boxes = np.array( [ [0.0, 0.0, img.shape[1], img.shape[0], 0.9] ] ,dtype=np.float32)
+        num_box = total_boxes.shape[0]
+        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height)
+        # (3, 48, 48) is the input shape for ONet
+        input_buf = np.zeros((num_box, 3, 48, 48), dtype=np.float32)
+        for i in range(num_box):
+            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32)
+            tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = img[y[i]:ey[i]+1, x[i]:ex[i]+1, :]
+            input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48)))
+        output = self.ONet.predict(input_buf)
+        # filter the total_boxes with threshold
+        passed = np.where(output[2][:, 1] > self.threshold[2])
+        total_boxes = total_boxes[passed]
+        if total_boxes.size == 0:
+            return None
+        total_boxes[:, 4] = output[2][passed, 1].reshape((-1,))
+        reg = output[1][passed]
+        points = output[0][passed]
+        # compute landmark points
+        bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
+        bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
+        points[:, 0:5] = np.expand_dims(total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5]
+        points[:, 5:10] = np.expand_dims(total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10]
+        # nms
+        total_boxes = self.calibrate_box(total_boxes, reg)
+        pick = nms(total_boxes, 0.7, 'Min')
+        total_boxes = total_boxes[pick]
+        points = points[pick]
+        if not self.accurate_landmark:
+            return total_boxes, points
+        #############################################
+        # extended stage
+        #############################################
+        num_box = total_boxes.shape[0]
+        patchw = np.maximum(total_boxes[:, 2]-total_boxes[:, 0]+1, total_boxes[:, 3]-total_boxes[:, 1]+1)
+        patchw = np.round(patchw*0.25)
+        # make it even
+        patchw[np.where(np.mod(patchw,2) == 1)] += 1
+        input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32)
+        for i in range(5):
+            x, y = points[:, i], points[:, i+5]
+            x, y = np.round(x-0.5*patchw), np.round(y-0.5*patchw)
+            [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(np.vstack([x, y, x+patchw-1, y+patchw-1]).T,
+                                                                    width,
+                                                                    height)
+            for j in range(num_box):
+                tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32)
+                tmpim[dy[j]:edy[j]+1, dx[j]:edx[j]+1, :] = img[y[j]:ey[j]+1, x[j]:ex[j]+1, :]
+                input_buf[j, i*3:i*3+3, :, :] = adjust_input(cv2.resize(tmpim, (24, 24)))
+        output = self.LNet.predict(input_buf)
+        pointx = np.zeros((num_box, 5))
+        pointy = np.zeros((num_box, 5))
+        for k in range(5):
+            # do not make a large movement
+            tmp_index = np.where(np.abs(output[k]-0.5) > 0.35)
+            output[k][tmp_index[0]] = 0.5
+            pointx[:, k] = np.round(points[:, k] - 0.5*patchw) + output[k][:, 0]*patchw
+            pointy[:, k] = np.round(points[:, k+5] - 0.5*patchw) + output[k][:, 1]*patchw
+        points = np.hstack([pointx, pointy])
+        points = points.astype(np.int32)
+        return total_boxes, points
+    def detect_face(self, img, det_type=0):
+        """
+            detect face over img
+        Parameters:
+        ----------
+            img: numpy array, bgr order of shape (1, 3, n, m)
+                input image
+        Retures:
+        -------
+            bboxes: numpy array, n x 5 (x1,y2,x2,y2,score)
+                bboxes
+            points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5)
+                landmarks
+        """
+        # check input
+        height, width, _ = img.shape
+        if det_type==0:
+            MIN_DET_SIZE = 12
+            if img is None:
+                return None
+            # only works for color image
+            if len(img.shape) != 3:
+                return None
+            # detected boxes
+            total_boxes = []
+            minl = min( height, width)
+            # get all the valid scales
+            scales = []
+            m = MIN_DET_SIZE/self.minsize
+            minl *= m
+            factor_count = 0
+            while minl > MIN_DET_SIZE:
+                scales.append(m*self.factor**factor_count)
+                minl *= self.factor
+                factor_count += 1
+            #############################################
+            # first stage
+            #############################################
+            sliced_index = self.slice_index(len(scales))
+            total_boxes = []
+            for batch in sliced_index:
+                local_boxes = map( detect_first_stage_warpper, \
+                        zip(repeat(img), self.PNets[:len(batch)], [scales[i] for i in batch], repeat(self.threshold[0])) )
+                total_boxes.extend(local_boxes)
+            # remove the Nones
+            total_boxes = [ i for i in total_boxes if i is not None]
+            if len(total_boxes) == 0:
+                return None
+            total_boxes = np.vstack(total_boxes)
+            if total_boxes.size == 0:
+                return None
+            # merge the detection from first stage
+            pick = nms(total_boxes[:, 0:5], 0.7, 'Union')
+            total_boxes = total_boxes[pick]
+            bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
+            bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
+            # refine the bboxes
+            total_boxes = np.vstack([total_boxes[:, 0]+total_boxes[:, 5] * bbw,
+                                     total_boxes[:, 1]+total_boxes[:, 6] * bbh,
+                                     total_boxes[:, 2]+total_boxes[:, 7] * bbw,
+                                     total_boxes[:, 3]+total_boxes[:, 8] * bbh,
+                                     total_boxes[:, 4]
+                                     ])
+            total_boxes = total_boxes.T
+            total_boxes = self.convert_to_square(total_boxes)
+            total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])
+        else:
+            total_boxes = np.array( [ [0.0, 0.0, img.shape[1], img.shape[0], 0.9] ] ,dtype=np.float32)
+        #############################################
+        # second stage
+        #############################################
+        num_box = total_boxes.shape[0]
+        # pad the bbox
+        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height)
+        # (3, 24, 24) is the input shape for RNet
+        input_buf = np.zeros((num_box, 3, 24, 24), dtype=np.float32)
+        for i in range(num_box):
+            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
+            tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = img[y[i]:ey[i]+1, x[i]:ex[i]+1, :]
+            input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24)))
+        output = self.RNet.predict(input_buf)
+        # filter the total_boxes with threshold
+        passed = np.where(output[1][:, 1] > self.threshold[1])
+        total_boxes = total_boxes[passed]
+        if total_boxes.size == 0:
+            return None
+        total_boxes[:, 4] = output[1][passed, 1].reshape((-1,))
+        reg = output[0][passed]
+        # nms
+        pick = nms(total_boxes, 0.7, 'Union')
+        total_boxes = total_boxes[pick]
+        total_boxes = self.calibrate_box(total_boxes, reg[pick])
+        total_boxes = self.convert_to_square(total_boxes)
+        total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])
+        #############################################
+        # third stage
+        #############################################
+        num_box = total_boxes.shape[0]
+        # pad the bbox
+        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height)
+        # (3, 48, 48) is the input shape for ONet
+        input_buf = np.zeros((num_box, 3, 48, 48), dtype=np.float32)
+        for i in range(num_box):
+            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32)
+            tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = img[y[i]:ey[i]+1, x[i]:ex[i]+1, :]
+            input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48)))
+        output = self.ONet.predict(input_buf)
+        # filter the total_boxes with threshold
+        passed = np.where(output[2][:, 1] > self.threshold[2])
+        total_boxes = total_boxes[passed]
+        if total_boxes.size == 0:
+            return None
+        total_boxes[:, 4] = output[2][passed, 1].reshape((-1,))
+        reg = output[1][passed]
+        points = output[0][passed]
+        # compute landmark points
+        bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
+        bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
+        points[:, 0:5] = np.expand_dims(total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5]
+        points[:, 5:10] = np.expand_dims(total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10]
+        # nms
+        total_boxes = self.calibrate_box(total_boxes, reg)
+        pick = nms(total_boxes, 0.7, 'Min')
+        total_boxes = total_boxes[pick]
+        points = points[pick]
+        if not self.accurate_landmark:
+            return total_boxes, points
+        #############################################
+        # extended stage
+        #############################################
+        num_box = total_boxes.shape[0]
+        patchw = np.maximum(total_boxes[:, 2]-total_boxes[:, 0]+1, total_boxes[:, 3]-total_boxes[:, 1]+1)
+        patchw = np.round(patchw*0.25)
+        # make it even
+        patchw[np.where(np.mod(patchw,2) == 1)] += 1
+        input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32)
+        for i in range(5):
+            x, y = points[:, i], points[:, i+5]
+            x, y = np.round(x-0.5*patchw), np.round(y-0.5*patchw)
+            [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(np.vstack([x, y, x+patchw-1, y+patchw-1]).T,
+                                                                    width,
+                                                                    height)
+            for j in range(num_box):
+                tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32)
+                tmpim[dy[j]:edy[j]+1, dx[j]:edx[j]+1, :] = img[y[j]:ey[j]+1, x[j]:ex[j]+1, :]
+                input_buf[j, i*3:i*3+3, :, :] = adjust_input(cv2.resize(tmpim, (24, 24)))
+        output = self.LNet.predict(input_buf)
+        pointx = np.zeros((num_box, 5))
+        pointy = np.zeros((num_box, 5))
+        for k in range(5):
+            # do not make a large movement
+            tmp_index = np.where(np.abs(output[k]-0.5) > 0.35)
+            output[k][tmp_index[0]] = 0.5
+            pointx[:, k] = np.round(points[:, k] - 0.5*patchw) + output[k][:, 0]*patchw
+            pointy[:, k] = np.round(points[:, k+5] - 0.5*patchw) + output[k][:, 1]*patchw
+        points = np.hstack([pointx, pointy])
+        points = points.astype(np.int32)
+        return total_boxes, points
+    def list2colmatrix(self, pts_list):
+        """
+            convert list to column matrix
+        Parameters:
+        ----------
+            pts_list:
+                input list
+        Retures:
+        -------
+            colMat:
+        """
+        assert len(pts_list) > 0
+        colMat = []
+        for i in range(len(pts_list)):
+            colMat.append(pts_list[i][0])
+            colMat.append(pts_list[i][1])
+        colMat = np.matrix(colMat).transpose()
+        return colMat
+    def find_tfrom_between_shapes(self, from_shape, to_shape):
+        """
+            find transform between shapes
+        Parameters:
+        ----------
+            from_shape:
+            to_shape:
+        Retures:
+        -------
+            tran_m:
+            tran_b:
+        """
+        assert from_shape.shape[0] == to_shape.shape[0] and from_shape.shape[0] % 2 == 0
+        sigma_from = 0.0
+        sigma_to = 0.0
+        cov = np.matrix([[0.0, 0.0], [0.0, 0.0]])
+        # compute the mean and cov
+        from_shape_points = from_shape.reshape(from_shape.shape[0]/2, 2)
+        to_shape_points = to_shape.reshape(to_shape.shape[0]/2, 2)
+        mean_from = from_shape_points.mean(axis=0)
+        mean_to = to_shape_points.mean(axis=0)
+        for i in range(from_shape_points.shape[0]):
+            temp_dis = np.linalg.norm(from_shape_points[i] - mean_from)
+            sigma_from += temp_dis * temp_dis
+            temp_dis = np.linalg.norm(to_shape_points[i] - mean_to)
+            sigma_to += temp_dis * temp_dis
+            cov += (to_shape_points[i].transpose() - mean_to.transpose()) * (from_shape_points[i] - mean_from)
+        sigma_from = sigma_from / to_shape_points.shape[0]
+        sigma_to = sigma_to / to_shape_points.shape[0]
+        cov = cov / to_shape_points.shape[0]
+        # compute the affine matrix
+        s = np.matrix([[1.0, 0.0], [0.0, 1.0]])
+        u, d, vt = np.linalg.svd(cov)
+        if np.linalg.det(cov) < 0:
+            if d[1] < d[0]:
+                s[1, 1] = -1
+            else:
+                s[0, 0] = -1
+        r = u * s * vt
+        c = 1.0
+        if sigma_from != 0:
+            c = 1.0 / sigma_from * np.trace(np.diag(d) * s)
+        tran_b = mean_to.transpose() - c * r * mean_from.transpose()
+        tran_m = c * r
+        return tran_m, tran_b
+    def extract_image_chips(self, img, points, desired_size=256, padding=0):
+        """
+            crop and align face
+        Parameters:
+        ----------
+            img: numpy array, bgr order of shape (1, 3, n, m)
+                input image
+            points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5)
+            desired_size: default 256
+            padding: default 0
+        Retures:
+        -------
+            crop_imgs: list, n
+                cropped and aligned faces
+        """
+        crop_imgs = []
+        for p in points:
+            shape  =[]
+            for k in range(len(p)/2):
+                shape.append(p[k])
+                shape.append(p[k+5])
+            if padding > 0:
+                padding = padding
+            else:
+                padding = 0
+            # average positions of face points
+            mean_face_shape_x = [0.224152, 0.75610125, 0.490127, 0.254149, 0.726104]
+            mean_face_shape_y = [0.2119465, 0.2119465, 0.628106, 0.780233, 0.780233]
+            from_points = []
+            to_points = []
+            for i in range(len(shape)/2):
+                x = (padding + mean_face_shape_x[i]) / (2 * padding + 1) * desired_size
+                y = (padding + mean_face_shape_y[i]) / (2 * padding + 1) * desired_size
+                to_points.append([x, y])
+                from_points.append([shape[2*i], shape[2*i+1]])
+            # convert the points to Mat
+            from_mat = self.list2colmatrix(from_points)
+            to_mat = self.list2colmatrix(to_points)
+            # compute the similar transfrom
+            tran_m, tran_b = self.find_tfrom_between_shapes(from_mat, to_mat)
+            probe_vec = np.matrix([1.0, 0.0]).transpose()
+            probe_vec = tran_m * probe_vec
+            scale = np.linalg.norm(probe_vec)
+            angle = 180.0 / math.pi * math.atan2(probe_vec[1, 0], probe_vec[0, 0])
+            from_center = [(shape[0]+shape[2])/2.0, (shape[1]+shape[3])/2.0]
+            to_center = [0, 0]
+            to_center[1] = desired_size * 0.4
+            to_center[0] = desired_size * 0.5
+            ex = to_center[0] - from_center[0]
+            ey = to_center[1] - from_center[1]
+            rot_mat = cv2.getRotationMatrix2D((from_center[0], from_center[1]), -1*angle, scale)
+            rot_mat[0][2] += ex
+            rot_mat[1][2] += ey
+            chips = cv2.warpAffine(img, rot_mat, (desired_size, desired_size))
+            crop_imgs.append(chips)
+        return crop_imgs