Spaces:
Runtime error
Runtime error
| import os | |
| import os.path as osp | |
| import numpy as np | |
| import torch | |
| import cv2 | |
| import json | |
| import copy | |
| from pycocotools.coco import COCO | |
| from config.config import cfg | |
| from util.human_models import smpl_x | |
| from util.preprocessing import ( | |
| load_img, process_bbox, augmentation_instance_sample, process_human_model_output_batch_simplify,process_db_coord_batch_no_valid) | |
| from util.transforms import world2cam, cam2pixel, rigid_align | |
| from detrsmpl.utils.geometry import batch_rodrigues, project_points_new, weak_perspective_projection, perspective_projection | |
| import tqdm | |
| import time | |
| import random | |
| from detrsmpl.utils.demo_utils import box2cs, xywh2xyxy, xyxy2xywh | |
| import torch.distributed as dist | |
| KPS2D_KEYS = [ | |
| 'keypoints2d_ori', 'keypoints2d_smplx', 'keypoints2d_smpl', | |
| 'keypoints2d_original','keypoints2d_gta','keypoints2d' | |
| ] | |
| KPS3D_KEYS = [ | |
| 'keypoints3d_cam', 'keypoints3d', 'keypoints3d_smplx', 'keypoints3d_smpl', | |
| 'keypoints3d_original', 'keypoints3d_gta','keypoints3d' | |
| ] | |
| # keypoints3d_cam with root-align has higher priority, followed by old version key keypoints3d | |
| # when there is keypoints3d_smplx, use this rather than keypoints3d_original | |
| from util.formatting import DefaultFormatBundle | |
| from detrsmpl.data.datasets.pipelines.transforms import Normalize | |
| class Cache(): | |
| """A custom implementation for OSX pipeline.""" | |
| def __init__(self, load_path=None): | |
| if load_path is not None: | |
| self.load(load_path) | |
| def load(self, load_path): | |
| self.load_path = load_path | |
| self.cache = np.load(load_path, allow_pickle=True) | |
| self.data_len = self.cache['data_len'] | |
| self.data_strategy = self.cache['data_strategy'] | |
| assert self.data_len == len(self.cache) - 2 # data_len, data_strategy | |
| self.cache = None | |
| def save(cls, save_path, data_list, data_strategy): | |
| assert save_path is not None, 'save_path is None' | |
| data_len = len(data_list) | |
| cache = {} | |
| for i, data in enumerate(data_list): | |
| cache[str(i)] = data | |
| assert len(cache) == data_len | |
| # update meta | |
| cache.update({'data_len': data_len, 'data_strategy': data_strategy}) | |
| # import pdb; pdb.set_trace() | |
| np.savez_compressed(save_path, **cache) | |
| print(f'Cache saved to {save_path}.') | |
| # def shuffle(self): | |
| # random.shuffle(self.mapping) | |
| def __len__(self): | |
| return self.data_len | |
| def __getitem__(self, idx): | |
| if self.cache is None: | |
| self.cache = np.load(self.load_path, allow_pickle=True) | |
| # mapped_idx = self.mapping[idx] | |
| # cache_data = self.cache[str(mapped_idx)] | |
| # print(self.cache.files) | |
| cache_data = self.cache[str(idx)] | |
| data = cache_data.item() | |
| return data | |
| class HumanDataset(torch.utils.data.Dataset): | |
| # same mapping for 144->137 and 190->137 | |
| SMPLX_137_MAPPING = [ | |
| 0, 1, 2, 4, 5, 7, 8, 12, 16, 17, 18, 19, 20, 21, 60, 61, 62, 63, 64, | |
| 65, 59, 58, 57, 56, 55, 37, 38, 39, 66, 25, 26, 27, 67, 28, 29, 30, 68, | |
| 34, 35, 36, 69, 31, 32, 33, 70, 52, 53, 54, 71, 40, 41, 42, 72, 43, 44, | |
| 45, 73, 49, 50, 51, 74, 46, 47, 48, 75, 22, 15, 56, 57, 76, 77, 78, 79, | |
| 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, | |
| 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, | |
| 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, | |
| 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, | |
| 140, 141, 142, 143 | |
| ] | |
| def __init__(self, transform, data_split): | |
| self.transform = transform | |
| self.data_split = data_split | |
| # dataset information, to be filled by child class | |
| self.img_dir = None | |
| self.annot_path = None | |
| self.annot_path_cache = None | |
| self.use_cache = False | |
| self.img_shape = None # (h, w) | |
| self.cam_param = None # {'focal_length': (fx, fy), 'princpt': (cx, cy)} | |
| self.use_betas_neutral = False | |
| self.body_only = False | |
| self.joint_set = { | |
| 'joint_num': smpl_x.joint_num, | |
| 'joints_name': smpl_x.joints_name, | |
| 'flip_pairs': smpl_x.flip_pairs | |
| } | |
| self.joint_set['root_joint_idx'] = self.joint_set['joints_name'].index( | |
| 'Pelvis') | |
| self.format = DefaultFormatBundle() | |
| self.normalize = Normalize(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375]) | |
| self.keypoints2d = None | |
| # self.rank = dist.get_rank() | |
| self.lhand_mean = smpl_x.layer['neutral'].left_hand_mean.reshape(15, 3).cpu().numpy() | |
| self.rhand_mean = smpl_x.layer['neutral'].right_hand_mean.reshape(15, 3).cpu().numpy() | |
| # self.log_file_path = f'indices_node{rank}.txt' | |
| def load_cache(self, annot_path_cache): | |
| datalist = Cache(annot_path_cache) | |
| # assert datalist.data_strategy == getattr(cfg, 'data_strategy', None), \ | |
| # f'Cache data strategy {datalist.data_strategy} does not match current data strategy ' \ | |
| # f'{getattr(cfg, "data_strategy", None)}' | |
| return datalist | |
| def save_cache(self, annot_path_cache, datalist): | |
| print( | |
| f'[{self.__class__.__name__}] Caching datalist to {self.annot_path_cache}...' | |
| ) | |
| Cache.save(annot_path_cache, | |
| datalist, | |
| data_strategy=getattr(cfg, 'data_strategy', None)) | |
| def load_data(self, train_sample_interval=1, | |
| hand_bbox_ratio=1, body_bbox_ratio=1): | |
| content = np.load(self.annot_path, allow_pickle=True) | |
| try: | |
| frame_range = content['frame_range'] | |
| except KeyError: | |
| self.num_data = len(content['image_path']) | |
| frame_range = \ | |
| np.array([[i, i + 1] for i in range(self.num_data)]) | |
| num_examples = len(frame_range) | |
| if 'meta' in content: | |
| meta = content['meta'].item() | |
| print('meta keys:', meta.keys()) | |
| else: | |
| meta = None | |
| print( | |
| 'No meta info provided! Please give height and width manually') | |
| print( | |
| f'Start loading humandata {self.annot_path} into memory...\nDataset includes: {content.files}' | |
| ) | |
| tic = time.time() | |
| image_path = content['image_path'] | |
| if meta is not None and 'height' in meta and len(meta['height'])>0: | |
| height = np.array(meta['height']) | |
| width = np.array(meta['width']) | |
| image_shape = np.stack([height, width], axis=-1) | |
| else: | |
| image_shape = None | |
| if meta is not None and 'gender' in meta and len(meta['gender']) != 0: | |
| gender = np.array(meta['gender']) | |
| else: | |
| gender = None | |
| bbox_xywh = content['bbox_xywh'] | |
| if 'smplx' in content: | |
| smplx = content['smplx'].item() | |
| as_smplx = 'smplx' | |
| elif 'smpl' in content: | |
| smplx = content['smpl'].item() | |
| as_smplx = 'smpl' | |
| elif 'smplh' in content: | |
| smplx = content['smplh'].item() | |
| as_smplx = 'smplh' | |
| # TODO: temp solution, should be more general. But SHAPY is very special | |
| elif self.__class__.__name__ == 'SHAPY': | |
| smplx = {} | |
| else: | |
| raise KeyError('No SMPL for SMPLX available, please check keys:\n' | |
| f'{content.files}') | |
| print('Smplx param', smplx.keys()) | |
| if 'lhand_bbox_xywh' in content and 'rhand_bbox_xywh' in content: | |
| lhand_bbox_xywh = content['lhand_bbox_xywh'] | |
| rhand_bbox_xywh = content['rhand_bbox_xywh'] | |
| else: | |
| lhand_bbox_xywh = np.zeros_like(bbox_xywh) | |
| rhand_bbox_xywh = np.zeros_like(bbox_xywh) | |
| if 'face_bbox_xywh' in content: | |
| face_bbox_xywh = content['face_bbox_xywh'] | |
| else: | |
| face_bbox_xywh = np.zeros_like(bbox_xywh) | |
| if meta is not None and 'smplx_valid' in meta: | |
| smplx_valid = meta['smplx_valid'] | |
| else: | |
| smplx_valid = np.ones(len(bbox_xywh)) | |
| decompressed = False | |
| if content['__keypoints_compressed__']: | |
| decompressed_kps = self.decompress_keypoints(content) | |
| decompressed = True | |
| keypoints3d = None | |
| valid_kps3d = False | |
| keypoints3d_mask = None | |
| valid_kps3d_mask = False | |
| # processing keypoints | |
| for kps3d_key in KPS3D_KEYS: | |
| if kps3d_key in content: | |
| keypoints3d = decompressed_kps[kps3d_key][:, self.SMPLX_137_MAPPING, :] if decompressed \ | |
| else content[kps3d_key][:, self.SMPLX_137_MAPPING, :] | |
| valid_kps3d = True | |
| if keypoints3d.shape[-1] == 4: | |
| valid_kps3d_mask = True | |
| break | |
| if self.keypoints2d is not None: | |
| keypoints2d = decompressed_kps[self.keypoints2d][:, self.SMPLX_137_MAPPING, :] if decompressed \ | |
| else content[self.keypoints2d][:, self.SMPLX_137_MAPPING, :] | |
| else: | |
| for kps2d_key in KPS2D_KEYS: | |
| if kps2d_key in content: | |
| keypoints2d = decompressed_kps[kps2d_key][:, self.SMPLX_137_MAPPING, :] if decompressed \ | |
| else content[kps2d_key][:, self.SMPLX_137_MAPPING, :] | |
| break | |
| if keypoints2d.shape[-1] == 3: | |
| valid_kps3d_mask = True | |
| print('Done. Time: {:.2f}s'.format(time.time() - tic)) | |
| datalist = [] | |
| # num_examples | |
| # processing each image, filter according to bbox valid | |
| for i in tqdm.tqdm(range(int(num_examples))): | |
| if self.data_split == 'train' and i % train_sample_interval != 0: | |
| continue | |
| frame_start, frame_end = frame_range[i] | |
| img_path = osp.join(self.img_dir, image_path[frame_start]) | |
| # im_shape = cv2.imread(img_path).shape[:2] | |
| img_shape = image_shape[ | |
| frame_start] if image_shape is not None else self.img_shape | |
| bbox_list = bbox_xywh[frame_start:frame_end, :4] | |
| valid_idx = [] | |
| body_bbox_list = [] | |
| # if hasattr(cfg, 'bbox_ratio'): | |
| # bbox_ratio = cfg.bbox_ratio * 0.833 # preprocess body bbox is giving 1.2 box padding | |
| # else: | |
| # bbox_ratio = 1.25 | |
| # if self.__class__.__name__ == 'SPEC': | |
| # bbox_ratio = 1.25 | |
| for bbox_i, bbox in enumerate(bbox_list): | |
| bbox = process_bbox(bbox, | |
| img_width=img_shape[1], | |
| img_height=img_shape[0], | |
| ratio=body_bbox_ratio) | |
| if bbox is None: | |
| continue | |
| else: | |
| valid_idx.append(frame_start + bbox_i) | |
| bbox[2:] += bbox[:2] | |
| body_bbox_list.append(bbox) | |
| if len(valid_idx) == 0: | |
| continue | |
| valid_num = len(valid_idx) | |
| # hand/face bbox | |
| lhand_bbox_list = [] | |
| rhand_bbox_list = [] | |
| face_bbox_list = [] | |
| smplx_valid_list = [] | |
| for bbox_i in valid_idx: | |
| smplx_valid_list.append(smplx_valid[bbox_i]) | |
| lhand_bbox = lhand_bbox_xywh[bbox_i] | |
| rhand_bbox = rhand_bbox_xywh[bbox_i] | |
| face_bbox = face_bbox_xywh[bbox_i] | |
| if lhand_bbox[-1] > 0: # conf > 0 | |
| lhand_bbox = lhand_bbox[:4] | |
| # if hasattr(cfg, 'bbox_ratio'): | |
| lhand_bbox = process_bbox(lhand_bbox, | |
| img_width=img_shape[1], | |
| img_height=img_shape[0], | |
| ratio=hand_bbox_ratio) | |
| if lhand_bbox is not None: | |
| lhand_bbox[2:] += lhand_bbox[:2] # xywh -> xyxy | |
| else: | |
| lhand_bbox = None | |
| if rhand_bbox[-1] > 0: | |
| rhand_bbox = rhand_bbox[:4] | |
| # if hasattr(cfg, 'bbox_ratio'): | |
| rhand_bbox = process_bbox(rhand_bbox, | |
| img_width=img_shape[1], | |
| img_height=img_shape[0], | |
| ratio=hand_bbox_ratio) | |
| if rhand_bbox is not None: | |
| rhand_bbox[2:] += rhand_bbox[:2] # xywh -> xyxy | |
| else: | |
| rhand_bbox = None | |
| if face_bbox[-1] > 0: | |
| face_bbox = face_bbox[:4] | |
| # if hasattr(cfg, 'bbox_ratio'): | |
| face_bbox = process_bbox(face_bbox, | |
| img_width=img_shape[1], | |
| img_height=img_shape[0], | |
| ratio=hand_bbox_ratio) | |
| if face_bbox is not None: | |
| face_bbox[2:] += face_bbox[:2] # xywh -> xyxy | |
| else: | |
| face_bbox = None | |
| lhand_bbox_list.append(lhand_bbox) | |
| rhand_bbox_list.append(rhand_bbox) | |
| face_bbox_list.append(face_bbox) | |
| joint_img = keypoints2d[valid_idx] | |
| if valid_kps3d: | |
| joint_cam = keypoints3d[valid_idx] | |
| else: | |
| joint_cam = None | |
| if 'leye_pose_0' in smplx.keys(): | |
| smplx.pop('leye_pose_0') | |
| if 'leye_pose_1' in smplx.keys(): | |
| smplx.pop('leye_pose_1') | |
| if 'leye_pose' in smplx.keys(): | |
| smplx.pop('leye_pose') | |
| if 'reye_pose_0' in smplx.keys(): | |
| smplx.pop('reye_pose_0') | |
| if 'reye_pose_1' in smplx.keys(): | |
| smplx.pop('reye_pose_1') | |
| if 'reye_pose' in smplx.keys(): | |
| smplx.pop('reye_pose') | |
| smplx_param = {k: v[valid_idx] for k, v in smplx.items()} | |
| gender_ = gender[valid_idx] \ | |
| if gender is not None else np.array(['neutral']*(valid_num)) | |
| lhand_bbox_valid = lhand_bbox_xywh[valid_idx,4] | |
| rhand_bbox_valid = rhand_bbox_xywh[valid_idx,4] | |
| face_bbox_valid = face_bbox_xywh[valid_idx,4] | |
| # TODO: set invalid if None? | |
| smplx_param['root_pose'] = smplx_param.pop('global_orient', None) | |
| smplx_param['shape'] = smplx_param.pop('betas', None) | |
| smplx_param['trans'] = smplx_param.pop('transl', np.zeros([len(valid_idx),3])) | |
| smplx_param['lhand_pose'] = smplx_param.pop('left_hand_pose', None) | |
| smplx_param['rhand_pose'] = smplx_param.pop( | |
| 'right_hand_pose', None) | |
| smplx_param['expr'] = smplx_param.pop('expression', None) | |
| # TODO do not fix betas, give up shape supervision | |
| if 'betas_neutral' in smplx_param and self.data_split == 'train': | |
| smplx_param['shape'] = smplx_param.pop('betas_neutral') | |
| # smplx_param['shape'] = np.zeros(10, dtype=np.float32) | |
| # # TODO fix shape of poses | |
| if self.__class__.__name__ == 'Talkshow': | |
| smplx_param['body_pose'] = smplx_param['body_pose'].reshape( | |
| -1, 21, 3) | |
| smplx_param['lhand_pose'] = smplx_param['lhand_pose'].reshape( | |
| -1, 15, 3) | |
| smplx_param['rhand_pose'] = smplx_param['lhand_pose'].reshape( | |
| -1, 15, 3) | |
| smplx_param['expr'] = smplx_param['expr'][:, :10] | |
| if self.__class__.__name__ == 'BEDLAM': | |
| smplx_param['shape'] = smplx_param['shape'][:, :10] | |
| # smplx_param['expr'] = None | |
| if self.__class__.__name__ == 'GTA': | |
| smplx_param['shape'] = np.zeros( | |
| [valid_num, 10], | |
| dtype=np.float32) | |
| if self.__class__.__name__ == 'COCO_NA': | |
| # smplx_param['expr'] = None | |
| smplx_param['body_pose'] = smplx_param['body_pose'].reshape( | |
| -1, 21, 3) | |
| smplx_param['lhand_pose'] = smplx_param['lhand_pose'].reshape( | |
| -1, 15, 3) | |
| smplx_param['rhand_pose'] = smplx_param['rhand_pose'].reshape( | |
| -1, 15, 3) | |
| if as_smplx == 'smpl': | |
| smplx_param['shape'] = np.zeros( | |
| [valid_num, 10], | |
| dtype=np.float32) # drop smpl betas for smplx | |
| smplx_param['body_pose'] = smplx_param[ | |
| 'body_pose'].reshape(-1,23,3)[:, :21, :] # use smpl body_pose on smplx | |
| if as_smplx == 'smplh': | |
| smplx_param['shape'] = np.zeros( | |
| [valid_num, 10], | |
| dtype=np.float32) # drop smpl betas for smplx | |
| if smplx_param['lhand_pose'] is None or self.body_only == True: | |
| smplx_param['lhand_valid'] = np.zeros(valid_num, dtype=np.bool8) | |
| else: | |
| smplx_param['lhand_valid'] = lhand_bbox_valid.astype(np.bool8) | |
| if smplx_param['rhand_pose'] is None or self.body_only == True: | |
| smplx_param['rhand_valid'] = np.zeros(valid_num, dtype=np.bool8) | |
| else: | |
| smplx_param['rhand_valid'] = rhand_bbox_valid.astype(np.bool8) | |
| if smplx_param['expr'] is None or self.body_only == True: | |
| smplx_param['face_valid'] = np.zeros(valid_num, dtype=np.bool8) | |
| else: | |
| smplx_param['face_valid'] = face_bbox_valid.astype(np.bool8) | |
| smplx_param['smplx_valid'] = np.array(smplx_valid_list).astype(np.bool8) | |
| if joint_cam is not None and np.any(np.isnan(joint_cam)): | |
| continue | |
| if self.__class__.__name__ == 'SPEC': | |
| joint_img[:,:,2] = joint_img[:,:,2]>0 | |
| joint_cam[:,:,3] = joint_cam[:,:,0]!=0 | |
| datalist.append({ | |
| 'img_path': img_path, | |
| 'img_shape': img_shape, | |
| 'bbox': body_bbox_list, | |
| 'lhand_bbox': lhand_bbox_list, | |
| 'rhand_bbox': rhand_bbox_list, | |
| 'face_bbox': face_bbox_list, | |
| 'joint_img': joint_img, | |
| 'joint_cam': joint_cam, | |
| 'smplx_param': smplx_param, | |
| 'as_smplx': as_smplx, | |
| 'gender': gender_ | |
| }) | |
| # save memory | |
| del content, image_path, bbox_xywh, lhand_bbox_xywh, rhand_bbox_xywh, face_bbox_xywh, keypoints3d, keypoints2d | |
| if self.data_split == 'train': | |
| print(f'[{self.__class__.__name__} train] original size:', | |
| int(num_examples), '. Sample interval:', | |
| train_sample_interval, '. Sampled size:', len(datalist)) | |
| if getattr(cfg, 'data_strategy', | |
| None) == 'balance' and self.data_split == 'train': | |
| print( | |
| f'[{self.__class__.__name__}] Using [balance] strategy with datalist shuffled...' | |
| ) | |
| random.shuffle(datalist) | |
| return datalist | |
| def __len__(self): | |
| return len(self.datalist) | |
| # 19493 | |
| def __getitem__(self, idx): | |
| # rank = self.rank | |
| # local_rank = rank % torch.cuda.device_count() | |
| # with open(f'index_log_{rank}.txt', 'a') as f: | |
| # f.write(f'{rank}-{local_rank}-{idx}\n') | |
| try: | |
| data = copy.deepcopy(self.datalist[idx]) | |
| except Exception as e: | |
| print(f'[{self.__class__.__name__}] Error loading data {idx}') | |
| print(e) | |
| exit(0) | |
| # data/datasets/coco_2017/train2017/000000029582.jpg' 45680 | |
| img_path, img_shape, bbox = \ | |
| data['img_path'], data['img_shape'], data['bbox'] | |
| as_smplx = data['as_smplx'] | |
| gender = data['gender'].copy() | |
| for gender_str, gender_num in { | |
| 'neutral': -1, 'male': 0, 'female': 1}.items(): | |
| gender[gender==gender_str]=gender_num | |
| gender = gender.astype(int) | |
| img_whole_bbox = np.array([0, 0, img_shape[1], img_shape[0]]) | |
| img = load_img(img_path, order='BGR') | |
| num_person = len(data['bbox']) | |
| data_name = self.__class__.__name__ | |
| try: | |
| # dist.barrier() | |
| img, img2bb_trans, bb2img_trans, rot, do_flip = \ | |
| augmentation_instance_sample(img, img_whole_bbox, self.data_split, data, data_name) | |
| except Exception as e: | |
| rank = self.rank | |
| local_rank = rank % torch.cuda.device_count() | |
| with open(f'index_log_{rank}.txt', 'a') as f: | |
| f.write(f'{rank}-{local_rank}-{idx}\n') | |
| f.write(f'[{self.__class__.__name__}] Error loading data {idx}\n') | |
| f.write(f'Error in augmentation_instance_sample for {img_path}\n') | |
| # print(f'[{self.__class__.__name__}] Error loading data {idx}') | |
| # print(f'Error in augmentation_instance_sample for {img_path}') | |
| raise e | |
| cropped_img_shape = img.shape[:2] | |
| if self.data_split == 'train': | |
| joint_cam = data['joint_cam'] # num, 137,4 | |
| if joint_cam is not None: | |
| dummy_cord = False | |
| joint_cam[:,:,:3] = \ | |
| joint_cam[:,:,:3] - joint_cam[:, self.joint_set['root_joint_idx'], None, :3] # root-relative | |
| else: | |
| # dummy cord as joint_cam | |
| dummy_cord = True | |
| joint_cam = np.zeros( | |
| (num_person, self.joint_set['joint_num'], 4), | |
| dtype=np.float32) | |
| joint_img = data['joint_img'] | |
| # do rotation on keypoints | |
| joint_img_aug, joint_cam_wo_ra, joint_cam_ra, joint_trunc = \ | |
| process_db_coord_batch_no_valid( | |
| joint_img, joint_cam, do_flip, img_shape, | |
| self.joint_set['flip_pairs'], img2bb_trans, rot, | |
| self.joint_set['joints_name'], smpl_x.joints_name, | |
| cropped_img_shape) | |
| joint_img_aug[:,:,2:] = joint_img_aug[:,:,2:] * joint_trunc | |
| # smplx coordinates and parameters | |
| smplx_param = data['smplx_param'] | |
| if self.__class__.__name__ in [ 'CHI3D', 'SynBody', 'UBody_MM']: | |
| smplx_param['lhand_pose']-=self.lhand_mean[None] | |
| smplx_param['rhand_pose']-=self.rhand_mean[None] | |
| # smplx_param | |
| smplx_pose, smplx_shape, smplx_expr, smplx_pose_valid, \ | |
| smplx_joint_valid, smplx_expr_valid, smplx_shape_valid = \ | |
| process_human_model_output_batch_simplify( | |
| smplx_param, do_flip, rot, as_smplx, data_name) | |
| smplx_joint_valid = smplx_joint_valid[:, :, None] | |
| # if cam not provided, we take joint_img as smplx joint 2d, | |
| # which is commonly the case for our processed humandata | |
| # change smplx_shape if use_betas_neutral | |
| # processing follows that in process_human_model_output | |
| if self.use_betas_neutral: | |
| smplx_shape = smplx_param['betas_neutral'].reshape( | |
| num_person, -1) | |
| smplx_shape[(np.abs(smplx_shape) > 3).any(axis=1)] = 0. | |
| smplx_shape = smplx_shape.reshape(num_person, -1) | |
| if self.__class__.__name__ == 'MPII_MM' : | |
| for name in ('L_Ankle', 'R_Ankle', 'L_Wrist', 'R_Wrist'): | |
| smplx_pose_valid[:, smpl_x.orig_joints_name.index(name)] = 0 | |
| for name in ('L_Big_toe', 'L_Small_toe', 'L_Heel', 'R_Big_toe', 'R_Small_toe', 'R_Heel'): | |
| smplx_joint_valid[:,smpl_x.joints_name.index(name)] = 0 | |
| lhand_bbox_center_list = [] | |
| lhand_bbox_valid_list = [] | |
| lhand_bbox_size_list = [] | |
| lhand_bbox_list = [] | |
| face_bbox_center_list = [] | |
| face_bbox_size_list = [] | |
| face_bbox_valid_list = [] | |
| face_bbox_list = [] | |
| rhand_bbox_center_list = [] | |
| rhand_bbox_valid_list = [] | |
| rhand_bbox_size_list = [] | |
| rhand_bbox_list = [] | |
| body_bbox_center_list = [] | |
| body_bbox_size_list = [] | |
| body_bbox_valid_list = [] | |
| body_bbox_list = [] | |
| # hand and face bbox transform | |
| for i in range(num_person): | |
| body_bbox, body_bbox_valid = self.process_hand_face_bbox( | |
| data['bbox'][i], do_flip, img_shape, img2bb_trans, | |
| cropped_img_shape) | |
| lhand_bbox, lhand_bbox_valid = self.process_hand_face_bbox( | |
| data['lhand_bbox'][i], do_flip, img_shape, img2bb_trans, | |
| cropped_img_shape) | |
| lhand_bbox_valid *= smplx_param['lhand_valid'][i] | |
| rhand_bbox, rhand_bbox_valid = self.process_hand_face_bbox( | |
| data['rhand_bbox'][i], do_flip, img_shape, img2bb_trans, | |
| cropped_img_shape) | |
| rhand_bbox_valid *= smplx_param['rhand_valid'][i] | |
| face_bbox, face_bbox_valid = self.process_hand_face_bbox( | |
| data['face_bbox'][i], do_flip, img_shape, img2bb_trans, | |
| cropped_img_shape) | |
| face_bbox_valid *= smplx_param['face_valid'][i] | |
| # BEDLAM and COCO_NA do not have face expression | |
| # if self.__class__.__name__ != 'BEDLAM': | |
| # face_bbox_valid *= smplx_param['face_valid'][i] | |
| if do_flip: | |
| lhand_bbox, rhand_bbox = rhand_bbox, lhand_bbox | |
| lhand_bbox_valid, rhand_bbox_valid = rhand_bbox_valid, lhand_bbox_valid | |
| body_bbox_list.append(body_bbox) | |
| lhand_bbox_list.append(lhand_bbox) | |
| rhand_bbox_list.append(rhand_bbox) | |
| face_bbox_list.append(face_bbox) | |
| lhand_bbox_center = (lhand_bbox[0] + lhand_bbox[1]) / 2. | |
| rhand_bbox_center = (rhand_bbox[0] + rhand_bbox[1]) / 2. | |
| face_bbox_center = (face_bbox[0] + face_bbox[1]) / 2. | |
| body_bbox_center = (body_bbox[0] + body_bbox[1]) / 2. | |
| lhand_bbox_size = lhand_bbox[1] - lhand_bbox[0] | |
| rhand_bbox_size = rhand_bbox[1] - rhand_bbox[0] | |
| face_bbox_size = face_bbox[1] - face_bbox[0] | |
| body_bbox_size = body_bbox[1] - body_bbox[0] | |
| lhand_bbox_center_list.append(lhand_bbox_center) | |
| lhand_bbox_valid_list.append(lhand_bbox_valid) | |
| lhand_bbox_size_list.append(lhand_bbox_size) | |
| face_bbox_center_list.append(face_bbox_center) | |
| face_bbox_size_list.append(face_bbox_size) | |
| face_bbox_valid_list.append(face_bbox_valid) | |
| rhand_bbox_center_list.append(rhand_bbox_center) | |
| rhand_bbox_valid_list.append(rhand_bbox_valid) | |
| rhand_bbox_size_list.append(rhand_bbox_size) | |
| body_bbox_center_list.append(body_bbox_center) | |
| body_bbox_size_list.append(body_bbox_size) | |
| body_bbox_valid_list.append(body_bbox_valid) | |
| body_bbox = np.stack(body_bbox_list, axis=0) | |
| lhand_bbox = np.stack(lhand_bbox_list, axis=0) | |
| rhand_bbox = np.stack(rhand_bbox_list, axis=0) | |
| face_bbox = np.stack(face_bbox_list, axis=0) | |
| lhand_bbox_center = np.stack(lhand_bbox_center_list, axis=0) | |
| lhand_bbox_valid = np.stack(lhand_bbox_valid_list, axis=0) | |
| lhand_bbox_size = np.stack(lhand_bbox_size_list, axis=0) | |
| face_bbox_center = np.stack(face_bbox_center_list, axis=0) | |
| face_bbox_size = np.stack(face_bbox_size_list, axis=0) | |
| face_bbox_valid = np.stack(face_bbox_valid_list, axis=0) | |
| body_bbox_center = np.stack(body_bbox_center_list, axis=0) | |
| body_bbox_size = np.stack(body_bbox_size_list, axis=0) | |
| body_bbox_valid = np.stack(body_bbox_valid_list, axis=0) | |
| rhand_bbox_center = np.stack(rhand_bbox_center_list, axis=0) | |
| rhand_bbox_valid = np.stack(rhand_bbox_valid_list, axis=0) | |
| rhand_bbox_size = np.stack(rhand_bbox_size_list, axis=0) | |
| inputs = {'img': img} | |
| # joint_img_aug[:,:,2] = joint_img_aug[:,:,2] * body_bbox_valid[:,None] | |
| is_3D = float(False) if dummy_cord else float(True) | |
| if self.__class__.__name__ == 'COCO_NA': | |
| is_3D = False | |
| if self.__class__.__name__ == 'GTA_Human2': | |
| smplx_shape_valid = smplx_shape_valid * 0 | |
| if self.__class__.__name__ == 'PoseTrack' or self.__class__.__name__ == 'MPII_MM' \ | |
| or self.__class__.__name__ == 'CrowdPose' or self.__class__.__name__ == 'UBody_MM' \ | |
| or self.__class__.__name__ == 'COCO_NA': | |
| joint_cam_ra[...,-1] = joint_cam_ra[...,-1] * smplx_joint_valid[...,0] | |
| joint_cam_wo_ra[...,-1] = joint_cam_wo_ra[...,-1] * smplx_joint_valid[...,0] | |
| joint_img_aug[...,-1] = joint_img_aug[...,-1] * smplx_joint_valid[...,0] | |
| # if body_bbox_valid.sum() > 0: | |
| targets = { | |
| # keypoints2d, [0,img_w],[0,img_h] -> [0,1] -> [0,output_hm_shape] | |
| 'joint_img': joint_img_aug[body_bbox_valid>0], | |
| # joint_cam, kp3d wo ra # raw kps3d probably without ra | |
| 'joint_cam': joint_cam_wo_ra[body_bbox_valid>0], | |
| # kps3d with body, face, hand ra | |
| 'smplx_joint_cam': joint_cam_ra[body_bbox_valid>0], | |
| 'smplx_pose': smplx_pose[body_bbox_valid>0], | |
| 'smplx_shape': smplx_shape[body_bbox_valid>0], | |
| 'smplx_expr': smplx_expr[body_bbox_valid>0], | |
| 'lhand_bbox_center': lhand_bbox_center[body_bbox_valid>0], | |
| 'lhand_bbox_size': lhand_bbox_size[body_bbox_valid>0], | |
| 'rhand_bbox_center': rhand_bbox_center[body_bbox_valid>0], | |
| 'rhand_bbox_size': rhand_bbox_size[body_bbox_valid>0], | |
| 'face_bbox_center': face_bbox_center[body_bbox_valid>0], | |
| 'face_bbox_size': face_bbox_size[body_bbox_valid>0], | |
| 'body_bbox_center': body_bbox_center[body_bbox_valid>0], | |
| 'body_bbox_size': body_bbox_size[body_bbox_valid>0], | |
| 'body_bbox': body_bbox.reshape(-1,4)[body_bbox_valid>0], | |
| 'lhand_bbox': lhand_bbox.reshape(-1,4)[body_bbox_valid>0], | |
| 'rhand_bbox': rhand_bbox.reshape(-1,4)[body_bbox_valid>0], | |
| 'face_bbox': face_bbox.reshape(-1,4)[body_bbox_valid>0], | |
| 'gender': gender[body_bbox_valid>0]} | |
| meta_info = { | |
| 'joint_trunc': joint_trunc[body_bbox_valid>0], | |
| 'smplx_pose_valid': smplx_pose_valid[body_bbox_valid>0], | |
| 'smplx_shape_valid': smplx_shape_valid[body_bbox_valid>0], | |
| 'smplx_expr_valid': smplx_expr_valid[body_bbox_valid>0], | |
| 'is_3D': is_3D, | |
| 'lhand_bbox_valid': lhand_bbox_valid[body_bbox_valid>0], | |
| 'rhand_bbox_valid': rhand_bbox_valid[body_bbox_valid>0], | |
| 'face_bbox_valid': face_bbox_valid[body_bbox_valid>0], | |
| 'body_bbox_valid': body_bbox_valid[body_bbox_valid>0], | |
| 'img_shape': np.array(img.shape[:2]), | |
| 'ori_shape':data['img_shape'], | |
| 'idx': idx | |
| } | |
| result = {**inputs, **targets, **meta_info} | |
| result = self.normalize(result) | |
| result = self.format(result) | |
| return result | |
| if self.data_split == 'test': | |
| self.cam_param = {} | |
| joint_cam = data['joint_cam'] | |
| if joint_cam is not None: | |
| dummy_cord = False | |
| joint_cam[:,:,:3] = joint_cam[:,:,:3] - joint_cam[ | |
| :, self.joint_set['root_joint_idx'], None, :3] # root-relative | |
| else: | |
| # dummy cord as joint_cam | |
| dummy_cord = True | |
| joint_cam = np.zeros( | |
| (num_person, self.joint_set['joint_num'], 3), | |
| dtype=np.float32) | |
| joint_img = data['joint_img'] | |
| joint_img_aug, joint_cam_wo_ra, joint_cam_ra, joint_trunc = \ | |
| process_db_coord_batch_no_valid( | |
| joint_img, joint_cam, do_flip, img_shape, | |
| self.joint_set['flip_pairs'], img2bb_trans, rot, | |
| self.joint_set['joints_name'], smpl_x.joints_name, | |
| cropped_img_shape) | |
| # smplx coordinates and parameters | |
| smplx_param = data['smplx_param'] | |
| # smplx_cam_trans = np.array( | |
| # smplx_param['trans']) if 'trans' in smplx_param else None | |
| # TODO: remove this, seperate smpl and smplx | |
| smplx_pose, smplx_shape, smplx_expr, smplx_pose_valid, \ | |
| smplx_joint_valid, smplx_expr_valid, smplx_shape_valid = \ | |
| process_human_model_output_batch_simplify( | |
| smplx_param, do_flip, rot, as_smplx) | |
| # if cam not provided, we take joint_img as smplx joint 2d, | |
| # which is commonly the case for our processed humandata | |
| if self.use_betas_neutral: | |
| smplx_shape = smplx_param['betas_neutral'].reshape( | |
| num_person, -1) | |
| smplx_shape[(np.abs(smplx_shape) > 3).any(axis=1)] = 0. | |
| smplx_shape = smplx_shape.reshape(num_person, -1) | |
| # smplx_pose_valid = np.tile(smplx_pose_valid[:,:, None], (1, 3)).reshape(num_person,-1) | |
| smplx_joint_valid = smplx_joint_valid[:, :, None] | |
| # if not (smplx_shape == 0).all(): | |
| # smplx_shape_valid = True | |
| # else: | |
| # smplx_shape_valid = False | |
| lhand_bbox_center_list = [] | |
| lhand_bbox_valid_list = [] | |
| lhand_bbox_size_list = [] | |
| lhand_bbox_list = [] | |
| face_bbox_center_list = [] | |
| face_bbox_size_list = [] | |
| face_bbox_valid_list = [] | |
| face_bbox_list = [] | |
| rhand_bbox_center_list = [] | |
| rhand_bbox_valid_list = [] | |
| rhand_bbox_size_list = [] | |
| rhand_bbox_list = [] | |
| body_bbox_center_list = [] | |
| body_bbox_size_list = [] | |
| body_bbox_valid_list = [] | |
| body_bbox_list = [] | |
| for i in range(num_person): | |
| lhand_bbox, lhand_bbox_valid = self.process_hand_face_bbox( | |
| data['lhand_bbox'][i], do_flip, img_shape, img2bb_trans, | |
| cropped_img_shape) | |
| rhand_bbox, rhand_bbox_valid = self.process_hand_face_bbox( | |
| data['rhand_bbox'][i], do_flip, img_shape, img2bb_trans, | |
| cropped_img_shape) | |
| face_bbox, face_bbox_valid = self.process_hand_face_bbox( | |
| data['face_bbox'][i], do_flip, img_shape, img2bb_trans, | |
| cropped_img_shape) | |
| body_bbox, body_bbox_valid = self.process_hand_face_bbox( | |
| data['bbox'][i], do_flip, img_shape, img2bb_trans, | |
| cropped_img_shape) | |
| if do_flip: | |
| lhand_bbox, rhand_bbox = rhand_bbox, lhand_bbox | |
| lhand_bbox_valid, rhand_bbox_valid = rhand_bbox_valid, lhand_bbox_valid | |
| body_bbox_list.append(body_bbox) | |
| lhand_bbox_list.append(lhand_bbox) | |
| rhand_bbox_list.append(rhand_bbox) | |
| face_bbox_list.append(face_bbox) | |
| lhand_bbox_center = (lhand_bbox[0] + lhand_bbox[1]) / 2. | |
| rhand_bbox_center = (rhand_bbox[0] + rhand_bbox[1]) / 2. | |
| face_bbox_center = (face_bbox[0] + face_bbox[1]) / 2. | |
| body_bbox_center = (body_bbox[0] + body_bbox[1]) / 2. | |
| lhand_bbox_size = lhand_bbox[1] - lhand_bbox[0] | |
| rhand_bbox_size = rhand_bbox[1] - rhand_bbox[0] | |
| face_bbox_size = face_bbox[1] - face_bbox[0] | |
| body_bbox_size = body_bbox[1] - body_bbox[0] | |
| lhand_bbox_center_list.append(lhand_bbox_center) | |
| lhand_bbox_valid_list.append(lhand_bbox_valid) | |
| lhand_bbox_size_list.append(lhand_bbox_size) | |
| face_bbox_center_list.append(face_bbox_center) | |
| face_bbox_size_list.append(face_bbox_size) | |
| face_bbox_valid_list.append(face_bbox_valid) | |
| rhand_bbox_center_list.append(rhand_bbox_center) | |
| rhand_bbox_valid_list.append(rhand_bbox_valid) | |
| rhand_bbox_size_list.append(rhand_bbox_size) | |
| body_bbox_center_list.append(body_bbox_center) | |
| body_bbox_size_list.append(body_bbox_size) | |
| body_bbox_valid_list.append(body_bbox_valid) | |
| body_bbox = np.stack(body_bbox_list, axis=0) | |
| lhand_bbox = np.stack(lhand_bbox_list, axis=0) | |
| rhand_bbox = np.stack(rhand_bbox_list, axis=0) | |
| face_bbox = np.stack(face_bbox_list, axis=0) | |
| lhand_bbox_center = np.stack(lhand_bbox_center_list, axis=0) | |
| lhand_bbox_valid = np.stack(lhand_bbox_valid_list, axis=0) | |
| lhand_bbox_size = np.stack(lhand_bbox_size_list, axis=0) | |
| face_bbox_center = np.stack(face_bbox_center_list, axis=0) | |
| face_bbox_size = np.stack(face_bbox_size_list, axis=0) | |
| face_bbox_valid = np.stack(face_bbox_valid_list, axis=0) | |
| body_bbox_center = np.stack(body_bbox_center_list, axis=0) | |
| body_bbox_size = np.stack(body_bbox_size_list, axis=0) | |
| body_bbox_valid = np.stack(body_bbox_valid_list, axis=0) | |
| rhand_bbox_center = np.stack(rhand_bbox_center_list, axis=0) | |
| rhand_bbox_valid = np.stack(rhand_bbox_valid_list, axis=0) | |
| rhand_bbox_size = np.stack(rhand_bbox_size_list, axis=0) | |
| inputs = {'img': img} | |
| targets = { | |
| # keypoints2d, [0,img_w],[0,img_h] -> [0,1] -> [0,output_hm_shape] | |
| 'joint_img': joint_img_aug, | |
| # projected smplx if valid cam_param, else same as keypoints2d | |
| # joint_cam, kp3d wo ra # raw kps3d probably without ra | |
| 'joint_cam': joint_cam_wo_ra, | |
| 'ann_idx': idx, | |
| # kps3d with body, face, hand ra | |
| 'smplx_joint_cam': joint_cam_ra, | |
| 'smplx_pose': smplx_pose, | |
| 'smplx_shape': smplx_shape, | |
| 'smplx_expr': smplx_expr, | |
| 'lhand_bbox_center': lhand_bbox_center, | |
| 'lhand_bbox_size': lhand_bbox_size, | |
| 'rhand_bbox_center': rhand_bbox_center, | |
| 'rhand_bbox_size': rhand_bbox_size, | |
| 'face_bbox_center': face_bbox_center, | |
| 'face_bbox_size': face_bbox_size, | |
| 'body_bbox_center': body_bbox_center, | |
| 'body_bbox_size': body_bbox_size, | |
| 'body_bbox': body_bbox.reshape(-1,4), | |
| 'lhand_bbox': lhand_bbox.reshape(-1,4), | |
| 'rhand_bbox': rhand_bbox.reshape(-1,4), | |
| 'face_bbox': face_bbox.reshape(-1,4), | |
| 'gender': gender, | |
| 'bb2img_trans': bb2img_trans, | |
| } | |
| if self.body_only: | |
| meta_info = { | |
| 'joint_trunc': joint_trunc, | |
| 'smplx_pose_valid': smplx_pose_valid, | |
| 'smplx_shape_valid': float(smplx_shape_valid), | |
| 'smplx_expr_valid': smplx_expr_valid, | |
| 'is_3D': float(False) if dummy_cord else float(True), | |
| 'lhand_bbox_valid': lhand_bbox_valid, | |
| 'rhand_bbox_valid': rhand_bbox_valid, | |
| 'face_bbox_valid': face_bbox_valid, | |
| 'body_bbox_valid': body_bbox_valid, | |
| 'img_shape': np.array(img.shape[:2]), | |
| 'ori_shape':data['img_shape'], | |
| 'idx': idx | |
| } | |
| else: | |
| meta_info = { | |
| 'joint_trunc': joint_trunc, | |
| 'smplx_pose_valid': smplx_pose_valid, | |
| 'smplx_shape_valid': smplx_shape_valid, | |
| 'smplx_expr_valid': smplx_expr_valid, | |
| 'is_3D': float(False) if dummy_cord else float(True), | |
| 'lhand_bbox_valid': lhand_bbox_valid, | |
| 'rhand_bbox_valid': rhand_bbox_valid, | |
| 'face_bbox_valid': face_bbox_valid, | |
| 'body_bbox_valid': body_bbox_valid, | |
| 'img_shape': np.array(img.shape[:2]), | |
| 'ori_shape':data['img_shape'], | |
| 'idx': idx | |
| } | |
| result = {**inputs, **targets, **meta_info} | |
| result = self.normalize(result) | |
| result = self.format(result) | |
| return result | |
| def process_hand_face_bbox(self, bbox, do_flip, img_shape, img2bb_trans, | |
| input_img_shape): | |
| if bbox is None: | |
| bbox = np.array([0, 0, 1, 1], | |
| dtype=np.float32).reshape(2, 2) # dummy value | |
| bbox_valid = float(False) # dummy value | |
| else: | |
| # reshape to top-left (x,y) and bottom-right (x,y) | |
| bbox = bbox.reshape(2, 2) | |
| # flip augmentation | |
| if do_flip: | |
| bbox[:, 0] = img_shape[1] - bbox[:, 0] - 1 | |
| bbox[0, 0], bbox[1, 0] = bbox[1, 0].copy(), bbox[ | |
| 0, 0].copy() # xmin <-> xmax swap | |
| # make four points of the bbox | |
| bbox = bbox.reshape(4).tolist() | |
| xmin, ymin, xmax, ymax = bbox | |
| bbox = np.array( | |
| [[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]], | |
| dtype=np.float32).reshape(4, 2) | |
| # affine transformation (crop, rotation, scale) | |
| bbox_xy1 = np.concatenate((bbox, np.ones_like(bbox[:, :1])), 1) | |
| bbox = np.dot(img2bb_trans, | |
| bbox_xy1.transpose(1, 0)).transpose(1, 0)[:, :2] | |
| # print(bbox) | |
| # bbox[:, 0] = bbox[:, 0] / input_img_shape[1] * cfg.output_hm_shape[2] | |
| # bbox[:, 1] = bbox[:, 1] / input_img_shape[0] * cfg.output_hm_shape[1] | |
| bbox[:, 0] /= input_img_shape[1] | |
| bbox[:, 1] /= input_img_shape[0] | |
| # make box a rectangle without rotation | |
| if np.max(bbox[:,0])<=0 or np.min(bbox[:,0])>=1 or np.max(bbox[:,1])<=0 or np.min(bbox[:,1])>=1: | |
| bbox_valid = float(False) | |
| bbox = np.array([0, 0, 1, 1], dtype=np.float32) | |
| else: | |
| xmin = np.max([np.min(bbox[:, 0]), 0]) | |
| xmax = np.min([np.max(bbox[:, 0]), 1]) | |
| ymin = np.max([np.min(bbox[:, 1]), 0]) | |
| ymax = np.min([np.max(bbox[:, 1]), 1]) | |
| bbox = np.array([xmin, ymin, xmax, ymax], dtype=np.float32) | |
| bbox = np.clip(bbox,0,1) | |
| bbox_valid = float(True) | |
| bbox = bbox.reshape(2, 2) | |
| return bbox, bbox_valid | |
| def evaluate(self, outs, cur_sample_idx=None): | |
| annots = self.datalist | |
| sample_num = len(outs) | |
| eval_result = { | |
| 'pa_mpvpe_all': [], | |
| 'pa_mpvpe_l_hand': [], | |
| 'pa_mpvpe_r_hand': [], | |
| 'pa_mpvpe_hand': [], | |
| 'pa_mpvpe_face': [], | |
| 'mpvpe_all': [], | |
| 'mpvpe_l_hand': [], | |
| 'mpvpe_r_hand': [], | |
| 'mpvpe_hand': [], | |
| 'mpvpe_face': [], | |
| 'pa_mpjpe_body': [], | |
| 'pa_mpjpe_l_hand': [], | |
| 'pa_mpjpe_r_hand': [], | |
| 'pa_mpjpe_hand': [] | |
| } | |
| for n in range(sample_num): | |
| out = outs[n] | |
| ann_idx = out['gt_ann_idx'] | |
| mesh_gt = out['smplx_mesh_cam_pseudo_gt'] | |
| mesh_out = out['smplx_mesh_cam'] | |
| cam_trans = out['cam_trans'] | |
| ann_idx = out['gt_ann_idx'] | |
| img_path = [] | |
| for ann_id in ann_idx: | |
| img_path.append(annots[ann_id]['img_path']) | |
| eval_result['img_path'] = img_path | |
| eval_result['ann_idx'] = ann_idx | |
| img = out['img'] | |
| # MPVPE from all vertices | |
| mesh_out_align = mesh_out - np.dot( | |
| smpl_x.J_regressor, | |
| mesh_out)[smpl_x.J_regressor_idx['pelvis'], None, :] + np.dot( | |
| smpl_x.J_regressor, | |
| mesh_gt)[smpl_x.J_regressor_idx['pelvis'], None, :] | |
| eval_result['mpvpe_all'].append( | |
| np.sqrt(np.sum( | |
| (mesh_out_align - mesh_gt)**2, 1)).mean() * 1000) | |
| mesh_out_align = rigid_align(mesh_out, mesh_gt) | |
| eval_result['pa_mpvpe_all'].append( | |
| np.sqrt(np.sum( | |
| (mesh_out_align - mesh_gt)**2, 1)).mean() * 1000) | |
| # MPVPE from hand vertices | |
| mesh_gt_lhand = mesh_gt[smpl_x.hand_vertex_idx['left_hand'], :] | |
| mesh_out_lhand = mesh_out[smpl_x.hand_vertex_idx['left_hand'], :] | |
| mesh_gt_rhand = mesh_gt[smpl_x.hand_vertex_idx['right_hand'], :] | |
| mesh_out_rhand = mesh_out[smpl_x.hand_vertex_idx['right_hand'], :] | |
| mesh_out_lhand_align = mesh_out_lhand - np.dot( | |
| smpl_x.J_regressor, | |
| mesh_out)[smpl_x.J_regressor_idx['lwrist'], None, :] + np.dot( | |
| smpl_x.J_regressor, | |
| mesh_gt)[smpl_x.J_regressor_idx['lwrist'], None, :] | |
| mesh_out_rhand_align = mesh_out_rhand - np.dot( | |
| smpl_x.J_regressor, | |
| mesh_out)[smpl_x.J_regressor_idx['rwrist'], None, :] + np.dot( | |
| smpl_x.J_regressor, | |
| mesh_gt)[smpl_x.J_regressor_idx['rwrist'], None, :] | |
| eval_result['mpvpe_l_hand'].append( | |
| np.sqrt(np.sum( | |
| (mesh_out_lhand_align - mesh_gt_lhand)**2, 1)).mean() * | |
| 1000) | |
| eval_result['mpvpe_r_hand'].append( | |
| np.sqrt(np.sum( | |
| (mesh_out_rhand_align - mesh_gt_rhand)**2, 1)).mean() * | |
| 1000) | |
| eval_result['mpvpe_hand'].append( | |
| (np.sqrt(np.sum( | |
| (mesh_out_lhand_align - mesh_gt_lhand)**2, 1)).mean() * | |
| 1000 + | |
| np.sqrt(np.sum( | |
| (mesh_out_rhand_align - mesh_gt_rhand)**2, 1)).mean() * | |
| 1000) / 2.) | |
| mesh_out_lhand_align = rigid_align(mesh_out_lhand, mesh_gt_lhand) | |
| mesh_out_rhand_align = rigid_align(mesh_out_rhand, mesh_gt_rhand) | |
| eval_result['pa_mpvpe_l_hand'].append( | |
| np.sqrt(np.sum( | |
| (mesh_out_lhand_align - mesh_gt_lhand)**2, 1)).mean() * | |
| 1000) | |
| eval_result['pa_mpvpe_r_hand'].append( | |
| np.sqrt(np.sum( | |
| (mesh_out_rhand_align - mesh_gt_rhand)**2, 1)).mean() * | |
| 1000) | |
| eval_result['pa_mpvpe_hand'].append( | |
| (np.sqrt(np.sum( | |
| (mesh_out_lhand_align - mesh_gt_lhand)**2, 1)).mean() * | |
| 1000 + | |
| np.sqrt(np.sum( | |
| (mesh_out_rhand_align - mesh_gt_rhand)**2, 1)).mean() * | |
| 1000) / 2.) | |
| if self.__class__.__name__ == 'UBody': | |
| joint_gt_body_wo_trans = np.dot(smpl_x.j14_regressor, | |
| mesh_gt) | |
| import ipdb;ipdb.set_trace() | |
| img_wh = out['gt_img_shape'].flip(-1) | |
| joint_gt_body_proj = project_points_new( | |
| points_3d=joint_gt_body_wo_trans, | |
| pred_cam=cam_trans, | |
| focal_length=5000, | |
| camera_center=img_wh/2 | |
| ) # origin image space | |
| joint_gt_lhand_wo_trans = np.dot( | |
| smpl_x.orig_hand_regressor['left'], mesh_gt) | |
| joint_gt_lhand_proj = project_points_new( | |
| points_3d=joint_gt_lhand_wo_trans, | |
| pred_cam=cam_trans, | |
| focal_length=5000, | |
| camera_center=img_wh/2 | |
| ) # origin image space | |
| joint_gt_rhand_wo_trans = np.dot( | |
| smpl_x.orig_hand_regressor['left'], mesh_gt) | |
| joint_gt_rhand_proj = project_points_new( | |
| points_3d=joint_gt_rhand_wo_trans, | |
| pred_cam=cam_trans, | |
| focal_length=5000, | |
| camera_center=img_wh/2 | |
| ) # origin image space | |
| mesh_gt_proj = project_points_new( | |
| points_3d=mesh_gt, | |
| pred_cam=cam_trans, | |
| focal_length=5000, | |
| camera_center=img_wh/2) | |
| joint_gt_body_valid = self.validate_within_img( | |
| img, joint_gt_body_proj) | |
| joint_gt_lhand_valid = self.validate_within_img( | |
| img, joint_gt_lhand_proj) | |
| joint_gt_rhand_valid = self.validate_within_img( | |
| img, joint_gt_rhand_proj) | |
| mesh_valid = self.validate_within_img(img, mesh_gt_proj) | |
| mesh_lhand_valid = mesh_valid[smpl_x.hand_vertex_idx['left_hand']] | |
| mesh_rhand_valid = mesh_valid[smpl_x.hand_vertex_idx['right_hand']] | |
| mesh_face_valid = mesh_valid[smpl_x.face_vertex_idx] | |
| # MPVPE from face vertices | |
| mesh_gt_face = mesh_gt[smpl_x.face_vertex_idx, :] | |
| mesh_out_face = mesh_out[smpl_x.face_vertex_idx, :] | |
| mesh_out_face_align = mesh_out_face - np.dot( | |
| smpl_x.J_regressor, | |
| mesh_out)[smpl_x.J_regressor_idx['neck'], None, :] + np.dot( | |
| smpl_x.J_regressor, | |
| mesh_gt)[smpl_x.J_regressor_idx['neck'], None, :] | |
| eval_result['mpvpe_face'].append( | |
| np.sqrt(np.sum( | |
| (mesh_out_face_align - mesh_gt_face)**2, 1)).mean() * 1000) | |
| mesh_out_face_align = rigid_align(mesh_out_face, mesh_gt_face) | |
| eval_result['pa_mpvpe_face'].append( | |
| np.sqrt(np.sum( | |
| (mesh_out_face_align - mesh_gt_face)**2, 1)).mean() * 1000) | |
| # MPJPE from body joints | |
| joint_gt_body = np.dot(smpl_x.j14_regressor, mesh_gt) | |
| joint_out_body = np.dot(smpl_x.j14_regressor, mesh_out) | |
| joint_out_body_align = rigid_align(joint_out_body, joint_gt_body) | |
| eval_result['pa_mpjpe_body'].append( | |
| np.sqrt(np.sum((joint_out_body_align - joint_gt_body)**2, | |
| 1))[joint_gt_body_valid].mean() * 1000) | |
| # eval_result['pa_mpjpe_body'].append( | |
| # np.sqrt(np.sum( | |
| # (joint_out_body_align - joint_gt_body)**2, 1)).mean() * | |
| # 1000) | |
| # MPJPE from hand joints | |
| joint_gt_lhand = np.dot(smpl_x.orig_hand_regressor['left'], | |
| mesh_gt) | |
| joint_out_lhand = np.dot(smpl_x.orig_hand_regressor['left'], | |
| mesh_out) | |
| joint_out_lhand_align = rigid_align(joint_out_lhand, | |
| joint_gt_lhand) | |
| joint_gt_rhand = np.dot(smpl_x.orig_hand_regressor['right'], | |
| mesh_gt) | |
| joint_out_rhand = np.dot(smpl_x.orig_hand_regressor['right'], | |
| mesh_out) | |
| joint_out_rhand_align = rigid_align(joint_out_rhand, | |
| joint_gt_rhand) | |
| # if self.__class__.__name__ == 'UBody': | |
| if sum(joint_gt_lhand_valid) != 0: | |
| pa_mpjpe_lhand = np.sqrt( | |
| np.sum((joint_out_lhand_align - joint_gt_lhand)**2, | |
| 1))[joint_gt_lhand_valid].mean() * 1000 | |
| pa_mpjpe_hand.append(pa_mpjpe_lhand) | |
| eval_result['pa_mpjpe_l_hand'].append(pa_mpjpe_lhand) | |
| if sum(joint_gt_rhand_valid) != 0: | |
| pa_mpjpe_rhand = np.sqrt( | |
| np.sum((joint_out_rhand_align - joint_gt_rhand)**2, | |
| 1))[joint_gt_rhand_valid].mean() * 1000 | |
| pa_mpjpe_hand.append(pa_mpjpe_rhand) | |
| eval_result['pa_mpjpe_r_hand'].append(pa_mpjpe_rhand) | |
| if len(pa_mpjpe_hand) > 0: | |
| eval_result['pa_mpjpe_hand'].append(np.mean(pa_mpjpe_hand)) | |
| eval_result['pa_mpjpe_l_hand'].append( | |
| np.sqrt(np.sum( | |
| (joint_out_lhand_align - joint_gt_lhand)**2, 1)).mean() * | |
| 1000) | |
| eval_result['pa_mpjpe_r_hand'].append( | |
| np.sqrt(np.sum( | |
| (joint_out_rhand_align - joint_gt_rhand)**2, 1)).mean() * | |
| 1000) | |
| eval_result['pa_mpjpe_hand'].append( | |
| (np.sqrt(np.sum( | |
| (joint_out_lhand_align - joint_gt_lhand)**2, 1)).mean() * | |
| 1000 + | |
| np.sqrt(np.sum( | |
| (joint_out_rhand_align - joint_gt_rhand)**2, 1)).mean() * | |
| 1000) / 2.) | |
| return eval_result | |
| def print_eval_result(self, eval_result): | |
| print(f'======{cfg.testset}======') | |
| print('PA MPVPE (All): %.2f mm' % np.mean(eval_result['pa_mpvpe_all'])) | |
| print('PA MPVPE (L-Hands): %.2f mm' % | |
| np.mean(eval_result['pa_mpvpe_l_hand'])) | |
| print('PA MPVPE (R-Hands): %.2f mm' % | |
| np.mean(eval_result['pa_mpvpe_r_hand'])) | |
| print('PA MPVPE (Hands): %.2f mm' % | |
| np.mean(eval_result['pa_mpvpe_hand'])) | |
| print('PA MPVPE (Face): %.2f mm' % | |
| np.mean(eval_result['pa_mpvpe_face'])) | |
| print() | |
| print('MPVPE (All): %.2f mm' % np.mean(eval_result['mpvpe_all'])) | |
| print('MPVPE (L-Hands): %.2f mm' % | |
| np.mean(eval_result['mpvpe_l_hand'])) | |
| print('MPVPE (R-Hands): %.2f mm' % | |
| np.mean(eval_result['mpvpe_r_hand'])) | |
| print('MPVPE (Hands): %.2f mm' % np.mean(eval_result['mpvpe_hand'])) | |
| print('MPVPE (Face): %.2f mm' % np.mean(eval_result['mpvpe_face'])) | |
| print() | |
| print('PA MPJPE (Body): %.2f mm' % | |
| np.mean(eval_result['pa_mpjpe_body'])) | |
| print('PA MPJPE (L-Hands): %.2f mm' % | |
| np.mean(eval_result['pa_mpjpe_l_hand'])) | |
| print('PA MPJPE (R-Hands): %.2f mm' % | |
| np.mean(eval_result['pa_mpjpe_r_hand'])) | |
| print('PA MPJPE (Hands): %.2f mm' % | |
| np.mean(eval_result['pa_mpjpe_hand'])) | |
| f = open(os.path.join(cfg.result_dir, 'result.txt'), 'w') | |
| f.write(f'{cfg.testset} dataset \n') | |
| f.write('PA MPVPE (All): %.2f mm\n' % | |
| np.mean(eval_result['pa_mpvpe_all'])) | |
| f.write('PA MPVPE (L-Hands): %.2f mm' % | |
| np.mean(eval_result['pa_mpvpe_l_hand'])) | |
| f.write('PA MPVPE (R-Hands): %.2f mm' % | |
| np.mean(eval_result['pa_mpvpe_r_hand'])) | |
| f.write('PA MPVPE (Hands): %.2f mm\n' % | |
| np.mean(eval_result['pa_mpvpe_hand'])) | |
| f.write('PA MPVPE (Face): %.2f mm\n' % | |
| np.mean(eval_result['pa_mpvpe_face'])) | |
| f.write('MPVPE (All): %.2f mm\n' % np.mean(eval_result['mpvpe_all'])) | |
| f.write('MPVPE (L-Hands): %.2f mm' % | |
| np.mean(eval_result['mpvpe_l_hand'])) | |
| f.write('MPVPE (R-Hands): %.2f mm' % | |
| np.mean(eval_result['mpvpe_r_hand'])) | |
| f.write('MPVPE (Hands): %.2f mm' % np.mean(eval_result['mpvpe_hand'])) | |
| f.write('MPVPE (Face): %.2f mm\n' % np.mean(eval_result['mpvpe_face'])) | |
| f.write('PA MPJPE (Body): %.2f mm\n' % | |
| np.mean(eval_result['pa_mpjpe_body'])) | |
| f.write('PA MPJPE (L-Hands): %.2f mm' % | |
| np.mean(eval_result['pa_mpjpe_l_hand'])) | |
| f.write('PA MPJPE (R-Hands): %.2f mm' % | |
| np.mean(eval_result['pa_mpjpe_r_hand'])) | |
| f.write('PA MPJPE (Hands): %.2f mm\n' % | |
| np.mean(eval_result['pa_mpjpe_hand'])) | |
| def validate_within_img_batch( | |
| self, img_wh, points): # check whether the points is within the image | |
| # img: (h, w, c), points: (num_points, 2) | |
| valid_mask = np.logical_and((points-img_wh[:,None])<0,points>0) | |
| valid_mask = np.logical_and(valid_mask[:,:,0],valid_mask[:,:,1]) | |
| return valid_mask | |
| def decompress_keypoints(self, humandata) -> None: | |
| """If a key contains 'keypoints', and f'{key}_mask' is in self.keys(), | |
| invalid zeros will be inserted to the right places and f'{key}_mask' | |
| will be unlocked. | |
| Raises: | |
| KeyError: | |
| A key contains 'keypoints' has been found | |
| but its corresponding mask is missing. | |
| """ | |
| assert bool(humandata['__keypoints_compressed__']) is True | |
| key_pairs = [] | |
| for key in humandata.files: | |
| if key not in KPS2D_KEYS + KPS3D_KEYS: | |
| continue | |
| mask_key = f'{key}_mask' | |
| if mask_key in humandata.files: | |
| print(f'Decompress {key}...') | |
| key_pairs.append([key, mask_key]) | |
| decompressed_dict = {} | |
| for kpt_key, mask_key in key_pairs: | |
| mask_array = np.asarray(humandata[mask_key]) | |
| compressed_kpt = humandata[kpt_key] | |
| kpt_array = \ | |
| self.add_zero_pad(compressed_kpt, mask_array) | |
| decompressed_dict[kpt_key] = kpt_array | |
| del humandata | |
| return decompressed_dict | |
| def add_zero_pad(self, compressed_array: np.ndarray, | |
| mask_array: np.ndarray) -> np.ndarray: | |
| """Pad zeros to a compressed keypoints array. | |
| Args: | |
| compressed_array (np.ndarray): | |
| A compressed keypoints array. | |
| mask_array (np.ndarray): | |
| The mask records compression relationship. | |
| Returns: | |
| np.ndarray: | |
| A keypoints array in full-size. | |
| """ | |
| assert mask_array.sum() == compressed_array.shape[1] | |
| data_len, _, dim = compressed_array.shape | |
| mask_len = mask_array.shape[0] | |
| ret_value = np.zeros(shape=[data_len, mask_len, dim], | |
| dtype=compressed_array.dtype) | |
| valid_mask_index = np.where(mask_array == 1)[0] | |
| ret_value[:, valid_mask_index, :] = compressed_array | |
| return ret_value | |