Spaces:
Runtime error
Runtime error
| import os | |
| import os.path as osp | |
| from glob import glob | |
| import numpy as np | |
| from config.config import cfg | |
| import copy | |
| import json | |
| import pickle | |
| import cv2 | |
| import torch | |
| from pycocotools.coco import COCO | |
| from util.human_models import smpl_x | |
| from util.preprocessing import load_img, sanitize_bbox, process_bbox,augmentation_keep_size, load_ply, load_obj | |
| from util.transforms import rigid_align, rigid_align_batch | |
| import tqdm | |
| import random | |
| from util.formatting import DefaultFormatBundle | |
| from detrsmpl.data.datasets.pipelines.transforms import Normalize | |
| from humandata import HumanDataset | |
| from detrsmpl.utils.demo_utils import xywh2xyxy, xyxy2xywh, box2cs | |
| from detrsmpl.core.conventions.keypoints_mapping import convert_kps | |
| import mmcv | |
| import cv2 | |
| import numpy as np | |
| from detrsmpl.core.visualization.visualize_keypoints2d import visualize_kp2d | |
| from detrsmpl.core.visualization.visualize_smpl import visualize_smpl_hmr,render_smpl | |
| from detrsmpl.models.body_models.builder import build_body_model | |
| from detrsmpl.core.visualization.visualize_keypoints3d import visualize_kp3d | |
| from detrsmpl.data.data_structures.multi_human_data import MultiHumanData | |
| from detrsmpl.utils.ffmpeg_utils import video_to_images | |
| from mmcv.runner import get_dist_info | |
| from config.config import cfg | |
| import torch.distributed as dist | |
| import shutil | |
| class INFERENCE(torch.utils.data.Dataset): | |
| def __init__(self, img_dir=None,out_path=None): | |
| self.output_path = out_path | |
| self.img_dir = img_dir | |
| self.is_vid = False | |
| # can you change isfile to decide if it is mp4 | |
| rank, _ = get_dist_info() | |
| if self.img_dir.endswith('.mp4'): | |
| self.is_vid = True | |
| img_name = self.img_dir.split('/')[-1][:-4] | |
| # self.img_dir = self.img_dir[:-4] | |
| else: | |
| img_name = self.img_dir.split('/')[-1] | |
| self.img_name = img_name+'_out' | |
| self.output_path = os.path.join(self.output_path,self.img_name) | |
| os.makedirs(self.output_path, exist_ok=True) | |
| self.tmp_dir = os.path.join(self.output_path, 'temp_img') | |
| os.makedirs(self.tmp_dir, exist_ok=True) | |
| self.result_img_dir = os.path.join(self.output_path, 'res_img') | |
| if not self.is_vid: | |
| if rank == 0: | |
| image_files = sorted(glob(self.img_dir + '/*.jpg') + glob(self.img_dir + '/*.png')) | |
| for i, image_file in enumerate(image_files): | |
| new_name = os.path.join(self.tmp_dir, '%06d.png'%i) | |
| shutil.copy(image_file, new_name) | |
| dist.barrier() | |
| else: | |
| if rank == 0: | |
| video_to_images(self.img_dir, self.tmp_dir) | |
| dist.barrier() | |
| self.img_paths = sorted(glob(self.tmp_dir+'/*',recursive=True)) | |
| self.score_threshold = 0.2 | |
| self.resolution = [720 ,1280] # AGORA test | |
| # self.resolution = [1200, 1600] # EHF | |
| # self.img_paths = sorted(glob(self.img_dir,recursive=True)) | |
| self.format = DefaultFormatBundle() | |
| self.normalize = Normalize(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375]) | |
| def __len__(self): | |
| return len(self.img_paths) | |
| def __getitem__(self, idx): | |
| img = load_img(self.img_paths[idx],'BGR') | |
| img_whole_bbox = np.array([0, 0, img.shape[1],img.shape[0]]) | |
| img, img2bb_trans, bb2img_trans, _, _ = \ | |
| augmentation_keep_size(img, img_whole_bbox, 'test') | |
| cropped_img_shape=img.shape[:2] | |
| img = (img.astype(np.float32)) | |
| inputs = {'img': img} | |
| targets = { | |
| 'body_bbox_center': np.array(img_whole_bbox[None]), | |
| 'body_bbox_size': np.array(img_whole_bbox[None])} | |
| meta_info = { | |
| 'ori_shape':np.array(self.resolution), | |
| 'img_shape': np.array(img.shape[:2]), | |
| 'img2bb_trans': img2bb_trans, | |
| 'bb2img_trans': bb2img_trans, | |
| 'ann_idx': idx} | |
| result = {**inputs, **targets, **meta_info} | |
| result = self.normalize(result) | |
| result = self.format(result) | |
| return result | |
| def inference(self, outs): | |
| img_paths = self.img_paths | |
| sample_num = len(outs) | |
| output = {} | |
| for out in outs: | |
| ann_idx = out['image_idx'] | |
| img_cropped = mmcv.imdenormalize( | |
| img=(out['img'].cpu().numpy()).transpose(1, 2, 0), | |
| mean=np.array([123.675, 116.28, 103.53]), | |
| std=np.array([58.395, 57.12, 57.375]), | |
| to_bgr=True).astype(np.uint8) | |
| # bb2img_trans = out['bb2img_trans'] | |
| # img2bb_trans = out['img2bb_trans'] | |
| scores = out['scores'].clone().cpu().numpy() | |
| img_shape = out['img_shape'].cpu().numpy()[::-1] # w, h | |
| width,height = img_shape | |
| width += width % 2 | |
| height += height % 2 | |
| img_shape = np.array([width, height]) | |
| img = cv2.imread(img_paths[ann_idx]) # h, w | |
| joint_proj = out['smplx_joint_proj'].clone().cpu().numpy() | |
| joint_vis = out['smplx_joint_proj'].clone().cpu().numpy() | |
| joint_coco = out['keypoints_coco'].clone().cpu().numpy() | |
| joint_coco_raw = joint_coco.copy() | |
| smpl_kp3d_coco, _ = convert_kps(out['smpl_kp3d'].clone().cpu().numpy(),src='smplx',dst='coco', approximate=True) | |
| body_bbox = out['body_bbox'].clone().cpu().numpy() | |
| lhand_bbox = out['lhand_bbox'].clone().cpu().numpy() | |
| rhand_bbox = out['rhand_bbox'].clone().cpu().numpy() | |
| face_bbox = out['face_bbox'].clone().cpu().numpy() | |
| if self.resolution == [720, 1280]: | |
| joint_proj[:, :, 0] = joint_proj[:, :, 0] / img_shape[0] * 3840 | |
| joint_proj[:, :, 1] = joint_proj[:, :, 1] / img_shape[1] * 2160 | |
| joint_vis[:, :, 0] = joint_vis[:, :, 0] / img_shape[0] * img.shape[1] | |
| joint_vis[:, :, 1] = joint_vis[:, :, 1]/ img_shape[1] * img.shape[0] | |
| joint_coco[:, :, 0] = joint_coco[:, :, 0] / img_shape[0] * img.shape[1] | |
| joint_coco[:, :, 1] = joint_coco[:, :, 1]/ img_shape[1] * img.shape[0] | |
| scale = np.array([ | |
| img.shape[1]/img_shape[0], | |
| img.shape[1]/img_shape[0], | |
| img.shape[1]/img_shape[0], | |
| img.shape[1]/img_shape[0], | |
| ]) | |
| body_bbox_raw = body_bbox.copy() | |
| body_bbox = body_bbox * scale | |
| lhand_bbox = lhand_bbox * scale | |
| rhand_bbox = rhand_bbox * scale | |
| face_bbox = face_bbox * scale | |
| elif self.resolution == [1200, 1600]: | |
| joint_proj[:, :, 0] = joint_proj[:, :, 0] * (1200 / 800) | |
| joint_proj[:, :, 1] = joint_proj[:, :, 1] * (1600 / 1066) | |
| joint_vis[:, :, 0] = joint_vis[:, :, 0] * (1200 / 800) | |
| joint_vis[:, :, 1] = joint_vis[:, :, 1] * (1600 / 1066) | |
| scale = np.array([1600/1066, 1200/800, 1600/1066, 1200/800])[None] | |
| body_bbox = body_bbox * scale | |
| lhand_bbox = lhand_bbox * scale | |
| rhand_bbox = rhand_bbox * scale | |
| face_bbox = face_bbox * scale | |
| for i, score in enumerate(scores): | |
| if score < self.score_threshold: | |
| break | |
| save_name = img_paths[ann_idx].split('/')[-1][:-4] # if not crop should be -4 | |
| if self.resolution == (2160, 3840): | |
| save_name = save_name.split('_ann_id')[0] | |
| else: | |
| save_name = save_name.split('_1280x720')[0] | |
| save_dict = { | |
| 'params': { | |
| 'transl': out['cam_trans'][i].reshape(1, -1).cpu().numpy(), | |
| 'global_orient': out['smplx_root_pose'][i].reshape(1, -1).cpu().numpy(), | |
| 'body_pose': out['smplx_body_pose'][i].reshape(1, -1).cpu().numpy(), | |
| 'left_hand_pose': out['smplx_lhand_pose'][i].reshape(1, -1).cpu().numpy(), | |
| 'right_hand_pose': out['smplx_rhand_pose'][i].reshape(1, -1).cpu().numpy(), | |
| 'reye_pose': np.zeros((1, 3)), | |
| 'leye_pose': np.zeros((1, 3)), | |
| 'jaw_pose': out['smplx_jaw_pose'][i].reshape(1, -1).cpu().numpy(), | |
| 'expression': out['smplx_expr'][i].reshape(1, -1).cpu().numpy(), | |
| 'betas': out['smplx_shape'][i].reshape(1, -1).cpu().numpy()}, | |
| 'joints': joint_proj[i].reshape(1, -1, 2)[0,:24]} | |
| # save | |
| exist_result_path = glob(osp.join(self.output_path, 'predictions', save_name + '*')) | |
| if len(exist_result_path) == 0: | |
| person_idx = 0 | |
| else: | |
| last_person_idx = max([ | |
| int(name.split('personId_')[1].split('.pkl')[0]) | |
| for name in exist_result_path | |
| ]) | |
| person_idx = last_person_idx + 1 | |
| save_name += '_personId_' + str(person_idx) + '.pkl' | |
| os.makedirs(osp.join(self.output_path, 'predictions'), exist_ok=True) | |
| with open(osp.join(self.output_path, 'predictions', save_name),'wb') as f: | |
| pickle.dump(save_dict, f) | |
| # mesh | |
| # bbox | |
| if i == 0: | |
| save_name = img_paths[ann_idx].split('/')[-1][:-4] | |
| cv2.imwrite(os.path.join(self.result_img_dir,img_paths[ann_idx].split('/')[-1]), img) | |
| else: | |
| # dump bbox | |
| body_xywh = xyxy2xywh(body_bbox[:i]) | |
| score = scores[:i] | |
| out_value = [{'bbox': b, 'score': s} for b, s in zip(body_xywh, score)] | |
| out_key = img_paths[ann_idx].split('/')[-1] | |
| output.update({out_key: out_value}) | |
| # show bbox | |
| img = mmcv.imshow_bboxes(img, body_bbox[:i], show=False, colors='green') | |
| img = mmcv.imshow_bboxes(img, lhand_bbox[:i], show=False, colors='blue') | |
| img = mmcv.imshow_bboxes(img, rhand_bbox[:i], show=False, colors='yellow') | |
| img = mmcv.imshow_bboxes(img, face_bbox[:i], show=False, colors='red') | |
| verts = out['smpl_verts'][:i] + out['cam_trans'][:i][:, None] | |
| body_model_cfg = dict( | |
| type='smplx', | |
| keypoint_src='smplx', | |
| num_expression_coeffs=10, | |
| num_betas=10, | |
| gender='neutral', | |
| keypoint_dst='smplx_137', | |
| model_path='data/body_models/smplx', | |
| use_pca=False, | |
| use_face_contour=True) | |
| body_model = build_body_model(body_model_cfg).to('cuda') | |
| # for n, v in enumerate(verts): | |
| # save_obj( | |
| # osp.join(self.out_path, 'vis', img_paths[ann_idx].split('/')[-1].rjust(5+4,'0')).replace('.jpg',f'_{n}_.obj'), | |
| # verts = v, | |
| # faces=torch.tensor(body_model.faces.astype(np.int32)) | |
| # ) | |
| # print(osp.join(self.out_path, 'vis', img_paths[ann_idx].split('/')[-1])) | |
| render_smpl( | |
| verts=verts[None], | |
| body_model=body_model, | |
| # K= np.array( | |
| # [[img_shape[0]/2, 0, img_shape[0]/2], | |
| # [0, img_shape[0]/2, img_shape[1]/2], | |
| # [0, 0, 1]]), | |
| K= np.array( | |
| [[5000, 0, img_shape[0]/2], | |
| [0, 5000, img_shape[1]/2], | |
| [0, 0, 1]]), | |
| R=None, | |
| T=None, | |
| # output_path=osp.join(self.out_path, 'vis', img_paths[ann_idx].split('/')[-1].rjust(5+4,'0')), | |
| output_path=os.path.join(self.result_img_dir,img_paths[ann_idx].split('/')[-1]), | |
| image_array=cv2.resize(img, (img_shape[0],img_shape[1]), cv2.INTER_CUBIC), | |
| in_ndc=False, | |
| alpha=0.9, | |
| convention='opencv', | |
| projection='perspective', | |
| overwrite=True, | |
| no_grad=True, | |
| device='cuda', | |
| resolution=[img_shape[0],img_shape[1]], | |
| render_choice='hq', | |
| ) | |
| return output | |