Spaces:

tokenid
/

ID-Pose

Running on Zero

App Files Files Community

tokenid commited on May 13, 2024

Commit

0e18ab2

1 Parent(s): c12c627

more examples & fix scales

Browse files

Files changed (17) hide show

app.py +51 -43
data/gradio_demo/arc_0.png +0 -0
data/gradio_demo/arc_1.png +0 -0
data/gradio_demo/christ_0.png +0 -0
data/gradio_demo/christ_1.png +0 -0
data/gradio_demo/cybertruck_0.png +0 -0
data/gradio_demo/cybertruck_1.png +0 -0
data/gradio_demo/elon_0.png +0 -0
data/gradio_demo/elon_1.png +0 -0
data/gradio_demo/ferrari_0.png +0 -0
data/gradio_demo/ferrari_1.png +0 -0
data/gradio_demo/husky_0.png +0 -0
data/gradio_demo/husky_1.png +0 -0
data/gradio_demo/plane_0.png +0 -0
data/gradio_demo/plane_1.png +0 -0
data/gradio_demo/ride_horse_0.png +0 -0
data/gradio_demo/ride_horse_1.png +0 -0

app.py CHANGED Viewed

@@ -69,13 +69,13 @@ def group_recenter(images, ratio=1.5, mask_thres=127, bkg_color=[255, 255, 255,
         y0, y1 = yy.min(), yy.max()
         x0, x1 = xx.min(), xx.max()
-        ws.append(x1 - x0)
-        hs.append(y1 - y0)
     sz_w = np.max(ws)
     sz_h = np.max(hs)
-    sz = int( max(ratio*sz_w, ratio*sz_h) )
     out_rgbs = []
@@ -93,16 +93,14 @@ def group_recenter(images, ratio=1.5, mask_thres=127, bkg_color=[255, 255, 255,
         cy = (y0 + y1) // 2
         cx = (x0 + x1) // 2
-        y0 = cy - int(np.floor(sz / 2))
-        y1 = cy + int(np.ceil(sz / 2))
-        x0 = cx - int(np.floor(sz / 2))
-        x1 = cx + int(np.ceil(sz / 2))
         out = rgba[ max(y0, 0) : min(y1, height) , max(x0, 0) : min(x1, width), : ].copy()
         pads = [(max(0-y0, 0), max(y1-height, 0)), (max(0-x0, 0), max(x1-width, 0)), (0, 0)]
         out = np.pad(out, pads, mode='constant', constant_values=0)
-        assert(out.shape[:2] == (sz, sz))
         out[:, :, :3] = out[:, :, :3] * (out[..., 3:]/255.) + np.array(bkg_color)[None, None, :3] * (1-out[..., 3:]/255.)
         out[:, :, -1] = bkg_color[-1]
@@ -148,7 +146,7 @@ def run_pose_exploration(cam_vis, image1, image2, probe_bsz, adj_bsz, adj_iters,
     cam_vis.set_images([np.asarray(image1, dtype=np.uint8), np.asarray(image2, dtype=np.uint8)])
     image1 = image_to_tensor(image1).to(_device_)
-    image2 = image_to_tensor(image2).to(_device_)
     images = [image1, image2]
@@ -158,29 +156,33 @@ def run_pose_exploration(cam_vis, image1, image2, probe_bsz, adj_bsz, adj_iters,
         matcher_ckpt_path=_matcher_ckpt_path_
     )
-    noise = np.random.randn(probe_bsz, 4, 32, 32)
-    result_poses, aux_data = estimate_poses(
-        _model_, images,
-        seed_cand_num=8,
-        explore_type='triangular',
-        refine_type='triangular',
-        probe_ts_range=[0.2, 0.21],
-        ts_range=[0.2, 0.21],
-        probe_bsz=probe_bsz,
-        adjust_factor=10.0,
-        adjust_iters=adj_iters,
-        adjust_bsz=adj_bsz,
-        refine_factor=1.0,
-        refine_iters=0,
-        refine_bsz=4,
-        noise=noise,
-        elevs=elevs,
-        elev_ranges=elev_ranges
-    )
-    theta, azimuth, radius = result_poses[0]
-    anchor_polar = aux_data['elev'][0]
     if anchor_polar is None:
         anchor_polar = np.pi/2
@@ -245,8 +247,8 @@ _HEADER_ = '''
 '''
 _FOOTER_ = '''
-- Project Page: [https://xt4d.github.io/id-pose-web/](https://xt4d.github.io/id-pose-web/)
-- Github: [https://github.com/xt4d/id-pose](https://github.com/xt4d/id-pose)
 '''
 _CITE_ = r"""
@@ -321,37 +323,43 @@ def run_demo():
                                 ['data/gradio_demo/duck_0.png', 'data/gradio_demo/duck_1.png'],
                                 ['data/gradio_demo/chair_0.png', 'data/gradio_demo/chair_1.png'],
                                 ['data/gradio_demo/foosball_0.png', 'data/gradio_demo/foosball_1.png'],
                             ],
                             inputs=[input_image1, input_image2],
-                            label='Examples (Self-captured)',
                             cache_examples=False,
-                            examples_per_page=3
                         )
                     with gr.Column(min_width=200):
                         gr.Examples(
                             examples = [
-                                ['data/gradio_demo/bunny_0.png', 'data/gradio_demo/bunny_1.png'],
-                                ['data/gradio_demo/bus_0.png', 'data/gradio_demo/bus_1.png'],
-                                ['data/gradio_demo/circo_0.png', 'data/gradio_demo/circo_1.png'],
                             ],
                             inputs=[input_image1, input_image2],
-                            label='Examples (Images from NAVI)',
                             cache_examples=False,
-                            examples_per_page=3
                         )
                     with gr.Column(min_width=200):
                         gr.Examples(
                             examples = [
                                 ['data/gradio_demo/status_0.png', 'data/gradio_demo/status_1.png'],
-                                ['data/gradio_demo/bag_0.png', 'data/gradio_demo/bag_1.png'],
                                 ['data/gradio_demo/cat_0.png', 'data/gradio_demo/cat_1.png'],
                             ],
                             inputs=[input_image1, input_image2],
                             label='Examples (Generated)',
                             cache_examples=False,
-                            examples_per_page=3
                         )
         cam_vis = CameraVisualizer([np.eye(4), np.eye(4)], ['Image 1', 'Image 2'], ['red', 'blue'])

         y0, y1 = yy.min(), yy.max()
         x0, x1 = xx.min(), xx.max()
+        ws.append(float(x1 - x0) / img.shape[0])
+        hs.append(float(y1 - y0) / img.shape[1])
     sz_w = np.max(ws)
     sz_h = np.max(hs)
+    sz = max(ratio*sz_w, ratio*sz_h)
     out_rgbs = []
         cy = (y0 + y1) // 2
         cx = (x0 + x1) // 2
+        y0 = cy - int(np.floor(sz * rgba.shape[0] / 2))
+        y1 = cy + int(np.ceil(sz * rgba.shape[0] / 2))
+        x0 = cx - int(np.floor(sz * rgba.shape[1] / 2))
+        x1 = cx + int(np.ceil(sz * rgba.shape[1] / 2))
         out = rgba[ max(y0, 0) : min(y1, height) , max(x0, 0) : min(x1, width), : ].copy()
         pads = [(max(0-y0, 0), max(y1-height, 0)), (max(0-x0, 0), max(x1-width, 0)), (0, 0)]
         out = np.pad(out, pads, mode='constant', constant_values=0)
         out[:, :, :3] = out[:, :, :3] * (out[..., 3:]/255.) + np.array(bkg_color)[None, None, :3] * (1-out[..., 3:]/255.)
         out[:, :, -1] = bkg_color[-1]
     cam_vis.set_images([np.asarray(image1, dtype=np.uint8), np.asarray(image2, dtype=np.uint8)])
     image1 = image_to_tensor(image1).to(_device_)
+    image2 = image_to_tensor(image2).to(_device_)
     images = [image1, image2]
         matcher_ckpt_path=_matcher_ckpt_path_
     )
+    anchor_polar = elevs[0]
+    if torch.mean(torch.abs(image1 - image2)) < 0.005:
+        theta = azimuth = radius = 0
+        print('Identical images found!')
+    else:
+        noise = np.random.randn(probe_bsz, 4, 32, 32)
+        result_poses, aux_data = estimate_poses(
+            _model_, images,
+            seed_cand_num=8,
+            explore_type='triangular',
+            refine_type='triangular',
+            probe_ts_range=[0.2, 0.21],
+            ts_range=[0.2, 0.21],
+            probe_bsz=probe_bsz,
+            adjust_factor=10.0,
+            adjust_iters=adj_iters,
+            adjust_bsz=adj_bsz,
+            refine_factor=1.0,
+            refine_iters=0,
+            refine_bsz=4,
+            noise=noise,
+            elevs=elevs,
+            elev_ranges=elev_ranges
+        )
+        theta, azimuth, radius = result_poses[0]
     if anchor_polar is None:
         anchor_polar = np.pi/2
 '''
 _FOOTER_ = '''
+[Project Page](https://xt4d.github.io/id-pose-web/) | ⭐ [Github](https://github.com/xt4d/id-pose) ⭐ [![GitHub Stars](https://img.shields.io/github/stars/xt4d/id-pose?style=social)](https://github.com/xt4d/id-pose)
+---
 '''
 _CITE_ = r"""
                                 ['data/gradio_demo/duck_0.png', 'data/gradio_demo/duck_1.png'],
                                 ['data/gradio_demo/chair_0.png', 'data/gradio_demo/chair_1.png'],
                                 ['data/gradio_demo/foosball_0.png', 'data/gradio_demo/foosball_1.png'],
+                                ['data/gradio_demo/bunny_0.png', 'data/gradio_demo/bunny_1.png'],
+                                ['data/gradio_demo/circo_0.png', 'data/gradio_demo/circo_1.png'],
                             ],
                             inputs=[input_image1, input_image2],
+                            label='Examples (Captured)',
                             cache_examples=False,
+                            examples_per_page=5
                         )
                     with gr.Column(min_width=200):
                         gr.Examples(
                             examples = [
+                                ['data/gradio_demo/arc_0.png', 'data/gradio_demo/arc_1.png'],
+                                ['data/gradio_demo/husky_0.png', 'data/gradio_demo/husky_1.png'],
+                                ['data/gradio_demo/cybertruck_0.png', 'data/gradio_demo/cybertruck_1.png'],
+                                ['data/gradio_demo/plane_0.png', 'data/gradio_demo/plane_1.png'],
+                                ['data/gradio_demo/christ_0.png', 'data/gradio_demo/christ_1.png'],
                             ],
                             inputs=[input_image1, input_image2],
+                            label='Examples (Internet)',
                             cache_examples=False,
+                            examples_per_page=5
                         )
                     with gr.Column(min_width=200):
                         gr.Examples(
                             examples = [
                                 ['data/gradio_demo/status_0.png', 'data/gradio_demo/status_1.png'],
                                 ['data/gradio_demo/cat_0.png', 'data/gradio_demo/cat_1.png'],
+                                ['data/gradio_demo/ferrari_0.png', 'data/gradio_demo/ferrari_1.png'],
+                                ['data/gradio_demo/elon_0.png', 'data/gradio_demo/elon_1.png'],
+                                ['data/gradio_demo/ride_horse_0.png', 'data/gradio_demo/ride_horse_1.png'],
                             ],
                             inputs=[input_image1, input_image2],
                             label='Examples (Generated)',
                             cache_examples=False,
+                            examples_per_page=5
                         )
         cam_vis = CameraVisualizer([np.eye(4), np.eye(4)], ['Image 1', 'Image 2'], ['red', 'blue'])