Spaces:

atnikos
/

motionfix-demo

Running

App Files Files Community

atnikos commited on Jul 3, 2024

Commit

38c4910

1 Parent(s): 7495dca

first tmr retrieval efffort

Browse files

Files changed (2) hide show

app.py +102 -23
gen_utils.py +10 -0

app.py CHANGED Viewed

@@ -17,6 +17,7 @@ zero = torch.Tensor([0]).cuda()
 print(zero.device) # <-- 'cuda:0' 🤗
 DEFAULT_TEXT = "A person is "
 from aitviewer.models.smpl import SMPLLayer
 def get_smpl_models():
     REPO_ID = 'athn-nik/smpl_models'
@@ -35,7 +36,6 @@ def get_renderer():
     return HeadlessRenderer()
 WEBSITE = ("""<div class="embed_hidden" style="text-align: center;">
     <h1>MotionFix: Text-Driven 3D Human Motion Editing</h1>
     <h3>
@@ -65,8 +65,8 @@ WEB_source =  ("""<div class="embed_hidden" style="text-align: center;">
     <h1>Pick a motion to edit!</h1>
     <h3>
         Here you should pick a source motion
     </h3>
-    <hr class="double">
 </div>
 """)
@@ -74,6 +74,7 @@ WEB_target =  ("""<div class="embed_hidden" style="text-align: center;">
     <h1>Now type the text to edit that motion!</h1>
     <h3>
         Here you should get the generated motion!
     </h3>
 </div>
 """)
@@ -152,13 +153,6 @@ def show_video(input_text):
                           smpl_layer=SMPL_LAYER)
     return fname
-def retrieve_video(retrieve_text):
-    tmr_text_encoder = get_tmr_model(download_tmr())
-    text_encoded = tmr_text_encoder([retrieve_text])
-    motion_embeds = None
-    retrieved_motion = tmr_text_encoder.compute_scores(text_encoded, motion_embeds)
-    return
 from huggingface_hub import hf_hub_download, hf_hub_url, cached_download
 def download_models():
@@ -171,15 +165,70 @@ def download_tmr():
     from huggingface_hub import snapshot_download
     return snapshot_download(repo_id=REPO_ID, allow_patterns="tmr*",
                              token=access_token_smpl)
 import gradio as gr
 def clear():
     return ""
-def random_number():
-    return "Random text"
-with gr.Blocks() as demo:
     gr.Markdown(WEBSITE)
     gr.Markdown(WEB_source)
     # TODO load TMR text-encoder
@@ -188,18 +237,44 @@ with gr.Blocks() as demo:
     # edit that motion!
     with gr.Row():
         with gr.Column(scale=10):
-            retrieve_text = gr.Textbox(placeholder="Type the text for the motion you want to Retrieve:",
                                        show_label=True, label="Retrieval Text", value=DEFAULT_TEXT)
-            xxx = 'https://motion-editing.s3.eu-central-1.amazonaws.com/collection_wo_walks_runs/rendered_pairs/011327_120_240-002682_120_240.mp4'
         with gr.Column(scale=8):
             retrieved_video_output = gr.Video(label="Retrieved Motion",
-                                            value=xxx,
-                                            height=360, width=480)
-    with gr.Row():
-        clear_button_retrieval = gr.Button("Clear Retrieval Text")
-        retrieve_button = gr.Button("TMRetrieve")
-        random_button = gr.Button("Random")
     gr.Markdown(WEB_target)
     with gr.Row():
@@ -223,14 +298,18 @@ with gr.Blocks() as demo:
         return fname
     from retrieval_loader import get_tmr_model
     # load the dataset and splits
     edit_button.click(process_and_show_video, inputs=input_text, outputs=video_output)
-    retrieve_button.click(process_and_retrieve_video, inputs=retrieve_text, outputs=retrieved_video_output)
     # import ipdb;ipdb.set_trace()
     clear_button_edit.click(clear, outputs=input_text)
     clear_button_retrieval.click(clear, outputs=retrieve_text)
-    random_button.click(random_number, outputs=input_text)
 demo.launch()

 print(zero.device) # <-- 'cuda:0' 🤗
 DEFAULT_TEXT = "A person is "
 from aitviewer.models.smpl import SMPLLayer
 def get_smpl_models():
     REPO_ID = 'athn-nik/smpl_models'
     return HeadlessRenderer()
 WEBSITE = ("""<div class="embed_hidden" style="text-align: center;">
     <h1>MotionFix: Text-Driven 3D Human Motion Editing</h1>
     <h3>
     <h1>Pick a motion to edit!</h1>
     <h3>
         Here you should pick a source motion
+        <hr class="double">
     </h3>
 </div>
 """)
     <h1>Now type the text to edit that motion!</h1>
     <h3>
         Here you should get the generated motion!
+        <hr class="double">
     </h3>
 </div>
 """)
                           smpl_layer=SMPL_LAYER)
     return fname
 from huggingface_hub import hf_hub_download, hf_hub_url, cached_download
 def download_models():
     from huggingface_hub import snapshot_download
     return snapshot_download(repo_id=REPO_ID, allow_patterns="tmr*",
                              token=access_token_smpl)
+def download_motionfix():
+    REPO_ID = 'athn-nik/example-model'
+    # return hf_hub_download(REPO_ID, filename="min_checkpoint.ckpt")
+    from huggingface_hub import snapshot_download
+    return snapshot_download(repo_id=REPO_ID, allow_patterns="motionfix*",
+                             token=access_token_smpl)
+def download_embeddings():
+    REPO_ID = 'athn-nik/example-model'
+    # return hf_hub_download(REPO_ID, filename="min_checkpoint.ckpt")
+    from huggingface_hub import snapshot_download
+    return snapshot_download(repo_id=REPO_ID, allow_patterns="embeddings*",
+                             token=access_token_smpl)
+MFIX_p = download_motionfix() + '/motionfix'
+SOURCE_MOTS_p = download_embeddings() + '/embeddings'
 import gradio as gr
 def clear():
     return ""
+def random_source_motion(set_to_pick):
+    # import ipdb;ipdb.set_trace()
+    mfix_train, mfix_test = load_motionfix(MFIX_p)
+    if set_to_pick == 'all':
+        current_set = mfix_test | mfix_train
+    elif set_to_pick == 'train':
+        current_set = mfix_train
+    elif set_to_pick == 'test':
+        current_set = mfix_test
+    import random
+    random_key = random.choice(list(current_set.keys()))
+    curvid = current_set[random_key]['motion_a']
+    text_annot = current_set[random_key]['annotation']
+    return curvid, text_annot
+def retrieve_video(retrieve_text):
+    tmr_text_encoder = get_tmr_model(download_tmr())
+    # import ipdb;ipdb.set_trace()
+    # text_encoded = tmr_text_encoder([retrieve_text])
+    motion_embeds = None
+    from gen_utils import read_json
+    import numpy as np
+    motion_embeds = torch.load(SOURCE_MOTS_p+'/source_motions_embeddings.pt')
+    motion_keyids =np.array(read_json(SOURCE_MOTS_p+'/keyids_embeddings.json'))
+    mfix_train, mfix_test = load_motionfix(MFIX_p)
+    all_mots = mfix_test | mfix_train
+    scores = tmr_text_encoder.compute_scores(retrieve_text, embs=motion_embeds)
+    sorted_idxs = np.argsort(-scores)
+    best_keyids = motion_keyids[sorted_idxs]
+    # best_scores = scores[sorted_idxs]
+    top_mot = best_keyids[0]
+    curvid = all_mots[top_mot]['motion_b']
+    text_annot = all_mots[top_mot]['annotation']
+    return curvid, text_annot
+with gr.Blocks(css="style.css") as demo:
     gr.Markdown(WEBSITE)
     gr.Markdown(WEB_source)
     # TODO load TMR text-encoder
     # edit that motion!
     with gr.Row():
         with gr.Column(scale=10):
+            with gr.Column(scale=5):
+                retrieve_text = gr.Textbox(placeholder="Type the text for the motion you want to Retrieve:",
                                        show_label=True, label="Retrieval Text", value=DEFAULT_TEXT)
+                suggested_edit_text = gr.Textbox(placeholder="Texts likely to edit the motion:",
+                                                 show_label=True, label="Suggested Edit Text",
+                                                 value='')
+                xxx = 'https://motion-editing.s3.eu-central-1.amazonaws.com/collection_wo_walks_runs/rendered_pairs/011327_120_240-002682_120_240.mp4'
+            with gr.Column(scale=5):
+                set_to_pick = gr.Radio(['all', 'train', 'test'],
+                                       value='all',
+                                        label="Set to pick from",
+                                        info="Motion will be picked from whole dataset or test or train data.")
+            with gr.Row():
+                with gr.Column(scale=10):
+                    retrieve_button = gr.Button("TMRetrieve")
+                    random_button = gr.Button("Random")
+                with gr.Column(scale=10):
+                    how_many_videos = gr.Radio([1, 3, 5, 7],
+                                                value=3,
+                                               label="# Videos",
+                                               info="# Videos to be retrieved in each case."),
+                    # temp_slider = gr.Slider(minimum=1,
+                    #                         maximum=5,
+                    #                         value=1,
+                    #                         step=2,
+                    #                         interactive=True,
+                    #                         label="Slide me")
+                with gr.Column(scale=10,elem_id="center-column"):
+                    clear_button_retrieval = gr.Button("Clear Retrieval Text")
         with gr.Column(scale=8):
             retrieved_video_output = gr.Video(label="Retrieved Motion",
+                                              value=xxx,
+                                             height=360, width=480)
     gr.Markdown(WEB_target)
     with gr.Row():
         return fname
     from retrieval_loader import get_tmr_model
+    from dataset_utils import load_motionfix
     # load the dataset and splits
+    # import ipdb;ipdb.set_trace()
     edit_button.click(process_and_show_video, inputs=input_text, outputs=video_output)
+    retrieve_button.click(process_and_retrieve_video, inputs=retrieve_text, outputs=[retrieved_video_output, suggested_edit_text])
+    random_button.click(random_source_motion, inputs=set_to_pick, outputs=[retrieved_video_output, suggested_edit_text])
+    # import ipdb;ipdb.set_trace()
     # import ipdb;ipdb.set_trace()
     clear_button_edit.click(clear, outputs=input_text)
     clear_button_retrieval.click(clear, outputs=retrieve_text)
 demo.launch()

gen_utils.py CHANGED Viewed

@@ -61,6 +61,16 @@ def save_config(cfg: DictConfig) -> str:
         f.write(string)
     return path
 def read_config(run_dir: str, return_json=False) -> DictConfig:
     path = os.path.join(run_dir, "config.json")

         f.write(string)
     return path
+def write_json(data, p):
+    import json
+    with open(p, 'w') as fp:
+        json.dump(data, fp, indent=2)
+def read_json(p):
+    import json
+    with open(p, 'r') as fp:
+        json_contents = json.load(fp)
+    return json_contents
 def read_config(run_dir: str, return_json=False) -> DictConfig:
     path = os.path.join(run_dir, "config.json")