Upload policy weights, train config and readme

Browse files

Files changed (4) hide show

README.md +63 -0
config.json +94 -0
model.safetensors +3 -0
train_config.json +361 -0

README.md ADDED Viewed

	@@ -0,0 +1,63 @@

+---
+base_model: lerobot/smolvla_base
+datasets: godnpeter/aopoli-lv-libero_combined_no_noops_lerobot_v21
+library_name: lerobot
+license: apache-2.0
+model_name: smolvla
+pipeline_tag: robotics
+tags:
+- robotics
+- lerobot
+- smolvla
+---
+# Model Card for smolvla
+<!-- Provide a quick summary of what the model is/does. -->
+[SmolVLA](https://huggingface.co/papers/2506.01844) is a compact, efficient vision-language-action model that achieves competitive performance at reduced computational costs and can be deployed on consumer-grade hardware.
+This policy has been trained and pushed to the Hub using [LeRobot](https://github.com/huggingface/lerobot).
+See the full documentation at [LeRobot Docs](https://huggingface.co/docs/lerobot/index).
+---
+## How to Get Started with the Model
+For a complete walkthrough, see the [training guide](https://huggingface.co/docs/lerobot/il_robots#train-a-policy).
+Below is the short version on how to train and run inference/eval:
+### Train from scratch
+```bash
+lerobot-train \
+  --dataset.repo_id=${HF_USER}/<dataset> \
+  --policy.type=act \
+  --output_dir=outputs/train/<desired_policy_repo_id> \
+  --job_name=lerobot_training \
+  --policy.device=cuda \
+  --policy.repo_id=${HF_USER}/<desired_policy_repo_id>
+  --wandb.enable=true
+```
+_Writes checkpoints to `outputs/train/<desired_policy_repo_id>/checkpoints/`._
+### Evaluate the policy/run inference
+```bash
+lerobot-record \
+  --robot.type=so100_follower \
+  --dataset.repo_id=<hf_user>/eval_<dataset> \
+  --policy.path=<hf_user>/<desired_policy_repo_id> \
+  --episodes=10
+```
+Prefix the dataset repo with **eval\_** and supply `--policy.path` pointing to a local or hub checkpoint.
+---
+## Model Details
+- **License:** apache-2.0

config.json ADDED Viewed

	@@ -0,0 +1,94 @@

+{
+    "type": "smolvla",
+    "n_obs_steps": 1,
+    "normalization_mapping": {
+        "VISUAL": "IDENTITY",
+        "STATE": "MEAN_STD",
+        "ACTION": "MEAN_STD"
+    },
+    "input_features": {
+        "observation.images.wrist_image": {
+            "type": "VISUAL",
+            "shape": [
+                256,
+                256,
+                3
+            ]
+        },
+        "observation.images.image": {
+            "type": "VISUAL",
+            "shape": [
+                256,
+                256,
+                3
+            ]
+        },
+        "observation.state": {
+            "type": "STATE",
+            "shape": [
+                8
+            ]
+        }
+    },
+    "output_features": {
+        "action": {
+            "type": "ACTION",
+            "shape": [
+                7
+            ]
+        }
+    },
+    "device": "cuda",
+    "use_amp": false,
+    "use_peft": false,
+    "push_to_hub": true,
+    "repo_id": "combined_frozen_chunk50_noproprio_unified_text_prompt_fullvlm_1010",
+    "private": null,
+    "tags": null,
+    "license": null,
+    "use_proprio": false,
+    "chunk_size": 50,
+    "n_action_steps": 50,
+    "normalize_visual": "identity",
+    "normalize_state": "mean_std",
+    "normalize_action": "mean_std",
+    "max_state_dim": 32,
+    "max_action_dim": 32,
+    "resize_imgs_with_padding": [
+        512,
+        512
+    ],
+    "empty_cameras": 0,
+    "adapt_to_pi_aloha": false,
+    "use_delta_joint_actions_aloha": false,
+    "tokenizer_max_length": 48,
+    "num_steps": 10,
+    "use_cache": true,
+    "freeze_vision_encoder": true,
+    "train_expert_only": true,
+    "train_state_proj": true,
+    "optimizer_lr": 0.0001,
+    "optimizer_betas": [
+        0.9,
+        0.95
+    ],
+    "optimizer_eps": 1e-08,
+    "optimizer_weight_decay": 1e-10,
+    "optimizer_grad_clip_norm": 10,
+    "scheduler_warmup_steps": 1000,
+    "scheduler_decay_steps": 30000,
+    "scheduler_decay_lr": 2.5e-06,
+    "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct",
+    "load_vlm_weights": true,
+    "vlm_model_dtype": "bfloat16",
+    "add_image_special_tokens": false,
+    "attention_mode": "cross_attn",
+    "prefix_length": -1,
+    "pad_language_to": "longest",
+    "num_expert_layers": -1,
+    "num_vlm_layers": -1,
+    "self_attn_every_n_layers": 2,
+    "expert_width_multiplier": 0.75,
+    "min_period": 0.004,
+    "max_period": 4.0
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:32c202a408aba1283f05a752c6811aa83a792e30aa72067f4dce4d95b10ff5fd
+size 1421156816

train_config.json ADDED Viewed

	@@ -0,0 +1,361 @@

+{
+    "dataset": {
+        "repo_id": "godnpeter/aopoli-lv-libero_combined_no_noops_lerobot_v21",
+        "use_all_local_repos": false,
+        "root": null,
+        "episodes": null,
+        "image_transforms": {
+            "enable": false,
+            "max_num_transforms": 3,
+            "random_order": false,
+            "tfs": {
+                "brightness": {
+                    "weight": 1.0,
+                    "type": "ColorJitter",
+                    "kwargs": {
+                        "brightness": [
+                            0.8,
+                            1.2
+                        ]
+                    }
+                },
+                "contrast": {
+                    "weight": 1.0,
+                    "type": "ColorJitter",
+                    "kwargs": {
+                        "contrast": [
+                            0.8,
+                            1.2
+                        ]
+                    }
+                },
+                "saturation": {
+                    "weight": 1.0,
+                    "type": "ColorJitter",
+                    "kwargs": {
+                        "saturation": [
+                            0.5,
+                            1.5
+                        ]
+                    }
+                },
+                "hue": {
+                    "weight": 1.0,
+                    "type": "ColorJitter",
+                    "kwargs": {
+                        "hue": [
+                            -0.05,
+                            0.05
+                        ]
+                    }
+                },
+                "sharpness": {
+                    "weight": 1.0,
+                    "type": "SharpnessJitter",
+                    "kwargs": {
+                        "sharpness": [
+                            0.5,
+                            1.5
+                        ]
+                    }
+                },
+                "shift": {
+                    "weight": 0.0,
+                    "type": "RandomShift",
+                    "kwargs": {
+                        "max_shift": 8,
+                        "padding_mode": "edge"
+                    }
+                }
+            }
+        },
+        "text_transform": {
+            "enable": true,
+            "rewrite_map_path": "/fsx/dongyoonhwang/lerobot_raw/modified_libero_prompts/unify_text_prompt.yaml",
+            "case_insensitive": true,
+            "random_choice": true,
+            "rewrite_map": {
+                "pick up the black bowl between the plate and the ramekin and place it on the plate": [
+                    "pick up the black bowl between the plate and the ramekin and place it on the plate"
+                ],
+                "pick up the black bowl next to the ramekin and place it on the plate": [
+                    "pick up the black bowl next to the ramekin and place it on the plate"
+                ],
+                "pick up the black bowl from table center and place it on the plate": [
+                    "pick up the black bowl from the table center and place it on the plate"
+                ],
+                "pick up the black bowl on the cookie box and place it on the plate": [
+                    "pick up the black bowl on the cookie box and place it on the plate"
+                ],
+                "pick up the black bowl in the top drawer of the wooden cabinet and place it on the plate": [
+                    "pick up the black bowl in the top drawer of the wooden cabinet and place it on the plate"
+                ],
+                "pick up the black bowl on the ramekin and place it on the plate": [
+                    "pick up the black bowl on the ramekin and place it on the plate"
+                ],
+                "pick up the black bowl next to the cookie box and place it on the plate": [
+                    "pick up the black bowl next to the cookie box and place it on the plate"
+                ],
+                "pick up the black bowl on the stove and place it on the plate": [
+                    "pick up the black bowl on the stove and place it on the plate"
+                ],
+                "pick up the black bowl next to the plate and place it on the plate": [
+                    "pick up the black bowl next to the plate and place it on the plate"
+                ],
+                "pick up the black bowl on the wooden cabinet and place it on the plate": [
+                    "pick up the black bowl on the wooden cabinet and place it on the plate"
+                ],
+                "pick up the alphabet soup and place it in the basket": [
+                    "pick up the alphabet soup can and place it in the basket"
+                ],
+                "pick up the cream cheese and place it in the basket": [
+                    "pick up the cream cheese box and place it in the basket"
+                ],
+                "pick up the salad dressing and place it in the basket": [
+                    "pick up the salad dressing bottle and place it in the basket"
+                ],
+                "pick up the bbq sauce and place it in the basket": [
+                    "pick up the bbq sauce bottle and place it in the basket"
+                ],
+                "pick up the ketchup and place it in the basket": [
+                    "pick up the ketchup bottle and place it in the basket"
+                ],
+                "pick up the tomato sauce and place it in the basket": [
+                    "pick up the tomato sauce can and place it in the basket"
+                ],
+                "pick up the butter and place it in the basket": [
+                    "pick up the butter stick and place it in the basket"
+                ],
+                "pick up the milk and place it in the basket": [
+                    "pick up the milk carton and place it in the basket"
+                ],
+                "pick up the chocolate pudding and place it in the basket": [
+                    "pick up the chocolate pudding cup and place it in the basket"
+                ],
+                "pick up the orange juice and place it in the basket": [
+                    "pick up the orange juice carton and place it in the basket"
+                ],
+                "open the middle drawer of the cabinet": [
+                    "open the middle drawer of the cabinet"
+                ],
+                "put the bowl on the stove": [
+                    "pick up the black bowl and place it on the stove"
+                ],
+                "put the wine bottle on top of the cabinet": [
+                    "pick up the wine bottle and place it on top of the cabinet"
+                ],
+                "open the top drawer and put the bowl inside": [
+                    "open the top drawer then pick up the black bowl and place it inside"
+                ],
+                "put the bowl on top of the cabinet": [
+                    "pick up the black bowl and place it on top of the cabinet"
+                ],
+                "push the plate to the front of the stove": [
+                    "push the plate to the front of the stove"
+                ],
+                "put the cream cheese in the bowl": [
+                    "pick up the cream cheese box and place it in the black bowl"
+                ],
+                "turn on the stove": [
+                    "turn on the stove"
+                ],
+                "put the bowl on the plate": [
+                    "pick up the black bowl and place it on the plate"
+                ],
+                "put the wine bottle on the rack": [
+                    "pick up the wine bottle and place it on the rack"
+                ],
+                "put both the alphabet soup and the tomato sauce in the basket": [
+                    "pick up the alphabet soup can and place it in the basket then pick up the tomato sauce can and place it in the basket"
+                ],
+                "put both the cream cheese box and the butter in the basket": [
+                    "pick up the cream cheese box and place it in the basket then pick up the butter stick and place it in the basket"
+                ],
+                "turn on the stove and put the moka pot on it": [
+                    "turn on the stove then pick up the moka pot and place it on the stove"
+                ],
+                "put the black bowl in the bottom drawer of the cabinet and close it": [
+                    "pick up the black bowl and place it in the bottom drawer of the cabinet then close the drawer"
+                ],
+                "put the white mug on the left plate and put the yellow and white mug on the right plate": [
+                    "pick up the white mug and place it on the left plate then pick up the yellow-and-white mug and place it on the right plate"
+                ],
+                "pick up the book and place it in the back compartment of the caddy": [
+                    "pick up the book and place it in the back compartment of the caddy"
+                ],
+                "put the white mug on the plate and put the chocolate pudding to the right of the plate": [
+                    "pick up the white mug and place it on the plate then pick up the chocolate pudding cup and place it to the right of the plate"
+                ],
+                "put both the alphabet soup and the cream cheese box in the basket": [
+                    "pick up the alphabet soup can and place it in the basket then pick up the cream cheese box and place it in the basket"
+                ],
+                "put both moka pots on the stove": [
+                    "pick up one moka pot and place it on the stove then pick up the other moka pot and place it on the stove"
+                ],
+                "put the yellow and white mug in the microwave and close it": [
+                    "pick up the yellow-and-white mug and place it in the microwave then close the microwave"
+                ]
+            }
+        },
+        "revision": null,
+        "use_imagenet_stats": true,
+        "video_backend": "torchcodec",
+        "only_robot_type": "so100",
+        "exclude_tasks": null,
+        "report_task_stats": true
+    },
+    "env": null,
+    "policy": {
+        "type": "smolvla",
+        "n_obs_steps": 1,
+        "normalization_mapping": {
+            "VISUAL": "IDENTITY",
+            "STATE": "MEAN_STD",
+            "ACTION": "MEAN_STD"
+        },
+        "input_features": {
+            "observation.images.wrist_image": {
+                "type": "VISUAL",
+                "shape": [
+                    256,
+                    256,
+                    3
+                ]
+            },
+            "observation.images.image": {
+                "type": "VISUAL",
+                "shape": [
+                    256,
+                    256,
+                    3
+                ]
+            },
+            "observation.state": {
+                "type": "STATE",
+                "shape": [
+                    8
+                ]
+            }
+        },
+        "output_features": {
+            "action": {
+                "type": "ACTION",
+                "shape": [
+                    7
+                ]
+            }
+        },
+        "device": "cuda",
+        "use_amp": false,
+        "use_peft": false,
+        "push_to_hub": true,
+        "repo_id": "combined_frozen_chunk50_noproprio_unified_text_prompt_fullvlm_1010",
+        "private": null,
+        "tags": null,
+        "license": null,
+        "use_proprio": false,
+        "chunk_size": 50,
+        "n_action_steps": 50,
+        "normalize_visual": "identity",
+        "normalize_state": "mean_std",
+        "normalize_action": "mean_std",
+        "max_state_dim": 32,
+        "max_action_dim": 32,
+        "resize_imgs_with_padding": [
+            512,
+            512
+        ],
+        "empty_cameras": 0,
+        "adapt_to_pi_aloha": false,
+        "use_delta_joint_actions_aloha": false,
+        "tokenizer_max_length": 48,
+        "num_steps": 10,
+        "use_cache": true,
+        "freeze_vision_encoder": true,
+        "train_expert_only": true,
+        "train_state_proj": true,
+        "optimizer_lr": 0.0001,
+        "optimizer_betas": [
+            0.9,
+            0.95
+        ],
+        "optimizer_eps": 1e-08,
+        "optimizer_weight_decay": 1e-10,
+        "optimizer_grad_clip_norm": 10,
+        "scheduler_warmup_steps": 1000,
+        "scheduler_decay_steps": 30000,
+        "scheduler_decay_lr": 2.5e-06,
+        "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct",
+        "load_vlm_weights": true,
+        "vlm_model_dtype": "bfloat16",
+        "add_image_special_tokens": false,
+        "attention_mode": "cross_attn",
+        "prefix_length": -1,
+        "pad_language_to": "longest",
+        "num_expert_layers": -1,
+        "num_vlm_layers": -1,
+        "self_attn_every_n_layers": 2,
+        "expert_width_multiplier": 0.75,
+        "min_period": 0.004,
+        "max_period": 4.0
+    },
+    "output_dir": "outputs/combined_frozen_unified_text_prompt_fullvlm_1010/combined_frozen_chunk50_noproprio_unified_text_prompt_fullvlm_1010/2025-10-11/12-19-59",
+    "exp_name": "combined_frozen_chunk50_noproprio_unified_text_prompt_fullvlm_1010/2025-10-11/12-19-59",
+    "group_name": "combined_frozen_unified_text_prompt_fullvlm_1010",
+    "resume": false,
+    "seed": 1000,
+    "num_workers": 8,
+    "batch_size": 64,
+    "update_steps": 50000,
+    "eval_freq": 20000,
+    "log_freq": 200,
+    "save_checkpoint": true,
+    "save_freq": 10000,
+    "use_policy_training_preset": true,
+    "optimizer": {
+        "type": "adamw",
+        "lr": 0.0001,
+        "weight_decay": 1e-10,
+        "grad_clip_norm": 10,
+        "betas": [
+            0.9,
+            0.95
+        ],
+        "eps": 1e-08
+    },
+    "scheduler": {
+        "type": "cosine_decay_with_warmup",
+        "num_warmup_steps": 1000,
+        "num_decay_steps": 30000,
+        "peak_lr": 0.0001,
+        "decay_lr": 2.5e-06
+    },
+    "eval": {
+        "n_episodes": 50,
+        "batch_size": 50,
+        "use_async_envs": false
+    },
+    "log_with": "wandb",
+    "wandb": {
+        "enable": false,
+        "disable_artifact": false,
+        "project": "lerobot",
+        "entity": null,
+        "notes": null,
+        "run_id": "qlamyc4i",
+        "mode": null
+    },
+    "gradient_accumulation_steps": 1,
+    "use_peft": false,
+    "autocast_adapter_dtype": true,
+    "peft": {
+        "target_modules": null,
+        "modules_to_save": null,
+        "method_type": "LORA",
+        "init_type": null,
+        "r": 64,
+        "lora_alpha": 128,
+        "fullfinetune_vlm_patch_embeddings": false,
+        "fullfinetune_vlm_vision_model": false
+    }
+}