Delete libero-4_MolmoE-7B-10131629-5000_openai_seq368_flow_matching-qwen2_two_images_proprio-8_ft_ah_fullyft_llm_bs704/step13500-unsharded

Browse files

Files changed (2) hide show

libero-4_MolmoE-7B-10131629-5000_openai_seq368_flow_matching-qwen2_two_images_proprio-8_ft_ah_fullyft_llm_bs704/step13500-unsharded/config.yaml +0 -330
libero-4_MolmoE-7B-10131629-5000_openai_seq368_flow_matching-qwen2_two_images_proprio-8_ft_ah_fullyft_llm_bs704/step13500-unsharded/model.pt +0 -3

libero-4_MolmoE-7B-10131629-5000_openai_seq368_flow_matching-qwen2_two_images_proprio-8_ft_ah_fullyft_llm_bs704/step13500-unsharded/config.yaml DELETED Viewed

@@ -1,330 +0,0 @@
-run_name: libero-4_MolmoE-7B-10131629-5000_openai_seq368_flow_matching-qwen2_two_images_proprio-8_ft_ah_fullyft_llm_bs704_20251027_163658
-seed: 6198
-epoch: null
-dry_run: false
-model:
-  d_model: 3584
-  n_heads: 28
-  n_kv_heads: 4
-  qkv_bias: true
-  clip_qkv: null
-  n_layers: 28
-  mlp_ratio: 4
-  mlp_hidden_size: 37888
-  activation_type: swiglu
-  block_type: sequential
-  block_group_size: 1
-  rope: true
-  rope_full_precision: true
-  rope_theta: 1000000.0
-  vision_backbone:
-    image_model_type: openai
-    image_default_input_size:
-    - 336
-    - 336
-    image_patch_size: 14
-    image_pos_patch_size: 14
-    image_emb_dim: 1024
-    image_num_heads: 16
-    image_num_key_value_heads: 16
-    image_num_layers: 23
-    image_head_dim: 64
-    image_mlp_dim: 4096
-    image_mlp_activations: quick_gelu
-    image_dropout_rate: 0.0
-    image_num_pos: 577
-    image_norm_eps: 1.0e-05
-    attention_dropout: 0.0
-    residual_dropout: 0.0
-    initializer_range: 0.02
-    fsdp_wrap: false
-    resize_mode: default
-  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
-  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
-  low_cpu_fsdp: false
-  attention_type: sdpa
-  float32_attention: true
-  attention_dropout: 0.0
-  attention_layer_norm: false
-  residual_dropout: 0.1
-  response_residual_dropout: 0.0
-  embedding_dropout: 0.0
-  layer_norm_type: rms
-  layer_norm_with_affine: true
-  layer_norm_eps: 1.0e-06
-  attention_layer_norm_with_affine: true
-  max_sequence_length: 4096
-  max_position_embeddings: null
-  include_bias: false
-  bias_for_layer_norm: null
-  scale_logits: false
-  vocab_size: 152064
-  embedding_size: 152064
-  ff_out_size: 152192
-  additional_vocab_size: 128
-  new_embedding_init_range: 0.02
-  weight_tying: false
-  init_device: cpu
-  init_fn: normal
-  init_std: 0.02
-  init_cutoff_factor: null
-  norm_after: false
-  precision: amp_bf16
-  max_crops: 12
-  crop_mode: resize
-  use_col_tokens: true
-  prompt_type: uber_model
-  system_prompt_kind: demo_or_style
-  message_formatting: role
-  always_start_with_space: true
-  multi_annotation_weighting: root_subsegments
-  default_inference_len: 128
-  overlap_margins:
-  - 4
-  - 4
-  pad_value: 0.0
-  image_padding_embed: pad_and_partial_pad
-  fix_image_padding: true
-  vit_layers:
-  - -2
-  - -9
-  image_pooling_h: 2
-  image_pooling_w: 2
-  image_pooling_2d: attention_meanq
-  image_projector: mlp
-  image_feature_dropout: 0.0
-  initializer_range: 0.02
-  normalize_input_embeds: false
-  use_position_ids: true
-  head_dim: null
-  action_tokenizer:
-    identifier: physical-intelligence/fast
-    tokenizer_dir: null
-  tokenizer:
-    identifier: Qwen/Qwen2-7B
-    tokenizer_dir: null
-  pad_tokenizer: true
-  moe_num_experts: 8
-  moe_top_k: 2
-  moe_mlp_impl: sparse
-  moe_log_expert_assignment: false
-  moe_shared_expert: false
-  moe_lbl_in_fp32: false
-  moe_interleave: false
-  moe_loss_weight: 0.1
-  moe_zloss_weight: null
-  moe_dropless: true
-  moe_capacity_factor: 1.25
-  action_head: flow_matching
-  action_dim: 7
-  right_end_effector_dim: 7
-  left_end_effector_dim: 7
-  mobile_base_dim: 3
-  num_actions_chunk: 8
-  proprio_dim: 8
-  num_diffusion_steps: 1000
-  num_diffusion_inference_steps: 30
-  use_proprio: true
-  action_head_dit_hidden_size: 1152
-  action_head_dit_depth: 28
-  action_head_dit_num_heads: 16
-  action_head_flow_matching_dim: 1024
-  action_head_flow_matching_layers: 28
-  action_head_flow_matching_heads: 8
-  action_head_flow_matching_intermediate_size: 2048
-  llm_causal_attention: false
-  action_use_left_eef: false
-  action_use_mobile_base: false
-allow_resume: true
-ft_llm: false
-ft_vit: false
-ft_connector: false
-ft_embedding: lm_head
-lora: false
-use_lora: false
-lora_rank: 8
-lora_llm: false
-lora_vit: false
-lora_connector: false
-early_exit: false
-train_exit_random_layer: false
-optimizer:
-  name: adamw
-  learning_rate: 0.0001
-  weight_decay: 0.01
-  betas:
-  - 0.9
-  - 0.95
-  eps: 1.0e-05
-  connector_learning_rate: 0.0002
-  vit_learning_rate: 6.0e-06
-  llm_learning_rate: 5.0e-05
-  connector_weight_decay: 0.0
-  vit_weight_decay: 0.0
-  llm_weight_decay: 0.0
-  connector_betas:
-  - 0.9
-  - 0.95
-  vit_betas:
-  - 0.9
-  - 0.95
-  llm_betas:
-  - 0.9
-  - 0.95
-  connector_eps: 1.0e-06
-  vit_eps: 1.0e-06
-  llm_eps: 1.0e-06
-  metrics_log_interval: 20
-scheduler:
-  name: multimodal
-  units: steps
-  t_warmup: 100
-  t_max: null
-  alpha_f: 0.1
-  connector_t_warmup: 200
-  vit_t_warmup: 2000
-  llm_t_warmup: 2000
-  grad_clip_warmup_steps: null
-  grad_clip_warmup_factor: null
-  warmup_min_lr: 0.0
-data:
-  dataset: vla_dataset_simulation
-  mixture: null
-  root_size_mixture: null
-  split: train
-  seed: 95818
-  shuffle_messages: false
-  pad: to_max
-  sequence_length: 368
-  shuffle: true
-  for_inference: false
-  multi_modal: torch
-  num_workers: 0
-  drop_last: true
-  pin_memory: true
-  prefetch_factor: null
-  persistent_workers: false
-  timeout: 0
-  rlds_dataset_name: libero_4_task_suites_no_noops
-  rlds_data_root_dir: /vast/users/xiaodan/zhangjian/datasets/modified_libero_rlds
-  use_wrist_image: true
-  use_proprio: true
-  rlds_shuffle_buffer_size: 100000
-  rlds_traj_threads: 8
-  rlds_read_threads: 8
-  lerobot_episode_index_start: null
-  lerobot_episode_index_end: null
-restore_dataloader: true
-fast_forward_batches: null
-evaluators:
-- label: val
-  data:
-    dataset: vla_dataset_simulation
-    mixture: null
-    root_size_mixture: null
-    split: validation
-    seed: null
-    shuffle_messages: false
-    pad: to_max
-    sequence_length: 368
-    shuffle: false
-    for_inference: false
-    multi_modal: torch
-    num_workers: 0
-    drop_last: true
-    pin_memory: true
-    prefetch_factor: null
-    persistent_workers: true
-    timeout: 0
-    rlds_dataset_name: libero_4_task_suites_no_noops
-    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
-    use_wrist_image: true
-    use_proprio: true
-    rlds_shuffle_buffer_size: 256000
-    rlds_traj_threads: 8
-    rlds_read_threads: 8
-    lerobot_episode_index_start: 353
-    lerobot_episode_index_end: 765
-  device_eval_batch_size: null
-  subset_num_batches: 64
-  max_examples: null
-  max_new_tokens: 448
-  mm_evaluator: null
-  save_dir: null
-  save_to_checkpoint_dir: false
-  eval_name: null
-  skip_if_metrics_cached: true
-eval_interval: 0
-inf_eval_interval: -1
-inf_evaluators: []
-save_folder: /vast/users/xiaodan/zhangjian/checkpoints/libero-4_MolmoE-7B-10131629-5000_openai_seq368_flow_matching-qwen2_two_images_proprio-8_ft_ah_fullyft_llm_bs704
-remote_save_folder: null
-canceled_check_interval: 50
-save_interval: 500
-save_interval_unsharded: 500
-save_interval_ephemeral: null
-save_interval_action_head: 500
-save_num_checkpoints_to_keep: 1
-save_num_unsharded_checkpoints_to_keep: 1
-save_num_action_head_checkpoints_to_keep: 2
-save_overwrite: true
-force_save_unsharded: false
-no_pre_train_checkpoint: true
-initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-10131629-5000
-load_model_config: null
-checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-10131629-5000
-load_path: /vast/users/xiaodan/zhangjian/checkpoints/libero-4_MolmoE-7B-10131629-5000_openai_seq368_flow_matching-qwen2_two_images_proprio-8_ft_ah_fullyft_llm_bs704/step9500
-load_path_sharded_checkpointer: null
-reset_optimizer_state: false
-reset_trainer_state: false
-save_dataloader_state: false
-reset_dataloader_state: false
-keep_lr_on_load: true
-sharded_checkpointer: torch_legacy
-max_duration: 500000
-global_train_batch_size: 704
-device_train_batch_size: 88
-device_train_microbatch_size: 88
-device_eval_batch_size: 4
-eval_subset_num_batches: -1
-eval_on_load: false
-device_inf_eval_batch_size: 16
-inf_eval_subset_num_batches: -1
-device_train_grad_accum: 1
-max_grad_norm: 1.0
-multi_component_grad_norm: true
-batch_divisor: global_batch
-max_grad_norm_ratio: null
-precision: amp_bf16
-wandb:
-  project: a1-vla-xiaodan
-  entity: demo0
-  group: null
-  name: libero-4_MolmoE-7B-10131629-5000_openai_seq368_flow_matching-qwen2_two_images_proprio-8_ft_ah_fullyft_llm_bs704_20251027_163658
-  tags:
-  - watching
-  log_artifacts: false
-  rank_zero_only: true
-  log_interval: 1
-speed_monitor:
-  window_size: 20
-  gpu_flops_available: null
-console_log_interval: 1
-gen1_gc_interval: 1
-compile: null
-fsdp:
-  use_orig_params: true
-  sharding_strategy: FULL_SHARD
-  wrapping_strategy: by_block_and_size
-  precision: float
-  hybrid_sharding_num_model_replicas: null
-softmax_auxiliary_loss: true
-softmax_auxiliary_loss_scale: 0.0001
-time_limit: null
-extra_steps_after_cancel: 10
-python_profiling: false
-torch_profiling: false
-stop_at: 500000
-stop_after: null
-activation_checkpointing: whole_layer
-fused_loss: null

libero-4_MolmoE-7B-10131629-5000_openai_seq368_flow_matching-qwen2_two_images_proprio-8_ft_ah_fullyft_llm_bs704/step13500-unsharded/model.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:59e59fe196464083a46023beb18f044405d789a7e14ff071fd29c33d96d2a681
-size 33843010215