accumulative_counts = 2 batch_size = 1 betas = ( 0.9, 0.95, ) custom_hooks = [ dict(type='xtuner.engine.hooks.VarlenAttnArgsToMessageHubHook'), ] data_num = 150221 data_path = '/cpfs01/shared/alillm_hs/zouyicheng/rm_pretrain/data/preference/single_source_prompt_sft/mixed/HH_puyu' dataloader_num_workers = 0 default_hooks = dict( checkpoint=dict( by_epoch=False, interval=1000, max_keep_ckpts=-1, type='mmengine.hooks.CheckpointHook'), logger=dict( interval=10, log_metric_by_epoch=False, type='mmengine.hooks.LoggerHook'), param_scheduler=dict(type='mmengine.hooks.ParamSchedulerHook'), sampler_seed=dict(type='mmengine.hooks.DistSamplerSeedHook'), timer=dict(type='mmengine.hooks.IterTimerHook')) env_cfg = dict( cudnn_benchmark=False, dist_cfg=dict(backend='nccl'), mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) launcher = 'pytorch' load_from = None log_level = 'INFO' log_processor = dict(by_epoch=False) loss_type = 'ranking' lr = 2e-05 max_epochs = 1 max_length = 16384 max_norm = 1 max_packed_length = 32768 max_response_length = 4096 model = dict( llm=dict( pretrained_model_name_or_path= '/cpfs01/shared/alillm_hs/zouyicheng/rm_pretrain/rm/RM_PT_internlm2_5_7b_DATA_510m_single_mix_Node_57_LR_1_45e_5_STEP_223684_hf', trust_remote_code=True, type='transformers.AutoModel.from_pretrained'), loss_type='ranking', penalty_type='none', type='xtuner.model.reward.RewardModel', use_varlen_attn=True) optim_type = 'torch.optim.AdamW' optim_wrapper = dict( optimizer=dict( betas=( 0.9, 0.95, ), lr=2e-05, type='torch.optim.AdamW', weight_decay=0), type='DeepSpeedOptimWrapper') param_scheduler = [ dict( begin=0, by_epoch=True, convert_to_iter_based=True, end=0.03, start_factor=2.0000000000000003e-06, type='mmengine.optim.LinearLR'), dict( begin=0.03, by_epoch=True, convert_to_iter_based=True, end=1, eta_min=2.0000000000000003e-06, type='mmengine.optim.CosineAnnealingLR'), ] penalty_type = 'none' pretrained_model_name_or_path = '/cpfs01/shared/alillm_hs/zouyicheng/rm_pretrain/rm/RM_PT_internlm2_5_7b_DATA_510m_single_mix_Node_57_LR_1_45e_5_STEP_223684_hf' randomness = dict(deterministic=False, seed=None) resume = False reward_token_id = 92527 runner_type = 'FlexibleRunner' sampler = 'mmengine.dataset.DefaultSampler' save_steps = 1000 save_total_limit = -1 sequence_parallel_size = 1 strategy = dict( config=dict( bf16=dict(enabled=True), fp16=dict(enabled=False, initial_scale_power=16), gradient_accumulation_steps='auto', gradient_clipping='auto', train_micro_batch_size_per_gpu='auto', zero_allow_untested_optimizer=True, zero_force_ds_cpu_optimizer=False, zero_optimization=dict(overlap_comm=True, stage=1)), exclude_frozen_parameters=True, gradient_accumulation_steps=2, gradient_clipping=1, sequence_parallel_size=1, train_micro_batch_size_per_gpu=1, type='xtuner.engine.DeepSpeedStrategy') tokenizer = dict( padding_side='left', pretrained_model_name_or_path= '/cpfs01/shared/alillm_hs/zouyicheng/rm_pretrain/rm/RM_PT_internlm2_5_7b_DATA_510m_single_mix_Node_57_LR_1_45e_5_STEP_223684_hf', trust_remote_code=True, type='transformers.AutoTokenizer.from_pretrained') train_cfg = dict(max_epochs=1, type='xtuner.engine.runner.TrainLoop') train_dataloader = dict( batch_size=1, collate_fn=dict( type= 'xtuner.dataset.collate_fns.preference_collate_fn.preference_collate_fn', use_varlen_attn=True), dataset=dict( dataset=dict( path= '/cpfs01/shared/alillm_hs/zouyicheng/rm_pretrain/data/preference/single_source_prompt_sft/mixed/HH_puyu', type='datasets.load_dataset'), dataset_map_fn=None, is_dpo=False, is_reward=True, max_length=16384, max_packed_length=32768, max_response_length=4096, num_proc=32, reward_token_id=92527, shuffle_before_pack=True, tokenizer=dict( padding_side='left', pretrained_model_name_or_path= '/cpfs01/shared/alillm_hs/zouyicheng/rm_pretrain/rm/RM_PT_internlm2_5_7b_DATA_510m_single_mix_Node_57_LR_1_45e_5_STEP_223684_hf', trust_remote_code=True, type='transformers.AutoTokenizer.from_pretrained'), type='xtuner.dataset.preference_dataset.build_preference_dataset', use_varlen_attn=True), num_workers=0, sampler=dict(shuffle=True, type='mmengine.dataset.DefaultSampler')) train_dataset = dict( dataset=dict( path= '/cpfs01/shared/alillm_hs/zouyicheng/rm_pretrain/data/preference/single_source_prompt_sft/mixed/HH_puyu', type='datasets.load_dataset'), dataset_map_fn=None, is_dpo=False, is_reward=True, max_length=16384, max_packed_length=32768, max_response_length=4096, num_proc=32, reward_token_id=92527, shuffle_before_pack=True, tokenizer=dict( padding_side='left', pretrained_model_name_or_path= '/cpfs01/shared/alillm_hs/zouyicheng/rm_pretrain/rm/RM_PT_internlm2_5_7b_DATA_510m_single_mix_Node_57_LR_1_45e_5_STEP_223684_hf', trust_remote_code=True, type='transformers.AutoTokenizer.from_pretrained'), type='xtuner.dataset.preference_dataset.build_preference_dataset', use_varlen_attn=True) use_varlen_attn = True visualizer = dict( type='mmengine.visualization.Visualizer', vis_backends=[ dict(type='mmengine.visualization.TensorboardVisBackend'), ]) warmup_ratio = 0.03 weight_decay = 0 work_dir = './work_dirs/RM_SFT_reward_pt_7b_223684_DATA_HH_puyu_mixed_Node_2_LR_2e-5'