| { | |
| "optim": { | |
| "type": "AdamW", | |
| "lr": 0.0005, | |
| "betas": [ | |
| 0.9, | |
| 0.98 | |
| ], | |
| "eps": 1e-12, | |
| "weight_decay": 0.01, | |
| "amsgrad": false, | |
| "fused": null | |
| }, | |
| "optim_mod": { | |
| "name": "none" | |
| }, | |
| "name": "bert-base", | |
| "limited_decay_keys": [ | |
| "bias", | |
| "LayerNorm.bias", | |
| "LayerNorm.weight", | |
| "norm" | |
| ], | |
| "warmup_steps": 30000, | |
| "cooldown_steps": 0, | |
| "steps": 900000, | |
| "scheduler": "budget-cosine-decay", | |
| "batch_size": 1536, | |
| "batch_size_ramp": 0, | |
| "gradient_clipping": null, | |
| "pretrain_in_train_mode": false, | |
| "objective": { | |
| "name": "masked-lm", | |
| "mlm_probability": 0.15, | |
| "use_80_20_rule": true, | |
| "disable_mlm": false, | |
| "token_drop": 0.0 | |
| }, | |
| "reverse_dataset_order": false, | |
| "budget": 24 | |
| } |