Upload config
Browse files- config.json +0 -5
- configuration_mamba.py +0 -10
config.json
CHANGED
|
@@ -10,15 +10,10 @@
|
|
| 10 |
"d_state": 16,
|
| 11 |
"dt_rank": 48,
|
| 12 |
"expand": 2,
|
| 13 |
-
"fused_add_norm": true,
|
| 14 |
"initializer_range": 0.02,
|
| 15 |
"model_type": "mamba",
|
| 16 |
"n_layer": 24,
|
| 17 |
-
"norm_epsilon": 1e-05,
|
| 18 |
"pad_vocab_size_multiple": 8,
|
| 19 |
-
"residual_in_fp32": true,
|
| 20 |
-
"rms_norm": true,
|
| 21 |
-
"ssm_cfg": {},
|
| 22 |
"transformers_version": "4.37.2",
|
| 23 |
"vocab_size": 50280
|
| 24 |
}
|
|
|
|
| 10 |
"d_state": 16,
|
| 11 |
"dt_rank": 48,
|
| 12 |
"expand": 2,
|
|
|
|
| 13 |
"initializer_range": 0.02,
|
| 14 |
"model_type": "mamba",
|
| 15 |
"n_layer": 24,
|
|
|
|
| 16 |
"pad_vocab_size_multiple": 8,
|
|
|
|
|
|
|
|
|
|
| 17 |
"transformers_version": "4.37.2",
|
| 18 |
"vocab_size": 50280
|
| 19 |
}
|
configuration_mamba.py
CHANGED
|
@@ -17,14 +17,9 @@ class MambaConfig(PretrainedConfig):
|
|
| 17 |
conv_bias=True,
|
| 18 |
bias=False,
|
| 19 |
n_layer=64,
|
| 20 |
-
norm_epsilon=1e-5,
|
| 21 |
dt_rank: Union[int, str] = "auto",
|
| 22 |
pad_vocab_size_multiple=8,
|
| 23 |
initializer_range=0.02,
|
| 24 |
-
rms_norm: bool = True,
|
| 25 |
-
fused_add_norm: bool = True,
|
| 26 |
-
ssm_cfg={},
|
| 27 |
-
residual_in_fp32: bool = True,
|
| 28 |
**kwargs,
|
| 29 |
):
|
| 30 |
self.vocab_size = vocab_size
|
|
@@ -39,11 +34,6 @@ class MambaConfig(PretrainedConfig):
|
|
| 39 |
self.dt_rank = dt_rank
|
| 40 |
self.initializer_range = initializer_range
|
| 41 |
self.bias = bias
|
| 42 |
-
self.ssm_cfg = ssm_cfg
|
| 43 |
-
self.norm_epsilon = norm_epsilon
|
| 44 |
-
self.rms_norm = rms_norm
|
| 45 |
-
self.residual_in_fp32 = residual_in_fp32
|
| 46 |
-
self.fused_add_norm = fused_add_norm
|
| 47 |
|
| 48 |
if self.dt_rank == "auto":
|
| 49 |
self.dt_rank = math.ceil(self.d_model / 16)
|
|
|
|
| 17 |
conv_bias=True,
|
| 18 |
bias=False,
|
| 19 |
n_layer=64,
|
|
|
|
| 20 |
dt_rank: Union[int, str] = "auto",
|
| 21 |
pad_vocab_size_multiple=8,
|
| 22 |
initializer_range=0.02,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
**kwargs,
|
| 24 |
):
|
| 25 |
self.vocab_size = vocab_size
|
|
|
|
| 34 |
self.dt_rank = dt_rank
|
| 35 |
self.initializer_range = initializer_range
|
| 36 |
self.bias = bias
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
if self.dt_rank == "auto":
|
| 39 |
self.dt_rank = math.ceil(self.d_model / 16)
|