Update configuration_ernie4_5_vl.py
Browse files- configuration_ernie4_5_vl.py +13 -13
configuration_ernie4_5_vl.py
CHANGED
|
@@ -539,19 +539,19 @@ class Ernie4_5_VLMoEConfig(Ernie4_5_MoEConfig):
|
|
| 539 |
"activation_function": "hidden_act",
|
| 540 |
}
|
| 541 |
base_model_tp_plan = {
|
| 542 |
-
"
|
| 543 |
-
"
|
| 544 |
-
"
|
| 545 |
-
"
|
| 546 |
-
"
|
| 547 |
-
"
|
| 548 |
-
"
|
| 549 |
-
"
|
| 550 |
-
"
|
| 551 |
-
"
|
| 552 |
-
"
|
| 553 |
-
"
|
| 554 |
-
"
|
| 555 |
}
|
| 556 |
|
| 557 |
def __init__(
|
|
|
|
| 539 |
"activation_function": "hidden_act",
|
| 540 |
}
|
| 541 |
base_model_tp_plan = {
|
| 542 |
+
"model.layers.*.self_attn.q_proj": "colwise_rep",
|
| 543 |
+
"model.layers.*.self_attn.k_proj": "colwise_rep",
|
| 544 |
+
"model.layers.*.self_attn.v_proj": "colwise_rep",
|
| 545 |
+
"model.layers.*.self_attn.o_proj": "rowwise_rep",
|
| 546 |
+
"model.layers.*.mlp.experts.*.gate_proj": "colwise",
|
| 547 |
+
"model.layers.*.mlp.experts.*.up_proj": "colwise",
|
| 548 |
+
"model.layers.*.mlp.experts.*.down_proj": "rowwise",
|
| 549 |
+
"model.layers.*.mlp_text.experts.*.gate_proj": "colwise",
|
| 550 |
+
"model.layers.*.mlp_text.experts.*.up_proj": "colwise",
|
| 551 |
+
"model.layers.*.mlp_text.experts.*.down_proj": "rowwise",
|
| 552 |
+
"model.layers.*.mlp.gate_proj": "colwise",
|
| 553 |
+
"model.layers.*.mlp.up_proj": "colwise",
|
| 554 |
+
"model.layers.*.mlp.down_proj": "rowwise"
|
| 555 |
}
|
| 556 |
|
| 557 |
def __init__(
|