Upload folder using huggingface_hub
Browse files- config.json +244 -243
- generation_config.json +2 -8
- model-00001-of-00002.safetensors +2 -2
- model.safetensors.index.json +0 -0
- tokenizer_config.json +2 -1
config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "
|
| 3 |
"architectures": [
|
| 4 |
"LlamaForCausalLM"
|
| 5 |
],
|
|
@@ -11,6 +11,7 @@
|
|
| 11 |
128008,
|
| 12 |
128009
|
| 13 |
],
|
|
|
|
| 14 |
"hidden_act": "silu",
|
| 15 |
"hidden_size": 4096,
|
| 16 |
"initializer_range": 0.02,
|
|
@@ -22,27 +23,12 @@
|
|
| 22 |
"num_hidden_layers": 32,
|
| 23 |
"num_key_value_heads": 8,
|
| 24 |
"pretraining_tp": 1,
|
| 25 |
-
"rms_norm_eps": 1e-05,
|
| 26 |
-
"rope_scaling": {
|
| 27 |
-
"factor": 8.0,
|
| 28 |
-
"high_freq_factor": 4.0,
|
| 29 |
-
"low_freq_factor": 1.0,
|
| 30 |
-
"original_max_position_embeddings": 8192,
|
| 31 |
-
"rope_type": "llama3"
|
| 32 |
-
},
|
| 33 |
-
"rope_theta": 500000.0,
|
| 34 |
-
"tie_word_embeddings": false,
|
| 35 |
-
"torch_dtype": "bfloat16",
|
| 36 |
-
"transformers_version": "4.45.1",
|
| 37 |
-
"use_cache": true,
|
| 38 |
-
"vocab_size": 128256,
|
| 39 |
"quantization_config": {
|
| 40 |
-
"quant_method": "vptq",
|
| 41 |
"config_for_layers": {
|
| 42 |
"model.layers.0.mlp.down_proj": {
|
| 43 |
"bias": false,
|
| 44 |
"enable_norm": true,
|
| 45 |
-
"enable_perm":
|
| 46 |
"group_num": 1,
|
| 47 |
"group_size": 14336,
|
| 48 |
"in_features": 14336,
|
|
@@ -66,7 +52,7 @@
|
|
| 66 |
"model.layers.0.mlp.gate_proj": {
|
| 67 |
"bias": false,
|
| 68 |
"enable_norm": true,
|
| 69 |
-
"enable_perm":
|
| 70 |
"group_num": 1,
|
| 71 |
"group_size": 4096,
|
| 72 |
"in_features": 4096,
|
|
@@ -90,7 +76,7 @@
|
|
| 90 |
"model.layers.0.mlp.up_proj": {
|
| 91 |
"bias": false,
|
| 92 |
"enable_norm": true,
|
| 93 |
-
"enable_perm":
|
| 94 |
"group_num": 1,
|
| 95 |
"group_size": 4096,
|
| 96 |
"in_features": 4096,
|
|
@@ -114,7 +100,7 @@
|
|
| 114 |
"model.layers.0.self_attn.k_proj": {
|
| 115 |
"bias": false,
|
| 116 |
"enable_norm": true,
|
| 117 |
-
"enable_perm":
|
| 118 |
"group_num": 1,
|
| 119 |
"group_size": 4096,
|
| 120 |
"in_features": 4096,
|
|
@@ -138,7 +124,7 @@
|
|
| 138 |
"model.layers.0.self_attn.o_proj": {
|
| 139 |
"bias": false,
|
| 140 |
"enable_norm": true,
|
| 141 |
-
"enable_perm":
|
| 142 |
"group_num": 1,
|
| 143 |
"group_size": 4096,
|
| 144 |
"in_features": 4096,
|
|
@@ -162,7 +148,7 @@
|
|
| 162 |
"model.layers.0.self_attn.q_proj": {
|
| 163 |
"bias": false,
|
| 164 |
"enable_norm": true,
|
| 165 |
-
"enable_perm":
|
| 166 |
"group_num": 1,
|
| 167 |
"group_size": 4096,
|
| 168 |
"in_features": 4096,
|
|
@@ -186,7 +172,7 @@
|
|
| 186 |
"model.layers.0.self_attn.v_proj": {
|
| 187 |
"bias": false,
|
| 188 |
"enable_norm": true,
|
| 189 |
-
"enable_perm":
|
| 190 |
"group_num": 1,
|
| 191 |
"group_size": 4096,
|
| 192 |
"in_features": 4096,
|
|
@@ -210,7 +196,7 @@
|
|
| 210 |
"model.layers.1.mlp.down_proj": {
|
| 211 |
"bias": false,
|
| 212 |
"enable_norm": true,
|
| 213 |
-
"enable_perm":
|
| 214 |
"group_num": 1,
|
| 215 |
"group_size": 14336,
|
| 216 |
"in_features": 14336,
|
|
@@ -234,7 +220,7 @@
|
|
| 234 |
"model.layers.1.mlp.gate_proj": {
|
| 235 |
"bias": false,
|
| 236 |
"enable_norm": true,
|
| 237 |
-
"enable_perm":
|
| 238 |
"group_num": 1,
|
| 239 |
"group_size": 4096,
|
| 240 |
"in_features": 4096,
|
|
@@ -258,7 +244,7 @@
|
|
| 258 |
"model.layers.1.mlp.up_proj": {
|
| 259 |
"bias": false,
|
| 260 |
"enable_norm": true,
|
| 261 |
-
"enable_perm":
|
| 262 |
"group_num": 1,
|
| 263 |
"group_size": 4096,
|
| 264 |
"in_features": 4096,
|
|
@@ -282,7 +268,7 @@
|
|
| 282 |
"model.layers.1.self_attn.k_proj": {
|
| 283 |
"bias": false,
|
| 284 |
"enable_norm": true,
|
| 285 |
-
"enable_perm":
|
| 286 |
"group_num": 1,
|
| 287 |
"group_size": 4096,
|
| 288 |
"in_features": 4096,
|
|
@@ -306,7 +292,7 @@
|
|
| 306 |
"model.layers.1.self_attn.o_proj": {
|
| 307 |
"bias": false,
|
| 308 |
"enable_norm": true,
|
| 309 |
-
"enable_perm":
|
| 310 |
"group_num": 1,
|
| 311 |
"group_size": 4096,
|
| 312 |
"in_features": 4096,
|
|
@@ -330,7 +316,7 @@
|
|
| 330 |
"model.layers.1.self_attn.q_proj": {
|
| 331 |
"bias": false,
|
| 332 |
"enable_norm": true,
|
| 333 |
-
"enable_perm":
|
| 334 |
"group_num": 1,
|
| 335 |
"group_size": 4096,
|
| 336 |
"in_features": 4096,
|
|
@@ -354,7 +340,7 @@
|
|
| 354 |
"model.layers.1.self_attn.v_proj": {
|
| 355 |
"bias": false,
|
| 356 |
"enable_norm": true,
|
| 357 |
-
"enable_perm":
|
| 358 |
"group_num": 1,
|
| 359 |
"group_size": 4096,
|
| 360 |
"in_features": 4096,
|
|
@@ -378,7 +364,7 @@
|
|
| 378 |
"model.layers.10.mlp.down_proj": {
|
| 379 |
"bias": false,
|
| 380 |
"enable_norm": true,
|
| 381 |
-
"enable_perm":
|
| 382 |
"group_num": 1,
|
| 383 |
"group_size": 14336,
|
| 384 |
"in_features": 14336,
|
|
@@ -402,7 +388,7 @@
|
|
| 402 |
"model.layers.10.mlp.gate_proj": {
|
| 403 |
"bias": false,
|
| 404 |
"enable_norm": true,
|
| 405 |
-
"enable_perm":
|
| 406 |
"group_num": 1,
|
| 407 |
"group_size": 4096,
|
| 408 |
"in_features": 4096,
|
|
@@ -426,7 +412,7 @@
|
|
| 426 |
"model.layers.10.mlp.up_proj": {
|
| 427 |
"bias": false,
|
| 428 |
"enable_norm": true,
|
| 429 |
-
"enable_perm":
|
| 430 |
"group_num": 1,
|
| 431 |
"group_size": 4096,
|
| 432 |
"in_features": 4096,
|
|
@@ -450,7 +436,7 @@
|
|
| 450 |
"model.layers.10.self_attn.k_proj": {
|
| 451 |
"bias": false,
|
| 452 |
"enable_norm": true,
|
| 453 |
-
"enable_perm":
|
| 454 |
"group_num": 1,
|
| 455 |
"group_size": 4096,
|
| 456 |
"in_features": 4096,
|
|
@@ -474,7 +460,7 @@
|
|
| 474 |
"model.layers.10.self_attn.o_proj": {
|
| 475 |
"bias": false,
|
| 476 |
"enable_norm": true,
|
| 477 |
-
"enable_perm":
|
| 478 |
"group_num": 1,
|
| 479 |
"group_size": 4096,
|
| 480 |
"in_features": 4096,
|
|
@@ -498,7 +484,7 @@
|
|
| 498 |
"model.layers.10.self_attn.q_proj": {
|
| 499 |
"bias": false,
|
| 500 |
"enable_norm": true,
|
| 501 |
-
"enable_perm":
|
| 502 |
"group_num": 1,
|
| 503 |
"group_size": 4096,
|
| 504 |
"in_features": 4096,
|
|
@@ -522,7 +508,7 @@
|
|
| 522 |
"model.layers.10.self_attn.v_proj": {
|
| 523 |
"bias": false,
|
| 524 |
"enable_norm": true,
|
| 525 |
-
"enable_perm":
|
| 526 |
"group_num": 1,
|
| 527 |
"group_size": 4096,
|
| 528 |
"in_features": 4096,
|
|
@@ -546,7 +532,7 @@
|
|
| 546 |
"model.layers.11.mlp.down_proj": {
|
| 547 |
"bias": false,
|
| 548 |
"enable_norm": true,
|
| 549 |
-
"enable_perm":
|
| 550 |
"group_num": 1,
|
| 551 |
"group_size": 14336,
|
| 552 |
"in_features": 14336,
|
|
@@ -570,7 +556,7 @@
|
|
| 570 |
"model.layers.11.mlp.gate_proj": {
|
| 571 |
"bias": false,
|
| 572 |
"enable_norm": true,
|
| 573 |
-
"enable_perm":
|
| 574 |
"group_num": 1,
|
| 575 |
"group_size": 4096,
|
| 576 |
"in_features": 4096,
|
|
@@ -594,7 +580,7 @@
|
|
| 594 |
"model.layers.11.mlp.up_proj": {
|
| 595 |
"bias": false,
|
| 596 |
"enable_norm": true,
|
| 597 |
-
"enable_perm":
|
| 598 |
"group_num": 1,
|
| 599 |
"group_size": 4096,
|
| 600 |
"in_features": 4096,
|
|
@@ -618,7 +604,7 @@
|
|
| 618 |
"model.layers.11.self_attn.k_proj": {
|
| 619 |
"bias": false,
|
| 620 |
"enable_norm": true,
|
| 621 |
-
"enable_perm":
|
| 622 |
"group_num": 1,
|
| 623 |
"group_size": 4096,
|
| 624 |
"in_features": 4096,
|
|
@@ -642,7 +628,7 @@
|
|
| 642 |
"model.layers.11.self_attn.o_proj": {
|
| 643 |
"bias": false,
|
| 644 |
"enable_norm": true,
|
| 645 |
-
"enable_perm":
|
| 646 |
"group_num": 1,
|
| 647 |
"group_size": 4096,
|
| 648 |
"in_features": 4096,
|
|
@@ -666,7 +652,7 @@
|
|
| 666 |
"model.layers.11.self_attn.q_proj": {
|
| 667 |
"bias": false,
|
| 668 |
"enable_norm": true,
|
| 669 |
-
"enable_perm":
|
| 670 |
"group_num": 1,
|
| 671 |
"group_size": 4096,
|
| 672 |
"in_features": 4096,
|
|
@@ -690,7 +676,7 @@
|
|
| 690 |
"model.layers.11.self_attn.v_proj": {
|
| 691 |
"bias": false,
|
| 692 |
"enable_norm": true,
|
| 693 |
-
"enable_perm":
|
| 694 |
"group_num": 1,
|
| 695 |
"group_size": 4096,
|
| 696 |
"in_features": 4096,
|
|
@@ -714,7 +700,7 @@
|
|
| 714 |
"model.layers.12.mlp.down_proj": {
|
| 715 |
"bias": false,
|
| 716 |
"enable_norm": true,
|
| 717 |
-
"enable_perm":
|
| 718 |
"group_num": 1,
|
| 719 |
"group_size": 14336,
|
| 720 |
"in_features": 14336,
|
|
@@ -738,7 +724,7 @@
|
|
| 738 |
"model.layers.12.mlp.gate_proj": {
|
| 739 |
"bias": false,
|
| 740 |
"enable_norm": true,
|
| 741 |
-
"enable_perm":
|
| 742 |
"group_num": 1,
|
| 743 |
"group_size": 4096,
|
| 744 |
"in_features": 4096,
|
|
@@ -762,7 +748,7 @@
|
|
| 762 |
"model.layers.12.mlp.up_proj": {
|
| 763 |
"bias": false,
|
| 764 |
"enable_norm": true,
|
| 765 |
-
"enable_perm":
|
| 766 |
"group_num": 1,
|
| 767 |
"group_size": 4096,
|
| 768 |
"in_features": 4096,
|
|
@@ -786,7 +772,7 @@
|
|
| 786 |
"model.layers.12.self_attn.k_proj": {
|
| 787 |
"bias": false,
|
| 788 |
"enable_norm": true,
|
| 789 |
-
"enable_perm":
|
| 790 |
"group_num": 1,
|
| 791 |
"group_size": 4096,
|
| 792 |
"in_features": 4096,
|
|
@@ -810,7 +796,7 @@
|
|
| 810 |
"model.layers.12.self_attn.o_proj": {
|
| 811 |
"bias": false,
|
| 812 |
"enable_norm": true,
|
| 813 |
-
"enable_perm":
|
| 814 |
"group_num": 1,
|
| 815 |
"group_size": 4096,
|
| 816 |
"in_features": 4096,
|
|
@@ -834,7 +820,7 @@
|
|
| 834 |
"model.layers.12.self_attn.q_proj": {
|
| 835 |
"bias": false,
|
| 836 |
"enable_norm": true,
|
| 837 |
-
"enable_perm":
|
| 838 |
"group_num": 1,
|
| 839 |
"group_size": 4096,
|
| 840 |
"in_features": 4096,
|
|
@@ -858,7 +844,7 @@
|
|
| 858 |
"model.layers.12.self_attn.v_proj": {
|
| 859 |
"bias": false,
|
| 860 |
"enable_norm": true,
|
| 861 |
-
"enable_perm":
|
| 862 |
"group_num": 1,
|
| 863 |
"group_size": 4096,
|
| 864 |
"in_features": 4096,
|
|
@@ -882,7 +868,7 @@
|
|
| 882 |
"model.layers.13.mlp.down_proj": {
|
| 883 |
"bias": false,
|
| 884 |
"enable_norm": true,
|
| 885 |
-
"enable_perm":
|
| 886 |
"group_num": 1,
|
| 887 |
"group_size": 14336,
|
| 888 |
"in_features": 14336,
|
|
@@ -906,7 +892,7 @@
|
|
| 906 |
"model.layers.13.mlp.gate_proj": {
|
| 907 |
"bias": false,
|
| 908 |
"enable_norm": true,
|
| 909 |
-
"enable_perm":
|
| 910 |
"group_num": 1,
|
| 911 |
"group_size": 4096,
|
| 912 |
"in_features": 4096,
|
|
@@ -930,7 +916,7 @@
|
|
| 930 |
"model.layers.13.mlp.up_proj": {
|
| 931 |
"bias": false,
|
| 932 |
"enable_norm": true,
|
| 933 |
-
"enable_perm":
|
| 934 |
"group_num": 1,
|
| 935 |
"group_size": 4096,
|
| 936 |
"in_features": 4096,
|
|
@@ -954,7 +940,7 @@
|
|
| 954 |
"model.layers.13.self_attn.k_proj": {
|
| 955 |
"bias": false,
|
| 956 |
"enable_norm": true,
|
| 957 |
-
"enable_perm":
|
| 958 |
"group_num": 1,
|
| 959 |
"group_size": 4096,
|
| 960 |
"in_features": 4096,
|
|
@@ -978,7 +964,7 @@
|
|
| 978 |
"model.layers.13.self_attn.o_proj": {
|
| 979 |
"bias": false,
|
| 980 |
"enable_norm": true,
|
| 981 |
-
"enable_perm":
|
| 982 |
"group_num": 1,
|
| 983 |
"group_size": 4096,
|
| 984 |
"in_features": 4096,
|
|
@@ -1002,7 +988,7 @@
|
|
| 1002 |
"model.layers.13.self_attn.q_proj": {
|
| 1003 |
"bias": false,
|
| 1004 |
"enable_norm": true,
|
| 1005 |
-
"enable_perm":
|
| 1006 |
"group_num": 1,
|
| 1007 |
"group_size": 4096,
|
| 1008 |
"in_features": 4096,
|
|
@@ -1026,7 +1012,7 @@
|
|
| 1026 |
"model.layers.13.self_attn.v_proj": {
|
| 1027 |
"bias": false,
|
| 1028 |
"enable_norm": true,
|
| 1029 |
-
"enable_perm":
|
| 1030 |
"group_num": 1,
|
| 1031 |
"group_size": 4096,
|
| 1032 |
"in_features": 4096,
|
|
@@ -1050,7 +1036,7 @@
|
|
| 1050 |
"model.layers.14.mlp.down_proj": {
|
| 1051 |
"bias": false,
|
| 1052 |
"enable_norm": true,
|
| 1053 |
-
"enable_perm":
|
| 1054 |
"group_num": 1,
|
| 1055 |
"group_size": 14336,
|
| 1056 |
"in_features": 14336,
|
|
@@ -1074,7 +1060,7 @@
|
|
| 1074 |
"model.layers.14.mlp.gate_proj": {
|
| 1075 |
"bias": false,
|
| 1076 |
"enable_norm": true,
|
| 1077 |
-
"enable_perm":
|
| 1078 |
"group_num": 1,
|
| 1079 |
"group_size": 4096,
|
| 1080 |
"in_features": 4096,
|
|
@@ -1098,7 +1084,7 @@
|
|
| 1098 |
"model.layers.14.mlp.up_proj": {
|
| 1099 |
"bias": false,
|
| 1100 |
"enable_norm": true,
|
| 1101 |
-
"enable_perm":
|
| 1102 |
"group_num": 1,
|
| 1103 |
"group_size": 4096,
|
| 1104 |
"in_features": 4096,
|
|
@@ -1122,7 +1108,7 @@
|
|
| 1122 |
"model.layers.14.self_attn.k_proj": {
|
| 1123 |
"bias": false,
|
| 1124 |
"enable_norm": true,
|
| 1125 |
-
"enable_perm":
|
| 1126 |
"group_num": 1,
|
| 1127 |
"group_size": 4096,
|
| 1128 |
"in_features": 4096,
|
|
@@ -1146,7 +1132,7 @@
|
|
| 1146 |
"model.layers.14.self_attn.o_proj": {
|
| 1147 |
"bias": false,
|
| 1148 |
"enable_norm": true,
|
| 1149 |
-
"enable_perm":
|
| 1150 |
"group_num": 1,
|
| 1151 |
"group_size": 4096,
|
| 1152 |
"in_features": 4096,
|
|
@@ -1170,7 +1156,7 @@
|
|
| 1170 |
"model.layers.14.self_attn.q_proj": {
|
| 1171 |
"bias": false,
|
| 1172 |
"enable_norm": true,
|
| 1173 |
-
"enable_perm":
|
| 1174 |
"group_num": 1,
|
| 1175 |
"group_size": 4096,
|
| 1176 |
"in_features": 4096,
|
|
@@ -1194,7 +1180,7 @@
|
|
| 1194 |
"model.layers.14.self_attn.v_proj": {
|
| 1195 |
"bias": false,
|
| 1196 |
"enable_norm": true,
|
| 1197 |
-
"enable_perm":
|
| 1198 |
"group_num": 1,
|
| 1199 |
"group_size": 4096,
|
| 1200 |
"in_features": 4096,
|
|
@@ -1218,7 +1204,7 @@
|
|
| 1218 |
"model.layers.15.mlp.down_proj": {
|
| 1219 |
"bias": false,
|
| 1220 |
"enable_norm": true,
|
| 1221 |
-
"enable_perm":
|
| 1222 |
"group_num": 1,
|
| 1223 |
"group_size": 14336,
|
| 1224 |
"in_features": 14336,
|
|
@@ -1242,7 +1228,7 @@
|
|
| 1242 |
"model.layers.15.mlp.gate_proj": {
|
| 1243 |
"bias": false,
|
| 1244 |
"enable_norm": true,
|
| 1245 |
-
"enable_perm":
|
| 1246 |
"group_num": 1,
|
| 1247 |
"group_size": 4096,
|
| 1248 |
"in_features": 4096,
|
|
@@ -1266,7 +1252,7 @@
|
|
| 1266 |
"model.layers.15.mlp.up_proj": {
|
| 1267 |
"bias": false,
|
| 1268 |
"enable_norm": true,
|
| 1269 |
-
"enable_perm":
|
| 1270 |
"group_num": 1,
|
| 1271 |
"group_size": 4096,
|
| 1272 |
"in_features": 4096,
|
|
@@ -1290,7 +1276,7 @@
|
|
| 1290 |
"model.layers.15.self_attn.k_proj": {
|
| 1291 |
"bias": false,
|
| 1292 |
"enable_norm": true,
|
| 1293 |
-
"enable_perm":
|
| 1294 |
"group_num": 1,
|
| 1295 |
"group_size": 4096,
|
| 1296 |
"in_features": 4096,
|
|
@@ -1314,7 +1300,7 @@
|
|
| 1314 |
"model.layers.15.self_attn.o_proj": {
|
| 1315 |
"bias": false,
|
| 1316 |
"enable_norm": true,
|
| 1317 |
-
"enable_perm":
|
| 1318 |
"group_num": 1,
|
| 1319 |
"group_size": 4096,
|
| 1320 |
"in_features": 4096,
|
|
@@ -1338,7 +1324,7 @@
|
|
| 1338 |
"model.layers.15.self_attn.q_proj": {
|
| 1339 |
"bias": false,
|
| 1340 |
"enable_norm": true,
|
| 1341 |
-
"enable_perm":
|
| 1342 |
"group_num": 1,
|
| 1343 |
"group_size": 4096,
|
| 1344 |
"in_features": 4096,
|
|
@@ -1362,7 +1348,7 @@
|
|
| 1362 |
"model.layers.15.self_attn.v_proj": {
|
| 1363 |
"bias": false,
|
| 1364 |
"enable_norm": true,
|
| 1365 |
-
"enable_perm":
|
| 1366 |
"group_num": 1,
|
| 1367 |
"group_size": 4096,
|
| 1368 |
"in_features": 4096,
|
|
@@ -1386,7 +1372,7 @@
|
|
| 1386 |
"model.layers.16.mlp.down_proj": {
|
| 1387 |
"bias": false,
|
| 1388 |
"enable_norm": true,
|
| 1389 |
-
"enable_perm":
|
| 1390 |
"group_num": 1,
|
| 1391 |
"group_size": 14336,
|
| 1392 |
"in_features": 14336,
|
|
@@ -1410,7 +1396,7 @@
|
|
| 1410 |
"model.layers.16.mlp.gate_proj": {
|
| 1411 |
"bias": false,
|
| 1412 |
"enable_norm": true,
|
| 1413 |
-
"enable_perm":
|
| 1414 |
"group_num": 1,
|
| 1415 |
"group_size": 4096,
|
| 1416 |
"in_features": 4096,
|
|
@@ -1434,7 +1420,7 @@
|
|
| 1434 |
"model.layers.16.mlp.up_proj": {
|
| 1435 |
"bias": false,
|
| 1436 |
"enable_norm": true,
|
| 1437 |
-
"enable_perm":
|
| 1438 |
"group_num": 1,
|
| 1439 |
"group_size": 4096,
|
| 1440 |
"in_features": 4096,
|
|
@@ -1458,7 +1444,7 @@
|
|
| 1458 |
"model.layers.16.self_attn.k_proj": {
|
| 1459 |
"bias": false,
|
| 1460 |
"enable_norm": true,
|
| 1461 |
-
"enable_perm":
|
| 1462 |
"group_num": 1,
|
| 1463 |
"group_size": 4096,
|
| 1464 |
"in_features": 4096,
|
|
@@ -1482,7 +1468,7 @@
|
|
| 1482 |
"model.layers.16.self_attn.o_proj": {
|
| 1483 |
"bias": false,
|
| 1484 |
"enable_norm": true,
|
| 1485 |
-
"enable_perm":
|
| 1486 |
"group_num": 1,
|
| 1487 |
"group_size": 4096,
|
| 1488 |
"in_features": 4096,
|
|
@@ -1506,7 +1492,7 @@
|
|
| 1506 |
"model.layers.16.self_attn.q_proj": {
|
| 1507 |
"bias": false,
|
| 1508 |
"enable_norm": true,
|
| 1509 |
-
"enable_perm":
|
| 1510 |
"group_num": 1,
|
| 1511 |
"group_size": 4096,
|
| 1512 |
"in_features": 4096,
|
|
@@ -1530,7 +1516,7 @@
|
|
| 1530 |
"model.layers.16.self_attn.v_proj": {
|
| 1531 |
"bias": false,
|
| 1532 |
"enable_norm": true,
|
| 1533 |
-
"enable_perm":
|
| 1534 |
"group_num": 1,
|
| 1535 |
"group_size": 4096,
|
| 1536 |
"in_features": 4096,
|
|
@@ -1554,7 +1540,7 @@
|
|
| 1554 |
"model.layers.17.mlp.down_proj": {
|
| 1555 |
"bias": false,
|
| 1556 |
"enable_norm": true,
|
| 1557 |
-
"enable_perm":
|
| 1558 |
"group_num": 1,
|
| 1559 |
"group_size": 14336,
|
| 1560 |
"in_features": 14336,
|
|
@@ -1578,7 +1564,7 @@
|
|
| 1578 |
"model.layers.17.mlp.gate_proj": {
|
| 1579 |
"bias": false,
|
| 1580 |
"enable_norm": true,
|
| 1581 |
-
"enable_perm":
|
| 1582 |
"group_num": 1,
|
| 1583 |
"group_size": 4096,
|
| 1584 |
"in_features": 4096,
|
|
@@ -1602,7 +1588,7 @@
|
|
| 1602 |
"model.layers.17.mlp.up_proj": {
|
| 1603 |
"bias": false,
|
| 1604 |
"enable_norm": true,
|
| 1605 |
-
"enable_perm":
|
| 1606 |
"group_num": 1,
|
| 1607 |
"group_size": 4096,
|
| 1608 |
"in_features": 4096,
|
|
@@ -1626,7 +1612,7 @@
|
|
| 1626 |
"model.layers.17.self_attn.k_proj": {
|
| 1627 |
"bias": false,
|
| 1628 |
"enable_norm": true,
|
| 1629 |
-
"enable_perm":
|
| 1630 |
"group_num": 1,
|
| 1631 |
"group_size": 4096,
|
| 1632 |
"in_features": 4096,
|
|
@@ -1650,7 +1636,7 @@
|
|
| 1650 |
"model.layers.17.self_attn.o_proj": {
|
| 1651 |
"bias": false,
|
| 1652 |
"enable_norm": true,
|
| 1653 |
-
"enable_perm":
|
| 1654 |
"group_num": 1,
|
| 1655 |
"group_size": 4096,
|
| 1656 |
"in_features": 4096,
|
|
@@ -1674,7 +1660,7 @@
|
|
| 1674 |
"model.layers.17.self_attn.q_proj": {
|
| 1675 |
"bias": false,
|
| 1676 |
"enable_norm": true,
|
| 1677 |
-
"enable_perm":
|
| 1678 |
"group_num": 1,
|
| 1679 |
"group_size": 4096,
|
| 1680 |
"in_features": 4096,
|
|
@@ -1698,7 +1684,7 @@
|
|
| 1698 |
"model.layers.17.self_attn.v_proj": {
|
| 1699 |
"bias": false,
|
| 1700 |
"enable_norm": true,
|
| 1701 |
-
"enable_perm":
|
| 1702 |
"group_num": 1,
|
| 1703 |
"group_size": 4096,
|
| 1704 |
"in_features": 4096,
|
|
@@ -1722,7 +1708,7 @@
|
|
| 1722 |
"model.layers.18.mlp.down_proj": {
|
| 1723 |
"bias": false,
|
| 1724 |
"enable_norm": true,
|
| 1725 |
-
"enable_perm":
|
| 1726 |
"group_num": 1,
|
| 1727 |
"group_size": 14336,
|
| 1728 |
"in_features": 14336,
|
|
@@ -1746,7 +1732,7 @@
|
|
| 1746 |
"model.layers.18.mlp.gate_proj": {
|
| 1747 |
"bias": false,
|
| 1748 |
"enable_norm": true,
|
| 1749 |
-
"enable_perm":
|
| 1750 |
"group_num": 1,
|
| 1751 |
"group_size": 4096,
|
| 1752 |
"in_features": 4096,
|
|
@@ -1770,7 +1756,7 @@
|
|
| 1770 |
"model.layers.18.mlp.up_proj": {
|
| 1771 |
"bias": false,
|
| 1772 |
"enable_norm": true,
|
| 1773 |
-
"enable_perm":
|
| 1774 |
"group_num": 1,
|
| 1775 |
"group_size": 4096,
|
| 1776 |
"in_features": 4096,
|
|
@@ -1794,7 +1780,7 @@
|
|
| 1794 |
"model.layers.18.self_attn.k_proj": {
|
| 1795 |
"bias": false,
|
| 1796 |
"enable_norm": true,
|
| 1797 |
-
"enable_perm":
|
| 1798 |
"group_num": 1,
|
| 1799 |
"group_size": 4096,
|
| 1800 |
"in_features": 4096,
|
|
@@ -1818,7 +1804,7 @@
|
|
| 1818 |
"model.layers.18.self_attn.o_proj": {
|
| 1819 |
"bias": false,
|
| 1820 |
"enable_norm": true,
|
| 1821 |
-
"enable_perm":
|
| 1822 |
"group_num": 1,
|
| 1823 |
"group_size": 4096,
|
| 1824 |
"in_features": 4096,
|
|
@@ -1842,7 +1828,7 @@
|
|
| 1842 |
"model.layers.18.self_attn.q_proj": {
|
| 1843 |
"bias": false,
|
| 1844 |
"enable_norm": true,
|
| 1845 |
-
"enable_perm":
|
| 1846 |
"group_num": 1,
|
| 1847 |
"group_size": 4096,
|
| 1848 |
"in_features": 4096,
|
|
@@ -1866,7 +1852,7 @@
|
|
| 1866 |
"model.layers.18.self_attn.v_proj": {
|
| 1867 |
"bias": false,
|
| 1868 |
"enable_norm": true,
|
| 1869 |
-
"enable_perm":
|
| 1870 |
"group_num": 1,
|
| 1871 |
"group_size": 4096,
|
| 1872 |
"in_features": 4096,
|
|
@@ -1890,7 +1876,7 @@
|
|
| 1890 |
"model.layers.19.mlp.down_proj": {
|
| 1891 |
"bias": false,
|
| 1892 |
"enable_norm": true,
|
| 1893 |
-
"enable_perm":
|
| 1894 |
"group_num": 1,
|
| 1895 |
"group_size": 14336,
|
| 1896 |
"in_features": 14336,
|
|
@@ -1914,7 +1900,7 @@
|
|
| 1914 |
"model.layers.19.mlp.gate_proj": {
|
| 1915 |
"bias": false,
|
| 1916 |
"enable_norm": true,
|
| 1917 |
-
"enable_perm":
|
| 1918 |
"group_num": 1,
|
| 1919 |
"group_size": 4096,
|
| 1920 |
"in_features": 4096,
|
|
@@ -1938,7 +1924,7 @@
|
|
| 1938 |
"model.layers.19.mlp.up_proj": {
|
| 1939 |
"bias": false,
|
| 1940 |
"enable_norm": true,
|
| 1941 |
-
"enable_perm":
|
| 1942 |
"group_num": 1,
|
| 1943 |
"group_size": 4096,
|
| 1944 |
"in_features": 4096,
|
|
@@ -1962,7 +1948,7 @@
|
|
| 1962 |
"model.layers.19.self_attn.k_proj": {
|
| 1963 |
"bias": false,
|
| 1964 |
"enable_norm": true,
|
| 1965 |
-
"enable_perm":
|
| 1966 |
"group_num": 1,
|
| 1967 |
"group_size": 4096,
|
| 1968 |
"in_features": 4096,
|
|
@@ -1986,7 +1972,7 @@
|
|
| 1986 |
"model.layers.19.self_attn.o_proj": {
|
| 1987 |
"bias": false,
|
| 1988 |
"enable_norm": true,
|
| 1989 |
-
"enable_perm":
|
| 1990 |
"group_num": 1,
|
| 1991 |
"group_size": 4096,
|
| 1992 |
"in_features": 4096,
|
|
@@ -2010,7 +1996,7 @@
|
|
| 2010 |
"model.layers.19.self_attn.q_proj": {
|
| 2011 |
"bias": false,
|
| 2012 |
"enable_norm": true,
|
| 2013 |
-
"enable_perm":
|
| 2014 |
"group_num": 1,
|
| 2015 |
"group_size": 4096,
|
| 2016 |
"in_features": 4096,
|
|
@@ -2034,7 +2020,7 @@
|
|
| 2034 |
"model.layers.19.self_attn.v_proj": {
|
| 2035 |
"bias": false,
|
| 2036 |
"enable_norm": true,
|
| 2037 |
-
"enable_perm":
|
| 2038 |
"group_num": 1,
|
| 2039 |
"group_size": 4096,
|
| 2040 |
"in_features": 4096,
|
|
@@ -2058,7 +2044,7 @@
|
|
| 2058 |
"model.layers.2.mlp.down_proj": {
|
| 2059 |
"bias": false,
|
| 2060 |
"enable_norm": true,
|
| 2061 |
-
"enable_perm":
|
| 2062 |
"group_num": 1,
|
| 2063 |
"group_size": 14336,
|
| 2064 |
"in_features": 14336,
|
|
@@ -2082,7 +2068,7 @@
|
|
| 2082 |
"model.layers.2.mlp.gate_proj": {
|
| 2083 |
"bias": false,
|
| 2084 |
"enable_norm": true,
|
| 2085 |
-
"enable_perm":
|
| 2086 |
"group_num": 1,
|
| 2087 |
"group_size": 4096,
|
| 2088 |
"in_features": 4096,
|
|
@@ -2106,7 +2092,7 @@
|
|
| 2106 |
"model.layers.2.mlp.up_proj": {
|
| 2107 |
"bias": false,
|
| 2108 |
"enable_norm": true,
|
| 2109 |
-
"enable_perm":
|
| 2110 |
"group_num": 1,
|
| 2111 |
"group_size": 4096,
|
| 2112 |
"in_features": 4096,
|
|
@@ -2130,7 +2116,7 @@
|
|
| 2130 |
"model.layers.2.self_attn.k_proj": {
|
| 2131 |
"bias": false,
|
| 2132 |
"enable_norm": true,
|
| 2133 |
-
"enable_perm":
|
| 2134 |
"group_num": 1,
|
| 2135 |
"group_size": 4096,
|
| 2136 |
"in_features": 4096,
|
|
@@ -2154,7 +2140,7 @@
|
|
| 2154 |
"model.layers.2.self_attn.o_proj": {
|
| 2155 |
"bias": false,
|
| 2156 |
"enable_norm": true,
|
| 2157 |
-
"enable_perm":
|
| 2158 |
"group_num": 1,
|
| 2159 |
"group_size": 4096,
|
| 2160 |
"in_features": 4096,
|
|
@@ -2178,7 +2164,7 @@
|
|
| 2178 |
"model.layers.2.self_attn.q_proj": {
|
| 2179 |
"bias": false,
|
| 2180 |
"enable_norm": true,
|
| 2181 |
-
"enable_perm":
|
| 2182 |
"group_num": 1,
|
| 2183 |
"group_size": 4096,
|
| 2184 |
"in_features": 4096,
|
|
@@ -2202,7 +2188,7 @@
|
|
| 2202 |
"model.layers.2.self_attn.v_proj": {
|
| 2203 |
"bias": false,
|
| 2204 |
"enable_norm": true,
|
| 2205 |
-
"enable_perm":
|
| 2206 |
"group_num": 1,
|
| 2207 |
"group_size": 4096,
|
| 2208 |
"in_features": 4096,
|
|
@@ -2226,7 +2212,7 @@
|
|
| 2226 |
"model.layers.20.mlp.down_proj": {
|
| 2227 |
"bias": false,
|
| 2228 |
"enable_norm": true,
|
| 2229 |
-
"enable_perm":
|
| 2230 |
"group_num": 1,
|
| 2231 |
"group_size": 14336,
|
| 2232 |
"in_features": 14336,
|
|
@@ -2250,7 +2236,7 @@
|
|
| 2250 |
"model.layers.20.mlp.gate_proj": {
|
| 2251 |
"bias": false,
|
| 2252 |
"enable_norm": true,
|
| 2253 |
-
"enable_perm":
|
| 2254 |
"group_num": 1,
|
| 2255 |
"group_size": 4096,
|
| 2256 |
"in_features": 4096,
|
|
@@ -2274,7 +2260,7 @@
|
|
| 2274 |
"model.layers.20.mlp.up_proj": {
|
| 2275 |
"bias": false,
|
| 2276 |
"enable_norm": true,
|
| 2277 |
-
"enable_perm":
|
| 2278 |
"group_num": 1,
|
| 2279 |
"group_size": 4096,
|
| 2280 |
"in_features": 4096,
|
|
@@ -2298,7 +2284,7 @@
|
|
| 2298 |
"model.layers.20.self_attn.k_proj": {
|
| 2299 |
"bias": false,
|
| 2300 |
"enable_norm": true,
|
| 2301 |
-
"enable_perm":
|
| 2302 |
"group_num": 1,
|
| 2303 |
"group_size": 4096,
|
| 2304 |
"in_features": 4096,
|
|
@@ -2322,7 +2308,7 @@
|
|
| 2322 |
"model.layers.20.self_attn.o_proj": {
|
| 2323 |
"bias": false,
|
| 2324 |
"enable_norm": true,
|
| 2325 |
-
"enable_perm":
|
| 2326 |
"group_num": 1,
|
| 2327 |
"group_size": 4096,
|
| 2328 |
"in_features": 4096,
|
|
@@ -2346,7 +2332,7 @@
|
|
| 2346 |
"model.layers.20.self_attn.q_proj": {
|
| 2347 |
"bias": false,
|
| 2348 |
"enable_norm": true,
|
| 2349 |
-
"enable_perm":
|
| 2350 |
"group_num": 1,
|
| 2351 |
"group_size": 4096,
|
| 2352 |
"in_features": 4096,
|
|
@@ -2370,7 +2356,7 @@
|
|
| 2370 |
"model.layers.20.self_attn.v_proj": {
|
| 2371 |
"bias": false,
|
| 2372 |
"enable_norm": true,
|
| 2373 |
-
"enable_perm":
|
| 2374 |
"group_num": 1,
|
| 2375 |
"group_size": 4096,
|
| 2376 |
"in_features": 4096,
|
|
@@ -2394,7 +2380,7 @@
|
|
| 2394 |
"model.layers.21.mlp.down_proj": {
|
| 2395 |
"bias": false,
|
| 2396 |
"enable_norm": true,
|
| 2397 |
-
"enable_perm":
|
| 2398 |
"group_num": 1,
|
| 2399 |
"group_size": 14336,
|
| 2400 |
"in_features": 14336,
|
|
@@ -2418,7 +2404,7 @@
|
|
| 2418 |
"model.layers.21.mlp.gate_proj": {
|
| 2419 |
"bias": false,
|
| 2420 |
"enable_norm": true,
|
| 2421 |
-
"enable_perm":
|
| 2422 |
"group_num": 1,
|
| 2423 |
"group_size": 4096,
|
| 2424 |
"in_features": 4096,
|
|
@@ -2442,7 +2428,7 @@
|
|
| 2442 |
"model.layers.21.mlp.up_proj": {
|
| 2443 |
"bias": false,
|
| 2444 |
"enable_norm": true,
|
| 2445 |
-
"enable_perm":
|
| 2446 |
"group_num": 1,
|
| 2447 |
"group_size": 4096,
|
| 2448 |
"in_features": 4096,
|
|
@@ -2466,7 +2452,7 @@
|
|
| 2466 |
"model.layers.21.self_attn.k_proj": {
|
| 2467 |
"bias": false,
|
| 2468 |
"enable_norm": true,
|
| 2469 |
-
"enable_perm":
|
| 2470 |
"group_num": 1,
|
| 2471 |
"group_size": 4096,
|
| 2472 |
"in_features": 4096,
|
|
@@ -2490,7 +2476,7 @@
|
|
| 2490 |
"model.layers.21.self_attn.o_proj": {
|
| 2491 |
"bias": false,
|
| 2492 |
"enable_norm": true,
|
| 2493 |
-
"enable_perm":
|
| 2494 |
"group_num": 1,
|
| 2495 |
"group_size": 4096,
|
| 2496 |
"in_features": 4096,
|
|
@@ -2514,7 +2500,7 @@
|
|
| 2514 |
"model.layers.21.self_attn.q_proj": {
|
| 2515 |
"bias": false,
|
| 2516 |
"enable_norm": true,
|
| 2517 |
-
"enable_perm":
|
| 2518 |
"group_num": 1,
|
| 2519 |
"group_size": 4096,
|
| 2520 |
"in_features": 4096,
|
|
@@ -2538,7 +2524,7 @@
|
|
| 2538 |
"model.layers.21.self_attn.v_proj": {
|
| 2539 |
"bias": false,
|
| 2540 |
"enable_norm": true,
|
| 2541 |
-
"enable_perm":
|
| 2542 |
"group_num": 1,
|
| 2543 |
"group_size": 4096,
|
| 2544 |
"in_features": 4096,
|
|
@@ -2562,7 +2548,7 @@
|
|
| 2562 |
"model.layers.22.mlp.down_proj": {
|
| 2563 |
"bias": false,
|
| 2564 |
"enable_norm": true,
|
| 2565 |
-
"enable_perm":
|
| 2566 |
"group_num": 1,
|
| 2567 |
"group_size": 14336,
|
| 2568 |
"in_features": 14336,
|
|
@@ -2586,7 +2572,7 @@
|
|
| 2586 |
"model.layers.22.mlp.gate_proj": {
|
| 2587 |
"bias": false,
|
| 2588 |
"enable_norm": true,
|
| 2589 |
-
"enable_perm":
|
| 2590 |
"group_num": 1,
|
| 2591 |
"group_size": 4096,
|
| 2592 |
"in_features": 4096,
|
|
@@ -2610,7 +2596,7 @@
|
|
| 2610 |
"model.layers.22.mlp.up_proj": {
|
| 2611 |
"bias": false,
|
| 2612 |
"enable_norm": true,
|
| 2613 |
-
"enable_perm":
|
| 2614 |
"group_num": 1,
|
| 2615 |
"group_size": 4096,
|
| 2616 |
"in_features": 4096,
|
|
@@ -2634,7 +2620,7 @@
|
|
| 2634 |
"model.layers.22.self_attn.k_proj": {
|
| 2635 |
"bias": false,
|
| 2636 |
"enable_norm": true,
|
| 2637 |
-
"enable_perm":
|
| 2638 |
"group_num": 1,
|
| 2639 |
"group_size": 4096,
|
| 2640 |
"in_features": 4096,
|
|
@@ -2658,7 +2644,7 @@
|
|
| 2658 |
"model.layers.22.self_attn.o_proj": {
|
| 2659 |
"bias": false,
|
| 2660 |
"enable_norm": true,
|
| 2661 |
-
"enable_perm":
|
| 2662 |
"group_num": 1,
|
| 2663 |
"group_size": 4096,
|
| 2664 |
"in_features": 4096,
|
|
@@ -2682,7 +2668,7 @@
|
|
| 2682 |
"model.layers.22.self_attn.q_proj": {
|
| 2683 |
"bias": false,
|
| 2684 |
"enable_norm": true,
|
| 2685 |
-
"enable_perm":
|
| 2686 |
"group_num": 1,
|
| 2687 |
"group_size": 4096,
|
| 2688 |
"in_features": 4096,
|
|
@@ -2706,7 +2692,7 @@
|
|
| 2706 |
"model.layers.22.self_attn.v_proj": {
|
| 2707 |
"bias": false,
|
| 2708 |
"enable_norm": true,
|
| 2709 |
-
"enable_perm":
|
| 2710 |
"group_num": 1,
|
| 2711 |
"group_size": 4096,
|
| 2712 |
"in_features": 4096,
|
|
@@ -2730,7 +2716,7 @@
|
|
| 2730 |
"model.layers.23.mlp.down_proj": {
|
| 2731 |
"bias": false,
|
| 2732 |
"enable_norm": true,
|
| 2733 |
-
"enable_perm":
|
| 2734 |
"group_num": 1,
|
| 2735 |
"group_size": 14336,
|
| 2736 |
"in_features": 14336,
|
|
@@ -2754,7 +2740,7 @@
|
|
| 2754 |
"model.layers.23.mlp.gate_proj": {
|
| 2755 |
"bias": false,
|
| 2756 |
"enable_norm": true,
|
| 2757 |
-
"enable_perm":
|
| 2758 |
"group_num": 1,
|
| 2759 |
"group_size": 4096,
|
| 2760 |
"in_features": 4096,
|
|
@@ -2778,7 +2764,7 @@
|
|
| 2778 |
"model.layers.23.mlp.up_proj": {
|
| 2779 |
"bias": false,
|
| 2780 |
"enable_norm": true,
|
| 2781 |
-
"enable_perm":
|
| 2782 |
"group_num": 1,
|
| 2783 |
"group_size": 4096,
|
| 2784 |
"in_features": 4096,
|
|
@@ -2802,7 +2788,7 @@
|
|
| 2802 |
"model.layers.23.self_attn.k_proj": {
|
| 2803 |
"bias": false,
|
| 2804 |
"enable_norm": true,
|
| 2805 |
-
"enable_perm":
|
| 2806 |
"group_num": 1,
|
| 2807 |
"group_size": 4096,
|
| 2808 |
"in_features": 4096,
|
|
@@ -2826,7 +2812,7 @@
|
|
| 2826 |
"model.layers.23.self_attn.o_proj": {
|
| 2827 |
"bias": false,
|
| 2828 |
"enable_norm": true,
|
| 2829 |
-
"enable_perm":
|
| 2830 |
"group_num": 1,
|
| 2831 |
"group_size": 4096,
|
| 2832 |
"in_features": 4096,
|
|
@@ -2850,7 +2836,7 @@
|
|
| 2850 |
"model.layers.23.self_attn.q_proj": {
|
| 2851 |
"bias": false,
|
| 2852 |
"enable_norm": true,
|
| 2853 |
-
"enable_perm":
|
| 2854 |
"group_num": 1,
|
| 2855 |
"group_size": 4096,
|
| 2856 |
"in_features": 4096,
|
|
@@ -2874,7 +2860,7 @@
|
|
| 2874 |
"model.layers.23.self_attn.v_proj": {
|
| 2875 |
"bias": false,
|
| 2876 |
"enable_norm": true,
|
| 2877 |
-
"enable_perm":
|
| 2878 |
"group_num": 1,
|
| 2879 |
"group_size": 4096,
|
| 2880 |
"in_features": 4096,
|
|
@@ -2898,7 +2884,7 @@
|
|
| 2898 |
"model.layers.24.mlp.down_proj": {
|
| 2899 |
"bias": false,
|
| 2900 |
"enable_norm": true,
|
| 2901 |
-
"enable_perm":
|
| 2902 |
"group_num": 1,
|
| 2903 |
"group_size": 14336,
|
| 2904 |
"in_features": 14336,
|
|
@@ -2922,7 +2908,7 @@
|
|
| 2922 |
"model.layers.24.mlp.gate_proj": {
|
| 2923 |
"bias": false,
|
| 2924 |
"enable_norm": true,
|
| 2925 |
-
"enable_perm":
|
| 2926 |
"group_num": 1,
|
| 2927 |
"group_size": 4096,
|
| 2928 |
"in_features": 4096,
|
|
@@ -2946,7 +2932,7 @@
|
|
| 2946 |
"model.layers.24.mlp.up_proj": {
|
| 2947 |
"bias": false,
|
| 2948 |
"enable_norm": true,
|
| 2949 |
-
"enable_perm":
|
| 2950 |
"group_num": 1,
|
| 2951 |
"group_size": 4096,
|
| 2952 |
"in_features": 4096,
|
|
@@ -2970,7 +2956,7 @@
|
|
| 2970 |
"model.layers.24.self_attn.k_proj": {
|
| 2971 |
"bias": false,
|
| 2972 |
"enable_norm": true,
|
| 2973 |
-
"enable_perm":
|
| 2974 |
"group_num": 1,
|
| 2975 |
"group_size": 4096,
|
| 2976 |
"in_features": 4096,
|
|
@@ -2994,7 +2980,7 @@
|
|
| 2994 |
"model.layers.24.self_attn.o_proj": {
|
| 2995 |
"bias": false,
|
| 2996 |
"enable_norm": true,
|
| 2997 |
-
"enable_perm":
|
| 2998 |
"group_num": 1,
|
| 2999 |
"group_size": 4096,
|
| 3000 |
"in_features": 4096,
|
|
@@ -3018,7 +3004,7 @@
|
|
| 3018 |
"model.layers.24.self_attn.q_proj": {
|
| 3019 |
"bias": false,
|
| 3020 |
"enable_norm": true,
|
| 3021 |
-
"enable_perm":
|
| 3022 |
"group_num": 1,
|
| 3023 |
"group_size": 4096,
|
| 3024 |
"in_features": 4096,
|
|
@@ -3042,7 +3028,7 @@
|
|
| 3042 |
"model.layers.24.self_attn.v_proj": {
|
| 3043 |
"bias": false,
|
| 3044 |
"enable_norm": true,
|
| 3045 |
-
"enable_perm":
|
| 3046 |
"group_num": 1,
|
| 3047 |
"group_size": 4096,
|
| 3048 |
"in_features": 4096,
|
|
@@ -3066,7 +3052,7 @@
|
|
| 3066 |
"model.layers.25.mlp.down_proj": {
|
| 3067 |
"bias": false,
|
| 3068 |
"enable_norm": true,
|
| 3069 |
-
"enable_perm":
|
| 3070 |
"group_num": 1,
|
| 3071 |
"group_size": 14336,
|
| 3072 |
"in_features": 14336,
|
|
@@ -3090,7 +3076,7 @@
|
|
| 3090 |
"model.layers.25.mlp.gate_proj": {
|
| 3091 |
"bias": false,
|
| 3092 |
"enable_norm": true,
|
| 3093 |
-
"enable_perm":
|
| 3094 |
"group_num": 1,
|
| 3095 |
"group_size": 4096,
|
| 3096 |
"in_features": 4096,
|
|
@@ -3114,7 +3100,7 @@
|
|
| 3114 |
"model.layers.25.mlp.up_proj": {
|
| 3115 |
"bias": false,
|
| 3116 |
"enable_norm": true,
|
| 3117 |
-
"enable_perm":
|
| 3118 |
"group_num": 1,
|
| 3119 |
"group_size": 4096,
|
| 3120 |
"in_features": 4096,
|
|
@@ -3138,7 +3124,7 @@
|
|
| 3138 |
"model.layers.25.self_attn.k_proj": {
|
| 3139 |
"bias": false,
|
| 3140 |
"enable_norm": true,
|
| 3141 |
-
"enable_perm":
|
| 3142 |
"group_num": 1,
|
| 3143 |
"group_size": 4096,
|
| 3144 |
"in_features": 4096,
|
|
@@ -3162,7 +3148,7 @@
|
|
| 3162 |
"model.layers.25.self_attn.o_proj": {
|
| 3163 |
"bias": false,
|
| 3164 |
"enable_norm": true,
|
| 3165 |
-
"enable_perm":
|
| 3166 |
"group_num": 1,
|
| 3167 |
"group_size": 4096,
|
| 3168 |
"in_features": 4096,
|
|
@@ -3186,7 +3172,7 @@
|
|
| 3186 |
"model.layers.25.self_attn.q_proj": {
|
| 3187 |
"bias": false,
|
| 3188 |
"enable_norm": true,
|
| 3189 |
-
"enable_perm":
|
| 3190 |
"group_num": 1,
|
| 3191 |
"group_size": 4096,
|
| 3192 |
"in_features": 4096,
|
|
@@ -3210,7 +3196,7 @@
|
|
| 3210 |
"model.layers.25.self_attn.v_proj": {
|
| 3211 |
"bias": false,
|
| 3212 |
"enable_norm": true,
|
| 3213 |
-
"enable_perm":
|
| 3214 |
"group_num": 1,
|
| 3215 |
"group_size": 4096,
|
| 3216 |
"in_features": 4096,
|
|
@@ -3234,7 +3220,7 @@
|
|
| 3234 |
"model.layers.26.mlp.down_proj": {
|
| 3235 |
"bias": false,
|
| 3236 |
"enable_norm": true,
|
| 3237 |
-
"enable_perm":
|
| 3238 |
"group_num": 1,
|
| 3239 |
"group_size": 14336,
|
| 3240 |
"in_features": 14336,
|
|
@@ -3258,7 +3244,7 @@
|
|
| 3258 |
"model.layers.26.mlp.gate_proj": {
|
| 3259 |
"bias": false,
|
| 3260 |
"enable_norm": true,
|
| 3261 |
-
"enable_perm":
|
| 3262 |
"group_num": 1,
|
| 3263 |
"group_size": 4096,
|
| 3264 |
"in_features": 4096,
|
|
@@ -3282,7 +3268,7 @@
|
|
| 3282 |
"model.layers.26.mlp.up_proj": {
|
| 3283 |
"bias": false,
|
| 3284 |
"enable_norm": true,
|
| 3285 |
-
"enable_perm":
|
| 3286 |
"group_num": 1,
|
| 3287 |
"group_size": 4096,
|
| 3288 |
"in_features": 4096,
|
|
@@ -3306,7 +3292,7 @@
|
|
| 3306 |
"model.layers.26.self_attn.k_proj": {
|
| 3307 |
"bias": false,
|
| 3308 |
"enable_norm": true,
|
| 3309 |
-
"enable_perm":
|
| 3310 |
"group_num": 1,
|
| 3311 |
"group_size": 4096,
|
| 3312 |
"in_features": 4096,
|
|
@@ -3330,7 +3316,7 @@
|
|
| 3330 |
"model.layers.26.self_attn.o_proj": {
|
| 3331 |
"bias": false,
|
| 3332 |
"enable_norm": true,
|
| 3333 |
-
"enable_perm":
|
| 3334 |
"group_num": 1,
|
| 3335 |
"group_size": 4096,
|
| 3336 |
"in_features": 4096,
|
|
@@ -3354,7 +3340,7 @@
|
|
| 3354 |
"model.layers.26.self_attn.q_proj": {
|
| 3355 |
"bias": false,
|
| 3356 |
"enable_norm": true,
|
| 3357 |
-
"enable_perm":
|
| 3358 |
"group_num": 1,
|
| 3359 |
"group_size": 4096,
|
| 3360 |
"in_features": 4096,
|
|
@@ -3378,7 +3364,7 @@
|
|
| 3378 |
"model.layers.26.self_attn.v_proj": {
|
| 3379 |
"bias": false,
|
| 3380 |
"enable_norm": true,
|
| 3381 |
-
"enable_perm":
|
| 3382 |
"group_num": 1,
|
| 3383 |
"group_size": 4096,
|
| 3384 |
"in_features": 4096,
|
|
@@ -3402,7 +3388,7 @@
|
|
| 3402 |
"model.layers.27.mlp.down_proj": {
|
| 3403 |
"bias": false,
|
| 3404 |
"enable_norm": true,
|
| 3405 |
-
"enable_perm":
|
| 3406 |
"group_num": 1,
|
| 3407 |
"group_size": 14336,
|
| 3408 |
"in_features": 14336,
|
|
@@ -3426,7 +3412,7 @@
|
|
| 3426 |
"model.layers.27.mlp.gate_proj": {
|
| 3427 |
"bias": false,
|
| 3428 |
"enable_norm": true,
|
| 3429 |
-
"enable_perm":
|
| 3430 |
"group_num": 1,
|
| 3431 |
"group_size": 4096,
|
| 3432 |
"in_features": 4096,
|
|
@@ -3450,7 +3436,7 @@
|
|
| 3450 |
"model.layers.27.mlp.up_proj": {
|
| 3451 |
"bias": false,
|
| 3452 |
"enable_norm": true,
|
| 3453 |
-
"enable_perm":
|
| 3454 |
"group_num": 1,
|
| 3455 |
"group_size": 4096,
|
| 3456 |
"in_features": 4096,
|
|
@@ -3474,7 +3460,7 @@
|
|
| 3474 |
"model.layers.27.self_attn.k_proj": {
|
| 3475 |
"bias": false,
|
| 3476 |
"enable_norm": true,
|
| 3477 |
-
"enable_perm":
|
| 3478 |
"group_num": 1,
|
| 3479 |
"group_size": 4096,
|
| 3480 |
"in_features": 4096,
|
|
@@ -3498,7 +3484,7 @@
|
|
| 3498 |
"model.layers.27.self_attn.o_proj": {
|
| 3499 |
"bias": false,
|
| 3500 |
"enable_norm": true,
|
| 3501 |
-
"enable_perm":
|
| 3502 |
"group_num": 1,
|
| 3503 |
"group_size": 4096,
|
| 3504 |
"in_features": 4096,
|
|
@@ -3522,7 +3508,7 @@
|
|
| 3522 |
"model.layers.27.self_attn.q_proj": {
|
| 3523 |
"bias": false,
|
| 3524 |
"enable_norm": true,
|
| 3525 |
-
"enable_perm":
|
| 3526 |
"group_num": 1,
|
| 3527 |
"group_size": 4096,
|
| 3528 |
"in_features": 4096,
|
|
@@ -3546,7 +3532,7 @@
|
|
| 3546 |
"model.layers.27.self_attn.v_proj": {
|
| 3547 |
"bias": false,
|
| 3548 |
"enable_norm": true,
|
| 3549 |
-
"enable_perm":
|
| 3550 |
"group_num": 1,
|
| 3551 |
"group_size": 4096,
|
| 3552 |
"in_features": 4096,
|
|
@@ -3570,7 +3556,7 @@
|
|
| 3570 |
"model.layers.28.mlp.down_proj": {
|
| 3571 |
"bias": false,
|
| 3572 |
"enable_norm": true,
|
| 3573 |
-
"enable_perm":
|
| 3574 |
"group_num": 1,
|
| 3575 |
"group_size": 14336,
|
| 3576 |
"in_features": 14336,
|
|
@@ -3594,7 +3580,7 @@
|
|
| 3594 |
"model.layers.28.mlp.gate_proj": {
|
| 3595 |
"bias": false,
|
| 3596 |
"enable_norm": true,
|
| 3597 |
-
"enable_perm":
|
| 3598 |
"group_num": 1,
|
| 3599 |
"group_size": 4096,
|
| 3600 |
"in_features": 4096,
|
|
@@ -3618,7 +3604,7 @@
|
|
| 3618 |
"model.layers.28.mlp.up_proj": {
|
| 3619 |
"bias": false,
|
| 3620 |
"enable_norm": true,
|
| 3621 |
-
"enable_perm":
|
| 3622 |
"group_num": 1,
|
| 3623 |
"group_size": 4096,
|
| 3624 |
"in_features": 4096,
|
|
@@ -3642,7 +3628,7 @@
|
|
| 3642 |
"model.layers.28.self_attn.k_proj": {
|
| 3643 |
"bias": false,
|
| 3644 |
"enable_norm": true,
|
| 3645 |
-
"enable_perm":
|
| 3646 |
"group_num": 1,
|
| 3647 |
"group_size": 4096,
|
| 3648 |
"in_features": 4096,
|
|
@@ -3666,7 +3652,7 @@
|
|
| 3666 |
"model.layers.28.self_attn.o_proj": {
|
| 3667 |
"bias": false,
|
| 3668 |
"enable_norm": true,
|
| 3669 |
-
"enable_perm":
|
| 3670 |
"group_num": 1,
|
| 3671 |
"group_size": 4096,
|
| 3672 |
"in_features": 4096,
|
|
@@ -3690,7 +3676,7 @@
|
|
| 3690 |
"model.layers.28.self_attn.q_proj": {
|
| 3691 |
"bias": false,
|
| 3692 |
"enable_norm": true,
|
| 3693 |
-
"enable_perm":
|
| 3694 |
"group_num": 1,
|
| 3695 |
"group_size": 4096,
|
| 3696 |
"in_features": 4096,
|
|
@@ -3714,7 +3700,7 @@
|
|
| 3714 |
"model.layers.28.self_attn.v_proj": {
|
| 3715 |
"bias": false,
|
| 3716 |
"enable_norm": true,
|
| 3717 |
-
"enable_perm":
|
| 3718 |
"group_num": 1,
|
| 3719 |
"group_size": 4096,
|
| 3720 |
"in_features": 4096,
|
|
@@ -3738,7 +3724,7 @@
|
|
| 3738 |
"model.layers.29.mlp.down_proj": {
|
| 3739 |
"bias": false,
|
| 3740 |
"enable_norm": true,
|
| 3741 |
-
"enable_perm":
|
| 3742 |
"group_num": 1,
|
| 3743 |
"group_size": 14336,
|
| 3744 |
"in_features": 14336,
|
|
@@ -3762,7 +3748,7 @@
|
|
| 3762 |
"model.layers.29.mlp.gate_proj": {
|
| 3763 |
"bias": false,
|
| 3764 |
"enable_norm": true,
|
| 3765 |
-
"enable_perm":
|
| 3766 |
"group_num": 1,
|
| 3767 |
"group_size": 4096,
|
| 3768 |
"in_features": 4096,
|
|
@@ -3786,7 +3772,7 @@
|
|
| 3786 |
"model.layers.29.mlp.up_proj": {
|
| 3787 |
"bias": false,
|
| 3788 |
"enable_norm": true,
|
| 3789 |
-
"enable_perm":
|
| 3790 |
"group_num": 1,
|
| 3791 |
"group_size": 4096,
|
| 3792 |
"in_features": 4096,
|
|
@@ -3810,7 +3796,7 @@
|
|
| 3810 |
"model.layers.29.self_attn.k_proj": {
|
| 3811 |
"bias": false,
|
| 3812 |
"enable_norm": true,
|
| 3813 |
-
"enable_perm":
|
| 3814 |
"group_num": 1,
|
| 3815 |
"group_size": 4096,
|
| 3816 |
"in_features": 4096,
|
|
@@ -3834,7 +3820,7 @@
|
|
| 3834 |
"model.layers.29.self_attn.o_proj": {
|
| 3835 |
"bias": false,
|
| 3836 |
"enable_norm": true,
|
| 3837 |
-
"enable_perm":
|
| 3838 |
"group_num": 1,
|
| 3839 |
"group_size": 4096,
|
| 3840 |
"in_features": 4096,
|
|
@@ -3858,7 +3844,7 @@
|
|
| 3858 |
"model.layers.29.self_attn.q_proj": {
|
| 3859 |
"bias": false,
|
| 3860 |
"enable_norm": true,
|
| 3861 |
-
"enable_perm":
|
| 3862 |
"group_num": 1,
|
| 3863 |
"group_size": 4096,
|
| 3864 |
"in_features": 4096,
|
|
@@ -3882,7 +3868,7 @@
|
|
| 3882 |
"model.layers.29.self_attn.v_proj": {
|
| 3883 |
"bias": false,
|
| 3884 |
"enable_norm": true,
|
| 3885 |
-
"enable_perm":
|
| 3886 |
"group_num": 1,
|
| 3887 |
"group_size": 4096,
|
| 3888 |
"in_features": 4096,
|
|
@@ -3906,7 +3892,7 @@
|
|
| 3906 |
"model.layers.3.mlp.down_proj": {
|
| 3907 |
"bias": false,
|
| 3908 |
"enable_norm": true,
|
| 3909 |
-
"enable_perm":
|
| 3910 |
"group_num": 1,
|
| 3911 |
"group_size": 14336,
|
| 3912 |
"in_features": 14336,
|
|
@@ -3930,7 +3916,7 @@
|
|
| 3930 |
"model.layers.3.mlp.gate_proj": {
|
| 3931 |
"bias": false,
|
| 3932 |
"enable_norm": true,
|
| 3933 |
-
"enable_perm":
|
| 3934 |
"group_num": 1,
|
| 3935 |
"group_size": 4096,
|
| 3936 |
"in_features": 4096,
|
|
@@ -3954,7 +3940,7 @@
|
|
| 3954 |
"model.layers.3.mlp.up_proj": {
|
| 3955 |
"bias": false,
|
| 3956 |
"enable_norm": true,
|
| 3957 |
-
"enable_perm":
|
| 3958 |
"group_num": 1,
|
| 3959 |
"group_size": 4096,
|
| 3960 |
"in_features": 4096,
|
|
@@ -3978,7 +3964,7 @@
|
|
| 3978 |
"model.layers.3.self_attn.k_proj": {
|
| 3979 |
"bias": false,
|
| 3980 |
"enable_norm": true,
|
| 3981 |
-
"enable_perm":
|
| 3982 |
"group_num": 1,
|
| 3983 |
"group_size": 4096,
|
| 3984 |
"in_features": 4096,
|
|
@@ -4002,7 +3988,7 @@
|
|
| 4002 |
"model.layers.3.self_attn.o_proj": {
|
| 4003 |
"bias": false,
|
| 4004 |
"enable_norm": true,
|
| 4005 |
-
"enable_perm":
|
| 4006 |
"group_num": 1,
|
| 4007 |
"group_size": 4096,
|
| 4008 |
"in_features": 4096,
|
|
@@ -4026,7 +4012,7 @@
|
|
| 4026 |
"model.layers.3.self_attn.q_proj": {
|
| 4027 |
"bias": false,
|
| 4028 |
"enable_norm": true,
|
| 4029 |
-
"enable_perm":
|
| 4030 |
"group_num": 1,
|
| 4031 |
"group_size": 4096,
|
| 4032 |
"in_features": 4096,
|
|
@@ -4050,7 +4036,7 @@
|
|
| 4050 |
"model.layers.3.self_attn.v_proj": {
|
| 4051 |
"bias": false,
|
| 4052 |
"enable_norm": true,
|
| 4053 |
-
"enable_perm":
|
| 4054 |
"group_num": 1,
|
| 4055 |
"group_size": 4096,
|
| 4056 |
"in_features": 4096,
|
|
@@ -4074,7 +4060,7 @@
|
|
| 4074 |
"model.layers.30.mlp.down_proj": {
|
| 4075 |
"bias": false,
|
| 4076 |
"enable_norm": true,
|
| 4077 |
-
"enable_perm":
|
| 4078 |
"group_num": 1,
|
| 4079 |
"group_size": 14336,
|
| 4080 |
"in_features": 14336,
|
|
@@ -4098,7 +4084,7 @@
|
|
| 4098 |
"model.layers.30.mlp.gate_proj": {
|
| 4099 |
"bias": false,
|
| 4100 |
"enable_norm": true,
|
| 4101 |
-
"enable_perm":
|
| 4102 |
"group_num": 1,
|
| 4103 |
"group_size": 4096,
|
| 4104 |
"in_features": 4096,
|
|
@@ -4122,7 +4108,7 @@
|
|
| 4122 |
"model.layers.30.mlp.up_proj": {
|
| 4123 |
"bias": false,
|
| 4124 |
"enable_norm": true,
|
| 4125 |
-
"enable_perm":
|
| 4126 |
"group_num": 1,
|
| 4127 |
"group_size": 4096,
|
| 4128 |
"in_features": 4096,
|
|
@@ -4146,7 +4132,7 @@
|
|
| 4146 |
"model.layers.30.self_attn.k_proj": {
|
| 4147 |
"bias": false,
|
| 4148 |
"enable_norm": true,
|
| 4149 |
-
"enable_perm":
|
| 4150 |
"group_num": 1,
|
| 4151 |
"group_size": 4096,
|
| 4152 |
"in_features": 4096,
|
|
@@ -4170,7 +4156,7 @@
|
|
| 4170 |
"model.layers.30.self_attn.o_proj": {
|
| 4171 |
"bias": false,
|
| 4172 |
"enable_norm": true,
|
| 4173 |
-
"enable_perm":
|
| 4174 |
"group_num": 1,
|
| 4175 |
"group_size": 4096,
|
| 4176 |
"in_features": 4096,
|
|
@@ -4194,7 +4180,7 @@
|
|
| 4194 |
"model.layers.30.self_attn.q_proj": {
|
| 4195 |
"bias": false,
|
| 4196 |
"enable_norm": true,
|
| 4197 |
-
"enable_perm":
|
| 4198 |
"group_num": 1,
|
| 4199 |
"group_size": 4096,
|
| 4200 |
"in_features": 4096,
|
|
@@ -4218,7 +4204,7 @@
|
|
| 4218 |
"model.layers.30.self_attn.v_proj": {
|
| 4219 |
"bias": false,
|
| 4220 |
"enable_norm": true,
|
| 4221 |
-
"enable_perm":
|
| 4222 |
"group_num": 1,
|
| 4223 |
"group_size": 4096,
|
| 4224 |
"in_features": 4096,
|
|
@@ -4242,7 +4228,7 @@
|
|
| 4242 |
"model.layers.31.mlp.down_proj": {
|
| 4243 |
"bias": false,
|
| 4244 |
"enable_norm": true,
|
| 4245 |
-
"enable_perm":
|
| 4246 |
"group_num": 1,
|
| 4247 |
"group_size": 14336,
|
| 4248 |
"in_features": 14336,
|
|
@@ -4266,7 +4252,7 @@
|
|
| 4266 |
"model.layers.31.mlp.gate_proj": {
|
| 4267 |
"bias": false,
|
| 4268 |
"enable_norm": true,
|
| 4269 |
-
"enable_perm":
|
| 4270 |
"group_num": 1,
|
| 4271 |
"group_size": 4096,
|
| 4272 |
"in_features": 4096,
|
|
@@ -4290,7 +4276,7 @@
|
|
| 4290 |
"model.layers.31.mlp.up_proj": {
|
| 4291 |
"bias": false,
|
| 4292 |
"enable_norm": true,
|
| 4293 |
-
"enable_perm":
|
| 4294 |
"group_num": 1,
|
| 4295 |
"group_size": 4096,
|
| 4296 |
"in_features": 4096,
|
|
@@ -4314,7 +4300,7 @@
|
|
| 4314 |
"model.layers.31.self_attn.k_proj": {
|
| 4315 |
"bias": false,
|
| 4316 |
"enable_norm": true,
|
| 4317 |
-
"enable_perm":
|
| 4318 |
"group_num": 1,
|
| 4319 |
"group_size": 4096,
|
| 4320 |
"in_features": 4096,
|
|
@@ -4338,7 +4324,7 @@
|
|
| 4338 |
"model.layers.31.self_attn.o_proj": {
|
| 4339 |
"bias": false,
|
| 4340 |
"enable_norm": true,
|
| 4341 |
-
"enable_perm":
|
| 4342 |
"group_num": 1,
|
| 4343 |
"group_size": 4096,
|
| 4344 |
"in_features": 4096,
|
|
@@ -4362,7 +4348,7 @@
|
|
| 4362 |
"model.layers.31.self_attn.q_proj": {
|
| 4363 |
"bias": false,
|
| 4364 |
"enable_norm": true,
|
| 4365 |
-
"enable_perm":
|
| 4366 |
"group_num": 1,
|
| 4367 |
"group_size": 4096,
|
| 4368 |
"in_features": 4096,
|
|
@@ -4386,7 +4372,7 @@
|
|
| 4386 |
"model.layers.31.self_attn.v_proj": {
|
| 4387 |
"bias": false,
|
| 4388 |
"enable_norm": true,
|
| 4389 |
-
"enable_perm":
|
| 4390 |
"group_num": 1,
|
| 4391 |
"group_size": 4096,
|
| 4392 |
"in_features": 4096,
|
|
@@ -4410,7 +4396,7 @@
|
|
| 4410 |
"model.layers.4.mlp.down_proj": {
|
| 4411 |
"bias": false,
|
| 4412 |
"enable_norm": true,
|
| 4413 |
-
"enable_perm":
|
| 4414 |
"group_num": 1,
|
| 4415 |
"group_size": 14336,
|
| 4416 |
"in_features": 14336,
|
|
@@ -4434,7 +4420,7 @@
|
|
| 4434 |
"model.layers.4.mlp.gate_proj": {
|
| 4435 |
"bias": false,
|
| 4436 |
"enable_norm": true,
|
| 4437 |
-
"enable_perm":
|
| 4438 |
"group_num": 1,
|
| 4439 |
"group_size": 4096,
|
| 4440 |
"in_features": 4096,
|
|
@@ -4458,7 +4444,7 @@
|
|
| 4458 |
"model.layers.4.mlp.up_proj": {
|
| 4459 |
"bias": false,
|
| 4460 |
"enable_norm": true,
|
| 4461 |
-
"enable_perm":
|
| 4462 |
"group_num": 1,
|
| 4463 |
"group_size": 4096,
|
| 4464 |
"in_features": 4096,
|
|
@@ -4482,7 +4468,7 @@
|
|
| 4482 |
"model.layers.4.self_attn.k_proj": {
|
| 4483 |
"bias": false,
|
| 4484 |
"enable_norm": true,
|
| 4485 |
-
"enable_perm":
|
| 4486 |
"group_num": 1,
|
| 4487 |
"group_size": 4096,
|
| 4488 |
"in_features": 4096,
|
|
@@ -4506,7 +4492,7 @@
|
|
| 4506 |
"model.layers.4.self_attn.o_proj": {
|
| 4507 |
"bias": false,
|
| 4508 |
"enable_norm": true,
|
| 4509 |
-
"enable_perm":
|
| 4510 |
"group_num": 1,
|
| 4511 |
"group_size": 4096,
|
| 4512 |
"in_features": 4096,
|
|
@@ -4530,7 +4516,7 @@
|
|
| 4530 |
"model.layers.4.self_attn.q_proj": {
|
| 4531 |
"bias": false,
|
| 4532 |
"enable_norm": true,
|
| 4533 |
-
"enable_perm":
|
| 4534 |
"group_num": 1,
|
| 4535 |
"group_size": 4096,
|
| 4536 |
"in_features": 4096,
|
|
@@ -4554,7 +4540,7 @@
|
|
| 4554 |
"model.layers.4.self_attn.v_proj": {
|
| 4555 |
"bias": false,
|
| 4556 |
"enable_norm": true,
|
| 4557 |
-
"enable_perm":
|
| 4558 |
"group_num": 1,
|
| 4559 |
"group_size": 4096,
|
| 4560 |
"in_features": 4096,
|
|
@@ -4578,7 +4564,7 @@
|
|
| 4578 |
"model.layers.5.mlp.down_proj": {
|
| 4579 |
"bias": false,
|
| 4580 |
"enable_norm": true,
|
| 4581 |
-
"enable_perm":
|
| 4582 |
"group_num": 1,
|
| 4583 |
"group_size": 14336,
|
| 4584 |
"in_features": 14336,
|
|
@@ -4602,7 +4588,7 @@
|
|
| 4602 |
"model.layers.5.mlp.gate_proj": {
|
| 4603 |
"bias": false,
|
| 4604 |
"enable_norm": true,
|
| 4605 |
-
"enable_perm":
|
| 4606 |
"group_num": 1,
|
| 4607 |
"group_size": 4096,
|
| 4608 |
"in_features": 4096,
|
|
@@ -4626,7 +4612,7 @@
|
|
| 4626 |
"model.layers.5.mlp.up_proj": {
|
| 4627 |
"bias": false,
|
| 4628 |
"enable_norm": true,
|
| 4629 |
-
"enable_perm":
|
| 4630 |
"group_num": 1,
|
| 4631 |
"group_size": 4096,
|
| 4632 |
"in_features": 4096,
|
|
@@ -4650,7 +4636,7 @@
|
|
| 4650 |
"model.layers.5.self_attn.k_proj": {
|
| 4651 |
"bias": false,
|
| 4652 |
"enable_norm": true,
|
| 4653 |
-
"enable_perm":
|
| 4654 |
"group_num": 1,
|
| 4655 |
"group_size": 4096,
|
| 4656 |
"in_features": 4096,
|
|
@@ -4674,7 +4660,7 @@
|
|
| 4674 |
"model.layers.5.self_attn.o_proj": {
|
| 4675 |
"bias": false,
|
| 4676 |
"enable_norm": true,
|
| 4677 |
-
"enable_perm":
|
| 4678 |
"group_num": 1,
|
| 4679 |
"group_size": 4096,
|
| 4680 |
"in_features": 4096,
|
|
@@ -4698,7 +4684,7 @@
|
|
| 4698 |
"model.layers.5.self_attn.q_proj": {
|
| 4699 |
"bias": false,
|
| 4700 |
"enable_norm": true,
|
| 4701 |
-
"enable_perm":
|
| 4702 |
"group_num": 1,
|
| 4703 |
"group_size": 4096,
|
| 4704 |
"in_features": 4096,
|
|
@@ -4722,7 +4708,7 @@
|
|
| 4722 |
"model.layers.5.self_attn.v_proj": {
|
| 4723 |
"bias": false,
|
| 4724 |
"enable_norm": true,
|
| 4725 |
-
"enable_perm":
|
| 4726 |
"group_num": 1,
|
| 4727 |
"group_size": 4096,
|
| 4728 |
"in_features": 4096,
|
|
@@ -4746,7 +4732,7 @@
|
|
| 4746 |
"model.layers.6.mlp.down_proj": {
|
| 4747 |
"bias": false,
|
| 4748 |
"enable_norm": true,
|
| 4749 |
-
"enable_perm":
|
| 4750 |
"group_num": 1,
|
| 4751 |
"group_size": 14336,
|
| 4752 |
"in_features": 14336,
|
|
@@ -4770,7 +4756,7 @@
|
|
| 4770 |
"model.layers.6.mlp.gate_proj": {
|
| 4771 |
"bias": false,
|
| 4772 |
"enable_norm": true,
|
| 4773 |
-
"enable_perm":
|
| 4774 |
"group_num": 1,
|
| 4775 |
"group_size": 4096,
|
| 4776 |
"in_features": 4096,
|
|
@@ -4794,7 +4780,7 @@
|
|
| 4794 |
"model.layers.6.mlp.up_proj": {
|
| 4795 |
"bias": false,
|
| 4796 |
"enable_norm": true,
|
| 4797 |
-
"enable_perm":
|
| 4798 |
"group_num": 1,
|
| 4799 |
"group_size": 4096,
|
| 4800 |
"in_features": 4096,
|
|
@@ -4818,7 +4804,7 @@
|
|
| 4818 |
"model.layers.6.self_attn.k_proj": {
|
| 4819 |
"bias": false,
|
| 4820 |
"enable_norm": true,
|
| 4821 |
-
"enable_perm":
|
| 4822 |
"group_num": 1,
|
| 4823 |
"group_size": 4096,
|
| 4824 |
"in_features": 4096,
|
|
@@ -4842,7 +4828,7 @@
|
|
| 4842 |
"model.layers.6.self_attn.o_proj": {
|
| 4843 |
"bias": false,
|
| 4844 |
"enable_norm": true,
|
| 4845 |
-
"enable_perm":
|
| 4846 |
"group_num": 1,
|
| 4847 |
"group_size": 4096,
|
| 4848 |
"in_features": 4096,
|
|
@@ -4866,7 +4852,7 @@
|
|
| 4866 |
"model.layers.6.self_attn.q_proj": {
|
| 4867 |
"bias": false,
|
| 4868 |
"enable_norm": true,
|
| 4869 |
-
"enable_perm":
|
| 4870 |
"group_num": 1,
|
| 4871 |
"group_size": 4096,
|
| 4872 |
"in_features": 4096,
|
|
@@ -4890,7 +4876,7 @@
|
|
| 4890 |
"model.layers.6.self_attn.v_proj": {
|
| 4891 |
"bias": false,
|
| 4892 |
"enable_norm": true,
|
| 4893 |
-
"enable_perm":
|
| 4894 |
"group_num": 1,
|
| 4895 |
"group_size": 4096,
|
| 4896 |
"in_features": 4096,
|
|
@@ -4914,7 +4900,7 @@
|
|
| 4914 |
"model.layers.7.mlp.down_proj": {
|
| 4915 |
"bias": false,
|
| 4916 |
"enable_norm": true,
|
| 4917 |
-
"enable_perm":
|
| 4918 |
"group_num": 1,
|
| 4919 |
"group_size": 14336,
|
| 4920 |
"in_features": 14336,
|
|
@@ -4938,7 +4924,7 @@
|
|
| 4938 |
"model.layers.7.mlp.gate_proj": {
|
| 4939 |
"bias": false,
|
| 4940 |
"enable_norm": true,
|
| 4941 |
-
"enable_perm":
|
| 4942 |
"group_num": 1,
|
| 4943 |
"group_size": 4096,
|
| 4944 |
"in_features": 4096,
|
|
@@ -4962,7 +4948,7 @@
|
|
| 4962 |
"model.layers.7.mlp.up_proj": {
|
| 4963 |
"bias": false,
|
| 4964 |
"enable_norm": true,
|
| 4965 |
-
"enable_perm":
|
| 4966 |
"group_num": 1,
|
| 4967 |
"group_size": 4096,
|
| 4968 |
"in_features": 4096,
|
|
@@ -4986,7 +4972,7 @@
|
|
| 4986 |
"model.layers.7.self_attn.k_proj": {
|
| 4987 |
"bias": false,
|
| 4988 |
"enable_norm": true,
|
| 4989 |
-
"enable_perm":
|
| 4990 |
"group_num": 1,
|
| 4991 |
"group_size": 4096,
|
| 4992 |
"in_features": 4096,
|
|
@@ -5010,7 +4996,7 @@
|
|
| 5010 |
"model.layers.7.self_attn.o_proj": {
|
| 5011 |
"bias": false,
|
| 5012 |
"enable_norm": true,
|
| 5013 |
-
"enable_perm":
|
| 5014 |
"group_num": 1,
|
| 5015 |
"group_size": 4096,
|
| 5016 |
"in_features": 4096,
|
|
@@ -5034,7 +5020,7 @@
|
|
| 5034 |
"model.layers.7.self_attn.q_proj": {
|
| 5035 |
"bias": false,
|
| 5036 |
"enable_norm": true,
|
| 5037 |
-
"enable_perm":
|
| 5038 |
"group_num": 1,
|
| 5039 |
"group_size": 4096,
|
| 5040 |
"in_features": 4096,
|
|
@@ -5058,7 +5044,7 @@
|
|
| 5058 |
"model.layers.7.self_attn.v_proj": {
|
| 5059 |
"bias": false,
|
| 5060 |
"enable_norm": true,
|
| 5061 |
-
"enable_perm":
|
| 5062 |
"group_num": 1,
|
| 5063 |
"group_size": 4096,
|
| 5064 |
"in_features": 4096,
|
|
@@ -5082,7 +5068,7 @@
|
|
| 5082 |
"model.layers.8.mlp.down_proj": {
|
| 5083 |
"bias": false,
|
| 5084 |
"enable_norm": true,
|
| 5085 |
-
"enable_perm":
|
| 5086 |
"group_num": 1,
|
| 5087 |
"group_size": 14336,
|
| 5088 |
"in_features": 14336,
|
|
@@ -5106,7 +5092,7 @@
|
|
| 5106 |
"model.layers.8.mlp.gate_proj": {
|
| 5107 |
"bias": false,
|
| 5108 |
"enable_norm": true,
|
| 5109 |
-
"enable_perm":
|
| 5110 |
"group_num": 1,
|
| 5111 |
"group_size": 4096,
|
| 5112 |
"in_features": 4096,
|
|
@@ -5130,7 +5116,7 @@
|
|
| 5130 |
"model.layers.8.mlp.up_proj": {
|
| 5131 |
"bias": false,
|
| 5132 |
"enable_norm": true,
|
| 5133 |
-
"enable_perm":
|
| 5134 |
"group_num": 1,
|
| 5135 |
"group_size": 4096,
|
| 5136 |
"in_features": 4096,
|
|
@@ -5154,7 +5140,7 @@
|
|
| 5154 |
"model.layers.8.self_attn.k_proj": {
|
| 5155 |
"bias": false,
|
| 5156 |
"enable_norm": true,
|
| 5157 |
-
"enable_perm":
|
| 5158 |
"group_num": 1,
|
| 5159 |
"group_size": 4096,
|
| 5160 |
"in_features": 4096,
|
|
@@ -5178,7 +5164,7 @@
|
|
| 5178 |
"model.layers.8.self_attn.o_proj": {
|
| 5179 |
"bias": false,
|
| 5180 |
"enable_norm": true,
|
| 5181 |
-
"enable_perm":
|
| 5182 |
"group_num": 1,
|
| 5183 |
"group_size": 4096,
|
| 5184 |
"in_features": 4096,
|
|
@@ -5202,7 +5188,7 @@
|
|
| 5202 |
"model.layers.8.self_attn.q_proj": {
|
| 5203 |
"bias": false,
|
| 5204 |
"enable_norm": true,
|
| 5205 |
-
"enable_perm":
|
| 5206 |
"group_num": 1,
|
| 5207 |
"group_size": 4096,
|
| 5208 |
"in_features": 4096,
|
|
@@ -5226,7 +5212,7 @@
|
|
| 5226 |
"model.layers.8.self_attn.v_proj": {
|
| 5227 |
"bias": false,
|
| 5228 |
"enable_norm": true,
|
| 5229 |
-
"enable_perm":
|
| 5230 |
"group_num": 1,
|
| 5231 |
"group_size": 4096,
|
| 5232 |
"in_features": 4096,
|
|
@@ -5250,7 +5236,7 @@
|
|
| 5250 |
"model.layers.9.mlp.down_proj": {
|
| 5251 |
"bias": false,
|
| 5252 |
"enable_norm": true,
|
| 5253 |
-
"enable_perm":
|
| 5254 |
"group_num": 1,
|
| 5255 |
"group_size": 14336,
|
| 5256 |
"in_features": 14336,
|
|
@@ -5274,7 +5260,7 @@
|
|
| 5274 |
"model.layers.9.mlp.gate_proj": {
|
| 5275 |
"bias": false,
|
| 5276 |
"enable_norm": true,
|
| 5277 |
-
"enable_perm":
|
| 5278 |
"group_num": 1,
|
| 5279 |
"group_size": 4096,
|
| 5280 |
"in_features": 4096,
|
|
@@ -5298,7 +5284,7 @@
|
|
| 5298 |
"model.layers.9.mlp.up_proj": {
|
| 5299 |
"bias": false,
|
| 5300 |
"enable_norm": true,
|
| 5301 |
-
"enable_perm":
|
| 5302 |
"group_num": 1,
|
| 5303 |
"group_size": 4096,
|
| 5304 |
"in_features": 4096,
|
|
@@ -5322,7 +5308,7 @@
|
|
| 5322 |
"model.layers.9.self_attn.k_proj": {
|
| 5323 |
"bias": false,
|
| 5324 |
"enable_norm": true,
|
| 5325 |
-
"enable_perm":
|
| 5326 |
"group_num": 1,
|
| 5327 |
"group_size": 4096,
|
| 5328 |
"in_features": 4096,
|
|
@@ -5346,7 +5332,7 @@
|
|
| 5346 |
"model.layers.9.self_attn.o_proj": {
|
| 5347 |
"bias": false,
|
| 5348 |
"enable_norm": true,
|
| 5349 |
-
"enable_perm":
|
| 5350 |
"group_num": 1,
|
| 5351 |
"group_size": 4096,
|
| 5352 |
"in_features": 4096,
|
|
@@ -5370,7 +5356,7 @@
|
|
| 5370 |
"model.layers.9.self_attn.q_proj": {
|
| 5371 |
"bias": false,
|
| 5372 |
"enable_norm": true,
|
| 5373 |
-
"enable_perm":
|
| 5374 |
"group_num": 1,
|
| 5375 |
"group_size": 4096,
|
| 5376 |
"in_features": 4096,
|
|
@@ -5394,7 +5380,7 @@
|
|
| 5394 |
"model.layers.9.self_attn.v_proj": {
|
| 5395 |
"bias": false,
|
| 5396 |
"enable_norm": true,
|
| 5397 |
-
"enable_perm":
|
| 5398 |
"group_num": 1,
|
| 5399 |
"group_size": 4096,
|
| 5400 |
"in_features": 4096,
|
|
@@ -5415,6 +5401,21 @@
|
|
| 5415 |
8
|
| 5416 |
]
|
| 5417 |
}
|
| 5418 |
-
}
|
| 5419 |
-
|
| 5420 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "VPTQ-community/Meta-Llama-3.1-8B-Instruct-v8-k65536-4096-woft",
|
| 3 |
"architectures": [
|
| 4 |
"LlamaForCausalLM"
|
| 5 |
],
|
|
|
|
| 11 |
128008,
|
| 12 |
128009
|
| 13 |
],
|
| 14 |
+
"head_dim": 128,
|
| 15 |
"hidden_act": "silu",
|
| 16 |
"hidden_size": 4096,
|
| 17 |
"initializer_range": 0.02,
|
|
|
|
| 23 |
"num_hidden_layers": 32,
|
| 24 |
"num_key_value_heads": 8,
|
| 25 |
"pretraining_tp": 1,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
"quantization_config": {
|
|
|
|
| 27 |
"config_for_layers": {
|
| 28 |
"model.layers.0.mlp.down_proj": {
|
| 29 |
"bias": false,
|
| 30 |
"enable_norm": true,
|
| 31 |
+
"enable_perm": false,
|
| 32 |
"group_num": 1,
|
| 33 |
"group_size": 14336,
|
| 34 |
"in_features": 14336,
|
|
|
|
| 52 |
"model.layers.0.mlp.gate_proj": {
|
| 53 |
"bias": false,
|
| 54 |
"enable_norm": true,
|
| 55 |
+
"enable_perm": false,
|
| 56 |
"group_num": 1,
|
| 57 |
"group_size": 4096,
|
| 58 |
"in_features": 4096,
|
|
|
|
| 76 |
"model.layers.0.mlp.up_proj": {
|
| 77 |
"bias": false,
|
| 78 |
"enable_norm": true,
|
| 79 |
+
"enable_perm": false,
|
| 80 |
"group_num": 1,
|
| 81 |
"group_size": 4096,
|
| 82 |
"in_features": 4096,
|
|
|
|
| 100 |
"model.layers.0.self_attn.k_proj": {
|
| 101 |
"bias": false,
|
| 102 |
"enable_norm": true,
|
| 103 |
+
"enable_perm": false,
|
| 104 |
"group_num": 1,
|
| 105 |
"group_size": 4096,
|
| 106 |
"in_features": 4096,
|
|
|
|
| 124 |
"model.layers.0.self_attn.o_proj": {
|
| 125 |
"bias": false,
|
| 126 |
"enable_norm": true,
|
| 127 |
+
"enable_perm": false,
|
| 128 |
"group_num": 1,
|
| 129 |
"group_size": 4096,
|
| 130 |
"in_features": 4096,
|
|
|
|
| 148 |
"model.layers.0.self_attn.q_proj": {
|
| 149 |
"bias": false,
|
| 150 |
"enable_norm": true,
|
| 151 |
+
"enable_perm": false,
|
| 152 |
"group_num": 1,
|
| 153 |
"group_size": 4096,
|
| 154 |
"in_features": 4096,
|
|
|
|
| 172 |
"model.layers.0.self_attn.v_proj": {
|
| 173 |
"bias": false,
|
| 174 |
"enable_norm": true,
|
| 175 |
+
"enable_perm": false,
|
| 176 |
"group_num": 1,
|
| 177 |
"group_size": 4096,
|
| 178 |
"in_features": 4096,
|
|
|
|
| 196 |
"model.layers.1.mlp.down_proj": {
|
| 197 |
"bias": false,
|
| 198 |
"enable_norm": true,
|
| 199 |
+
"enable_perm": false,
|
| 200 |
"group_num": 1,
|
| 201 |
"group_size": 14336,
|
| 202 |
"in_features": 14336,
|
|
|
|
| 220 |
"model.layers.1.mlp.gate_proj": {
|
| 221 |
"bias": false,
|
| 222 |
"enable_norm": true,
|
| 223 |
+
"enable_perm": false,
|
| 224 |
"group_num": 1,
|
| 225 |
"group_size": 4096,
|
| 226 |
"in_features": 4096,
|
|
|
|
| 244 |
"model.layers.1.mlp.up_proj": {
|
| 245 |
"bias": false,
|
| 246 |
"enable_norm": true,
|
| 247 |
+
"enable_perm": false,
|
| 248 |
"group_num": 1,
|
| 249 |
"group_size": 4096,
|
| 250 |
"in_features": 4096,
|
|
|
|
| 268 |
"model.layers.1.self_attn.k_proj": {
|
| 269 |
"bias": false,
|
| 270 |
"enable_norm": true,
|
| 271 |
+
"enable_perm": false,
|
| 272 |
"group_num": 1,
|
| 273 |
"group_size": 4096,
|
| 274 |
"in_features": 4096,
|
|
|
|
| 292 |
"model.layers.1.self_attn.o_proj": {
|
| 293 |
"bias": false,
|
| 294 |
"enable_norm": true,
|
| 295 |
+
"enable_perm": false,
|
| 296 |
"group_num": 1,
|
| 297 |
"group_size": 4096,
|
| 298 |
"in_features": 4096,
|
|
|
|
| 316 |
"model.layers.1.self_attn.q_proj": {
|
| 317 |
"bias": false,
|
| 318 |
"enable_norm": true,
|
| 319 |
+
"enable_perm": false,
|
| 320 |
"group_num": 1,
|
| 321 |
"group_size": 4096,
|
| 322 |
"in_features": 4096,
|
|
|
|
| 340 |
"model.layers.1.self_attn.v_proj": {
|
| 341 |
"bias": false,
|
| 342 |
"enable_norm": true,
|
| 343 |
+
"enable_perm": false,
|
| 344 |
"group_num": 1,
|
| 345 |
"group_size": 4096,
|
| 346 |
"in_features": 4096,
|
|
|
|
| 364 |
"model.layers.10.mlp.down_proj": {
|
| 365 |
"bias": false,
|
| 366 |
"enable_norm": true,
|
| 367 |
+
"enable_perm": false,
|
| 368 |
"group_num": 1,
|
| 369 |
"group_size": 14336,
|
| 370 |
"in_features": 14336,
|
|
|
|
| 388 |
"model.layers.10.mlp.gate_proj": {
|
| 389 |
"bias": false,
|
| 390 |
"enable_norm": true,
|
| 391 |
+
"enable_perm": false,
|
| 392 |
"group_num": 1,
|
| 393 |
"group_size": 4096,
|
| 394 |
"in_features": 4096,
|
|
|
|
| 412 |
"model.layers.10.mlp.up_proj": {
|
| 413 |
"bias": false,
|
| 414 |
"enable_norm": true,
|
| 415 |
+
"enable_perm": false,
|
| 416 |
"group_num": 1,
|
| 417 |
"group_size": 4096,
|
| 418 |
"in_features": 4096,
|
|
|
|
| 436 |
"model.layers.10.self_attn.k_proj": {
|
| 437 |
"bias": false,
|
| 438 |
"enable_norm": true,
|
| 439 |
+
"enable_perm": false,
|
| 440 |
"group_num": 1,
|
| 441 |
"group_size": 4096,
|
| 442 |
"in_features": 4096,
|
|
|
|
| 460 |
"model.layers.10.self_attn.o_proj": {
|
| 461 |
"bias": false,
|
| 462 |
"enable_norm": true,
|
| 463 |
+
"enable_perm": false,
|
| 464 |
"group_num": 1,
|
| 465 |
"group_size": 4096,
|
| 466 |
"in_features": 4096,
|
|
|
|
| 484 |
"model.layers.10.self_attn.q_proj": {
|
| 485 |
"bias": false,
|
| 486 |
"enable_norm": true,
|
| 487 |
+
"enable_perm": false,
|
| 488 |
"group_num": 1,
|
| 489 |
"group_size": 4096,
|
| 490 |
"in_features": 4096,
|
|
|
|
| 508 |
"model.layers.10.self_attn.v_proj": {
|
| 509 |
"bias": false,
|
| 510 |
"enable_norm": true,
|
| 511 |
+
"enable_perm": false,
|
| 512 |
"group_num": 1,
|
| 513 |
"group_size": 4096,
|
| 514 |
"in_features": 4096,
|
|
|
|
| 532 |
"model.layers.11.mlp.down_proj": {
|
| 533 |
"bias": false,
|
| 534 |
"enable_norm": true,
|
| 535 |
+
"enable_perm": false,
|
| 536 |
"group_num": 1,
|
| 537 |
"group_size": 14336,
|
| 538 |
"in_features": 14336,
|
|
|
|
| 556 |
"model.layers.11.mlp.gate_proj": {
|
| 557 |
"bias": false,
|
| 558 |
"enable_norm": true,
|
| 559 |
+
"enable_perm": false,
|
| 560 |
"group_num": 1,
|
| 561 |
"group_size": 4096,
|
| 562 |
"in_features": 4096,
|
|
|
|
| 580 |
"model.layers.11.mlp.up_proj": {
|
| 581 |
"bias": false,
|
| 582 |
"enable_norm": true,
|
| 583 |
+
"enable_perm": false,
|
| 584 |
"group_num": 1,
|
| 585 |
"group_size": 4096,
|
| 586 |
"in_features": 4096,
|
|
|
|
| 604 |
"model.layers.11.self_attn.k_proj": {
|
| 605 |
"bias": false,
|
| 606 |
"enable_norm": true,
|
| 607 |
+
"enable_perm": false,
|
| 608 |
"group_num": 1,
|
| 609 |
"group_size": 4096,
|
| 610 |
"in_features": 4096,
|
|
|
|
| 628 |
"model.layers.11.self_attn.o_proj": {
|
| 629 |
"bias": false,
|
| 630 |
"enable_norm": true,
|
| 631 |
+
"enable_perm": false,
|
| 632 |
"group_num": 1,
|
| 633 |
"group_size": 4096,
|
| 634 |
"in_features": 4096,
|
|
|
|
| 652 |
"model.layers.11.self_attn.q_proj": {
|
| 653 |
"bias": false,
|
| 654 |
"enable_norm": true,
|
| 655 |
+
"enable_perm": false,
|
| 656 |
"group_num": 1,
|
| 657 |
"group_size": 4096,
|
| 658 |
"in_features": 4096,
|
|
|
|
| 676 |
"model.layers.11.self_attn.v_proj": {
|
| 677 |
"bias": false,
|
| 678 |
"enable_norm": true,
|
| 679 |
+
"enable_perm": false,
|
| 680 |
"group_num": 1,
|
| 681 |
"group_size": 4096,
|
| 682 |
"in_features": 4096,
|
|
|
|
| 700 |
"model.layers.12.mlp.down_proj": {
|
| 701 |
"bias": false,
|
| 702 |
"enable_norm": true,
|
| 703 |
+
"enable_perm": false,
|
| 704 |
"group_num": 1,
|
| 705 |
"group_size": 14336,
|
| 706 |
"in_features": 14336,
|
|
|
|
| 724 |
"model.layers.12.mlp.gate_proj": {
|
| 725 |
"bias": false,
|
| 726 |
"enable_norm": true,
|
| 727 |
+
"enable_perm": false,
|
| 728 |
"group_num": 1,
|
| 729 |
"group_size": 4096,
|
| 730 |
"in_features": 4096,
|
|
|
|
| 748 |
"model.layers.12.mlp.up_proj": {
|
| 749 |
"bias": false,
|
| 750 |
"enable_norm": true,
|
| 751 |
+
"enable_perm": false,
|
| 752 |
"group_num": 1,
|
| 753 |
"group_size": 4096,
|
| 754 |
"in_features": 4096,
|
|
|
|
| 772 |
"model.layers.12.self_attn.k_proj": {
|
| 773 |
"bias": false,
|
| 774 |
"enable_norm": true,
|
| 775 |
+
"enable_perm": false,
|
| 776 |
"group_num": 1,
|
| 777 |
"group_size": 4096,
|
| 778 |
"in_features": 4096,
|
|
|
|
| 796 |
"model.layers.12.self_attn.o_proj": {
|
| 797 |
"bias": false,
|
| 798 |
"enable_norm": true,
|
| 799 |
+
"enable_perm": false,
|
| 800 |
"group_num": 1,
|
| 801 |
"group_size": 4096,
|
| 802 |
"in_features": 4096,
|
|
|
|
| 820 |
"model.layers.12.self_attn.q_proj": {
|
| 821 |
"bias": false,
|
| 822 |
"enable_norm": true,
|
| 823 |
+
"enable_perm": false,
|
| 824 |
"group_num": 1,
|
| 825 |
"group_size": 4096,
|
| 826 |
"in_features": 4096,
|
|
|
|
| 844 |
"model.layers.12.self_attn.v_proj": {
|
| 845 |
"bias": false,
|
| 846 |
"enable_norm": true,
|
| 847 |
+
"enable_perm": false,
|
| 848 |
"group_num": 1,
|
| 849 |
"group_size": 4096,
|
| 850 |
"in_features": 4096,
|
|
|
|
| 868 |
"model.layers.13.mlp.down_proj": {
|
| 869 |
"bias": false,
|
| 870 |
"enable_norm": true,
|
| 871 |
+
"enable_perm": false,
|
| 872 |
"group_num": 1,
|
| 873 |
"group_size": 14336,
|
| 874 |
"in_features": 14336,
|
|
|
|
| 892 |
"model.layers.13.mlp.gate_proj": {
|
| 893 |
"bias": false,
|
| 894 |
"enable_norm": true,
|
| 895 |
+
"enable_perm": false,
|
| 896 |
"group_num": 1,
|
| 897 |
"group_size": 4096,
|
| 898 |
"in_features": 4096,
|
|
|
|
| 916 |
"model.layers.13.mlp.up_proj": {
|
| 917 |
"bias": false,
|
| 918 |
"enable_norm": true,
|
| 919 |
+
"enable_perm": false,
|
| 920 |
"group_num": 1,
|
| 921 |
"group_size": 4096,
|
| 922 |
"in_features": 4096,
|
|
|
|
| 940 |
"model.layers.13.self_attn.k_proj": {
|
| 941 |
"bias": false,
|
| 942 |
"enable_norm": true,
|
| 943 |
+
"enable_perm": false,
|
| 944 |
"group_num": 1,
|
| 945 |
"group_size": 4096,
|
| 946 |
"in_features": 4096,
|
|
|
|
| 964 |
"model.layers.13.self_attn.o_proj": {
|
| 965 |
"bias": false,
|
| 966 |
"enable_norm": true,
|
| 967 |
+
"enable_perm": false,
|
| 968 |
"group_num": 1,
|
| 969 |
"group_size": 4096,
|
| 970 |
"in_features": 4096,
|
|
|
|
| 988 |
"model.layers.13.self_attn.q_proj": {
|
| 989 |
"bias": false,
|
| 990 |
"enable_norm": true,
|
| 991 |
+
"enable_perm": false,
|
| 992 |
"group_num": 1,
|
| 993 |
"group_size": 4096,
|
| 994 |
"in_features": 4096,
|
|
|
|
| 1012 |
"model.layers.13.self_attn.v_proj": {
|
| 1013 |
"bias": false,
|
| 1014 |
"enable_norm": true,
|
| 1015 |
+
"enable_perm": false,
|
| 1016 |
"group_num": 1,
|
| 1017 |
"group_size": 4096,
|
| 1018 |
"in_features": 4096,
|
|
|
|
| 1036 |
"model.layers.14.mlp.down_proj": {
|
| 1037 |
"bias": false,
|
| 1038 |
"enable_norm": true,
|
| 1039 |
+
"enable_perm": false,
|
| 1040 |
"group_num": 1,
|
| 1041 |
"group_size": 14336,
|
| 1042 |
"in_features": 14336,
|
|
|
|
| 1060 |
"model.layers.14.mlp.gate_proj": {
|
| 1061 |
"bias": false,
|
| 1062 |
"enable_norm": true,
|
| 1063 |
+
"enable_perm": false,
|
| 1064 |
"group_num": 1,
|
| 1065 |
"group_size": 4096,
|
| 1066 |
"in_features": 4096,
|
|
|
|
| 1084 |
"model.layers.14.mlp.up_proj": {
|
| 1085 |
"bias": false,
|
| 1086 |
"enable_norm": true,
|
| 1087 |
+
"enable_perm": false,
|
| 1088 |
"group_num": 1,
|
| 1089 |
"group_size": 4096,
|
| 1090 |
"in_features": 4096,
|
|
|
|
| 1108 |
"model.layers.14.self_attn.k_proj": {
|
| 1109 |
"bias": false,
|
| 1110 |
"enable_norm": true,
|
| 1111 |
+
"enable_perm": false,
|
| 1112 |
"group_num": 1,
|
| 1113 |
"group_size": 4096,
|
| 1114 |
"in_features": 4096,
|
|
|
|
| 1132 |
"model.layers.14.self_attn.o_proj": {
|
| 1133 |
"bias": false,
|
| 1134 |
"enable_norm": true,
|
| 1135 |
+
"enable_perm": false,
|
| 1136 |
"group_num": 1,
|
| 1137 |
"group_size": 4096,
|
| 1138 |
"in_features": 4096,
|
|
|
|
| 1156 |
"model.layers.14.self_attn.q_proj": {
|
| 1157 |
"bias": false,
|
| 1158 |
"enable_norm": true,
|
| 1159 |
+
"enable_perm": false,
|
| 1160 |
"group_num": 1,
|
| 1161 |
"group_size": 4096,
|
| 1162 |
"in_features": 4096,
|
|
|
|
| 1180 |
"model.layers.14.self_attn.v_proj": {
|
| 1181 |
"bias": false,
|
| 1182 |
"enable_norm": true,
|
| 1183 |
+
"enable_perm": false,
|
| 1184 |
"group_num": 1,
|
| 1185 |
"group_size": 4096,
|
| 1186 |
"in_features": 4096,
|
|
|
|
| 1204 |
"model.layers.15.mlp.down_proj": {
|
| 1205 |
"bias": false,
|
| 1206 |
"enable_norm": true,
|
| 1207 |
+
"enable_perm": false,
|
| 1208 |
"group_num": 1,
|
| 1209 |
"group_size": 14336,
|
| 1210 |
"in_features": 14336,
|
|
|
|
| 1228 |
"model.layers.15.mlp.gate_proj": {
|
| 1229 |
"bias": false,
|
| 1230 |
"enable_norm": true,
|
| 1231 |
+
"enable_perm": false,
|
| 1232 |
"group_num": 1,
|
| 1233 |
"group_size": 4096,
|
| 1234 |
"in_features": 4096,
|
|
|
|
| 1252 |
"model.layers.15.mlp.up_proj": {
|
| 1253 |
"bias": false,
|
| 1254 |
"enable_norm": true,
|
| 1255 |
+
"enable_perm": false,
|
| 1256 |
"group_num": 1,
|
| 1257 |
"group_size": 4096,
|
| 1258 |
"in_features": 4096,
|
|
|
|
| 1276 |
"model.layers.15.self_attn.k_proj": {
|
| 1277 |
"bias": false,
|
| 1278 |
"enable_norm": true,
|
| 1279 |
+
"enable_perm": false,
|
| 1280 |
"group_num": 1,
|
| 1281 |
"group_size": 4096,
|
| 1282 |
"in_features": 4096,
|
|
|
|
| 1300 |
"model.layers.15.self_attn.o_proj": {
|
| 1301 |
"bias": false,
|
| 1302 |
"enable_norm": true,
|
| 1303 |
+
"enable_perm": false,
|
| 1304 |
"group_num": 1,
|
| 1305 |
"group_size": 4096,
|
| 1306 |
"in_features": 4096,
|
|
|
|
| 1324 |
"model.layers.15.self_attn.q_proj": {
|
| 1325 |
"bias": false,
|
| 1326 |
"enable_norm": true,
|
| 1327 |
+
"enable_perm": false,
|
| 1328 |
"group_num": 1,
|
| 1329 |
"group_size": 4096,
|
| 1330 |
"in_features": 4096,
|
|
|
|
| 1348 |
"model.layers.15.self_attn.v_proj": {
|
| 1349 |
"bias": false,
|
| 1350 |
"enable_norm": true,
|
| 1351 |
+
"enable_perm": false,
|
| 1352 |
"group_num": 1,
|
| 1353 |
"group_size": 4096,
|
| 1354 |
"in_features": 4096,
|
|
|
|
| 1372 |
"model.layers.16.mlp.down_proj": {
|
| 1373 |
"bias": false,
|
| 1374 |
"enable_norm": true,
|
| 1375 |
+
"enable_perm": false,
|
| 1376 |
"group_num": 1,
|
| 1377 |
"group_size": 14336,
|
| 1378 |
"in_features": 14336,
|
|
|
|
| 1396 |
"model.layers.16.mlp.gate_proj": {
|
| 1397 |
"bias": false,
|
| 1398 |
"enable_norm": true,
|
| 1399 |
+
"enable_perm": false,
|
| 1400 |
"group_num": 1,
|
| 1401 |
"group_size": 4096,
|
| 1402 |
"in_features": 4096,
|
|
|
|
| 1420 |
"model.layers.16.mlp.up_proj": {
|
| 1421 |
"bias": false,
|
| 1422 |
"enable_norm": true,
|
| 1423 |
+
"enable_perm": false,
|
| 1424 |
"group_num": 1,
|
| 1425 |
"group_size": 4096,
|
| 1426 |
"in_features": 4096,
|
|
|
|
| 1444 |
"model.layers.16.self_attn.k_proj": {
|
| 1445 |
"bias": false,
|
| 1446 |
"enable_norm": true,
|
| 1447 |
+
"enable_perm": false,
|
| 1448 |
"group_num": 1,
|
| 1449 |
"group_size": 4096,
|
| 1450 |
"in_features": 4096,
|
|
|
|
| 1468 |
"model.layers.16.self_attn.o_proj": {
|
| 1469 |
"bias": false,
|
| 1470 |
"enable_norm": true,
|
| 1471 |
+
"enable_perm": false,
|
| 1472 |
"group_num": 1,
|
| 1473 |
"group_size": 4096,
|
| 1474 |
"in_features": 4096,
|
|
|
|
| 1492 |
"model.layers.16.self_attn.q_proj": {
|
| 1493 |
"bias": false,
|
| 1494 |
"enable_norm": true,
|
| 1495 |
+
"enable_perm": false,
|
| 1496 |
"group_num": 1,
|
| 1497 |
"group_size": 4096,
|
| 1498 |
"in_features": 4096,
|
|
|
|
| 1516 |
"model.layers.16.self_attn.v_proj": {
|
| 1517 |
"bias": false,
|
| 1518 |
"enable_norm": true,
|
| 1519 |
+
"enable_perm": false,
|
| 1520 |
"group_num": 1,
|
| 1521 |
"group_size": 4096,
|
| 1522 |
"in_features": 4096,
|
|
|
|
| 1540 |
"model.layers.17.mlp.down_proj": {
|
| 1541 |
"bias": false,
|
| 1542 |
"enable_norm": true,
|
| 1543 |
+
"enable_perm": false,
|
| 1544 |
"group_num": 1,
|
| 1545 |
"group_size": 14336,
|
| 1546 |
"in_features": 14336,
|
|
|
|
| 1564 |
"model.layers.17.mlp.gate_proj": {
|
| 1565 |
"bias": false,
|
| 1566 |
"enable_norm": true,
|
| 1567 |
+
"enable_perm": false,
|
| 1568 |
"group_num": 1,
|
| 1569 |
"group_size": 4096,
|
| 1570 |
"in_features": 4096,
|
|
|
|
| 1588 |
"model.layers.17.mlp.up_proj": {
|
| 1589 |
"bias": false,
|
| 1590 |
"enable_norm": true,
|
| 1591 |
+
"enable_perm": false,
|
| 1592 |
"group_num": 1,
|
| 1593 |
"group_size": 4096,
|
| 1594 |
"in_features": 4096,
|
|
|
|
| 1612 |
"model.layers.17.self_attn.k_proj": {
|
| 1613 |
"bias": false,
|
| 1614 |
"enable_norm": true,
|
| 1615 |
+
"enable_perm": false,
|
| 1616 |
"group_num": 1,
|
| 1617 |
"group_size": 4096,
|
| 1618 |
"in_features": 4096,
|
|
|
|
| 1636 |
"model.layers.17.self_attn.o_proj": {
|
| 1637 |
"bias": false,
|
| 1638 |
"enable_norm": true,
|
| 1639 |
+
"enable_perm": false,
|
| 1640 |
"group_num": 1,
|
| 1641 |
"group_size": 4096,
|
| 1642 |
"in_features": 4096,
|
|
|
|
| 1660 |
"model.layers.17.self_attn.q_proj": {
|
| 1661 |
"bias": false,
|
| 1662 |
"enable_norm": true,
|
| 1663 |
+
"enable_perm": false,
|
| 1664 |
"group_num": 1,
|
| 1665 |
"group_size": 4096,
|
| 1666 |
"in_features": 4096,
|
|
|
|
| 1684 |
"model.layers.17.self_attn.v_proj": {
|
| 1685 |
"bias": false,
|
| 1686 |
"enable_norm": true,
|
| 1687 |
+
"enable_perm": false,
|
| 1688 |
"group_num": 1,
|
| 1689 |
"group_size": 4096,
|
| 1690 |
"in_features": 4096,
|
|
|
|
| 1708 |
"model.layers.18.mlp.down_proj": {
|
| 1709 |
"bias": false,
|
| 1710 |
"enable_norm": true,
|
| 1711 |
+
"enable_perm": false,
|
| 1712 |
"group_num": 1,
|
| 1713 |
"group_size": 14336,
|
| 1714 |
"in_features": 14336,
|
|
|
|
| 1732 |
"model.layers.18.mlp.gate_proj": {
|
| 1733 |
"bias": false,
|
| 1734 |
"enable_norm": true,
|
| 1735 |
+
"enable_perm": false,
|
| 1736 |
"group_num": 1,
|
| 1737 |
"group_size": 4096,
|
| 1738 |
"in_features": 4096,
|
|
|
|
| 1756 |
"model.layers.18.mlp.up_proj": {
|
| 1757 |
"bias": false,
|
| 1758 |
"enable_norm": true,
|
| 1759 |
+
"enable_perm": false,
|
| 1760 |
"group_num": 1,
|
| 1761 |
"group_size": 4096,
|
| 1762 |
"in_features": 4096,
|
|
|
|
| 1780 |
"model.layers.18.self_attn.k_proj": {
|
| 1781 |
"bias": false,
|
| 1782 |
"enable_norm": true,
|
| 1783 |
+
"enable_perm": false,
|
| 1784 |
"group_num": 1,
|
| 1785 |
"group_size": 4096,
|
| 1786 |
"in_features": 4096,
|
|
|
|
| 1804 |
"model.layers.18.self_attn.o_proj": {
|
| 1805 |
"bias": false,
|
| 1806 |
"enable_norm": true,
|
| 1807 |
+
"enable_perm": false,
|
| 1808 |
"group_num": 1,
|
| 1809 |
"group_size": 4096,
|
| 1810 |
"in_features": 4096,
|
|
|
|
| 1828 |
"model.layers.18.self_attn.q_proj": {
|
| 1829 |
"bias": false,
|
| 1830 |
"enable_norm": true,
|
| 1831 |
+
"enable_perm": false,
|
| 1832 |
"group_num": 1,
|
| 1833 |
"group_size": 4096,
|
| 1834 |
"in_features": 4096,
|
|
|
|
| 1852 |
"model.layers.18.self_attn.v_proj": {
|
| 1853 |
"bias": false,
|
| 1854 |
"enable_norm": true,
|
| 1855 |
+
"enable_perm": false,
|
| 1856 |
"group_num": 1,
|
| 1857 |
"group_size": 4096,
|
| 1858 |
"in_features": 4096,
|
|
|
|
| 1876 |
"model.layers.19.mlp.down_proj": {
|
| 1877 |
"bias": false,
|
| 1878 |
"enable_norm": true,
|
| 1879 |
+
"enable_perm": false,
|
| 1880 |
"group_num": 1,
|
| 1881 |
"group_size": 14336,
|
| 1882 |
"in_features": 14336,
|
|
|
|
| 1900 |
"model.layers.19.mlp.gate_proj": {
|
| 1901 |
"bias": false,
|
| 1902 |
"enable_norm": true,
|
| 1903 |
+
"enable_perm": false,
|
| 1904 |
"group_num": 1,
|
| 1905 |
"group_size": 4096,
|
| 1906 |
"in_features": 4096,
|
|
|
|
| 1924 |
"model.layers.19.mlp.up_proj": {
|
| 1925 |
"bias": false,
|
| 1926 |
"enable_norm": true,
|
| 1927 |
+
"enable_perm": false,
|
| 1928 |
"group_num": 1,
|
| 1929 |
"group_size": 4096,
|
| 1930 |
"in_features": 4096,
|
|
|
|
| 1948 |
"model.layers.19.self_attn.k_proj": {
|
| 1949 |
"bias": false,
|
| 1950 |
"enable_norm": true,
|
| 1951 |
+
"enable_perm": false,
|
| 1952 |
"group_num": 1,
|
| 1953 |
"group_size": 4096,
|
| 1954 |
"in_features": 4096,
|
|
|
|
| 1972 |
"model.layers.19.self_attn.o_proj": {
|
| 1973 |
"bias": false,
|
| 1974 |
"enable_norm": true,
|
| 1975 |
+
"enable_perm": false,
|
| 1976 |
"group_num": 1,
|
| 1977 |
"group_size": 4096,
|
| 1978 |
"in_features": 4096,
|
|
|
|
| 1996 |
"model.layers.19.self_attn.q_proj": {
|
| 1997 |
"bias": false,
|
| 1998 |
"enable_norm": true,
|
| 1999 |
+
"enable_perm": false,
|
| 2000 |
"group_num": 1,
|
| 2001 |
"group_size": 4096,
|
| 2002 |
"in_features": 4096,
|
|
|
|
| 2020 |
"model.layers.19.self_attn.v_proj": {
|
| 2021 |
"bias": false,
|
| 2022 |
"enable_norm": true,
|
| 2023 |
+
"enable_perm": false,
|
| 2024 |
"group_num": 1,
|
| 2025 |
"group_size": 4096,
|
| 2026 |
"in_features": 4096,
|
|
|
|
| 2044 |
"model.layers.2.mlp.down_proj": {
|
| 2045 |
"bias": false,
|
| 2046 |
"enable_norm": true,
|
| 2047 |
+
"enable_perm": false,
|
| 2048 |
"group_num": 1,
|
| 2049 |
"group_size": 14336,
|
| 2050 |
"in_features": 14336,
|
|
|
|
| 2068 |
"model.layers.2.mlp.gate_proj": {
|
| 2069 |
"bias": false,
|
| 2070 |
"enable_norm": true,
|
| 2071 |
+
"enable_perm": false,
|
| 2072 |
"group_num": 1,
|
| 2073 |
"group_size": 4096,
|
| 2074 |
"in_features": 4096,
|
|
|
|
| 2092 |
"model.layers.2.mlp.up_proj": {
|
| 2093 |
"bias": false,
|
| 2094 |
"enable_norm": true,
|
| 2095 |
+
"enable_perm": false,
|
| 2096 |
"group_num": 1,
|
| 2097 |
"group_size": 4096,
|
| 2098 |
"in_features": 4096,
|
|
|
|
| 2116 |
"model.layers.2.self_attn.k_proj": {
|
| 2117 |
"bias": false,
|
| 2118 |
"enable_norm": true,
|
| 2119 |
+
"enable_perm": false,
|
| 2120 |
"group_num": 1,
|
| 2121 |
"group_size": 4096,
|
| 2122 |
"in_features": 4096,
|
|
|
|
| 2140 |
"model.layers.2.self_attn.o_proj": {
|
| 2141 |
"bias": false,
|
| 2142 |
"enable_norm": true,
|
| 2143 |
+
"enable_perm": false,
|
| 2144 |
"group_num": 1,
|
| 2145 |
"group_size": 4096,
|
| 2146 |
"in_features": 4096,
|
|
|
|
| 2164 |
"model.layers.2.self_attn.q_proj": {
|
| 2165 |
"bias": false,
|
| 2166 |
"enable_norm": true,
|
| 2167 |
+
"enable_perm": false,
|
| 2168 |
"group_num": 1,
|
| 2169 |
"group_size": 4096,
|
| 2170 |
"in_features": 4096,
|
|
|
|
| 2188 |
"model.layers.2.self_attn.v_proj": {
|
| 2189 |
"bias": false,
|
| 2190 |
"enable_norm": true,
|
| 2191 |
+
"enable_perm": false,
|
| 2192 |
"group_num": 1,
|
| 2193 |
"group_size": 4096,
|
| 2194 |
"in_features": 4096,
|
|
|
|
| 2212 |
"model.layers.20.mlp.down_proj": {
|
| 2213 |
"bias": false,
|
| 2214 |
"enable_norm": true,
|
| 2215 |
+
"enable_perm": false,
|
| 2216 |
"group_num": 1,
|
| 2217 |
"group_size": 14336,
|
| 2218 |
"in_features": 14336,
|
|
|
|
| 2236 |
"model.layers.20.mlp.gate_proj": {
|
| 2237 |
"bias": false,
|
| 2238 |
"enable_norm": true,
|
| 2239 |
+
"enable_perm": false,
|
| 2240 |
"group_num": 1,
|
| 2241 |
"group_size": 4096,
|
| 2242 |
"in_features": 4096,
|
|
|
|
| 2260 |
"model.layers.20.mlp.up_proj": {
|
| 2261 |
"bias": false,
|
| 2262 |
"enable_norm": true,
|
| 2263 |
+
"enable_perm": false,
|
| 2264 |
"group_num": 1,
|
| 2265 |
"group_size": 4096,
|
| 2266 |
"in_features": 4096,
|
|
|
|
| 2284 |
"model.layers.20.self_attn.k_proj": {
|
| 2285 |
"bias": false,
|
| 2286 |
"enable_norm": true,
|
| 2287 |
+
"enable_perm": false,
|
| 2288 |
"group_num": 1,
|
| 2289 |
"group_size": 4096,
|
| 2290 |
"in_features": 4096,
|
|
|
|
| 2308 |
"model.layers.20.self_attn.o_proj": {
|
| 2309 |
"bias": false,
|
| 2310 |
"enable_norm": true,
|
| 2311 |
+
"enable_perm": false,
|
| 2312 |
"group_num": 1,
|
| 2313 |
"group_size": 4096,
|
| 2314 |
"in_features": 4096,
|
|
|
|
| 2332 |
"model.layers.20.self_attn.q_proj": {
|
| 2333 |
"bias": false,
|
| 2334 |
"enable_norm": true,
|
| 2335 |
+
"enable_perm": false,
|
| 2336 |
"group_num": 1,
|
| 2337 |
"group_size": 4096,
|
| 2338 |
"in_features": 4096,
|
|
|
|
| 2356 |
"model.layers.20.self_attn.v_proj": {
|
| 2357 |
"bias": false,
|
| 2358 |
"enable_norm": true,
|
| 2359 |
+
"enable_perm": false,
|
| 2360 |
"group_num": 1,
|
| 2361 |
"group_size": 4096,
|
| 2362 |
"in_features": 4096,
|
|
|
|
| 2380 |
"model.layers.21.mlp.down_proj": {
|
| 2381 |
"bias": false,
|
| 2382 |
"enable_norm": true,
|
| 2383 |
+
"enable_perm": false,
|
| 2384 |
"group_num": 1,
|
| 2385 |
"group_size": 14336,
|
| 2386 |
"in_features": 14336,
|
|
|
|
| 2404 |
"model.layers.21.mlp.gate_proj": {
|
| 2405 |
"bias": false,
|
| 2406 |
"enable_norm": true,
|
| 2407 |
+
"enable_perm": false,
|
| 2408 |
"group_num": 1,
|
| 2409 |
"group_size": 4096,
|
| 2410 |
"in_features": 4096,
|
|
|
|
| 2428 |
"model.layers.21.mlp.up_proj": {
|
| 2429 |
"bias": false,
|
| 2430 |
"enable_norm": true,
|
| 2431 |
+
"enable_perm": false,
|
| 2432 |
"group_num": 1,
|
| 2433 |
"group_size": 4096,
|
| 2434 |
"in_features": 4096,
|
|
|
|
| 2452 |
"model.layers.21.self_attn.k_proj": {
|
| 2453 |
"bias": false,
|
| 2454 |
"enable_norm": true,
|
| 2455 |
+
"enable_perm": false,
|
| 2456 |
"group_num": 1,
|
| 2457 |
"group_size": 4096,
|
| 2458 |
"in_features": 4096,
|
|
|
|
| 2476 |
"model.layers.21.self_attn.o_proj": {
|
| 2477 |
"bias": false,
|
| 2478 |
"enable_norm": true,
|
| 2479 |
+
"enable_perm": false,
|
| 2480 |
"group_num": 1,
|
| 2481 |
"group_size": 4096,
|
| 2482 |
"in_features": 4096,
|
|
|
|
| 2500 |
"model.layers.21.self_attn.q_proj": {
|
| 2501 |
"bias": false,
|
| 2502 |
"enable_norm": true,
|
| 2503 |
+
"enable_perm": false,
|
| 2504 |
"group_num": 1,
|
| 2505 |
"group_size": 4096,
|
| 2506 |
"in_features": 4096,
|
|
|
|
| 2524 |
"model.layers.21.self_attn.v_proj": {
|
| 2525 |
"bias": false,
|
| 2526 |
"enable_norm": true,
|
| 2527 |
+
"enable_perm": false,
|
| 2528 |
"group_num": 1,
|
| 2529 |
"group_size": 4096,
|
| 2530 |
"in_features": 4096,
|
|
|
|
| 2548 |
"model.layers.22.mlp.down_proj": {
|
| 2549 |
"bias": false,
|
| 2550 |
"enable_norm": true,
|
| 2551 |
+
"enable_perm": false,
|
| 2552 |
"group_num": 1,
|
| 2553 |
"group_size": 14336,
|
| 2554 |
"in_features": 14336,
|
|
|
|
| 2572 |
"model.layers.22.mlp.gate_proj": {
|
| 2573 |
"bias": false,
|
| 2574 |
"enable_norm": true,
|
| 2575 |
+
"enable_perm": false,
|
| 2576 |
"group_num": 1,
|
| 2577 |
"group_size": 4096,
|
| 2578 |
"in_features": 4096,
|
|
|
|
| 2596 |
"model.layers.22.mlp.up_proj": {
|
| 2597 |
"bias": false,
|
| 2598 |
"enable_norm": true,
|
| 2599 |
+
"enable_perm": false,
|
| 2600 |
"group_num": 1,
|
| 2601 |
"group_size": 4096,
|
| 2602 |
"in_features": 4096,
|
|
|
|
| 2620 |
"model.layers.22.self_attn.k_proj": {
|
| 2621 |
"bias": false,
|
| 2622 |
"enable_norm": true,
|
| 2623 |
+
"enable_perm": false,
|
| 2624 |
"group_num": 1,
|
| 2625 |
"group_size": 4096,
|
| 2626 |
"in_features": 4096,
|
|
|
|
| 2644 |
"model.layers.22.self_attn.o_proj": {
|
| 2645 |
"bias": false,
|
| 2646 |
"enable_norm": true,
|
| 2647 |
+
"enable_perm": false,
|
| 2648 |
"group_num": 1,
|
| 2649 |
"group_size": 4096,
|
| 2650 |
"in_features": 4096,
|
|
|
|
| 2668 |
"model.layers.22.self_attn.q_proj": {
|
| 2669 |
"bias": false,
|
| 2670 |
"enable_norm": true,
|
| 2671 |
+
"enable_perm": false,
|
| 2672 |
"group_num": 1,
|
| 2673 |
"group_size": 4096,
|
| 2674 |
"in_features": 4096,
|
|
|
|
| 2692 |
"model.layers.22.self_attn.v_proj": {
|
| 2693 |
"bias": false,
|
| 2694 |
"enable_norm": true,
|
| 2695 |
+
"enable_perm": false,
|
| 2696 |
"group_num": 1,
|
| 2697 |
"group_size": 4096,
|
| 2698 |
"in_features": 4096,
|
|
|
|
| 2716 |
"model.layers.23.mlp.down_proj": {
|
| 2717 |
"bias": false,
|
| 2718 |
"enable_norm": true,
|
| 2719 |
+
"enable_perm": false,
|
| 2720 |
"group_num": 1,
|
| 2721 |
"group_size": 14336,
|
| 2722 |
"in_features": 14336,
|
|
|
|
| 2740 |
"model.layers.23.mlp.gate_proj": {
|
| 2741 |
"bias": false,
|
| 2742 |
"enable_norm": true,
|
| 2743 |
+
"enable_perm": false,
|
| 2744 |
"group_num": 1,
|
| 2745 |
"group_size": 4096,
|
| 2746 |
"in_features": 4096,
|
|
|
|
| 2764 |
"model.layers.23.mlp.up_proj": {
|
| 2765 |
"bias": false,
|
| 2766 |
"enable_norm": true,
|
| 2767 |
+
"enable_perm": false,
|
| 2768 |
"group_num": 1,
|
| 2769 |
"group_size": 4096,
|
| 2770 |
"in_features": 4096,
|
|
|
|
| 2788 |
"model.layers.23.self_attn.k_proj": {
|
| 2789 |
"bias": false,
|
| 2790 |
"enable_norm": true,
|
| 2791 |
+
"enable_perm": false,
|
| 2792 |
"group_num": 1,
|
| 2793 |
"group_size": 4096,
|
| 2794 |
"in_features": 4096,
|
|
|
|
| 2812 |
"model.layers.23.self_attn.o_proj": {
|
| 2813 |
"bias": false,
|
| 2814 |
"enable_norm": true,
|
| 2815 |
+
"enable_perm": false,
|
| 2816 |
"group_num": 1,
|
| 2817 |
"group_size": 4096,
|
| 2818 |
"in_features": 4096,
|
|
|
|
| 2836 |
"model.layers.23.self_attn.q_proj": {
|
| 2837 |
"bias": false,
|
| 2838 |
"enable_norm": true,
|
| 2839 |
+
"enable_perm": false,
|
| 2840 |
"group_num": 1,
|
| 2841 |
"group_size": 4096,
|
| 2842 |
"in_features": 4096,
|
|
|
|
| 2860 |
"model.layers.23.self_attn.v_proj": {
|
| 2861 |
"bias": false,
|
| 2862 |
"enable_norm": true,
|
| 2863 |
+
"enable_perm": false,
|
| 2864 |
"group_num": 1,
|
| 2865 |
"group_size": 4096,
|
| 2866 |
"in_features": 4096,
|
|
|
|
| 2884 |
"model.layers.24.mlp.down_proj": {
|
| 2885 |
"bias": false,
|
| 2886 |
"enable_norm": true,
|
| 2887 |
+
"enable_perm": false,
|
| 2888 |
"group_num": 1,
|
| 2889 |
"group_size": 14336,
|
| 2890 |
"in_features": 14336,
|
|
|
|
| 2908 |
"model.layers.24.mlp.gate_proj": {
|
| 2909 |
"bias": false,
|
| 2910 |
"enable_norm": true,
|
| 2911 |
+
"enable_perm": false,
|
| 2912 |
"group_num": 1,
|
| 2913 |
"group_size": 4096,
|
| 2914 |
"in_features": 4096,
|
|
|
|
| 2932 |
"model.layers.24.mlp.up_proj": {
|
| 2933 |
"bias": false,
|
| 2934 |
"enable_norm": true,
|
| 2935 |
+
"enable_perm": false,
|
| 2936 |
"group_num": 1,
|
| 2937 |
"group_size": 4096,
|
| 2938 |
"in_features": 4096,
|
|
|
|
| 2956 |
"model.layers.24.self_attn.k_proj": {
|
| 2957 |
"bias": false,
|
| 2958 |
"enable_norm": true,
|
| 2959 |
+
"enable_perm": false,
|
| 2960 |
"group_num": 1,
|
| 2961 |
"group_size": 4096,
|
| 2962 |
"in_features": 4096,
|
|
|
|
| 2980 |
"model.layers.24.self_attn.o_proj": {
|
| 2981 |
"bias": false,
|
| 2982 |
"enable_norm": true,
|
| 2983 |
+
"enable_perm": false,
|
| 2984 |
"group_num": 1,
|
| 2985 |
"group_size": 4096,
|
| 2986 |
"in_features": 4096,
|
|
|
|
| 3004 |
"model.layers.24.self_attn.q_proj": {
|
| 3005 |
"bias": false,
|
| 3006 |
"enable_norm": true,
|
| 3007 |
+
"enable_perm": false,
|
| 3008 |
"group_num": 1,
|
| 3009 |
"group_size": 4096,
|
| 3010 |
"in_features": 4096,
|
|
|
|
| 3028 |
"model.layers.24.self_attn.v_proj": {
|
| 3029 |
"bias": false,
|
| 3030 |
"enable_norm": true,
|
| 3031 |
+
"enable_perm": false,
|
| 3032 |
"group_num": 1,
|
| 3033 |
"group_size": 4096,
|
| 3034 |
"in_features": 4096,
|
|
|
|
| 3052 |
"model.layers.25.mlp.down_proj": {
|
| 3053 |
"bias": false,
|
| 3054 |
"enable_norm": true,
|
| 3055 |
+
"enable_perm": false,
|
| 3056 |
"group_num": 1,
|
| 3057 |
"group_size": 14336,
|
| 3058 |
"in_features": 14336,
|
|
|
|
| 3076 |
"model.layers.25.mlp.gate_proj": {
|
| 3077 |
"bias": false,
|
| 3078 |
"enable_norm": true,
|
| 3079 |
+
"enable_perm": false,
|
| 3080 |
"group_num": 1,
|
| 3081 |
"group_size": 4096,
|
| 3082 |
"in_features": 4096,
|
|
|
|
| 3100 |
"model.layers.25.mlp.up_proj": {
|
| 3101 |
"bias": false,
|
| 3102 |
"enable_norm": true,
|
| 3103 |
+
"enable_perm": false,
|
| 3104 |
"group_num": 1,
|
| 3105 |
"group_size": 4096,
|
| 3106 |
"in_features": 4096,
|
|
|
|
| 3124 |
"model.layers.25.self_attn.k_proj": {
|
| 3125 |
"bias": false,
|
| 3126 |
"enable_norm": true,
|
| 3127 |
+
"enable_perm": false,
|
| 3128 |
"group_num": 1,
|
| 3129 |
"group_size": 4096,
|
| 3130 |
"in_features": 4096,
|
|
|
|
| 3148 |
"model.layers.25.self_attn.o_proj": {
|
| 3149 |
"bias": false,
|
| 3150 |
"enable_norm": true,
|
| 3151 |
+
"enable_perm": false,
|
| 3152 |
"group_num": 1,
|
| 3153 |
"group_size": 4096,
|
| 3154 |
"in_features": 4096,
|
|
|
|
| 3172 |
"model.layers.25.self_attn.q_proj": {
|
| 3173 |
"bias": false,
|
| 3174 |
"enable_norm": true,
|
| 3175 |
+
"enable_perm": false,
|
| 3176 |
"group_num": 1,
|
| 3177 |
"group_size": 4096,
|
| 3178 |
"in_features": 4096,
|
|
|
|
| 3196 |
"model.layers.25.self_attn.v_proj": {
|
| 3197 |
"bias": false,
|
| 3198 |
"enable_norm": true,
|
| 3199 |
+
"enable_perm": false,
|
| 3200 |
"group_num": 1,
|
| 3201 |
"group_size": 4096,
|
| 3202 |
"in_features": 4096,
|
|
|
|
| 3220 |
"model.layers.26.mlp.down_proj": {
|
| 3221 |
"bias": false,
|
| 3222 |
"enable_norm": true,
|
| 3223 |
+
"enable_perm": false,
|
| 3224 |
"group_num": 1,
|
| 3225 |
"group_size": 14336,
|
| 3226 |
"in_features": 14336,
|
|
|
|
| 3244 |
"model.layers.26.mlp.gate_proj": {
|
| 3245 |
"bias": false,
|
| 3246 |
"enable_norm": true,
|
| 3247 |
+
"enable_perm": false,
|
| 3248 |
"group_num": 1,
|
| 3249 |
"group_size": 4096,
|
| 3250 |
"in_features": 4096,
|
|
|
|
| 3268 |
"model.layers.26.mlp.up_proj": {
|
| 3269 |
"bias": false,
|
| 3270 |
"enable_norm": true,
|
| 3271 |
+
"enable_perm": false,
|
| 3272 |
"group_num": 1,
|
| 3273 |
"group_size": 4096,
|
| 3274 |
"in_features": 4096,
|
|
|
|
| 3292 |
"model.layers.26.self_attn.k_proj": {
|
| 3293 |
"bias": false,
|
| 3294 |
"enable_norm": true,
|
| 3295 |
+
"enable_perm": false,
|
| 3296 |
"group_num": 1,
|
| 3297 |
"group_size": 4096,
|
| 3298 |
"in_features": 4096,
|
|
|
|
| 3316 |
"model.layers.26.self_attn.o_proj": {
|
| 3317 |
"bias": false,
|
| 3318 |
"enable_norm": true,
|
| 3319 |
+
"enable_perm": false,
|
| 3320 |
"group_num": 1,
|
| 3321 |
"group_size": 4096,
|
| 3322 |
"in_features": 4096,
|
|
|
|
| 3340 |
"model.layers.26.self_attn.q_proj": {
|
| 3341 |
"bias": false,
|
| 3342 |
"enable_norm": true,
|
| 3343 |
+
"enable_perm": false,
|
| 3344 |
"group_num": 1,
|
| 3345 |
"group_size": 4096,
|
| 3346 |
"in_features": 4096,
|
|
|
|
| 3364 |
"model.layers.26.self_attn.v_proj": {
|
| 3365 |
"bias": false,
|
| 3366 |
"enable_norm": true,
|
| 3367 |
+
"enable_perm": false,
|
| 3368 |
"group_num": 1,
|
| 3369 |
"group_size": 4096,
|
| 3370 |
"in_features": 4096,
|
|
|
|
| 3388 |
"model.layers.27.mlp.down_proj": {
|
| 3389 |
"bias": false,
|
| 3390 |
"enable_norm": true,
|
| 3391 |
+
"enable_perm": false,
|
| 3392 |
"group_num": 1,
|
| 3393 |
"group_size": 14336,
|
| 3394 |
"in_features": 14336,
|
|
|
|
| 3412 |
"model.layers.27.mlp.gate_proj": {
|
| 3413 |
"bias": false,
|
| 3414 |
"enable_norm": true,
|
| 3415 |
+
"enable_perm": false,
|
| 3416 |
"group_num": 1,
|
| 3417 |
"group_size": 4096,
|
| 3418 |
"in_features": 4096,
|
|
|
|
| 3436 |
"model.layers.27.mlp.up_proj": {
|
| 3437 |
"bias": false,
|
| 3438 |
"enable_norm": true,
|
| 3439 |
+
"enable_perm": false,
|
| 3440 |
"group_num": 1,
|
| 3441 |
"group_size": 4096,
|
| 3442 |
"in_features": 4096,
|
|
|
|
| 3460 |
"model.layers.27.self_attn.k_proj": {
|
| 3461 |
"bias": false,
|
| 3462 |
"enable_norm": true,
|
| 3463 |
+
"enable_perm": false,
|
| 3464 |
"group_num": 1,
|
| 3465 |
"group_size": 4096,
|
| 3466 |
"in_features": 4096,
|
|
|
|
| 3484 |
"model.layers.27.self_attn.o_proj": {
|
| 3485 |
"bias": false,
|
| 3486 |
"enable_norm": true,
|
| 3487 |
+
"enable_perm": false,
|
| 3488 |
"group_num": 1,
|
| 3489 |
"group_size": 4096,
|
| 3490 |
"in_features": 4096,
|
|
|
|
| 3508 |
"model.layers.27.self_attn.q_proj": {
|
| 3509 |
"bias": false,
|
| 3510 |
"enable_norm": true,
|
| 3511 |
+
"enable_perm": false,
|
| 3512 |
"group_num": 1,
|
| 3513 |
"group_size": 4096,
|
| 3514 |
"in_features": 4096,
|
|
|
|
| 3532 |
"model.layers.27.self_attn.v_proj": {
|
| 3533 |
"bias": false,
|
| 3534 |
"enable_norm": true,
|
| 3535 |
+
"enable_perm": false,
|
| 3536 |
"group_num": 1,
|
| 3537 |
"group_size": 4096,
|
| 3538 |
"in_features": 4096,
|
|
|
|
| 3556 |
"model.layers.28.mlp.down_proj": {
|
| 3557 |
"bias": false,
|
| 3558 |
"enable_norm": true,
|
| 3559 |
+
"enable_perm": false,
|
| 3560 |
"group_num": 1,
|
| 3561 |
"group_size": 14336,
|
| 3562 |
"in_features": 14336,
|
|
|
|
| 3580 |
"model.layers.28.mlp.gate_proj": {
|
| 3581 |
"bias": false,
|
| 3582 |
"enable_norm": true,
|
| 3583 |
+
"enable_perm": false,
|
| 3584 |
"group_num": 1,
|
| 3585 |
"group_size": 4096,
|
| 3586 |
"in_features": 4096,
|
|
|
|
| 3604 |
"model.layers.28.mlp.up_proj": {
|
| 3605 |
"bias": false,
|
| 3606 |
"enable_norm": true,
|
| 3607 |
+
"enable_perm": false,
|
| 3608 |
"group_num": 1,
|
| 3609 |
"group_size": 4096,
|
| 3610 |
"in_features": 4096,
|
|
|
|
| 3628 |
"model.layers.28.self_attn.k_proj": {
|
| 3629 |
"bias": false,
|
| 3630 |
"enable_norm": true,
|
| 3631 |
+
"enable_perm": false,
|
| 3632 |
"group_num": 1,
|
| 3633 |
"group_size": 4096,
|
| 3634 |
"in_features": 4096,
|
|
|
|
| 3652 |
"model.layers.28.self_attn.o_proj": {
|
| 3653 |
"bias": false,
|
| 3654 |
"enable_norm": true,
|
| 3655 |
+
"enable_perm": false,
|
| 3656 |
"group_num": 1,
|
| 3657 |
"group_size": 4096,
|
| 3658 |
"in_features": 4096,
|
|
|
|
| 3676 |
"model.layers.28.self_attn.q_proj": {
|
| 3677 |
"bias": false,
|
| 3678 |
"enable_norm": true,
|
| 3679 |
+
"enable_perm": false,
|
| 3680 |
"group_num": 1,
|
| 3681 |
"group_size": 4096,
|
| 3682 |
"in_features": 4096,
|
|
|
|
| 3700 |
"model.layers.28.self_attn.v_proj": {
|
| 3701 |
"bias": false,
|
| 3702 |
"enable_norm": true,
|
| 3703 |
+
"enable_perm": false,
|
| 3704 |
"group_num": 1,
|
| 3705 |
"group_size": 4096,
|
| 3706 |
"in_features": 4096,
|
|
|
|
| 3724 |
"model.layers.29.mlp.down_proj": {
|
| 3725 |
"bias": false,
|
| 3726 |
"enable_norm": true,
|
| 3727 |
+
"enable_perm": false,
|
| 3728 |
"group_num": 1,
|
| 3729 |
"group_size": 14336,
|
| 3730 |
"in_features": 14336,
|
|
|
|
| 3748 |
"model.layers.29.mlp.gate_proj": {
|
| 3749 |
"bias": false,
|
| 3750 |
"enable_norm": true,
|
| 3751 |
+
"enable_perm": false,
|
| 3752 |
"group_num": 1,
|
| 3753 |
"group_size": 4096,
|
| 3754 |
"in_features": 4096,
|
|
|
|
| 3772 |
"model.layers.29.mlp.up_proj": {
|
| 3773 |
"bias": false,
|
| 3774 |
"enable_norm": true,
|
| 3775 |
+
"enable_perm": false,
|
| 3776 |
"group_num": 1,
|
| 3777 |
"group_size": 4096,
|
| 3778 |
"in_features": 4096,
|
|
|
|
| 3796 |
"model.layers.29.self_attn.k_proj": {
|
| 3797 |
"bias": false,
|
| 3798 |
"enable_norm": true,
|
| 3799 |
+
"enable_perm": false,
|
| 3800 |
"group_num": 1,
|
| 3801 |
"group_size": 4096,
|
| 3802 |
"in_features": 4096,
|
|
|
|
| 3820 |
"model.layers.29.self_attn.o_proj": {
|
| 3821 |
"bias": false,
|
| 3822 |
"enable_norm": true,
|
| 3823 |
+
"enable_perm": false,
|
| 3824 |
"group_num": 1,
|
| 3825 |
"group_size": 4096,
|
| 3826 |
"in_features": 4096,
|
|
|
|
| 3844 |
"model.layers.29.self_attn.q_proj": {
|
| 3845 |
"bias": false,
|
| 3846 |
"enable_norm": true,
|
| 3847 |
+
"enable_perm": false,
|
| 3848 |
"group_num": 1,
|
| 3849 |
"group_size": 4096,
|
| 3850 |
"in_features": 4096,
|
|
|
|
| 3868 |
"model.layers.29.self_attn.v_proj": {
|
| 3869 |
"bias": false,
|
| 3870 |
"enable_norm": true,
|
| 3871 |
+
"enable_perm": false,
|
| 3872 |
"group_num": 1,
|
| 3873 |
"group_size": 4096,
|
| 3874 |
"in_features": 4096,
|
|
|
|
| 3892 |
"model.layers.3.mlp.down_proj": {
|
| 3893 |
"bias": false,
|
| 3894 |
"enable_norm": true,
|
| 3895 |
+
"enable_perm": false,
|
| 3896 |
"group_num": 1,
|
| 3897 |
"group_size": 14336,
|
| 3898 |
"in_features": 14336,
|
|
|
|
| 3916 |
"model.layers.3.mlp.gate_proj": {
|
| 3917 |
"bias": false,
|
| 3918 |
"enable_norm": true,
|
| 3919 |
+
"enable_perm": false,
|
| 3920 |
"group_num": 1,
|
| 3921 |
"group_size": 4096,
|
| 3922 |
"in_features": 4096,
|
|
|
|
| 3940 |
"model.layers.3.mlp.up_proj": {
|
| 3941 |
"bias": false,
|
| 3942 |
"enable_norm": true,
|
| 3943 |
+
"enable_perm": false,
|
| 3944 |
"group_num": 1,
|
| 3945 |
"group_size": 4096,
|
| 3946 |
"in_features": 4096,
|
|
|
|
| 3964 |
"model.layers.3.self_attn.k_proj": {
|
| 3965 |
"bias": false,
|
| 3966 |
"enable_norm": true,
|
| 3967 |
+
"enable_perm": false,
|
| 3968 |
"group_num": 1,
|
| 3969 |
"group_size": 4096,
|
| 3970 |
"in_features": 4096,
|
|
|
|
| 3988 |
"model.layers.3.self_attn.o_proj": {
|
| 3989 |
"bias": false,
|
| 3990 |
"enable_norm": true,
|
| 3991 |
+
"enable_perm": false,
|
| 3992 |
"group_num": 1,
|
| 3993 |
"group_size": 4096,
|
| 3994 |
"in_features": 4096,
|
|
|
|
| 4012 |
"model.layers.3.self_attn.q_proj": {
|
| 4013 |
"bias": false,
|
| 4014 |
"enable_norm": true,
|
| 4015 |
+
"enable_perm": false,
|
| 4016 |
"group_num": 1,
|
| 4017 |
"group_size": 4096,
|
| 4018 |
"in_features": 4096,
|
|
|
|
| 4036 |
"model.layers.3.self_attn.v_proj": {
|
| 4037 |
"bias": false,
|
| 4038 |
"enable_norm": true,
|
| 4039 |
+
"enable_perm": false,
|
| 4040 |
"group_num": 1,
|
| 4041 |
"group_size": 4096,
|
| 4042 |
"in_features": 4096,
|
|
|
|
| 4060 |
"model.layers.30.mlp.down_proj": {
|
| 4061 |
"bias": false,
|
| 4062 |
"enable_norm": true,
|
| 4063 |
+
"enable_perm": false,
|
| 4064 |
"group_num": 1,
|
| 4065 |
"group_size": 14336,
|
| 4066 |
"in_features": 14336,
|
|
|
|
| 4084 |
"model.layers.30.mlp.gate_proj": {
|
| 4085 |
"bias": false,
|
| 4086 |
"enable_norm": true,
|
| 4087 |
+
"enable_perm": false,
|
| 4088 |
"group_num": 1,
|
| 4089 |
"group_size": 4096,
|
| 4090 |
"in_features": 4096,
|
|
|
|
| 4108 |
"model.layers.30.mlp.up_proj": {
|
| 4109 |
"bias": false,
|
| 4110 |
"enable_norm": true,
|
| 4111 |
+
"enable_perm": false,
|
| 4112 |
"group_num": 1,
|
| 4113 |
"group_size": 4096,
|
| 4114 |
"in_features": 4096,
|
|
|
|
| 4132 |
"model.layers.30.self_attn.k_proj": {
|
| 4133 |
"bias": false,
|
| 4134 |
"enable_norm": true,
|
| 4135 |
+
"enable_perm": false,
|
| 4136 |
"group_num": 1,
|
| 4137 |
"group_size": 4096,
|
| 4138 |
"in_features": 4096,
|
|
|
|
| 4156 |
"model.layers.30.self_attn.o_proj": {
|
| 4157 |
"bias": false,
|
| 4158 |
"enable_norm": true,
|
| 4159 |
+
"enable_perm": false,
|
| 4160 |
"group_num": 1,
|
| 4161 |
"group_size": 4096,
|
| 4162 |
"in_features": 4096,
|
|
|
|
| 4180 |
"model.layers.30.self_attn.q_proj": {
|
| 4181 |
"bias": false,
|
| 4182 |
"enable_norm": true,
|
| 4183 |
+
"enable_perm": false,
|
| 4184 |
"group_num": 1,
|
| 4185 |
"group_size": 4096,
|
| 4186 |
"in_features": 4096,
|
|
|
|
| 4204 |
"model.layers.30.self_attn.v_proj": {
|
| 4205 |
"bias": false,
|
| 4206 |
"enable_norm": true,
|
| 4207 |
+
"enable_perm": false,
|
| 4208 |
"group_num": 1,
|
| 4209 |
"group_size": 4096,
|
| 4210 |
"in_features": 4096,
|
|
|
|
| 4228 |
"model.layers.31.mlp.down_proj": {
|
| 4229 |
"bias": false,
|
| 4230 |
"enable_norm": true,
|
| 4231 |
+
"enable_perm": false,
|
| 4232 |
"group_num": 1,
|
| 4233 |
"group_size": 14336,
|
| 4234 |
"in_features": 14336,
|
|
|
|
| 4252 |
"model.layers.31.mlp.gate_proj": {
|
| 4253 |
"bias": false,
|
| 4254 |
"enable_norm": true,
|
| 4255 |
+
"enable_perm": false,
|
| 4256 |
"group_num": 1,
|
| 4257 |
"group_size": 4096,
|
| 4258 |
"in_features": 4096,
|
|
|
|
| 4276 |
"model.layers.31.mlp.up_proj": {
|
| 4277 |
"bias": false,
|
| 4278 |
"enable_norm": true,
|
| 4279 |
+
"enable_perm": false,
|
| 4280 |
"group_num": 1,
|
| 4281 |
"group_size": 4096,
|
| 4282 |
"in_features": 4096,
|
|
|
|
| 4300 |
"model.layers.31.self_attn.k_proj": {
|
| 4301 |
"bias": false,
|
| 4302 |
"enable_norm": true,
|
| 4303 |
+
"enable_perm": false,
|
| 4304 |
"group_num": 1,
|
| 4305 |
"group_size": 4096,
|
| 4306 |
"in_features": 4096,
|
|
|
|
| 4324 |
"model.layers.31.self_attn.o_proj": {
|
| 4325 |
"bias": false,
|
| 4326 |
"enable_norm": true,
|
| 4327 |
+
"enable_perm": false,
|
| 4328 |
"group_num": 1,
|
| 4329 |
"group_size": 4096,
|
| 4330 |
"in_features": 4096,
|
|
|
|
| 4348 |
"model.layers.31.self_attn.q_proj": {
|
| 4349 |
"bias": false,
|
| 4350 |
"enable_norm": true,
|
| 4351 |
+
"enable_perm": false,
|
| 4352 |
"group_num": 1,
|
| 4353 |
"group_size": 4096,
|
| 4354 |
"in_features": 4096,
|
|
|
|
| 4372 |
"model.layers.31.self_attn.v_proj": {
|
| 4373 |
"bias": false,
|
| 4374 |
"enable_norm": true,
|
| 4375 |
+
"enable_perm": false,
|
| 4376 |
"group_num": 1,
|
| 4377 |
"group_size": 4096,
|
| 4378 |
"in_features": 4096,
|
|
|
|
| 4396 |
"model.layers.4.mlp.down_proj": {
|
| 4397 |
"bias": false,
|
| 4398 |
"enable_norm": true,
|
| 4399 |
+
"enable_perm": false,
|
| 4400 |
"group_num": 1,
|
| 4401 |
"group_size": 14336,
|
| 4402 |
"in_features": 14336,
|
|
|
|
| 4420 |
"model.layers.4.mlp.gate_proj": {
|
| 4421 |
"bias": false,
|
| 4422 |
"enable_norm": true,
|
| 4423 |
+
"enable_perm": false,
|
| 4424 |
"group_num": 1,
|
| 4425 |
"group_size": 4096,
|
| 4426 |
"in_features": 4096,
|
|
|
|
| 4444 |
"model.layers.4.mlp.up_proj": {
|
| 4445 |
"bias": false,
|
| 4446 |
"enable_norm": true,
|
| 4447 |
+
"enable_perm": false,
|
| 4448 |
"group_num": 1,
|
| 4449 |
"group_size": 4096,
|
| 4450 |
"in_features": 4096,
|
|
|
|
| 4468 |
"model.layers.4.self_attn.k_proj": {
|
| 4469 |
"bias": false,
|
| 4470 |
"enable_norm": true,
|
| 4471 |
+
"enable_perm": false,
|
| 4472 |
"group_num": 1,
|
| 4473 |
"group_size": 4096,
|
| 4474 |
"in_features": 4096,
|
|
|
|
| 4492 |
"model.layers.4.self_attn.o_proj": {
|
| 4493 |
"bias": false,
|
| 4494 |
"enable_norm": true,
|
| 4495 |
+
"enable_perm": false,
|
| 4496 |
"group_num": 1,
|
| 4497 |
"group_size": 4096,
|
| 4498 |
"in_features": 4096,
|
|
|
|
| 4516 |
"model.layers.4.self_attn.q_proj": {
|
| 4517 |
"bias": false,
|
| 4518 |
"enable_norm": true,
|
| 4519 |
+
"enable_perm": false,
|
| 4520 |
"group_num": 1,
|
| 4521 |
"group_size": 4096,
|
| 4522 |
"in_features": 4096,
|
|
|
|
| 4540 |
"model.layers.4.self_attn.v_proj": {
|
| 4541 |
"bias": false,
|
| 4542 |
"enable_norm": true,
|
| 4543 |
+
"enable_perm": false,
|
| 4544 |
"group_num": 1,
|
| 4545 |
"group_size": 4096,
|
| 4546 |
"in_features": 4096,
|
|
|
|
| 4564 |
"model.layers.5.mlp.down_proj": {
|
| 4565 |
"bias": false,
|
| 4566 |
"enable_norm": true,
|
| 4567 |
+
"enable_perm": false,
|
| 4568 |
"group_num": 1,
|
| 4569 |
"group_size": 14336,
|
| 4570 |
"in_features": 14336,
|
|
|
|
| 4588 |
"model.layers.5.mlp.gate_proj": {
|
| 4589 |
"bias": false,
|
| 4590 |
"enable_norm": true,
|
| 4591 |
+
"enable_perm": false,
|
| 4592 |
"group_num": 1,
|
| 4593 |
"group_size": 4096,
|
| 4594 |
"in_features": 4096,
|
|
|
|
| 4612 |
"model.layers.5.mlp.up_proj": {
|
| 4613 |
"bias": false,
|
| 4614 |
"enable_norm": true,
|
| 4615 |
+
"enable_perm": false,
|
| 4616 |
"group_num": 1,
|
| 4617 |
"group_size": 4096,
|
| 4618 |
"in_features": 4096,
|
|
|
|
| 4636 |
"model.layers.5.self_attn.k_proj": {
|
| 4637 |
"bias": false,
|
| 4638 |
"enable_norm": true,
|
| 4639 |
+
"enable_perm": false,
|
| 4640 |
"group_num": 1,
|
| 4641 |
"group_size": 4096,
|
| 4642 |
"in_features": 4096,
|
|
|
|
| 4660 |
"model.layers.5.self_attn.o_proj": {
|
| 4661 |
"bias": false,
|
| 4662 |
"enable_norm": true,
|
| 4663 |
+
"enable_perm": false,
|
| 4664 |
"group_num": 1,
|
| 4665 |
"group_size": 4096,
|
| 4666 |
"in_features": 4096,
|
|
|
|
| 4684 |
"model.layers.5.self_attn.q_proj": {
|
| 4685 |
"bias": false,
|
| 4686 |
"enable_norm": true,
|
| 4687 |
+
"enable_perm": false,
|
| 4688 |
"group_num": 1,
|
| 4689 |
"group_size": 4096,
|
| 4690 |
"in_features": 4096,
|
|
|
|
| 4708 |
"model.layers.5.self_attn.v_proj": {
|
| 4709 |
"bias": false,
|
| 4710 |
"enable_norm": true,
|
| 4711 |
+
"enable_perm": false,
|
| 4712 |
"group_num": 1,
|
| 4713 |
"group_size": 4096,
|
| 4714 |
"in_features": 4096,
|
|
|
|
| 4732 |
"model.layers.6.mlp.down_proj": {
|
| 4733 |
"bias": false,
|
| 4734 |
"enable_norm": true,
|
| 4735 |
+
"enable_perm": false,
|
| 4736 |
"group_num": 1,
|
| 4737 |
"group_size": 14336,
|
| 4738 |
"in_features": 14336,
|
|
|
|
| 4756 |
"model.layers.6.mlp.gate_proj": {
|
| 4757 |
"bias": false,
|
| 4758 |
"enable_norm": true,
|
| 4759 |
+
"enable_perm": false,
|
| 4760 |
"group_num": 1,
|
| 4761 |
"group_size": 4096,
|
| 4762 |
"in_features": 4096,
|
|
|
|
| 4780 |
"model.layers.6.mlp.up_proj": {
|
| 4781 |
"bias": false,
|
| 4782 |
"enable_norm": true,
|
| 4783 |
+
"enable_perm": false,
|
| 4784 |
"group_num": 1,
|
| 4785 |
"group_size": 4096,
|
| 4786 |
"in_features": 4096,
|
|
|
|
| 4804 |
"model.layers.6.self_attn.k_proj": {
|
| 4805 |
"bias": false,
|
| 4806 |
"enable_norm": true,
|
| 4807 |
+
"enable_perm": false,
|
| 4808 |
"group_num": 1,
|
| 4809 |
"group_size": 4096,
|
| 4810 |
"in_features": 4096,
|
|
|
|
| 4828 |
"model.layers.6.self_attn.o_proj": {
|
| 4829 |
"bias": false,
|
| 4830 |
"enable_norm": true,
|
| 4831 |
+
"enable_perm": false,
|
| 4832 |
"group_num": 1,
|
| 4833 |
"group_size": 4096,
|
| 4834 |
"in_features": 4096,
|
|
|
|
| 4852 |
"model.layers.6.self_attn.q_proj": {
|
| 4853 |
"bias": false,
|
| 4854 |
"enable_norm": true,
|
| 4855 |
+
"enable_perm": false,
|
| 4856 |
"group_num": 1,
|
| 4857 |
"group_size": 4096,
|
| 4858 |
"in_features": 4096,
|
|
|
|
| 4876 |
"model.layers.6.self_attn.v_proj": {
|
| 4877 |
"bias": false,
|
| 4878 |
"enable_norm": true,
|
| 4879 |
+
"enable_perm": false,
|
| 4880 |
"group_num": 1,
|
| 4881 |
"group_size": 4096,
|
| 4882 |
"in_features": 4096,
|
|
|
|
| 4900 |
"model.layers.7.mlp.down_proj": {
|
| 4901 |
"bias": false,
|
| 4902 |
"enable_norm": true,
|
| 4903 |
+
"enable_perm": false,
|
| 4904 |
"group_num": 1,
|
| 4905 |
"group_size": 14336,
|
| 4906 |
"in_features": 14336,
|
|
|
|
| 4924 |
"model.layers.7.mlp.gate_proj": {
|
| 4925 |
"bias": false,
|
| 4926 |
"enable_norm": true,
|
| 4927 |
+
"enable_perm": false,
|
| 4928 |
"group_num": 1,
|
| 4929 |
"group_size": 4096,
|
| 4930 |
"in_features": 4096,
|
|
|
|
| 4948 |
"model.layers.7.mlp.up_proj": {
|
| 4949 |
"bias": false,
|
| 4950 |
"enable_norm": true,
|
| 4951 |
+
"enable_perm": false,
|
| 4952 |
"group_num": 1,
|
| 4953 |
"group_size": 4096,
|
| 4954 |
"in_features": 4096,
|
|
|
|
| 4972 |
"model.layers.7.self_attn.k_proj": {
|
| 4973 |
"bias": false,
|
| 4974 |
"enable_norm": true,
|
| 4975 |
+
"enable_perm": false,
|
| 4976 |
"group_num": 1,
|
| 4977 |
"group_size": 4096,
|
| 4978 |
"in_features": 4096,
|
|
|
|
| 4996 |
"model.layers.7.self_attn.o_proj": {
|
| 4997 |
"bias": false,
|
| 4998 |
"enable_norm": true,
|
| 4999 |
+
"enable_perm": false,
|
| 5000 |
"group_num": 1,
|
| 5001 |
"group_size": 4096,
|
| 5002 |
"in_features": 4096,
|
|
|
|
| 5020 |
"model.layers.7.self_attn.q_proj": {
|
| 5021 |
"bias": false,
|
| 5022 |
"enable_norm": true,
|
| 5023 |
+
"enable_perm": false,
|
| 5024 |
"group_num": 1,
|
| 5025 |
"group_size": 4096,
|
| 5026 |
"in_features": 4096,
|
|
|
|
| 5044 |
"model.layers.7.self_attn.v_proj": {
|
| 5045 |
"bias": false,
|
| 5046 |
"enable_norm": true,
|
| 5047 |
+
"enable_perm": false,
|
| 5048 |
"group_num": 1,
|
| 5049 |
"group_size": 4096,
|
| 5050 |
"in_features": 4096,
|
|
|
|
| 5068 |
"model.layers.8.mlp.down_proj": {
|
| 5069 |
"bias": false,
|
| 5070 |
"enable_norm": true,
|
| 5071 |
+
"enable_perm": false,
|
| 5072 |
"group_num": 1,
|
| 5073 |
"group_size": 14336,
|
| 5074 |
"in_features": 14336,
|
|
|
|
| 5092 |
"model.layers.8.mlp.gate_proj": {
|
| 5093 |
"bias": false,
|
| 5094 |
"enable_norm": true,
|
| 5095 |
+
"enable_perm": false,
|
| 5096 |
"group_num": 1,
|
| 5097 |
"group_size": 4096,
|
| 5098 |
"in_features": 4096,
|
|
|
|
| 5116 |
"model.layers.8.mlp.up_proj": {
|
| 5117 |
"bias": false,
|
| 5118 |
"enable_norm": true,
|
| 5119 |
+
"enable_perm": false,
|
| 5120 |
"group_num": 1,
|
| 5121 |
"group_size": 4096,
|
| 5122 |
"in_features": 4096,
|
|
|
|
| 5140 |
"model.layers.8.self_attn.k_proj": {
|
| 5141 |
"bias": false,
|
| 5142 |
"enable_norm": true,
|
| 5143 |
+
"enable_perm": false,
|
| 5144 |
"group_num": 1,
|
| 5145 |
"group_size": 4096,
|
| 5146 |
"in_features": 4096,
|
|
|
|
| 5164 |
"model.layers.8.self_attn.o_proj": {
|
| 5165 |
"bias": false,
|
| 5166 |
"enable_norm": true,
|
| 5167 |
+
"enable_perm": false,
|
| 5168 |
"group_num": 1,
|
| 5169 |
"group_size": 4096,
|
| 5170 |
"in_features": 4096,
|
|
|
|
| 5188 |
"model.layers.8.self_attn.q_proj": {
|
| 5189 |
"bias": false,
|
| 5190 |
"enable_norm": true,
|
| 5191 |
+
"enable_perm": false,
|
| 5192 |
"group_num": 1,
|
| 5193 |
"group_size": 4096,
|
| 5194 |
"in_features": 4096,
|
|
|
|
| 5212 |
"model.layers.8.self_attn.v_proj": {
|
| 5213 |
"bias": false,
|
| 5214 |
"enable_norm": true,
|
| 5215 |
+
"enable_perm": false,
|
| 5216 |
"group_num": 1,
|
| 5217 |
"group_size": 4096,
|
| 5218 |
"in_features": 4096,
|
|
|
|
| 5236 |
"model.layers.9.mlp.down_proj": {
|
| 5237 |
"bias": false,
|
| 5238 |
"enable_norm": true,
|
| 5239 |
+
"enable_perm": false,
|
| 5240 |
"group_num": 1,
|
| 5241 |
"group_size": 14336,
|
| 5242 |
"in_features": 14336,
|
|
|
|
| 5260 |
"model.layers.9.mlp.gate_proj": {
|
| 5261 |
"bias": false,
|
| 5262 |
"enable_norm": true,
|
| 5263 |
+
"enable_perm": false,
|
| 5264 |
"group_num": 1,
|
| 5265 |
"group_size": 4096,
|
| 5266 |
"in_features": 4096,
|
|
|
|
| 5284 |
"model.layers.9.mlp.up_proj": {
|
| 5285 |
"bias": false,
|
| 5286 |
"enable_norm": true,
|
| 5287 |
+
"enable_perm": false,
|
| 5288 |
"group_num": 1,
|
| 5289 |
"group_size": 4096,
|
| 5290 |
"in_features": 4096,
|
|
|
|
| 5308 |
"model.layers.9.self_attn.k_proj": {
|
| 5309 |
"bias": false,
|
| 5310 |
"enable_norm": true,
|
| 5311 |
+
"enable_perm": false,
|
| 5312 |
"group_num": 1,
|
| 5313 |
"group_size": 4096,
|
| 5314 |
"in_features": 4096,
|
|
|
|
| 5332 |
"model.layers.9.self_attn.o_proj": {
|
| 5333 |
"bias": false,
|
| 5334 |
"enable_norm": true,
|
| 5335 |
+
"enable_perm": false,
|
| 5336 |
"group_num": 1,
|
| 5337 |
"group_size": 4096,
|
| 5338 |
"in_features": 4096,
|
|
|
|
| 5356 |
"model.layers.9.self_attn.q_proj": {
|
| 5357 |
"bias": false,
|
| 5358 |
"enable_norm": true,
|
| 5359 |
+
"enable_perm": false,
|
| 5360 |
"group_num": 1,
|
| 5361 |
"group_size": 4096,
|
| 5362 |
"in_features": 4096,
|
|
|
|
| 5380 |
"model.layers.9.self_attn.v_proj": {
|
| 5381 |
"bias": false,
|
| 5382 |
"enable_norm": true,
|
| 5383 |
+
"enable_perm": false,
|
| 5384 |
"group_num": 1,
|
| 5385 |
"group_size": 4096,
|
| 5386 |
"in_features": 4096,
|
|
|
|
| 5401 |
8
|
| 5402 |
]
|
| 5403 |
}
|
| 5404 |
+
},
|
| 5405 |
+
"quant_method": "vptq"
|
| 5406 |
+
},
|
| 5407 |
+
"rms_norm_eps": 1e-05,
|
| 5408 |
+
"rope_scaling": {
|
| 5409 |
+
"factor": 8.0,
|
| 5410 |
+
"high_freq_factor": 4.0,
|
| 5411 |
+
"low_freq_factor": 1.0,
|
| 5412 |
+
"original_max_position_embeddings": 8192,
|
| 5413 |
+
"rope_type": "llama3"
|
| 5414 |
+
},
|
| 5415 |
+
"rope_theta": 500000.0,
|
| 5416 |
+
"tie_word_embeddings": false,
|
| 5417 |
+
"torch_dtype": "bfloat16",
|
| 5418 |
+
"transformers_version": "4.49.0",
|
| 5419 |
+
"use_cache": true,
|
| 5420 |
+
"vocab_size": 128256
|
| 5421 |
+
}
|
generation_config.json
CHANGED
|
@@ -1,16 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"
|
| 3 |
"bos_token_id": 128000,
|
| 4 |
-
"do_sample": true,
|
| 5 |
"eos_token_id": [
|
| 6 |
128001,
|
| 7 |
128008,
|
| 8 |
128009
|
| 9 |
],
|
| 10 |
-
"
|
| 11 |
-
"num_assistant_tokens_schedule": "heuristic",
|
| 12 |
-
"return_legacy_cache": true,
|
| 13 |
-
"temperature": 0.6,
|
| 14 |
-
"top_p": 0.9,
|
| 15 |
-
"transformers_version": "4.45.1"
|
| 16 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
"bos_token_id": 128000,
|
|
|
|
| 4 |
"eos_token_id": [
|
| 5 |
128001,
|
| 6 |
128008,
|
| 7 |
128009
|
| 8 |
],
|
| 9 |
+
"transformers_version": "4.49.0"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
}
|
model-00001-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf76e03e16f27cef164fcc14201d31aa6a0d855d06cef6061bc0ebeb4f47de17
|
| 3 |
+
size 4359343640
|
model.safetensors.index.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
CHANGED
|
@@ -2053,11 +2053,12 @@
|
|
| 2053 |
"chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
|
| 2054 |
"clean_up_tokenization_spaces": true,
|
| 2055 |
"eos_token": "<|eot_id|>",
|
|
|
|
| 2056 |
"legacy": false,
|
| 2057 |
"model_input_names": [
|
| 2058 |
"input_ids",
|
| 2059 |
"attention_mask"
|
| 2060 |
],
|
| 2061 |
"model_max_length": 131072,
|
| 2062 |
-
"tokenizer_class": "
|
| 2063 |
}
|
|
|
|
| 2053 |
"chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
|
| 2054 |
"clean_up_tokenization_spaces": true,
|
| 2055 |
"eos_token": "<|eot_id|>",
|
| 2056 |
+
"extra_special_tokens": {},
|
| 2057 |
"legacy": false,
|
| 2058 |
"model_input_names": [
|
| 2059 |
"input_ids",
|
| 2060 |
"attention_mask"
|
| 2061 |
],
|
| 2062 |
"model_max_length": 131072,
|
| 2063 |
+
"tokenizer_class": "PreTrainedTokenizer"
|
| 2064 |
}
|