Upload AudioFlamingo3ForConditionalGeneration
Browse files- config.json +2 -1
- model-00001-of-00004.safetensors +2 -2
- model-00002-of-00004.safetensors +2 -2
- model-00003-of-00004.safetensors +2 -2
- model-00004-of-00004.safetensors +2 -2
- model.safetensors.index.json +97 -97
config.json
CHANGED
|
@@ -7,6 +7,7 @@
|
|
| 7 |
"activation_function": "gelu",
|
| 8 |
"attention_dropout": 0.0,
|
| 9 |
"dropout": 0.0,
|
|
|
|
| 10 |
"hidden_size": 1280,
|
| 11 |
"initializer_range": 0.02,
|
| 12 |
"intermediate_size": 5120,
|
|
@@ -19,7 +20,7 @@
|
|
| 19 |
"scale_embedding": false
|
| 20 |
},
|
| 21 |
"audio_token_id": 151669,
|
| 22 |
-
"dtype": "
|
| 23 |
"model_type": "audioflamingo3",
|
| 24 |
"projector_bias": true,
|
| 25 |
"projector_hidden_act": "gelu",
|
|
|
|
| 7 |
"activation_function": "gelu",
|
| 8 |
"attention_dropout": 0.0,
|
| 9 |
"dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
"hidden_size": 1280,
|
| 12 |
"initializer_range": 0.02,
|
| 13 |
"intermediate_size": 5120,
|
|
|
|
| 20 |
"scale_embedding": false
|
| 21 |
},
|
| 22 |
"audio_token_id": 151669,
|
| 23 |
+
"dtype": "bfloat16",
|
| 24 |
"model_type": "audioflamingo3",
|
| 25 |
"projector_bias": true,
|
| 26 |
"projector_hidden_act": "gelu",
|
model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df42e51f121d243a7bbfb253ba2d388046b6b9abb33b7770c789d431c04d21bf
|
| 3 |
+
size 4886285784
|
model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:36f6299c1e3137d431cf61044537c4dda0627d7092a5a44b5ce72eca4dcc56da
|
| 3 |
+
size 4991497784
|
model-00003-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e0c90ec3232d5a54c8722b19487fa2a117463fcd662ff6e93bab9970d0fe2f7
|
| 3 |
+
size 4932752872
|
model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4050632946b7c60cf2855d9d7628589639bd06bc2d5d45a58988fe97c7d98af2
|
| 3 |
+
size 1723994720
|
model.safetensors.index.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
"total_parameters": 8267215360,
|
| 4 |
-
"total_size":
|
| 5 |
},
|
| 6 |
"weight_map": {
|
| 7 |
"audio_tower.conv1.bias": "model-00001-of-00004.safetensors",
|
|
@@ -553,11 +553,11 @@
|
|
| 553 |
"language_model.model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
| 554 |
"language_model.model.layers.12.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
| 555 |
"language_model.model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
| 556 |
-
"language_model.model.layers.13.input_layernorm.weight": "model-
|
| 557 |
-
"language_model.model.layers.13.mlp.down_proj.weight": "model-
|
| 558 |
"language_model.model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
| 559 |
-
"language_model.model.layers.13.mlp.up_proj.weight": "model-
|
| 560 |
-
"language_model.model.layers.13.post_attention_layernorm.weight": "model-
|
| 561 |
"language_model.model.layers.13.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
|
| 562 |
"language_model.model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
| 563 |
"language_model.model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
|
@@ -565,42 +565,42 @@
|
|
| 565 |
"language_model.model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
| 566 |
"language_model.model.layers.13.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
| 567 |
"language_model.model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
| 568 |
-
"language_model.model.layers.14.input_layernorm.weight": "model-
|
| 569 |
-
"language_model.model.layers.14.mlp.down_proj.weight": "model-
|
| 570 |
-
"language_model.model.layers.14.mlp.gate_proj.weight": "model-
|
| 571 |
-
"language_model.model.layers.14.mlp.up_proj.weight": "model-
|
| 572 |
-
"language_model.model.layers.14.post_attention_layernorm.weight": "model-
|
| 573 |
-
"language_model.model.layers.14.self_attn.k_proj.bias": "model-
|
| 574 |
-
"language_model.model.layers.14.self_attn.k_proj.weight": "model-
|
| 575 |
-
"language_model.model.layers.14.self_attn.o_proj.weight": "model-
|
| 576 |
-
"language_model.model.layers.14.self_attn.q_proj.bias": "model-
|
| 577 |
-
"language_model.model.layers.14.self_attn.q_proj.weight": "model-
|
| 578 |
-
"language_model.model.layers.14.self_attn.v_proj.bias": "model-
|
| 579 |
-
"language_model.model.layers.14.self_attn.v_proj.weight": "model-
|
| 580 |
-
"language_model.model.layers.15.input_layernorm.weight": "model-
|
| 581 |
-
"language_model.model.layers.15.mlp.down_proj.weight": "model-
|
| 582 |
-
"language_model.model.layers.15.mlp.gate_proj.weight": "model-
|
| 583 |
-
"language_model.model.layers.15.mlp.up_proj.weight": "model-
|
| 584 |
-
"language_model.model.layers.15.post_attention_layernorm.weight": "model-
|
| 585 |
-
"language_model.model.layers.15.self_attn.k_proj.bias": "model-
|
| 586 |
-
"language_model.model.layers.15.self_attn.k_proj.weight": "model-
|
| 587 |
-
"language_model.model.layers.15.self_attn.o_proj.weight": "model-
|
| 588 |
-
"language_model.model.layers.15.self_attn.q_proj.bias": "model-
|
| 589 |
-
"language_model.model.layers.15.self_attn.q_proj.weight": "model-
|
| 590 |
-
"language_model.model.layers.15.self_attn.v_proj.bias": "model-
|
| 591 |
-
"language_model.model.layers.15.self_attn.v_proj.weight": "model-
|
| 592 |
"language_model.model.layers.16.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 593 |
"language_model.model.layers.16.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
| 594 |
"language_model.model.layers.16.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
| 595 |
"language_model.model.layers.16.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
| 596 |
"language_model.model.layers.16.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 597 |
-
"language_model.model.layers.16.self_attn.k_proj.bias": "model-
|
| 598 |
-
"language_model.model.layers.16.self_attn.k_proj.weight": "model-
|
| 599 |
-
"language_model.model.layers.16.self_attn.o_proj.weight": "model-
|
| 600 |
-
"language_model.model.layers.16.self_attn.q_proj.bias": "model-
|
| 601 |
-
"language_model.model.layers.16.self_attn.q_proj.weight": "model-
|
| 602 |
-
"language_model.model.layers.16.self_attn.v_proj.bias": "model-
|
| 603 |
-
"language_model.model.layers.16.self_attn.v_proj.weight": "model-
|
| 604 |
"language_model.model.layers.17.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 605 |
"language_model.model.layers.17.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
| 606 |
"language_model.model.layers.17.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
|
@@ -637,11 +637,11 @@
|
|
| 637 |
"language_model.model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 638 |
"language_model.model.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
| 639 |
"language_model.model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 640 |
-
"language_model.model.layers.2.input_layernorm.weight": "model-
|
| 641 |
-
"language_model.model.layers.2.mlp.down_proj.weight": "model-
|
| 642 |
"language_model.model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
| 643 |
"language_model.model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
| 644 |
-
"language_model.model.layers.2.post_attention_layernorm.weight": "model-
|
| 645 |
"language_model.model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
|
| 646 |
"language_model.model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
| 647 |
"language_model.model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
|
@@ -697,11 +697,11 @@
|
|
| 697 |
"language_model.model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 698 |
"language_model.model.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
| 699 |
"language_model.model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 700 |
-
"language_model.model.layers.24.input_layernorm.weight": "model-
|
| 701 |
-
"language_model.model.layers.24.mlp.down_proj.weight": "model-
|
| 702 |
-
"language_model.model.layers.24.mlp.gate_proj.weight": "model-
|
| 703 |
-
"language_model.model.layers.24.mlp.up_proj.weight": "model-
|
| 704 |
-
"language_model.model.layers.24.post_attention_layernorm.weight": "model-
|
| 705 |
"language_model.model.layers.24.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
|
| 706 |
"language_model.model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
| 707 |
"language_model.model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
|
@@ -709,30 +709,30 @@
|
|
| 709 |
"language_model.model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 710 |
"language_model.model.layers.24.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
| 711 |
"language_model.model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 712 |
-
"language_model.model.layers.25.input_layernorm.weight": "model-
|
| 713 |
-
"language_model.model.layers.25.mlp.down_proj.weight": "model-
|
| 714 |
-
"language_model.model.layers.25.mlp.gate_proj.weight": "model-
|
| 715 |
-
"language_model.model.layers.25.mlp.up_proj.weight": "model-
|
| 716 |
-
"language_model.model.layers.25.post_attention_layernorm.weight": "model-
|
| 717 |
-
"language_model.model.layers.25.self_attn.k_proj.bias": "model-
|
| 718 |
-
"language_model.model.layers.25.self_attn.k_proj.weight": "model-
|
| 719 |
-
"language_model.model.layers.25.self_attn.o_proj.weight": "model-
|
| 720 |
-
"language_model.model.layers.25.self_attn.q_proj.bias": "model-
|
| 721 |
-
"language_model.model.layers.25.self_attn.q_proj.weight": "model-
|
| 722 |
-
"language_model.model.layers.25.self_attn.v_proj.bias": "model-
|
| 723 |
-
"language_model.model.layers.25.self_attn.v_proj.weight": "model-
|
| 724 |
"language_model.model.layers.26.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 725 |
"language_model.model.layers.26.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
| 726 |
-
"language_model.model.layers.26.mlp.gate_proj.weight": "model-
|
| 727 |
-
"language_model.model.layers.26.mlp.up_proj.weight": "model-
|
| 728 |
"language_model.model.layers.26.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 729 |
-
"language_model.model.layers.26.self_attn.k_proj.bias": "model-
|
| 730 |
-
"language_model.model.layers.26.self_attn.k_proj.weight": "model-
|
| 731 |
-
"language_model.model.layers.26.self_attn.o_proj.weight": "model-
|
| 732 |
-
"language_model.model.layers.26.self_attn.q_proj.bias": "model-
|
| 733 |
-
"language_model.model.layers.26.self_attn.q_proj.weight": "model-
|
| 734 |
-
"language_model.model.layers.26.self_attn.v_proj.bias": "model-
|
| 735 |
-
"language_model.model.layers.26.self_attn.v_proj.weight": "model-
|
| 736 |
"language_model.model.layers.27.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 737 |
"language_model.model.layers.27.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
| 738 |
"language_model.model.layers.27.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
|
|
@@ -745,42 +745,42 @@
|
|
| 745 |
"language_model.model.layers.27.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
|
| 746 |
"language_model.model.layers.27.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
|
| 747 |
"language_model.model.layers.27.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
|
| 748 |
-
"language_model.model.layers.3.input_layernorm.weight": "model-
|
| 749 |
-
"language_model.model.layers.3.mlp.down_proj.weight": "model-
|
| 750 |
-
"language_model.model.layers.3.mlp.gate_proj.weight": "model-
|
| 751 |
-
"language_model.model.layers.3.mlp.up_proj.weight": "model-
|
| 752 |
-
"language_model.model.layers.3.post_attention_layernorm.weight": "model-
|
| 753 |
-
"language_model.model.layers.3.self_attn.k_proj.bias": "model-
|
| 754 |
-
"language_model.model.layers.3.self_attn.k_proj.weight": "model-
|
| 755 |
-
"language_model.model.layers.3.self_attn.o_proj.weight": "model-
|
| 756 |
-
"language_model.model.layers.3.self_attn.q_proj.bias": "model-
|
| 757 |
-
"language_model.model.layers.3.self_attn.q_proj.weight": "model-
|
| 758 |
-
"language_model.model.layers.3.self_attn.v_proj.bias": "model-
|
| 759 |
-
"language_model.model.layers.3.self_attn.v_proj.weight": "model-
|
| 760 |
-
"language_model.model.layers.4.input_layernorm.weight": "model-
|
| 761 |
-
"language_model.model.layers.4.mlp.down_proj.weight": "model-
|
| 762 |
-
"language_model.model.layers.4.mlp.gate_proj.weight": "model-
|
| 763 |
-
"language_model.model.layers.4.mlp.up_proj.weight": "model-
|
| 764 |
-
"language_model.model.layers.4.post_attention_layernorm.weight": "model-
|
| 765 |
-
"language_model.model.layers.4.self_attn.k_proj.bias": "model-
|
| 766 |
-
"language_model.model.layers.4.self_attn.k_proj.weight": "model-
|
| 767 |
-
"language_model.model.layers.4.self_attn.o_proj.weight": "model-
|
| 768 |
-
"language_model.model.layers.4.self_attn.q_proj.bias": "model-
|
| 769 |
-
"language_model.model.layers.4.self_attn.q_proj.weight": "model-
|
| 770 |
-
"language_model.model.layers.4.self_attn.v_proj.bias": "model-
|
| 771 |
-
"language_model.model.layers.4.self_attn.v_proj.weight": "model-
|
| 772 |
"language_model.model.layers.5.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 773 |
"language_model.model.layers.5.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
| 774 |
-
"language_model.model.layers.5.mlp.gate_proj.weight": "model-
|
| 775 |
"language_model.model.layers.5.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
| 776 |
"language_model.model.layers.5.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 777 |
-
"language_model.model.layers.5.self_attn.k_proj.bias": "model-
|
| 778 |
-
"language_model.model.layers.5.self_attn.k_proj.weight": "model-
|
| 779 |
-
"language_model.model.layers.5.self_attn.o_proj.weight": "model-
|
| 780 |
-
"language_model.model.layers.5.self_attn.q_proj.bias": "model-
|
| 781 |
-
"language_model.model.layers.5.self_attn.q_proj.weight": "model-
|
| 782 |
-
"language_model.model.layers.5.self_attn.v_proj.bias": "model-
|
| 783 |
-
"language_model.model.layers.5.self_attn.v_proj.weight": "model-
|
| 784 |
"language_model.model.layers.6.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 785 |
"language_model.model.layers.6.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
| 786 |
"language_model.model.layers.6.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
|
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
"total_parameters": 8267215360,
|
| 4 |
+
"total_size": 16534430720
|
| 5 |
},
|
| 6 |
"weight_map": {
|
| 7 |
"audio_tower.conv1.bias": "model-00001-of-00004.safetensors",
|
|
|
|
| 553 |
"language_model.model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
| 554 |
"language_model.model.layers.12.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
| 555 |
"language_model.model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
| 556 |
+
"language_model.model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 557 |
+
"language_model.model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
| 558 |
"language_model.model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
| 559 |
+
"language_model.model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
| 560 |
+
"language_model.model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 561 |
"language_model.model.layers.13.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
|
| 562 |
"language_model.model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
| 563 |
"language_model.model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
|
|
|
| 565 |
"language_model.model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
| 566 |
"language_model.model.layers.13.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
| 567 |
"language_model.model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
| 568 |
+
"language_model.model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 569 |
+
"language_model.model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
| 570 |
+
"language_model.model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
| 571 |
+
"language_model.model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
| 572 |
+
"language_model.model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 573 |
+
"language_model.model.layers.14.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
|
| 574 |
+
"language_model.model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
| 575 |
+
"language_model.model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
| 576 |
+
"language_model.model.layers.14.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
|
| 577 |
+
"language_model.model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
| 578 |
+
"language_model.model.layers.14.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
| 579 |
+
"language_model.model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
| 580 |
+
"language_model.model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 581 |
+
"language_model.model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
| 582 |
+
"language_model.model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
| 583 |
+
"language_model.model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
| 584 |
+
"language_model.model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 585 |
+
"language_model.model.layers.15.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
|
| 586 |
+
"language_model.model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
| 587 |
+
"language_model.model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
| 588 |
+
"language_model.model.layers.15.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
|
| 589 |
+
"language_model.model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
| 590 |
+
"language_model.model.layers.15.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
| 591 |
+
"language_model.model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
| 592 |
"language_model.model.layers.16.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 593 |
"language_model.model.layers.16.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
| 594 |
"language_model.model.layers.16.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
| 595 |
"language_model.model.layers.16.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
| 596 |
"language_model.model.layers.16.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 597 |
+
"language_model.model.layers.16.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
|
| 598 |
+
"language_model.model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
| 599 |
+
"language_model.model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
| 600 |
+
"language_model.model.layers.16.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
|
| 601 |
+
"language_model.model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
| 602 |
+
"language_model.model.layers.16.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
| 603 |
+
"language_model.model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
| 604 |
"language_model.model.layers.17.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 605 |
"language_model.model.layers.17.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
| 606 |
"language_model.model.layers.17.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
|
|
|
| 637 |
"language_model.model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 638 |
"language_model.model.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
| 639 |
"language_model.model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 640 |
+
"language_model.model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 641 |
+
"language_model.model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
| 642 |
"language_model.model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
| 643 |
"language_model.model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
| 644 |
+
"language_model.model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 645 |
"language_model.model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
|
| 646 |
"language_model.model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
| 647 |
"language_model.model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
|
|
|
| 697 |
"language_model.model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 698 |
"language_model.model.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
| 699 |
"language_model.model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 700 |
+
"language_model.model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 701 |
+
"language_model.model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
| 702 |
+
"language_model.model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
| 703 |
+
"language_model.model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
| 704 |
+
"language_model.model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 705 |
"language_model.model.layers.24.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
|
| 706 |
"language_model.model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
| 707 |
"language_model.model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
|
|
|
| 709 |
"language_model.model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 710 |
"language_model.model.layers.24.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
| 711 |
"language_model.model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 712 |
+
"language_model.model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 713 |
+
"language_model.model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
| 714 |
+
"language_model.model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
| 715 |
+
"language_model.model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
| 716 |
+
"language_model.model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 717 |
+
"language_model.model.layers.25.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
|
| 718 |
+
"language_model.model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
| 719 |
+
"language_model.model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
| 720 |
+
"language_model.model.layers.25.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
|
| 721 |
+
"language_model.model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 722 |
+
"language_model.model.layers.25.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
| 723 |
+
"language_model.model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 724 |
"language_model.model.layers.26.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 725 |
"language_model.model.layers.26.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
| 726 |
+
"language_model.model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
| 727 |
+
"language_model.model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
| 728 |
"language_model.model.layers.26.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 729 |
+
"language_model.model.layers.26.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
|
| 730 |
+
"language_model.model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
| 731 |
+
"language_model.model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
| 732 |
+
"language_model.model.layers.26.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
|
| 733 |
+
"language_model.model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 734 |
+
"language_model.model.layers.26.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
| 735 |
+
"language_model.model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 736 |
"language_model.model.layers.27.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 737 |
"language_model.model.layers.27.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
| 738 |
"language_model.model.layers.27.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
|
|
|
|
| 745 |
"language_model.model.layers.27.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
|
| 746 |
"language_model.model.layers.27.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
|
| 747 |
"language_model.model.layers.27.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
|
| 748 |
+
"language_model.model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 749 |
+
"language_model.model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
| 750 |
+
"language_model.model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
| 751 |
+
"language_model.model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
| 752 |
+
"language_model.model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 753 |
+
"language_model.model.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
|
| 754 |
+
"language_model.model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
| 755 |
+
"language_model.model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
| 756 |
+
"language_model.model.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
|
| 757 |
+
"language_model.model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
| 758 |
+
"language_model.model.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
|
| 759 |
+
"language_model.model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
| 760 |
+
"language_model.model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 761 |
+
"language_model.model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
| 762 |
+
"language_model.model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
| 763 |
+
"language_model.model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
| 764 |
+
"language_model.model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 765 |
+
"language_model.model.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
|
| 766 |
+
"language_model.model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
| 767 |
+
"language_model.model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
| 768 |
+
"language_model.model.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
|
| 769 |
+
"language_model.model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
| 770 |
+
"language_model.model.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
|
| 771 |
+
"language_model.model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
| 772 |
"language_model.model.layers.5.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 773 |
"language_model.model.layers.5.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
| 774 |
+
"language_model.model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
| 775 |
"language_model.model.layers.5.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
| 776 |
"language_model.model.layers.5.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 777 |
+
"language_model.model.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
|
| 778 |
+
"language_model.model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
| 779 |
+
"language_model.model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
| 780 |
+
"language_model.model.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
|
| 781 |
+
"language_model.model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
| 782 |
+
"language_model.model.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
|
| 783 |
+
"language_model.model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
| 784 |
"language_model.model.layers.6.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 785 |
"language_model.model.layers.6.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
| 786 |
"language_model.model.layers.6.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|