SreyanG-NVIDIA commited on
Commit
b6e8004
·
verified ·
1 Parent(s): d90256f

Upload AudioFlamingo3ForConditionalGeneration

Browse files
config.json CHANGED
@@ -7,6 +7,7 @@
7
  "activation_function": "gelu",
8
  "attention_dropout": 0.0,
9
  "dropout": 0.0,
 
10
  "hidden_size": 1280,
11
  "initializer_range": 0.02,
12
  "intermediate_size": 5120,
@@ -19,7 +20,7 @@
19
  "scale_embedding": false
20
  },
21
  "audio_token_id": 151669,
22
- "dtype": "float32",
23
  "model_type": "audioflamingo3",
24
  "projector_bias": true,
25
  "projector_hidden_act": "gelu",
 
7
  "activation_function": "gelu",
8
  "attention_dropout": 0.0,
9
  "dropout": 0.0,
10
+ "dtype": "bfloat16",
11
  "hidden_size": 1280,
12
  "initializer_range": 0.02,
13
  "intermediate_size": 5120,
 
20
  "scale_embedding": false
21
  },
22
  "audio_token_id": 151669,
23
+ "dtype": "bfloat16",
24
  "model_type": "audioflamingo3",
25
  "projector_bias": true,
26
  "projector_hidden_act": "gelu",
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2944227d8f05c303fea780953497344d072186756aacff4b5f5061056f3cc4c4
3
- size 4897662976
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df42e51f121d243a7bbfb253ba2d388046b6b9abb33b7770c789d431c04d21bf
3
+ size 4886285784
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74f9239be9cbdc4593f281ab889cc7ca8126b58eaf30892331214a685703b1c6
3
- size 4991497752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36f6299c1e3137d431cf61044537c4dda0627d7092a5a44b5ce72eca4dcc56da
3
+ size 4991497784
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19b39b04a0f508f132d229d94fa2137a46a03299667bc6d27d823f278b1a8b25
3
- size 4991497848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e0c90ec3232d5a54c8722b19487fa2a117463fcd662ff6e93bab9970d0fe2f7
3
+ size 4932752872
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d22767504cd0fae229a2f95dbecc20d6fb9e50797183cd64c8b7b5b0d0527b7
3
- size 2962689912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4050632946b7c60cf2855d9d7628589639bd06bc2d5d45a58988fe97c7d98af2
3
+ size 1723994720
model.safetensors.index.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "metadata": {
3
  "total_parameters": 8267215360,
4
- "total_size": 17843248128
5
  },
6
  "weight_map": {
7
  "audio_tower.conv1.bias": "model-00001-of-00004.safetensors",
@@ -553,11 +553,11 @@
553
  "language_model.model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
554
  "language_model.model.layers.12.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
555
  "language_model.model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
556
- "language_model.model.layers.13.input_layernorm.weight": "model-00003-of-00004.safetensors",
557
- "language_model.model.layers.13.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
558
  "language_model.model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
559
- "language_model.model.layers.13.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
560
- "language_model.model.layers.13.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
561
  "language_model.model.layers.13.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
562
  "language_model.model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
563
  "language_model.model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
@@ -565,42 +565,42 @@
565
  "language_model.model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
566
  "language_model.model.layers.13.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
567
  "language_model.model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
568
- "language_model.model.layers.14.input_layernorm.weight": "model-00003-of-00004.safetensors",
569
- "language_model.model.layers.14.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
570
- "language_model.model.layers.14.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
571
- "language_model.model.layers.14.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
572
- "language_model.model.layers.14.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
573
- "language_model.model.layers.14.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
574
- "language_model.model.layers.14.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
575
- "language_model.model.layers.14.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
576
- "language_model.model.layers.14.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
577
- "language_model.model.layers.14.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
578
- "language_model.model.layers.14.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
579
- "language_model.model.layers.14.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
580
- "language_model.model.layers.15.input_layernorm.weight": "model-00003-of-00004.safetensors",
581
- "language_model.model.layers.15.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
582
- "language_model.model.layers.15.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
583
- "language_model.model.layers.15.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
584
- "language_model.model.layers.15.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
585
- "language_model.model.layers.15.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
586
- "language_model.model.layers.15.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
587
- "language_model.model.layers.15.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
588
- "language_model.model.layers.15.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
589
- "language_model.model.layers.15.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
590
- "language_model.model.layers.15.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
591
- "language_model.model.layers.15.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
592
  "language_model.model.layers.16.input_layernorm.weight": "model-00003-of-00004.safetensors",
593
  "language_model.model.layers.16.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
594
  "language_model.model.layers.16.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
595
  "language_model.model.layers.16.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
596
  "language_model.model.layers.16.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
597
- "language_model.model.layers.16.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
598
- "language_model.model.layers.16.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
599
- "language_model.model.layers.16.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
600
- "language_model.model.layers.16.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
601
- "language_model.model.layers.16.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
602
- "language_model.model.layers.16.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
603
- "language_model.model.layers.16.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
604
  "language_model.model.layers.17.input_layernorm.weight": "model-00003-of-00004.safetensors",
605
  "language_model.model.layers.17.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
606
  "language_model.model.layers.17.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
@@ -637,11 +637,11 @@
637
  "language_model.model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
638
  "language_model.model.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
639
  "language_model.model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
640
- "language_model.model.layers.2.input_layernorm.weight": "model-00002-of-00004.safetensors",
641
- "language_model.model.layers.2.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
642
  "language_model.model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
643
  "language_model.model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
644
- "language_model.model.layers.2.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
645
  "language_model.model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
646
  "language_model.model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
647
  "language_model.model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
@@ -697,11 +697,11 @@
697
  "language_model.model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
698
  "language_model.model.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
699
  "language_model.model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
700
- "language_model.model.layers.24.input_layernorm.weight": "model-00004-of-00004.safetensors",
701
- "language_model.model.layers.24.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
702
- "language_model.model.layers.24.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
703
- "language_model.model.layers.24.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
704
- "language_model.model.layers.24.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
705
  "language_model.model.layers.24.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
706
  "language_model.model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
707
  "language_model.model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
@@ -709,30 +709,30 @@
709
  "language_model.model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
710
  "language_model.model.layers.24.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
711
  "language_model.model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
712
- "language_model.model.layers.25.input_layernorm.weight": "model-00004-of-00004.safetensors",
713
- "language_model.model.layers.25.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
714
- "language_model.model.layers.25.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
715
- "language_model.model.layers.25.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
716
- "language_model.model.layers.25.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
717
- "language_model.model.layers.25.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
718
- "language_model.model.layers.25.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
719
- "language_model.model.layers.25.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
720
- "language_model.model.layers.25.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
721
- "language_model.model.layers.25.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
722
- "language_model.model.layers.25.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
723
- "language_model.model.layers.25.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
724
  "language_model.model.layers.26.input_layernorm.weight": "model-00004-of-00004.safetensors",
725
  "language_model.model.layers.26.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
726
- "language_model.model.layers.26.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
727
- "language_model.model.layers.26.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
728
  "language_model.model.layers.26.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
729
- "language_model.model.layers.26.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
730
- "language_model.model.layers.26.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
731
- "language_model.model.layers.26.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
732
- "language_model.model.layers.26.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
733
- "language_model.model.layers.26.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
734
- "language_model.model.layers.26.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
735
- "language_model.model.layers.26.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
736
  "language_model.model.layers.27.input_layernorm.weight": "model-00004-of-00004.safetensors",
737
  "language_model.model.layers.27.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
738
  "language_model.model.layers.27.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
@@ -745,42 +745,42 @@
745
  "language_model.model.layers.27.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
746
  "language_model.model.layers.27.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
747
  "language_model.model.layers.27.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
748
- "language_model.model.layers.3.input_layernorm.weight": "model-00002-of-00004.safetensors",
749
- "language_model.model.layers.3.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
750
- "language_model.model.layers.3.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
751
- "language_model.model.layers.3.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
752
- "language_model.model.layers.3.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
753
- "language_model.model.layers.3.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
754
- "language_model.model.layers.3.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
755
- "language_model.model.layers.3.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
756
- "language_model.model.layers.3.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
757
- "language_model.model.layers.3.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
758
- "language_model.model.layers.3.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
759
- "language_model.model.layers.3.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
760
- "language_model.model.layers.4.input_layernorm.weight": "model-00002-of-00004.safetensors",
761
- "language_model.model.layers.4.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
762
- "language_model.model.layers.4.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
763
- "language_model.model.layers.4.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
764
- "language_model.model.layers.4.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
765
- "language_model.model.layers.4.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
766
- "language_model.model.layers.4.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
767
- "language_model.model.layers.4.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
768
- "language_model.model.layers.4.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
769
- "language_model.model.layers.4.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
770
- "language_model.model.layers.4.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
771
- "language_model.model.layers.4.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
772
  "language_model.model.layers.5.input_layernorm.weight": "model-00002-of-00004.safetensors",
773
  "language_model.model.layers.5.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
774
- "language_model.model.layers.5.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
775
  "language_model.model.layers.5.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
776
  "language_model.model.layers.5.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
777
- "language_model.model.layers.5.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
778
- "language_model.model.layers.5.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
779
- "language_model.model.layers.5.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
780
- "language_model.model.layers.5.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
781
- "language_model.model.layers.5.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
782
- "language_model.model.layers.5.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
783
- "language_model.model.layers.5.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
784
  "language_model.model.layers.6.input_layernorm.weight": "model-00002-of-00004.safetensors",
785
  "language_model.model.layers.6.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
786
  "language_model.model.layers.6.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
 
1
  {
2
  "metadata": {
3
  "total_parameters": 8267215360,
4
+ "total_size": 16534430720
5
  },
6
  "weight_map": {
7
  "audio_tower.conv1.bias": "model-00001-of-00004.safetensors",
 
553
  "language_model.model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
554
  "language_model.model.layers.12.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
555
  "language_model.model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
556
+ "language_model.model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
557
+ "language_model.model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
558
  "language_model.model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
559
+ "language_model.model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
560
+ "language_model.model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
561
  "language_model.model.layers.13.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
562
  "language_model.model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
563
  "language_model.model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
 
565
  "language_model.model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
566
  "language_model.model.layers.13.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
567
  "language_model.model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
568
+ "language_model.model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
569
+ "language_model.model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
570
+ "language_model.model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
571
+ "language_model.model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
572
+ "language_model.model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
573
+ "language_model.model.layers.14.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
574
+ "language_model.model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
575
+ "language_model.model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
576
+ "language_model.model.layers.14.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
577
+ "language_model.model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
578
+ "language_model.model.layers.14.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
579
+ "language_model.model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
580
+ "language_model.model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
581
+ "language_model.model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
582
+ "language_model.model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
583
+ "language_model.model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
584
+ "language_model.model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
585
+ "language_model.model.layers.15.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
586
+ "language_model.model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
587
+ "language_model.model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
588
+ "language_model.model.layers.15.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
589
+ "language_model.model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
590
+ "language_model.model.layers.15.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
591
+ "language_model.model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
592
  "language_model.model.layers.16.input_layernorm.weight": "model-00003-of-00004.safetensors",
593
  "language_model.model.layers.16.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
594
  "language_model.model.layers.16.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
595
  "language_model.model.layers.16.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
596
  "language_model.model.layers.16.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
597
+ "language_model.model.layers.16.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
598
+ "language_model.model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
599
+ "language_model.model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
600
+ "language_model.model.layers.16.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
601
+ "language_model.model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
602
+ "language_model.model.layers.16.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
603
+ "language_model.model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
604
  "language_model.model.layers.17.input_layernorm.weight": "model-00003-of-00004.safetensors",
605
  "language_model.model.layers.17.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
606
  "language_model.model.layers.17.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
 
637
  "language_model.model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
638
  "language_model.model.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
639
  "language_model.model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
640
+ "language_model.model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
641
+ "language_model.model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
642
  "language_model.model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
643
  "language_model.model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
644
+ "language_model.model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
645
  "language_model.model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
646
  "language_model.model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
647
  "language_model.model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
 
697
  "language_model.model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
698
  "language_model.model.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
699
  "language_model.model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
700
+ "language_model.model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
701
+ "language_model.model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
702
+ "language_model.model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
703
+ "language_model.model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
704
+ "language_model.model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
705
  "language_model.model.layers.24.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
706
  "language_model.model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
707
  "language_model.model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
 
709
  "language_model.model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
710
  "language_model.model.layers.24.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
711
  "language_model.model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
712
+ "language_model.model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
713
+ "language_model.model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
714
+ "language_model.model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
715
+ "language_model.model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
716
+ "language_model.model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
717
+ "language_model.model.layers.25.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
718
+ "language_model.model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
719
+ "language_model.model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
720
+ "language_model.model.layers.25.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
721
+ "language_model.model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
722
+ "language_model.model.layers.25.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
723
+ "language_model.model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
724
  "language_model.model.layers.26.input_layernorm.weight": "model-00004-of-00004.safetensors",
725
  "language_model.model.layers.26.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
726
+ "language_model.model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
727
+ "language_model.model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
728
  "language_model.model.layers.26.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
729
+ "language_model.model.layers.26.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
730
+ "language_model.model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
731
+ "language_model.model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
732
+ "language_model.model.layers.26.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
733
+ "language_model.model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
734
+ "language_model.model.layers.26.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
735
+ "language_model.model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
736
  "language_model.model.layers.27.input_layernorm.weight": "model-00004-of-00004.safetensors",
737
  "language_model.model.layers.27.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
738
  "language_model.model.layers.27.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
 
745
  "language_model.model.layers.27.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
746
  "language_model.model.layers.27.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
747
  "language_model.model.layers.27.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
748
+ "language_model.model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
749
+ "language_model.model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
750
+ "language_model.model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
751
+ "language_model.model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
752
+ "language_model.model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
753
+ "language_model.model.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
754
+ "language_model.model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
755
+ "language_model.model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
756
+ "language_model.model.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
757
+ "language_model.model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
758
+ "language_model.model.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
759
+ "language_model.model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
760
+ "language_model.model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
761
+ "language_model.model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
762
+ "language_model.model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
763
+ "language_model.model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
764
+ "language_model.model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
765
+ "language_model.model.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
766
+ "language_model.model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
767
+ "language_model.model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
768
+ "language_model.model.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
769
+ "language_model.model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
770
+ "language_model.model.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
771
+ "language_model.model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
772
  "language_model.model.layers.5.input_layernorm.weight": "model-00002-of-00004.safetensors",
773
  "language_model.model.layers.5.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
774
+ "language_model.model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
775
  "language_model.model.layers.5.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
776
  "language_model.model.layers.5.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
777
+ "language_model.model.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
778
+ "language_model.model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
779
+ "language_model.model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
780
+ "language_model.model.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
781
+ "language_model.model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
782
+ "language_model.model.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
783
+ "language_model.model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
784
  "language_model.model.layers.6.input_layernorm.weight": "model-00002-of-00004.safetensors",
785
  "language_model.model.layers.6.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
786
  "language_model.model.layers.6.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",