woodchen7 commited on
Commit
cb5b6e3
·
verified ·
1 Parent(s): ca21a18

Upload angelslim_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. angelslim_config.json +182 -0
angelslim_config.json ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "name": "Qwen3VL",
4
+ "model_path": "Base Model Path",
5
+ "trust_remote_code": true,
6
+ "torch_dtype": "auto",
7
+ "device_map": "auto",
8
+ "low_cpu_mem_usage": true,
9
+ "use_cache": false,
10
+ "cache_dir": null
11
+ },
12
+ "compression_config": {
13
+ "name": [
14
+ "PTQ"
15
+ ],
16
+ "quantization": {
17
+ "name": "fp8_static",
18
+ "save_name": "compressed-tensors",
19
+ "bits": 8,
20
+ "quant_method": {
21
+ "weight": "per-tensor",
22
+ "activation": "per-tensor"
23
+ },
24
+ "quant_helpers": [],
25
+ "smooth_alpha": 0.5,
26
+ "low_memory": false,
27
+ "cpu_convert": false,
28
+ "modules_to_quantize": [],
29
+ "zero_point": true,
30
+ "mse_range": false,
31
+ "ignore_layers": [
32
+ "model.visual.patch_embed.proj",
33
+ "model.lm_head",
34
+ "model.language_model.embed_tokens",
35
+ "model.visual.blocks.0.attn.qkv",
36
+ "model.visual.blocks.0.attn.proj",
37
+ "model.visual.blocks.0.mlp.linear_fc1",
38
+ "model.visual.blocks.0.mlp.linear_fc2",
39
+ "model.visual.blocks.1.attn.qkv",
40
+ "model.visual.blocks.1.attn.proj",
41
+ "model.visual.blocks.1.mlp.linear_fc1",
42
+ "model.visual.blocks.1.mlp.linear_fc2",
43
+ "model.visual.blocks.2.attn.qkv",
44
+ "model.visual.blocks.2.attn.proj",
45
+ "model.visual.blocks.2.mlp.linear_fc1",
46
+ "model.visual.blocks.2.mlp.linear_fc2",
47
+ "model.visual.blocks.3.attn.qkv",
48
+ "model.visual.blocks.3.attn.proj",
49
+ "model.visual.blocks.3.mlp.linear_fc1",
50
+ "model.visual.blocks.3.mlp.linear_fc2",
51
+ "model.visual.blocks.4.attn.qkv",
52
+ "model.visual.blocks.4.attn.proj",
53
+ "model.visual.blocks.4.mlp.linear_fc1",
54
+ "model.visual.blocks.4.mlp.linear_fc2",
55
+ "model.visual.blocks.5.attn.qkv",
56
+ "model.visual.blocks.5.attn.proj",
57
+ "model.visual.blocks.5.mlp.linear_fc1",
58
+ "model.visual.blocks.5.mlp.linear_fc2",
59
+ "model.visual.blocks.6.attn.qkv",
60
+ "model.visual.blocks.6.attn.proj",
61
+ "model.visual.blocks.6.mlp.linear_fc1",
62
+ "model.visual.blocks.6.mlp.linear_fc2",
63
+ "model.visual.blocks.7.attn.qkv",
64
+ "model.visual.blocks.7.attn.proj",
65
+ "model.visual.blocks.7.mlp.linear_fc1",
66
+ "model.visual.blocks.7.mlp.linear_fc2",
67
+ "model.visual.blocks.8.attn.qkv",
68
+ "model.visual.blocks.8.attn.proj",
69
+ "model.visual.blocks.8.mlp.linear_fc1",
70
+ "model.visual.blocks.8.mlp.linear_fc2",
71
+ "model.visual.blocks.9.attn.qkv",
72
+ "model.visual.blocks.9.attn.proj",
73
+ "model.visual.blocks.9.mlp.linear_fc1",
74
+ "model.visual.blocks.9.mlp.linear_fc2",
75
+ "model.visual.blocks.10.attn.qkv",
76
+ "model.visual.blocks.10.attn.proj",
77
+ "model.visual.blocks.10.mlp.linear_fc1",
78
+ "model.visual.blocks.10.mlp.linear_fc2",
79
+ "model.visual.blocks.11.attn.qkv",
80
+ "model.visual.blocks.11.attn.proj",
81
+ "model.visual.blocks.11.mlp.linear_fc1",
82
+ "model.visual.blocks.11.mlp.linear_fc2",
83
+ "model.visual.blocks.12.attn.qkv",
84
+ "model.visual.blocks.12.attn.proj",
85
+ "model.visual.blocks.12.mlp.linear_fc1",
86
+ "model.visual.blocks.12.mlp.linear_fc2",
87
+ "model.visual.blocks.13.attn.qkv",
88
+ "model.visual.blocks.13.attn.proj",
89
+ "model.visual.blocks.13.mlp.linear_fc1",
90
+ "model.visual.blocks.13.mlp.linear_fc2",
91
+ "model.visual.blocks.14.attn.qkv",
92
+ "model.visual.blocks.14.attn.proj",
93
+ "model.visual.blocks.14.mlp.linear_fc1",
94
+ "model.visual.blocks.14.mlp.linear_fc2",
95
+ "model.visual.blocks.15.attn.qkv",
96
+ "model.visual.blocks.15.attn.proj",
97
+ "model.visual.blocks.15.mlp.linear_fc1",
98
+ "model.visual.blocks.15.mlp.linear_fc2",
99
+ "model.visual.blocks.16.attn.qkv",
100
+ "model.visual.blocks.16.attn.proj",
101
+ "model.visual.blocks.16.mlp.linear_fc1",
102
+ "model.visual.blocks.16.mlp.linear_fc2",
103
+ "model.visual.blocks.17.attn.qkv",
104
+ "model.visual.blocks.17.attn.proj",
105
+ "model.visual.blocks.17.mlp.linear_fc1",
106
+ "model.visual.blocks.17.mlp.linear_fc2",
107
+ "model.visual.blocks.18.attn.qkv",
108
+ "model.visual.blocks.18.attn.proj",
109
+ "model.visual.blocks.18.mlp.linear_fc1",
110
+ "model.visual.blocks.18.mlp.linear_fc2",
111
+ "model.visual.blocks.19.attn.qkv",
112
+ "model.visual.blocks.19.attn.proj",
113
+ "model.visual.blocks.19.mlp.linear_fc1",
114
+ "model.visual.blocks.19.mlp.linear_fc2",
115
+ "model.visual.blocks.20.attn.qkv",
116
+ "model.visual.blocks.20.attn.proj",
117
+ "model.visual.blocks.20.mlp.linear_fc1",
118
+ "model.visual.blocks.20.mlp.linear_fc2",
119
+ "model.visual.blocks.21.attn.qkv",
120
+ "model.visual.blocks.21.attn.proj",
121
+ "model.visual.blocks.21.mlp.linear_fc1",
122
+ "model.visual.blocks.21.mlp.linear_fc2",
123
+ "model.visual.blocks.22.attn.qkv",
124
+ "model.visual.blocks.22.attn.proj",
125
+ "model.visual.blocks.22.mlp.linear_fc1",
126
+ "model.visual.blocks.22.mlp.linear_fc2",
127
+ "model.visual.blocks.23.attn.qkv",
128
+ "model.visual.blocks.23.attn.proj",
129
+ "model.visual.blocks.23.mlp.linear_fc1",
130
+ "model.visual.blocks.23.mlp.linear_fc2",
131
+ "model.visual.merger.linear_fc1",
132
+ "model.visual.merger.linear_fc2",
133
+ "model.visual.deepstack_merger_list.0.linear_fc1",
134
+ "model.visual.deepstack_merger_list.0.linear_fc2",
135
+ "model.visual.deepstack_merger_list.1.linear_fc1",
136
+ "model.visual.deepstack_merger_list.1.linear_fc2",
137
+ "model.visual.deepstack_merger_list.2.linear_fc1",
138
+ "model.visual.deepstack_merger_list.2.linear_fc2",
139
+ "lm_head"
140
+ ],
141
+ "quant_analyse": false,
142
+ "quant_vit": false
143
+ },
144
+ "cache": null
145
+ },
146
+ "dataset_config": {
147
+ "name": "MultiModalDataset",
148
+ "data_path": "Data Path",
149
+ "max_seq_length": 4096,
150
+ "num_samples": 256,
151
+ "batch_size": 1,
152
+ "shuffle": false,
153
+ "inference_settings": null
154
+ },
155
+ "global_config": {
156
+ "save_path": "Save Model Path",
157
+ "max_seq_length": 4096,
158
+ "hidden_size": 2048,
159
+ "model_arch_type": "qwen3_vl",
160
+ "deploy_backend": "vllm"
161
+ },
162
+ "infer_config": null,
163
+ "debug_info": {
164
+ "python": "3.10.12 (main, Aug 29 2024, 16:22:46) [GCC 9.3.0]",
165
+ "angelslim": {
166
+ "name": "angelslim",
167
+ "version": "0.0.0.dev0",
168
+ "source": "pip"
169
+ },
170
+ "torch": {
171
+ "name": "torch",
172
+ "version": "2.6.0",
173
+ "source": "pip"
174
+ },
175
+ "transformers": {
176
+ "name": "transformers",
177
+ "version": "4.57.1",
178
+ "source": "pip"
179
+ },
180
+ "torch_cuda_version": "12.4"
181
+ }
182
+ }