mratsim commited on 14 days ago

Commit

a09519f

verified ·

1 Parent(s): aeb3c7c

Upload folder using huggingface_hub

Browse files

Files changed (26) hide show

.gitattributes +1 -0
GLM45-NoThink-SillyTavern-Preset.json +163 -0
README.md +198 -0
chat_template.jinja +103 -0
config.json +404 -0
generation_config.json +14 -0
model-00001-of-00015.safetensors +3 -0
model-00002-of-00015.safetensors +3 -0
model-00003-of-00015.safetensors +3 -0
model-00004-of-00015.safetensors +3 -0
model-00005-of-00015.safetensors +3 -0
model-00006-of-00015.safetensors +3 -0
model-00007-of-00015.safetensors +3 -0
model-00008-of-00015.safetensors +3 -0
model-00009-of-00015.safetensors +3 -0
model-00010-of-00015.safetensors +3 -0
model-00011-of-00015.safetensors +3 -0
model-00012-of-00015.safetensors +3 -0
model-00013-of-00015.safetensors +3 -0
model-00014-of-00015.safetensors +3 -0
model-00015-of-00015.safetensors +3 -0
model.safetensors.index.json +0 -0
recipe.yaml +28 -0
special_tokens_map.json +40 -0
tokenizer.json +3 -0
tokenizer_config.json +325 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

GLM45-NoThink-SillyTavern-Preset.json ADDED Viewed

	@@ -0,0 +1,163 @@

+{
+    "instruct": {
+        "input_sequence": "<|user|>\n",
+        "output_sequence": "<|assistant|>\n<think></think>\n",
+        "last_output_sequence": "",
+        "system_sequence": "<|system|>\n",
+        "stop_sequence": "<|user|>",
+        "wrap": false,
+        "macro": true,
+        "activation_regex": "",
+        "first_output_sequence": "",
+        "skip_examples": false,
+        "output_suffix": "",
+        "input_suffix": "/nothink",
+        "system_suffix": "",
+        "user_alignment_message": "",
+        "system_same_as_user": false,
+        "last_system_sequence": "",
+        "first_input_sequence": "",
+        "last_input_sequence": "",
+        "names_behavior": "always",
+        "sequences_as_stop_strings": true,
+        "story_string_prefix": "",
+        "story_string_suffix": "",
+        "name": "GLM4.5-NoThink"
+    },
+    "context": {
+        "story_string": "[gMASK]<sop><|system|>\n{{#if anchorBefore}}{{anchorBefore}}\n{{/if}}{{#if system}}{{system}}\n{{/if}}{{#if wiBefore}}## World Info:\n{{wiBefore}}\n{{/if}}{{#if description}}## {{char}}'s Description:\n{{description}}\n{{/if}}{{#if personality}}## {{char}}'s Personality:\n{{personality}}\n{{/if}}{{#if persona}}## {{user}}'s Persona:\n{{persona}}\n{{/if}}{{#if scenario}}## Scenario:\n{{scenario}}\n{{/if}}{{#if wiAfter}}## Lore:\n{{wiAfter}}\n{{/if}}{{#if anchorAfter}}{{anchorAfter}}\n{{/if}}{{trim}}\n",
+        "example_separator": "",
+        "chat_start": "",
+        "use_stop_strings": false,
+        "names_as_stop_strings": true,
+        "story_string_position": 0,
+        "story_string_depth": 1,
+        "story_string_role": 0,
+        "always_force_name2": true,
+        "trim_sentences": false,
+        "single_line": false,
+        "name": "GLM4.5-NoThink"
+    },
+    "preset": {
+        "temp": 0.8,
+        "temperature_last": true,
+        "top_p": 0.95,
+        "top_k": 0,
+        "top_a": 0,
+        "tfs": 1,
+        "epsilon_cutoff": 0,
+        "eta_cutoff": 0,
+        "typical_p": 1,
+        "min_p": 0.05,
+        "rep_pen": 1,
+        "rep_pen_range": 0,
+        "rep_pen_decay": 0,
+        "rep_pen_slope": 1,
+        "no_repeat_ngram_size": 0,
+        "penalty_alpha": 0,
+        "num_beams": 1,
+        "length_penalty": 1,
+        "min_length": 0,
+        "encoder_rep_pen": 1,
+        "freq_pen": 0,
+        "presence_pen": 0,
+        "skew": 0,
+        "do_sample": true,
+        "early_stopping": false,
+        "dynatemp": false,
+        "min_temp": 1,
+        "max_temp": 1.3,
+        "dynatemp_exponent": 1,
+        "smoothing_factor": 0,
+        "smoothing_curve": 1,
+        "dry_allowed_length": 4,
+        "dry_multiplier": 0,
+        "dry_base": 1.75,
+        "dry_sequence_breakers": "[\"\\n\", \":\", \"\\\"\", \"*\", \"<|start_header_id|>system<|end_header_id|>\", \"<|start_header_id|>assistant<|end_header_id|>\", \"<|start_header_id|>user<|end_header_id|>\", \"<|eot_id|>\"]",
+        "dry_penalty_last_n": 0,
+        "add_bos_token": true,
+        "ban_eos_token": false,
+        "skip_special_tokens": false,
+        "mirostat_mode": 0,
+        "mirostat_tau": 2,
+        "mirostat_eta": 0.1,
+        "guidance_scale": 1,
+        "negative_prompt": "",
+        "grammar_string": "",
+        "json_schema": {},
+        "banned_tokens": "",
+        "sampler_priority": [
+            "repetition_penalty",
+            "presence_penalty",
+            "frequency_penalty",
+            "dry",
+            "top_k",
+            "top_p",
+            "typical_p",
+            "top_n_sigma",
+            "epsilon_cutoff",
+            "eta_cutoff",
+            "tfs",
+            "top_a",
+            "min_p",
+            "temperature",
+            "mirostat",
+            "quadratic_sampling",
+            "dynamic_temperature",
+            "xtc",
+            "encoder_repetition_penalty",
+            "no_repeat_ngram"
+        ],
+        "samplers": [
+            "penalties",
+            "dry",
+            "top_n_sigma",
+            "top_k",
+            "typ_p",
+            "tfs_z",
+            "typical_p",
+            "top_p",
+            "min_p",
+            "xtc",
+            "temperature"
+        ],
+        "samplers_priorities": [
+            "dry",
+            "penalties",
+            "no_repeat_ngram",
+            "temperature",
+            "top_nsigma",
+            "top_p_top_k",
+            "top_a",
+            "min_p",
+            "tfs",
+            "eta_cutoff",
+            "epsilon_cutoff",
+            "typical_p",
+            "quadratic",
+            "xtc"
+        ],
+        "ignore_eos_token": false,
+        "spaces_between_special_tokens": true,
+        "speculative_ngram": false,
+        "sampler_order": [
+            6,
+            0,
+            1,
+            3,
+            4,
+            2,
+            5
+        ],
+        "logit_bias": [],
+        "xtc_threshold": 0,
+        "xtc_probability": 0,
+        "nsigma": 0,
+        "min_keep": 0,
+        "extensions": {},
+        "rep_pen_size": 0,
+        "genamt": 700,
+        "max_length": 20480,
+        "name": "GLM4.5-Iceblink-v2"
+    }
+}

README.md ADDED Viewed

	@@ -0,0 +1,198 @@

+---
+license: mit
+base_model:
+- zerofata/GLM-4.5-Iceblink-v2-106B-A12B
+datasets:
+- neuralmagic/calibration
+- HuggingFaceH4/ultrachat_200k
+- nvidia/OpenCodeInstruct
+- CSJianYang/CodeArena
+- nvidia/OpenScienceReasoning-2
+- MegaScience/MegaScience
+- Gryphe/Opus-WritingPrompts
+- ServiceNow-AI/M2Lingual
+- anthracite-org/stheno-filtered-v1.1
+- zerofata/Roleplay-Anime-Characters
+- zerofata/Instruct-Anime
+- zerofata/Instruct-Anime-CreativeWriting
+- sam-paech/gutenberg3-generalfiction-scifi-fantasy-romance-adventure-dpo
+- nvidia/OpenMathInstruct-2
+- fka/awesome-chatgpt-prompts
+- databricks/databricks-dolly-15k
+- FreedomIntelligence/SocraticChat
+- ruggsea/stanford-encyclopedia-of-philosophy_instruct
+- mlfoundations-dev/stackexchange_philosophy
+- theoldmandthesea/17k_business_book
+- anthracite-org/nopm_claude_writing_fixed
+pipeline_tag: text-generation
+tags:
+- text adventure
+- roleplay
+- rpg
+- creative writing
+- conversational
+- awq
+- vllm
+---
+# GLM-4.5-Iceblink-v2-106B-A12B (AWQ 4-bit quant)
+This repo contains GLM-4.5-Iceblink-v2-106B-A12B quantized with AWQ mixed 4-bit/16-bit precision following state-of-the-art Mixture-Of-Expert quantization and a careful selection of calibration datasets covering math, sciences, philosophy, business, fiction, roleplay, creative writing, general knowledge and multilingual to plausibly ensure that all 127 experts of the model had been activated through enough calibration samples.
+- Original Model:
+  - [zerofata/GLM-4.5-Iceblink-v2-106B-A12B](https://huggingface.co/zerofata/GLM-4.5-Iceblink-v2-106B-A12B)
+The model requires ~65.7GiB of VRAM + 23GiB for a KV-cache for 131072 tokens.
+This fits perfectly with 4x24GB or 2x48GB or 1x96GB GPUs.
+## 📥 Usage & Running Instructions
+The model was tested with vLLM + 1x RTX Pro 6000, here is a script suitable for such configuration with 131072 context length.
+### Recommendations
+It is however recommended to use only 65K context to avoid significant degradation (https://fiction.live/stories/Fiction-liveBench-Sept-29-2025/oQdzQvKHw8JyXbN87)
+The recommended sampler is "min-p" sampling, this sampling is available through
+both the oldest Text completions API and the Chat completions API (and there is a new Response API),
+however most LLM frontends only support modifying min-p when using Text completions.
+You can however use `--override-generation-config "${SAMPLER_JSONCONFIG}"` to override the sampler (which is a merge of generation_config.json and vLLM defaults)
+### Running script
+```bash
+# Model configuration (Mandatory)
+MODEL="mratsim/GLM-4.5-Iceblink-v2-106B-A12B-AWQ"
+MODELNAME="GLM-4.5-Iceblink-v2"
+GPU_UTIL=0.97
+# Sampling configuration (Optional, if departing from `generation_config.json`)
+SAMPLER_OVERRIDE='{"temperature": 0.8, "min_p": 0.05, "top_p": 0.95}'
+# Prevent memory fragmentation
+export PYTORCH_ALLOC_CONF=expandable_segments:True,max_split_size_mb:512
+# Prevent vLLM from using 100% CPU when idle (Very Recommended)
+export VLLM_SLEEP_WHEN_IDLE=1
+# Use FlashInfer backend (fastest, recommended, "instant" context reprocessing)
+# however needs to reduce context length to 120000 tokens and GPU_UTIL to 0.95
+# export VLLM_ATTENTION_BACKEND=FLASHINFER
+vllm serve "${MODEL}" \
+  --served-model-name "${MODELNAME}" \
+  --gpu-memory-utilization ${GPU_UTIL} \
+  --override-generation-config "${SAMPLER_OVERRIDE}"
+```
+> ℹ️ The FlashInfer backend may fail with an error similar to
+> `Failed to allocate memory for batch_prefill_tmp_v with size XYZ and alignment 16 in AlignedAllocator`.
+>
+> A workaround is running a sed replacement command within vllm install to increase buffer space
+> ```bash
+> sed -i 's/FLASHINFER_WORKSPACE_BUFFER_SIZE = 256 \* 1024 \* 1024/FLASHINFER_WORKSPACE_BUFFER_SIZE = 768 \* 1024 \* 1024/g' vllm/v1/attention/backends/flashinfer.py
+> ```
+> This will be fixed by PR https://github.com/vllm-project/vllm/pull/25344 or https://github.com/vllm-project/vllm/pull/28269
+## 🔬 Quantization method
+The llmcompressor library was used with the following recipe:
+```yaml
+default_stage:
+  default_modifiers:
+    AWQModifier:
+      config_groups:
+        group_0:
+          targets: ['re:.*mlp\.experts\.[0-9]+\.(down|gate|up)_proj$']
+          weights:
+            num_bits: 4
+            type: int
+            symmetric: true
+            group_size: 32
+            strategy: group
+            block_structure: null
+            dynamic: false
+            actorder: null
+            observer: mse
+            observer_kwargs: {}
+          input_activations: null
+          output_activations: null
+          format: null
+      targets: ['re:.*mlp\.experts\.[0-9]+\.(down|gate|up)_proj$']
+      ignore: []
+      mappings:
+      - smooth_layer: re:.*post_attention_layernorm$
+        balance_layers: ['re:.*gate_proj$', 're:.*up_proj$']
+      - smooth_layer: re:.*up_proj$
+        balance_layers: ['re:.*down_proj$']
+      duo_scaling: true
+```
+and calibrated with over 1600 samples, up to 8192 sequence length of:
+- [neuralmagic/calibration](https://huggingface.co/datasets/neuralmagic/calibration)
+- [HuggingFaceH4/ultrachat_200k](https://huggingface.co/datasets/HuggingFaceH4/ultrachat_200k)
+- [nvidia/OpenCodeInstruct](https://huggingface.co/datasets/nvidia/OpenCodeInstruct)
+- [CSJianYang/CodeArena](https://huggingface.co/datasets/CSJianYang/CodeArena)
+- [nvidia/OpenScienceReasoning-2](https://huggingface.co/datasets/nvidia/OpenScienceReasoning-2)
+- [MegaScience/MegaScience](https://huggingface.co/datasets/MegaScience/MegaScience)
+- [Gryphe/Opus-WritingPrompts](https://huggingface.co/datasets/Gryphe/Opus-WritingPrompts)
+- [ServiceNow-AI/M2Lingual](https://huggingface.co/datasets/ServiceNow-AI/M2Lingual)
+- [anthracite-org/stheno-filtered-v1.1](https://huggingface.co/datasets/anthracite-org/stheno-filtered-v1.1)
+- [zerofata/Roleplay-Anime-Characters](https://huggingface.co/datasets/zerofata/Roleplay-Anime-Characters)
+- [zerofata/Instruct-Anime](https://huggingface.co/datasets/zerofata/Instruct-Anime)
+- [zerofata/Instruct-Anime-CreativeWriting](https://huggingface.co/datasets/zerofata/Instruct-Anime-CreativeWriting)
+- [sam-paech/gutenberg3-generalfiction-scifi-fantasy-romance-adventure-dpo](https://huggingface.co/datasets/sam-paech/gutenberg3-generalfiction-scifi-fantasy-romance-adventure-dpo)
+- [nvidia/OpenMathInstruct-2](https://huggingface.co/datasets/nvidia/OpenMathInstruct-2)
+- [fka/awesome-chatgpt-prompts](https://huggingface.co/datasets/fka/awesome-chatgpt-prompts)
+- [databricks/databricks-dolly-15k](https://huggingface.co/datasets/databricks/databricks-dolly-15k)
+- [FreedomIntelligence/SocraticChat](https://huggingface.co/datasets/FreedomIntelligence/SocraticChat)
+- [ruggsea/stanford-encyclopedia-of-philosophy_instruct](https://huggingface.co/datasets/ruggsea/stanford-encyclopedia-of-philosophy_instruct)
+- [mlfoundations-dev/stackexchange_philosophy](https://huggingface.co/datasets/mlfoundations-dev/stackexchange_philosophy)
+- [theoldmandthesea/17k_business_book](https://huggingface.co/datasets/theoldmandthesea/17k_business_book)
+- [anthracite-org/nopm_claude_writing_fixed](https://huggingface.co/datasets/anthracite-org/nopm_claude_writing_fixed)
+According to the [AWQ presentation](https://minjiazhang.github.io/courses/fall24-resource/slides/awq.pdf)
+only 64 samples are needed however due to the Mixture-of-Experts topology, this implies all 127 experts need to see at least 64 samples or alternatively, we activate all experts during calibration which requires [reimplementing the attention block of the model](https://github.com/vllm-project/llm-compressor/tree/0.8.1/examples/quantization_w4a4_fp4#quantizing-moes) in [llmcompressor's modeling DB](https://github.com/vllm-project/llm-compressor/tree/0.8.1/src/llmcompressor/modeling).
+### Deep-dive
+Quantization should be focused on Linear layer (also called Dense or Fully-Connected layers i.e. MatMu+Bias)
+In particular quantizing LayerNorm/RMSnorm layer is strongly discouraged, see [1]
+> LayerNorm in Quantization. Kovaleva et al. (2021); Wei et al. (2022) find that outliers in the
+> LayerNorm parameters of BERT (Devlin et al., 2019) cause difficulties in model compression.
+> Given the importance of LayerNorm, all the quantization methods we discuss above leave LayerNorm unquantized.
+_Note: Experts layers might not be stored as a `Linear` layer, meaning they might be skipped if using `llmcompressor` with a `Linear` target._
+Some layers have a higher impact on LLM performance.
+According to [2], spending more bits in attention layers results in large gain compared to spending them in FFN layers.
+According to [3] on 2-bit quantization:
+- quantizing expert FFN layers do not seriously impact model quality
+- quantizing cross-attention has some impact
+- quantizing self-attention has a large impact
+- quantizing dense FFN has a very significant impact
+Hence to preserve model quality we choose not to quantize dense FFN layers (i.e. shared experts) and self-attention layers.
+We notice that:
+- official MXFP4 weights of gpt-oss-120b from OpenAI keep self-attention in BF16:
+  - https://huggingface.co/openai/gpt-oss-120b/blob/main/model.safetensors.index.json
+- NVFP4 weights of DeepSeek-R1 quantized by Nvidia also keep self-attention in BF16:
+  - https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4/blob/main/model.safetensors.index.json
+According to [2], giving more bits to the first `k` blocks have a significantly higher impact on model quality than for the same last `k` blocks.
+In this case, we keep the first layer unquantized as `"first_k_dense_replace": 1` in [config.json](config.json)
+### References
+1. Why Do Some Inputs Break Low-Bit LLM Quantization? (2025)\
+  Ting-Yun Chang, Muru Zhang, Jesse Thomason, Robin Jia\
+  https://arxiv.org/pdf/2506.12044
+2. Examining Post-Training Quantization for Mixture-of-Experts: A Benchmark (2024)\
+  Pingzhi Li, Xiaolong Jin, Yu Cheng, Tianlong Chen\
+  https://arxiv.org/pdf/2406.08155v1
+3. Mixture of Quantized Experts (MoQE): Complementary Effect of Low-bit Quantization and Robustness (2023)\
+  Young Jin Kim, Raffy Fahim, Hany Hassan Awadalla\
+  https://arxiv.org/pdf/2310.02410

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,103 @@

+[gMASK]<sop>
+{%- if tools -%}
+<|system|>
+# Tools
+You may call one or more functions to assist with the user query.
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{% for tool in tools %}
+{{ tool | tojson(ensure_ascii=False) }}
+{% endfor %}
+</tools>
+For each function call, output the function name and arguments within the following XML format:
+<tool_call>{function-name}
+<arg_key>{arg-key-1}</arg_key>
+<arg_value>{arg-value-1}</arg_value>
+<arg_key>{arg-key-2}</arg_key>
+<arg_value>{arg-value-2}</arg_value>
+...
+</tool_call>{%- endif -%}
+{%- macro visible_text(content) -%}
+    {%- if content is string -%}
+        {{- content }}
+    {%- elif content is iterable and content is not mapping -%}
+        {%- for item in content -%}
+            {%- if item is mapping and item.type == 'text' -%}
+                {{- item.text }}
+            {%- elif item is string -%}
+                {{- item }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- else -%}
+        {{- content }}
+    {%- endif -%}
+{%- endmacro -%}
+{%- set ns = namespace(last_user_index=-1) %}
+{%- for m in messages %}
+    {%- if m.role == 'user' %}
+        {% set ns.last_user_index = loop.index0 -%}
+    {%- endif %}
+{%- endfor %}
+{% for m in messages %}
+{%- if m.role == 'user' -%}<|user|>
+{{ visible_text(m.content) }}
+{{- '/nothink' if (enable_thinking is defined and not enable_thinking and not visible_text(m.content).endswith("/nothink")) else '' -}}
+{%- elif m.role == 'assistant' -%}
+<|assistant|>
+{%- set reasoning_content = '' %}
+{%- set content = visible_text(m.content) %}
+{%- if m.reasoning_content is string %}
+    {%- set reasoning_content = m.reasoning_content %}
+{%- else %}
+    {%- if '</think>' in content %}
+        {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+        {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+    {%- endif %}
+{%- endif %}
+{%- if loop.index0 > ns.last_user_index and reasoning_content -%}
+{{ '\n<think>' + reasoning_content.strip() +  '</think>'}}
+{%- else -%}
+{{ '\n<think></think>' }}
+{%- endif -%}
+{%- if content.strip() -%}
+{{ '\n' + content.strip() }}
+{%- endif -%}
+{% if m.tool_calls %}
+{% for tc in m.tool_calls %}
+{%- if tc.function %}
+    {%- set tc = tc.function %}
+{%- endif %}
+{{ '\n<tool_call>' + tc.name }}
+{% set _args = tc.arguments %}
+{% for k, v in _args.items() %}
+<arg_key>{{ k }}</arg_key>
+<arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>
+{% endfor %}
+</tool_call>{% endfor %}
+{% endif %}
+{%- elif m.role == 'tool' -%}
+{%- if m.content is string -%}
+{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+    {{- '<|observation|>' }}
+{%- endif %}
+{{- '\n<tool_response>\n' }}
+{{- m.content }}
+{{- '\n</tool_response>' }}
+{%- else -%}
+<|observation|>{% for tr in m.content %}
+<tool_response>
+{{ tr.output if tr.output is defined else tr }}
+</tool_response>{% endfor -%}
+{% endif -%}
+{%- elif m.role == 'system' -%}
+<|system|>
+{{ visible_text(m.content) }}
+{%- endif -%}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    <|assistant|>{{- '\n<think></think>' if (enable_thinking is defined and not enable_thinking) else '' -}}
+{%- endif -%}

config.json ADDED Viewed

	@@ -0,0 +1,404 @@

+{
+  "architectures": [
+    "Glm4MoeForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "eos_token_id": [
+    151329,
+    151336,
+    151338
+  ],
+  "first_k_dense_replace": 1,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 10944,
+  "max_position_embeddings": 131072,
+  "model_type": "glm4_moe",
+  "moe_intermediate_size": 1408,
+  "n_group": 1,
+  "n_routed_experts": 128,
+  "n_shared_experts": 1,
+  "no_split_module_classes": [
+    "MergedColumnParallelLinear"
+  ],
+  "norm_topk_prob": true,
+  "num_attention_heads": 96,
+  "num_experts_per_tok": 8,
+  "num_hidden_layers": 46,
+  "num_key_value_heads": 8,
+  "num_nextn_predict_layers": 1,
+  "pad_token_id": 151329,
+  "partial_rotary_factor": 0.5,
+  "quantization_config": {
+    "config_groups": {
+      "group_0": {
+        "format": "pack-quantized",
+        "input_activations": null,
+        "output_activations": null,
+        "targets": [
+          "Linear",
+          "re:.*mlp\\.experts\\.[0-9]+\\.(down|gate|up)_proj$"
+        ],
+        "weights": {
+          "actorder": null,
+          "block_structure": null,
+          "dynamic": false,
+          "group_size": 32,
+          "num_bits": 4,
+          "observer": "mse",
+          "observer_kwargs": {},
+          "strategy": "group",
+          "symmetric": true,
+          "type": "int"
+        }
+      }
+    },
+    "format": "pack-quantized",
+    "global_compression_ratio": null,
+    "ignore": [
+      "model.layers.0.self_attn.q_proj",
+      "model.layers.0.self_attn.k_proj",
+      "model.layers.0.self_attn.v_proj",
+      "model.layers.0.self_attn.o_proj",
+      "model.layers.0.mlp.gate_proj",
+      "model.layers.0.mlp.up_proj",
+      "model.layers.0.mlp.down_proj",
+      "model.layers.1.self_attn.q_proj",
+      "model.layers.1.self_attn.k_proj",
+      "model.layers.1.self_attn.v_proj",
+      "model.layers.1.self_attn.o_proj",
+      "model.layers.1.mlp.shared_experts.gate_proj",
+      "model.layers.1.mlp.shared_experts.up_proj",
+      "model.layers.1.mlp.shared_experts.down_proj",
+      "model.layers.2.self_attn.q_proj",
+      "model.layers.2.self_attn.k_proj",
+      "model.layers.2.self_attn.v_proj",
+      "model.layers.2.self_attn.o_proj",
+      "model.layers.2.mlp.shared_experts.gate_proj",
+      "model.layers.2.mlp.shared_experts.up_proj",
+      "model.layers.2.mlp.shared_experts.down_proj",
+      "model.layers.3.self_attn.q_proj",
+      "model.layers.3.self_attn.k_proj",
+      "model.layers.3.self_attn.v_proj",
+      "model.layers.3.self_attn.o_proj",
+      "model.layers.3.mlp.shared_experts.gate_proj",
+      "model.layers.3.mlp.shared_experts.up_proj",
+      "model.layers.3.mlp.shared_experts.down_proj",
+      "model.layers.4.self_attn.q_proj",
+      "model.layers.4.self_attn.k_proj",
+      "model.layers.4.self_attn.v_proj",
+      "model.layers.4.self_attn.o_proj",
+      "model.layers.4.mlp.shared_experts.gate_proj",
+      "model.layers.4.mlp.shared_experts.up_proj",
+      "model.layers.4.mlp.shared_experts.down_proj",
+      "model.layers.5.self_attn.q_proj",
+      "model.layers.5.self_attn.k_proj",
+      "model.layers.5.self_attn.v_proj",
+      "model.layers.5.self_attn.o_proj",
+      "model.layers.5.mlp.shared_experts.gate_proj",
+      "model.layers.5.mlp.shared_experts.up_proj",
+      "model.layers.5.mlp.shared_experts.down_proj",
+      "model.layers.6.self_attn.q_proj",
+      "model.layers.6.self_attn.k_proj",
+      "model.layers.6.self_attn.v_proj",
+      "model.layers.6.self_attn.o_proj",
+      "model.layers.6.mlp.shared_experts.gate_proj",
+      "model.layers.6.mlp.shared_experts.up_proj",
+      "model.layers.6.mlp.shared_experts.down_proj",
+      "model.layers.7.self_attn.q_proj",
+      "model.layers.7.self_attn.k_proj",
+      "model.layers.7.self_attn.v_proj",
+      "model.layers.7.self_attn.o_proj",
+      "model.layers.7.mlp.shared_experts.gate_proj",
+      "model.layers.7.mlp.shared_experts.up_proj",
+      "model.layers.7.mlp.shared_experts.down_proj",
+      "model.layers.8.self_attn.q_proj",
+      "model.layers.8.self_attn.k_proj",
+      "model.layers.8.self_attn.v_proj",
+      "model.layers.8.self_attn.o_proj",
+      "model.layers.8.mlp.shared_experts.gate_proj",
+      "model.layers.8.mlp.shared_experts.up_proj",
+      "model.layers.8.mlp.shared_experts.down_proj",
+      "model.layers.9.self_attn.q_proj",
+      "model.layers.9.self_attn.k_proj",
+      "model.layers.9.self_attn.v_proj",
+      "model.layers.9.self_attn.o_proj",
+      "model.layers.9.mlp.shared_experts.gate_proj",
+      "model.layers.9.mlp.shared_experts.up_proj",
+      "model.layers.9.mlp.shared_experts.down_proj",
+      "model.layers.10.self_attn.q_proj",
+      "model.layers.10.self_attn.k_proj",
+      "model.layers.10.self_attn.v_proj",
+      "model.layers.10.self_attn.o_proj",
+      "model.layers.10.mlp.shared_experts.gate_proj",
+      "model.layers.10.mlp.shared_experts.up_proj",
+      "model.layers.10.mlp.shared_experts.down_proj",
+      "model.layers.11.self_attn.q_proj",
+      "model.layers.11.self_attn.k_proj",
+      "model.layers.11.self_attn.v_proj",
+      "model.layers.11.self_attn.o_proj",
+      "model.layers.11.mlp.shared_experts.gate_proj",
+      "model.layers.11.mlp.shared_experts.up_proj",
+      "model.layers.11.mlp.shared_experts.down_proj",
+      "model.layers.12.self_attn.q_proj",
+      "model.layers.12.self_attn.k_proj",
+      "model.layers.12.self_attn.v_proj",
+      "model.layers.12.self_attn.o_proj",
+      "model.layers.12.mlp.shared_experts.gate_proj",
+      "model.layers.12.mlp.shared_experts.up_proj",
+      "model.layers.12.mlp.shared_experts.down_proj",
+      "model.layers.13.self_attn.q_proj",
+      "model.layers.13.self_attn.k_proj",
+      "model.layers.13.self_attn.v_proj",
+      "model.layers.13.self_attn.o_proj",
+      "model.layers.13.mlp.shared_experts.gate_proj",
+      "model.layers.13.mlp.shared_experts.up_proj",
+      "model.layers.13.mlp.shared_experts.down_proj",
+      "model.layers.14.self_attn.q_proj",
+      "model.layers.14.self_attn.k_proj",
+      "model.layers.14.self_attn.v_proj",
+      "model.layers.14.self_attn.o_proj",
+      "model.layers.14.mlp.shared_experts.gate_proj",
+      "model.layers.14.mlp.shared_experts.up_proj",
+      "model.layers.14.mlp.shared_experts.down_proj",
+      "model.layers.15.self_attn.q_proj",
+      "model.layers.15.self_attn.k_proj",
+      "model.layers.15.self_attn.v_proj",
+      "model.layers.15.self_attn.o_proj",
+      "model.layers.15.mlp.shared_experts.gate_proj",
+      "model.layers.15.mlp.shared_experts.up_proj",
+      "model.layers.15.mlp.shared_experts.down_proj",
+      "model.layers.16.self_attn.q_proj",
+      "model.layers.16.self_attn.k_proj",
+      "model.layers.16.self_attn.v_proj",
+      "model.layers.16.self_attn.o_proj",
+      "model.layers.16.mlp.shared_experts.gate_proj",
+      "model.layers.16.mlp.shared_experts.up_proj",
+      "model.layers.16.mlp.shared_experts.down_proj",
+      "model.layers.17.self_attn.q_proj",
+      "model.layers.17.self_attn.k_proj",
+      "model.layers.17.self_attn.v_proj",
+      "model.layers.17.self_attn.o_proj",
+      "model.layers.17.mlp.shared_experts.gate_proj",
+      "model.layers.17.mlp.shared_experts.up_proj",
+      "model.layers.17.mlp.shared_experts.down_proj",
+      "model.layers.18.self_attn.q_proj",
+      "model.layers.18.self_attn.k_proj",
+      "model.layers.18.self_attn.v_proj",
+      "model.layers.18.self_attn.o_proj",
+      "model.layers.18.mlp.shared_experts.gate_proj",
+      "model.layers.18.mlp.shared_experts.up_proj",
+      "model.layers.18.mlp.shared_experts.down_proj",
+      "model.layers.19.self_attn.q_proj",
+      "model.layers.19.self_attn.k_proj",
+      "model.layers.19.self_attn.v_proj",
+      "model.layers.19.self_attn.o_proj",
+      "model.layers.19.mlp.shared_experts.gate_proj",
+      "model.layers.19.mlp.shared_experts.up_proj",
+      "model.layers.19.mlp.shared_experts.down_proj",
+      "model.layers.20.self_attn.q_proj",
+      "model.layers.20.self_attn.k_proj",
+      "model.layers.20.self_attn.v_proj",
+      "model.layers.20.self_attn.o_proj",
+      "model.layers.20.mlp.shared_experts.gate_proj",
+      "model.layers.20.mlp.shared_experts.up_proj",
+      "model.layers.20.mlp.shared_experts.down_proj",
+      "model.layers.21.self_attn.q_proj",
+      "model.layers.21.self_attn.k_proj",
+      "model.layers.21.self_attn.v_proj",
+      "model.layers.21.self_attn.o_proj",
+      "model.layers.21.mlp.shared_experts.gate_proj",
+      "model.layers.21.mlp.shared_experts.up_proj",
+      "model.layers.21.mlp.shared_experts.down_proj",
+      "model.layers.22.self_attn.q_proj",
+      "model.layers.22.self_attn.k_proj",
+      "model.layers.22.self_attn.v_proj",
+      "model.layers.22.self_attn.o_proj",
+      "model.layers.22.mlp.shared_experts.gate_proj",
+      "model.layers.22.mlp.shared_experts.up_proj",
+      "model.layers.22.mlp.shared_experts.down_proj",
+      "model.layers.23.self_attn.q_proj",
+      "model.layers.23.self_attn.k_proj",
+      "model.layers.23.self_attn.v_proj",
+      "model.layers.23.self_attn.o_proj",
+      "model.layers.23.mlp.shared_experts.gate_proj",
+      "model.layers.23.mlp.shared_experts.up_proj",
+      "model.layers.23.mlp.shared_experts.down_proj",
+      "model.layers.24.self_attn.q_proj",
+      "model.layers.24.self_attn.k_proj",
+      "model.layers.24.self_attn.v_proj",
+      "model.layers.24.self_attn.o_proj",
+      "model.layers.24.mlp.shared_experts.gate_proj",
+      "model.layers.24.mlp.shared_experts.up_proj",
+      "model.layers.24.mlp.shared_experts.down_proj",
+      "model.layers.25.self_attn.q_proj",
+      "model.layers.25.self_attn.k_proj",
+      "model.layers.25.self_attn.v_proj",
+      "model.layers.25.self_attn.o_proj",
+      "model.layers.25.mlp.shared_experts.gate_proj",
+      "model.layers.25.mlp.shared_experts.up_proj",
+      "model.layers.25.mlp.shared_experts.down_proj",
+      "model.layers.26.self_attn.q_proj",
+      "model.layers.26.self_attn.k_proj",
+      "model.layers.26.self_attn.v_proj",
+      "model.layers.26.self_attn.o_proj",
+      "model.layers.26.mlp.shared_experts.gate_proj",
+      "model.layers.26.mlp.shared_experts.up_proj",
+      "model.layers.26.mlp.shared_experts.down_proj",
+      "model.layers.27.self_attn.q_proj",
+      "model.layers.27.self_attn.k_proj",
+      "model.layers.27.self_attn.v_proj",
+      "model.layers.27.self_attn.o_proj",
+      "model.layers.27.mlp.shared_experts.gate_proj",
+      "model.layers.27.mlp.shared_experts.up_proj",
+      "model.layers.27.mlp.shared_experts.down_proj",
+      "model.layers.28.self_attn.q_proj",
+      "model.layers.28.self_attn.k_proj",
+      "model.layers.28.self_attn.v_proj",
+      "model.layers.28.self_attn.o_proj",
+      "model.layers.28.mlp.shared_experts.gate_proj",
+      "model.layers.28.mlp.shared_experts.up_proj",
+      "model.layers.28.mlp.shared_experts.down_proj",
+      "model.layers.29.self_attn.q_proj",
+      "model.layers.29.self_attn.k_proj",
+      "model.layers.29.self_attn.v_proj",
+      "model.layers.29.self_attn.o_proj",
+      "model.layers.29.mlp.shared_experts.gate_proj",
+      "model.layers.29.mlp.shared_experts.up_proj",
+      "model.layers.29.mlp.shared_experts.down_proj",
+      "model.layers.30.self_attn.q_proj",
+      "model.layers.30.self_attn.k_proj",
+      "model.layers.30.self_attn.v_proj",
+      "model.layers.30.self_attn.o_proj",
+      "model.layers.30.mlp.shared_experts.gate_proj",
+      "model.layers.30.mlp.shared_experts.up_proj",
+      "model.layers.30.mlp.shared_experts.down_proj",
+      "model.layers.31.self_attn.q_proj",
+      "model.layers.31.self_attn.k_proj",
+      "model.layers.31.self_attn.v_proj",
+      "model.layers.31.self_attn.o_proj",
+      "model.layers.31.mlp.shared_experts.gate_proj",
+      "model.layers.31.mlp.shared_experts.up_proj",
+      "model.layers.31.mlp.shared_experts.down_proj",
+      "model.layers.32.self_attn.q_proj",
+      "model.layers.32.self_attn.k_proj",
+      "model.layers.32.self_attn.v_proj",
+      "model.layers.32.self_attn.o_proj",
+      "model.layers.32.mlp.shared_experts.gate_proj",
+      "model.layers.32.mlp.shared_experts.up_proj",
+      "model.layers.32.mlp.shared_experts.down_proj",
+      "model.layers.33.self_attn.q_proj",
+      "model.layers.33.self_attn.k_proj",
+      "model.layers.33.self_attn.v_proj",
+      "model.layers.33.self_attn.o_proj",
+      "model.layers.33.mlp.shared_experts.gate_proj",
+      "model.layers.33.mlp.shared_experts.up_proj",
+      "model.layers.33.mlp.shared_experts.down_proj",
+      "model.layers.34.self_attn.q_proj",
+      "model.layers.34.self_attn.k_proj",
+      "model.layers.34.self_attn.v_proj",
+      "model.layers.34.self_attn.o_proj",
+      "model.layers.34.mlp.shared_experts.gate_proj",
+      "model.layers.34.mlp.shared_experts.up_proj",
+      "model.layers.34.mlp.shared_experts.down_proj",
+      "model.layers.35.self_attn.q_proj",
+      "model.layers.35.self_attn.k_proj",
+      "model.layers.35.self_attn.v_proj",
+      "model.layers.35.self_attn.o_proj",
+      "model.layers.35.mlp.shared_experts.gate_proj",
+      "model.layers.35.mlp.shared_experts.up_proj",
+      "model.layers.35.mlp.shared_experts.down_proj",
+      "model.layers.36.self_attn.q_proj",
+      "model.layers.36.self_attn.k_proj",
+      "model.layers.36.self_attn.v_proj",
+      "model.layers.36.self_attn.o_proj",
+      "model.layers.36.mlp.shared_experts.gate_proj",
+      "model.layers.36.mlp.shared_experts.up_proj",
+      "model.layers.36.mlp.shared_experts.down_proj",
+      "model.layers.37.self_attn.q_proj",
+      "model.layers.37.self_attn.k_proj",
+      "model.layers.37.self_attn.v_proj",
+      "model.layers.37.self_attn.o_proj",
+      "model.layers.37.mlp.shared_experts.gate_proj",
+      "model.layers.37.mlp.shared_experts.up_proj",
+      "model.layers.37.mlp.shared_experts.down_proj",
+      "model.layers.38.self_attn.q_proj",
+      "model.layers.38.self_attn.k_proj",
+      "model.layers.38.self_attn.v_proj",
+      "model.layers.38.self_attn.o_proj",
+      "model.layers.38.mlp.shared_experts.gate_proj",
+      "model.layers.38.mlp.shared_experts.up_proj",
+      "model.layers.38.mlp.shared_experts.down_proj",
+      "model.layers.39.self_attn.q_proj",
+      "model.layers.39.self_attn.k_proj",
+      "model.layers.39.self_attn.v_proj",
+      "model.layers.39.self_attn.o_proj",
+      "model.layers.39.mlp.shared_experts.gate_proj",
+      "model.layers.39.mlp.shared_experts.up_proj",
+      "model.layers.39.mlp.shared_experts.down_proj",
+      "model.layers.40.self_attn.q_proj",
+      "model.layers.40.self_attn.k_proj",
+      "model.layers.40.self_attn.v_proj",
+      "model.layers.40.self_attn.o_proj",
+      "model.layers.40.mlp.shared_experts.gate_proj",
+      "model.layers.40.mlp.shared_experts.up_proj",
+      "model.layers.40.mlp.shared_experts.down_proj",
+      "model.layers.41.self_attn.q_proj",
+      "model.layers.41.self_attn.k_proj",
+      "model.layers.41.self_attn.v_proj",
+      "model.layers.41.self_attn.o_proj",
+      "model.layers.41.mlp.shared_experts.gate_proj",
+      "model.layers.41.mlp.shared_experts.up_proj",
+      "model.layers.41.mlp.shared_experts.down_proj",
+      "model.layers.42.self_attn.q_proj",
+      "model.layers.42.self_attn.k_proj",
+      "model.layers.42.self_attn.v_proj",
+      "model.layers.42.self_attn.o_proj",
+      "model.layers.42.mlp.shared_experts.gate_proj",
+      "model.layers.42.mlp.shared_experts.up_proj",
+      "model.layers.42.mlp.shared_experts.down_proj",
+      "model.layers.43.self_attn.q_proj",
+      "model.layers.43.self_attn.k_proj",
+      "model.layers.43.self_attn.v_proj",
+      "model.layers.43.self_attn.o_proj",
+      "model.layers.43.mlp.shared_experts.gate_proj",
+      "model.layers.43.mlp.shared_experts.up_proj",
+      "model.layers.43.mlp.shared_experts.down_proj",
+      "model.layers.44.self_attn.q_proj",
+      "model.layers.44.self_attn.k_proj",
+      "model.layers.44.self_attn.v_proj",
+      "model.layers.44.self_attn.o_proj",
+      "model.layers.44.mlp.shared_experts.gate_proj",
+      "model.layers.44.mlp.shared_experts.up_proj",
+      "model.layers.44.mlp.shared_experts.down_proj",
+      "model.layers.45.self_attn.q_proj",
+      "model.layers.45.self_attn.k_proj",
+      "model.layers.45.self_attn.v_proj",
+      "model.layers.45.self_attn.o_proj",
+      "model.layers.45.mlp.shared_experts.gate_proj",
+      "model.layers.45.mlp.shared_experts.up_proj",
+      "model.layers.45.mlp.shared_experts.down_proj",
+      "lm_head"
+    ],
+    "kv_cache_scheme": null,
+    "quant_method": "compressed-tensors",
+    "quantization_status": "compressed",
+    "sparsity_config": {},
+    "transform_config": {},
+    "version": "0.12.2"
+  },
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "routed_scaling_factor": 1.0,
+  "tie_word_embeddings": false,
+  "topk_group": 1,
+  "transformers_version": "4.56.2",
+  "use_cache": true,
+  "use_qk_norm": false,
+  "vocab_size": 151552
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "_from_model_config": true,
+  "do_sample": true,
+  "eos_token_id": [
+    151329,
+    151336,
+    151338
+  ],
+  "min_p": 0.05,
+  "pad_token_id": 151329,
+  "temperature": 0.8,
+  "top_p": 0.95,
+  "transformers_version": "4.56.2"
+}

model-00001-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f31178a14945e05a33b292a870c234f8a6f2442a6913b5c5c1d54cad5e83489
+size 4997984280

model-00002-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f427ff1a9a1fec0ab7ad56272409aed8a2ed783ae0c2fa078df188037b9ad178
+size 4998615584

model-00003-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2abc175d520311d0b296c96861acbc49c468b87d225f40cc0b337260653db2b9
+size 4998615696

model-00004-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eda15385dc06c247aaacf24a32b0e3276a79577829af208e5c6573b072047bdf
+size 4999357328

model-00005-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b120a3af82ddef351483d0f780df27e14da4252bbde74269eaac32760fe6deab
+size 4998619544

model-00006-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2d15e33cec2941d52769d4f0aa87350f530b68096a83d7016398d8b56b74a633
+size 4998619656

model-00007-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c95397e718aab16335458598107553f671f4d4301cc40913465ff0da49d22ad
+size 4999357512

model-00008-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8be0612ce358b2a19850e3371ee575442bcec26a629e30c76659dc15660545ae
+size 4998619544

model-00009-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:59d9176cac635c1f196e20312adda1359ccbaa4309aaa9ca192558a32bd803b7
+size 4998619656

model-00010-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f5071929b799809ae04e436faaf1d409963a44551da7023267515519458b4240
+size 4999357512

model-00011-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a909d45b42ee120da85ca78f816274e2f4def602622a825d196ab2bc45f3b120
+size 4998619544

model-00012-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3acf084cc7e8c59cb9caf16a9f9d914770645227eed3784f8499d1766a1be964
+size 4998619656

model-00013-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3e82b2b3be9a66e0445dd0b1bf7cd2a72fb185eec705d613243d80294863e551
+size 4999357512

model-00014-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c802cefaea29976a29f6db9b5bf460a67639900ff1a488a6f138500fadfe3df5
+size 4228942984

model-00015-of-00015.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3215a617243d684b885879f7ba6eee3f5c346651709bc26c02551ca55bd54297
+size 1241514112

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

recipe.yaml ADDED Viewed

	@@ -0,0 +1,28 @@

+default_stage:
+  default_modifiers:
+    AWQModifier:
+      config_groups:
+        group_0:
+          targets: ['re:.*mlp\.experts\.[0-9]+\.(down|gate|up)_proj$']
+          weights:
+            num_bits: 4
+            type: int
+            symmetric: true
+            group_size: 32
+            strategy: group
+            block_structure: null
+            dynamic: false
+            actorder: null
+            observer: mse
+            observer_kwargs: {}
+          input_activations: null
+          output_activations: null
+          format: null
+      targets: ['re:.*mlp\.experts\.[0-9]+\.(down|gate|up)_proj$']
+      ignore: []
+      mappings:
+      - smooth_layer: re:.*post_attention_layernorm$
+        balance_layers: ['re:.*gate_proj$', 're:.*up_proj$']
+      - smooth_layer: re:.*up_proj$
+        balance_layers: ['re:.*down_proj$']
+      duo_scaling: true

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "additional_special_tokens": [
+    "<|endoftext|>",
+    "[MASK]",
+    "[gMASK]",
+    "[sMASK]",
+    "<sop>",
+    "<eop>",
+    "<|system|>",
+    "<|user|>",
+    "<|assistant|>",
+    "<|observation|>",
+    "<|begin_of_image|>",
+    "<|end_of_image|>",
+    "<|begin_of_video|>",
+    "<|end_of_video|>",
+    "<|begin_of_audio|>",
+    "<|end_of_audio|>",
+    "<|begin_of_transcription|>",
+    "<|end_of_transcription|>",
+    "<|code_prefix|>",
+    "<|code_middle|>",
+    "<|code_suffix|>",
+    "/nothink"
+  ],
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f0a7d462ffab1bb3530eb290e68bd4f8eb603e92080d8d66bdf3ccd03bcbcb3
+size 19970799

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,325 @@

+{
+  "added_tokens_decoder": {
+    "151329": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151330": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151331": {
+      "content": "[gMASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151332": {
+      "content": "[sMASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151333": {
+      "content": "<sop>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151334": {
+      "content": "<eop>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151335": {
+      "content": "<|system|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151336": {
+      "content": "<|user|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151337": {
+      "content": "<|assistant|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151338": {
+      "content": "<|observation|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151339": {
+      "content": "<|begin_of_image|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151340": {
+      "content": "<|end_of_image|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151341": {
+      "content": "<|begin_of_video|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151342": {
+      "content": "<|end_of_video|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151343": {
+      "content": "<|begin_of_audio|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151344": {
+      "content": "<|end_of_audio|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151345": {
+      "content": "<|begin_of_transcription|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151346": {
+      "content": "<|end_of_transcription|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151347": {
+      "content": "<|code_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151348": {
+      "content": "<|code_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151349": {
+      "content": "<|code_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151350": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151351": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151352": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151353": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151354": {
+      "content": "<tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151355": {
+      "content": "</tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151356": {
+      "content": "<arg_key>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151357": {
+      "content": "</arg_key>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151358": {
+      "content": "<arg_value>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151359": {
+      "content": "</arg_value>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151360": {
+      "content": "/nothink",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151361": {
+      "content": "<|begin_of_box|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151362": {
+      "content": "<|end_of_box|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151363": {
+      "content": "<|image|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151364": {
+      "content": "<|video|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|endoftext|>",
+    "[MASK]",
+    "[gMASK]",
+    "[sMASK]",
+    "<sop>",
+    "<eop>",
+    "<|system|>",
+    "<|user|>",
+    "<|assistant|>",
+    "<|observation|>",
+    "<|begin_of_image|>",
+    "<|end_of_image|>",
+    "<|begin_of_video|>",
+    "<|end_of_video|>",
+    "<|begin_of_audio|>",
+    "<|end_of_audio|>",
+    "<|begin_of_transcription|>",
+    "<|end_of_transcription|>",
+    "<|code_prefix|>",
+    "<|code_middle|>",
+    "<|code_suffix|>",
+    "/nothink"
+  ],
+  "clean_up_tokenization_spaces": false,
+  "do_lower_case": false,
+  "eos_token": "<|endoftext|>",
+  "extra_special_tokens": {},
+  "model_max_length": 128000,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "left",
+  "remove_space": false,
+  "tokenizer_class": "PreTrainedTokenizerFast"
+}