Qubitium commited on
Commit
7fe660b
·
verified ·
1 Parent(s): 6907be3

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. README.md +1 -4
  2. chat_template.jinja +159 -0
  3. config.json +109 -0
  4. generation_config.json +7 -0
  5. model-00002-of-00130.safetensors +3 -0
  6. model-00003-of-00130.safetensors +3 -0
  7. model-00004-of-00130.safetensors +3 -0
  8. model-00009-of-00130.safetensors +3 -0
  9. model-00012-of-00130.safetensors +3 -0
  10. model-00013-of-00130.safetensors +3 -0
  11. model-00015-of-00130.safetensors +3 -0
  12. model-00018-of-00130.safetensors +3 -0
  13. model-00019-of-00130.safetensors +3 -0
  14. model-00020-of-00130.safetensors +3 -0
  15. model-00021-of-00130.safetensors +3 -0
  16. model-00026-of-00130.safetensors +3 -0
  17. model-00030-of-00130.safetensors +3 -0
  18. model-00031-of-00130.safetensors +3 -0
  19. model-00037-of-00130.safetensors +3 -0
  20. model-00040-of-00130.safetensors +3 -0
  21. model-00041-of-00130.safetensors +3 -0
  22. model-00046-of-00130.safetensors +3 -0
  23. model-00050-of-00130.safetensors +3 -0
  24. model-00051-of-00130.safetensors +3 -0
  25. model-00057-of-00130.safetensors +3 -0
  26. model-00062-of-00130.safetensors +3 -0
  27. model-00064-of-00130.safetensors +3 -0
  28. model-00068-of-00130.safetensors +3 -0
  29. model-00069-of-00130.safetensors +3 -0
  30. model-00072-of-00130.safetensors +3 -0
  31. model-00073-of-00130.safetensors +3 -0
  32. model-00075-of-00130.safetensors +3 -0
  33. model-00078-of-00130.safetensors +3 -0
  34. model-00079-of-00130.safetensors +3 -0
  35. model-00080-of-00130.safetensors +3 -0
  36. model-00081-of-00130.safetensors +3 -0
  37. model-00086-of-00130.safetensors +3 -0
  38. model-00091-of-00130.safetensors +3 -0
  39. model-00097-of-00130.safetensors +3 -0
  40. model-00102-of-00130.safetensors +3 -0
  41. model-00104-of-00130.safetensors +3 -0
  42. model-00105-of-00130.safetensors +3 -0
  43. model-00109-of-00130.safetensors +3 -0
  44. model-00113-of-00130.safetensors +3 -0
  45. model-00114-of-00130.safetensors +3 -0
  46. model-00115-of-00130.safetensors +3 -0
  47. model-00118-of-00130.safetensors +3 -0
  48. model-00120-of-00130.safetensors +3 -0
  49. model-00126-of-00130.safetensors +3 -0
  50. model-00127-of-00130.safetensors +3 -0
README.md CHANGED
@@ -1,13 +1,10 @@
1
  ---
2
  pipeline_tag: text-generation
3
  license: mit
4
- library_name: transformers
5
  ---
6
 
7
  <div align="center">
8
 
9
- <a href='https://github.com/modelcloud/gptqmodel'>GPT-QModel</a> team dequantized the FP8 model into BF16 for inference on gpus without native FP8 support.
10
-
11
  <svg width="60%" height="auto" viewBox="0 0 144 48" fill="none" xmlns="http://www.w3.org/2000/svg">
12
  <path d="M26.6782 7.96523C26.6782 7.02436 25.913 6.26087 24.9739 6.26087C24.0348 6.26087 23.2695 7.0261 23.2695 7.96523V36.2139C23.2695 38.4 21.4904 40.1791 19.3043 40.1791C17.1183 40.1791 15.3391 38.4 15.3391 36.2139V18.0904C15.3391 17.1496 14.5739 16.3861 13.6348 16.3861C12.6956 16.3861 11.9304 17.1513 11.9304 18.0904V25.7722C11.9304 27.9583 10.1513 29.7374 7.96518 29.7374C5.7791 29.7374 4 27.9583 4 25.7722V22.9878C4 22.3635 4.50609 21.8574 5.13043 21.8574C5.75478 21.8574 6.26087 22.3635 6.26087 22.9878V25.7722C6.26087 26.713 7.02605 27.4765 7.96518 27.4765C8.90431 27.4765 9.66954 26.7113 9.66954 25.7722V18.0904C9.66954 15.9044 11.4487 14.1252 13.6348 14.1252C15.8209 14.1252 17.6 15.9044 17.6 18.0904V36.2139C17.6 37.1548 18.3652 37.9183 19.3043 37.9183C20.2435 37.9183 21.0087 37.153 21.0087 36.2139V25.1322V7.96523C21.0087 5.77914 22.7878 4 24.9739 4C27.16 4 28.9391 5.77914 28.9391 7.96523V31.3565C28.9391 31.9809 28.433 32.487 27.8087 32.487C27.1843 32.487 26.6782 31.9809 26.6782 31.3565V7.96523ZM47.6539 14.1252C45.4678 14.1252 43.6887 15.9044 43.6887 18.0904V33.2296C43.6887 34.1704 42.9235 34.9339 41.9843 34.9339C41.0452 34.9339 40.28 34.1687 40.28 33.2296V7.96523C40.28 5.77914 38.5008 4 36.3148 4C34.1287 4 32.3496 5.77914 32.3496 7.96523V40.0348C32.3496 40.9756 31.5843 41.7391 30.6452 41.7391C29.7061 41.7391 28.9409 40.9739 28.9409 40.0348V36.0643C28.9409 35.44 28.4348 34.9339 27.8104 34.9339C27.1861 34.9339 26.68 35.44 26.68 36.0643V40.0348C26.68 42.2209 28.4591 44 30.6452 44C32.8313 44 34.6104 42.2209 34.6104 40.0348V7.96523C34.6104 7.02436 35.3756 6.26087 36.3148 6.26087C37.2539 6.26087 38.0191 7.0261 38.0191 7.96523V33.2296C38.0191 35.4156 39.7982 37.1948 41.9843 37.1948C44.1704 37.1948 45.9496 35.4156 45.9496 33.2296V18.0904C45.9496 17.1496 46.7148 16.3861 47.6539 16.3861C48.593 16.3861 49.3582 17.1513 49.3582 18.0904V31.3565C49.3582 31.9809 49.8643 32.487 50.4887 32.487C51.113 32.487 51.6191 31.9809 51.6191 31.3565V18.0904C51.6191 15.9044 49.84 14.1252 47.6539 14.1252Z" fill="url(#paint0_linear_17_483)"/>
13
  <path d="M68.7671 16.5615H71.2541C71.3254 16.5615 71.3845 16.5859 71.435 16.6363C71.4836 16.6868 71.5097 16.7459 71.5097 16.8172V31.1824C71.5097 31.2537 71.4854 31.3128 71.435 31.3633C71.3845 31.4137 71.3254 31.4381 71.2541 31.4381H68.7671C68.6958 31.4381 68.6367 31.4137 68.5862 31.3633C68.5358 31.3146 68.5115 31.2537 68.5115 31.1824V21.812C68.5115 21.7563 68.4976 21.7268 68.4697 21.7268C68.4419 21.7268 68.4123 21.7476 68.3845 21.7911L66.1323 25.318C66.061 25.4311 65.9619 25.4885 65.8349 25.4885H64.581C64.4541 25.4885 64.3549 25.4328 64.2836 25.318L62.0315 21.7911C62.0036 21.7494 61.9741 21.7302 61.9462 21.7372C61.9184 21.7441 61.9045 21.7772 61.9045 21.8328V31.1824C61.9045 31.2537 61.8802 31.3128 61.8297 31.3633C61.7793 31.4137 61.7202 31.4381 61.6489 31.4381H59.1619C59.0906 31.4381 59.0315 31.4137 58.981 31.3633C58.9306 31.3146 58.9062 31.2537 58.9062 31.1824V16.8172C58.9062 16.7459 58.9306 16.6868 58.981 16.6363C59.0315 16.5859 59.0906 16.5615 59.1619 16.5615H61.6489C61.7758 16.5615 61.8749 16.6189 61.9462 16.732L65.1341 21.6833C65.1758 21.7685 65.2193 21.7685 65.261 21.6833L68.4697 16.732C68.541 16.6189 68.6402 16.5615 68.7671 16.5615Z" fill="currentColor"/>
@@ -159,7 +156,7 @@ We look forward to your feedback and to collaborating with developers and resear
159
 
160
  ## How to Use
161
 
162
- - Our product **MiniMax Agent**, built on MiniMax-M2, is now **publicly available and free** for a limited time: https://agent.minimax.io/
163
 
164
  - The MiniMax-M2 API is now live on the **MiniMax Open Platform** and is **free** for a limited time: https://platform.minimax.io/docs/guides/text-generation
165
 
 
1
  ---
2
  pipeline_tag: text-generation
3
  license: mit
 
4
  ---
5
 
6
  <div align="center">
7
 
 
 
8
  <svg width="60%" height="auto" viewBox="0 0 144 48" fill="none" xmlns="http://www.w3.org/2000/svg">
9
  <path d="M26.6782 7.96523C26.6782 7.02436 25.913 6.26087 24.9739 6.26087C24.0348 6.26087 23.2695 7.0261 23.2695 7.96523V36.2139C23.2695 38.4 21.4904 40.1791 19.3043 40.1791C17.1183 40.1791 15.3391 38.4 15.3391 36.2139V18.0904C15.3391 17.1496 14.5739 16.3861 13.6348 16.3861C12.6956 16.3861 11.9304 17.1513 11.9304 18.0904V25.7722C11.9304 27.9583 10.1513 29.7374 7.96518 29.7374C5.7791 29.7374 4 27.9583 4 25.7722V22.9878C4 22.3635 4.50609 21.8574 5.13043 21.8574C5.75478 21.8574 6.26087 22.3635 6.26087 22.9878V25.7722C6.26087 26.713 7.02605 27.4765 7.96518 27.4765C8.90431 27.4765 9.66954 26.7113 9.66954 25.7722V18.0904C9.66954 15.9044 11.4487 14.1252 13.6348 14.1252C15.8209 14.1252 17.6 15.9044 17.6 18.0904V36.2139C17.6 37.1548 18.3652 37.9183 19.3043 37.9183C20.2435 37.9183 21.0087 37.153 21.0087 36.2139V25.1322V7.96523C21.0087 5.77914 22.7878 4 24.9739 4C27.16 4 28.9391 5.77914 28.9391 7.96523V31.3565C28.9391 31.9809 28.433 32.487 27.8087 32.487C27.1843 32.487 26.6782 31.9809 26.6782 31.3565V7.96523ZM47.6539 14.1252C45.4678 14.1252 43.6887 15.9044 43.6887 18.0904V33.2296C43.6887 34.1704 42.9235 34.9339 41.9843 34.9339C41.0452 34.9339 40.28 34.1687 40.28 33.2296V7.96523C40.28 5.77914 38.5008 4 36.3148 4C34.1287 4 32.3496 5.77914 32.3496 7.96523V40.0348C32.3496 40.9756 31.5843 41.7391 30.6452 41.7391C29.7061 41.7391 28.9409 40.9739 28.9409 40.0348V36.0643C28.9409 35.44 28.4348 34.9339 27.8104 34.9339C27.1861 34.9339 26.68 35.44 26.68 36.0643V40.0348C26.68 42.2209 28.4591 44 30.6452 44C32.8313 44 34.6104 42.2209 34.6104 40.0348V7.96523C34.6104 7.02436 35.3756 6.26087 36.3148 6.26087C37.2539 6.26087 38.0191 7.0261 38.0191 7.96523V33.2296C38.0191 35.4156 39.7982 37.1948 41.9843 37.1948C44.1704 37.1948 45.9496 35.4156 45.9496 33.2296V18.0904C45.9496 17.1496 46.7148 16.3861 47.6539 16.3861C48.593 16.3861 49.3582 17.1513 49.3582 18.0904V31.3565C49.3582 31.9809 49.8643 32.487 50.4887 32.487C51.113 32.487 51.6191 31.9809 51.6191 31.3565V18.0904C51.6191 15.9044 49.84 14.1252 47.6539 14.1252Z" fill="url(#paint0_linear_17_483)"/>
10
  <path d="M68.7671 16.5615H71.2541C71.3254 16.5615 71.3845 16.5859 71.435 16.6363C71.4836 16.6868 71.5097 16.7459 71.5097 16.8172V31.1824C71.5097 31.2537 71.4854 31.3128 71.435 31.3633C71.3845 31.4137 71.3254 31.4381 71.2541 31.4381H68.7671C68.6958 31.4381 68.6367 31.4137 68.5862 31.3633C68.5358 31.3146 68.5115 31.2537 68.5115 31.1824V21.812C68.5115 21.7563 68.4976 21.7268 68.4697 21.7268C68.4419 21.7268 68.4123 21.7476 68.3845 21.7911L66.1323 25.318C66.061 25.4311 65.9619 25.4885 65.8349 25.4885H64.581C64.4541 25.4885 64.3549 25.4328 64.2836 25.318L62.0315 21.7911C62.0036 21.7494 61.9741 21.7302 61.9462 21.7372C61.9184 21.7441 61.9045 21.7772 61.9045 21.8328V31.1824C61.9045 31.2537 61.8802 31.3128 61.8297 31.3633C61.7793 31.4137 61.7202 31.4381 61.6489 31.4381H59.1619C59.0906 31.4381 59.0315 31.4137 58.981 31.3633C58.9306 31.3146 58.9062 31.2537 58.9062 31.1824V16.8172C58.9062 16.7459 58.9306 16.6868 58.981 16.6363C59.0315 16.5859 59.0906 16.5615 59.1619 16.5615H61.6489C61.7758 16.5615 61.8749 16.6189 61.9462 16.732L65.1341 21.6833C65.1758 21.7685 65.2193 21.7685 65.261 21.6833L68.4697 16.732C68.541 16.6189 68.6402 16.5615 68.7671 16.5615Z" fill="currentColor"/>
 
156
 
157
  ## How to Use
158
 
159
+ - Our product **MiniMax Agent**, built on MiniMax-M2, is now **publicly available and free** for a limited time: https://agent.minimaxi.io/
160
 
161
  - The MiniMax-M2 API is now live on the **MiniMax Open Platform** and is **free** for a limited time: https://platform.minimax.io/docs/guides/text-generation
162
 
chat_template.jinja ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {# ----------‑‑‑ special token variables ‑‑‑---------- #}
2
+ {%- set toolcall_begin_token = '<minimax:tool_call>' -%}
3
+ {%- set toolcall_end_token = '</minimax:tool_call>' -%}
4
+ {#- Tool Rendering Functions ============================================== -#}
5
+ {%- macro render_tool_namespace(namespace_name, tool_list) -%}
6
+ {%- for tool in tool_list -%}
7
+ <tool>{{ tool.function | tojson(ensure_ascii=False) }}</tool>
8
+ {% endfor -%}
9
+ {%- endmacro -%}
10
+ {%- macro visible_text(content) -%}
11
+ {%- if content is string -%}
12
+ {{ content }}
13
+ {%- elif content is iterable and content is not mapping -%}
14
+ {%- for item in content -%}
15
+ {%- if item is mapping and item.type == 'text' -%}
16
+ {{- item.text }}
17
+ {%- elif item is string -%}
18
+ {{- item }}
19
+ {%- endif -%}
20
+ {%- endfor -%}
21
+ {%- else -%}
22
+ {{- content }}
23
+ {%- endif -%}
24
+ {%- endmacro -%}
25
+ {#- System Message Construction ============================================ -#}
26
+ {%- macro build_system_message(system_message) -%}
27
+ {%- if system_message and system_message.content -%}
28
+ {{- visible_text(system_message.content) }}
29
+ {%- else -%}
30
+ {%- if model_identity is not defined -%}
31
+ {%- set model_identity = "You are a helpful assistant." -%}
32
+ {%- endif -%}
33
+ {{- model_identity }}
34
+ {%- endif -%}
35
+
36
+ {#- Handle current_date -#}
37
+ {%- if system_message and system_message.current_date -%}
38
+ {{- '\n' ~ 'Current date: ' + system_message.current_date }}
39
+ {%- endif -%}
40
+ {#- Handle current_location -#}
41
+ {%- if system_message and system_message.current_location -%}
42
+ {{- '\n' ~ 'Current location: ' + system_message.current_location }}
43
+ {%- endif -%}
44
+ {%- endmacro -%}
45
+ {#- Main Template Logic ================================================= -#}
46
+ {#- Extract system message (only first message if it's system) -#}
47
+ {%- set system_message = none -%}
48
+ {%- set conversation_messages = messages -%}
49
+ {%- if messages and messages[0].role == "system" -%}
50
+ {%- set system_message = messages[0] -%}
51
+ {%- set conversation_messages = messages[1:] -%}
52
+ {%- endif -%}
53
+ {#- Get the last user message turn, for interleved thinking -#}
54
+ {%- set ns = namespace(last_user_index=-1) %}
55
+ {% for m in conversation_messages %}
56
+ {%- if m.role == 'user' %}
57
+ {% set ns.last_user_index = loop.index0 -%}
58
+ {%- endif %}
59
+ {%- endfor %}
60
+ {#- Render system message -#}
61
+ {{- ']~!b[' ~ ']~b]system' ~ '\n' }}
62
+ {{- build_system_message(system_message) }}
63
+ {#- Render tools if available -#}
64
+ {%- if tools -%}
65
+ {{- '\n\n' ~ '# Tools' ~ '\n' ~ 'You may call one or more tools to assist with the user query.\nHere are the tools available in JSONSchema format:' ~ '\n' }}
66
+ {{- '\n' ~ '<tools>' ~ '\n' }}
67
+ {{- render_tool_namespace("functions", tools) }}
68
+ {{- '</tools>' ~ '\n\n' }}
69
+ {{- 'When making tool calls, use XML format to invoke tools and pass parameters:' ~ '\n' }}
70
+ {{- '\n' ~ toolcall_begin_token }}
71
+ <invoke name="tool-name-1">
72
+ <parameter name="param-key-1">param-value-1</parameter>
73
+ <parameter name="param-key-2">param-value-2</parameter>
74
+ ...
75
+ </invoke>
76
+ {{- '\n' ~ toolcall_end_token }}
77
+ {%- endif -%}
78
+ {{- '[e~[\n' }}
79
+
80
+ {#- Render messages -#}
81
+ {%- set last_tool_call = namespace(name=none) -%}
82
+ {%- for message in conversation_messages -%}
83
+ {%- if message.role == 'assistant' -%}
84
+ {#- Only render reasoning_content if no user message follows -#}
85
+ {{- ']~b]ai' ~ '\n' }}
86
+
87
+ {%- set reasoning_content = '' %}
88
+ {%- set content = visible_text(message.content) %}
89
+ {%- if message.reasoning_content is string %}
90
+ {%- set reasoning_content = message.reasoning_content %}
91
+ {%- else %}
92
+ {%- if '</think>' in content %}
93
+ {%- set reasoning_content = content.split('</think>')[0].strip('\n').split('<think>')[-1].strip('\n') %}
94
+ {%- set content = content.split('</think>')[-1].strip('\n') %}
95
+ {%- endif %}
96
+ {%- endif %}
97
+ {%- if reasoning_content and loop.index0 > ns.last_user_index -%}
98
+ {{- '<think>' ~ '\n' ~ reasoning_content ~ '\n' ~ '</think>' ~ '\n\n' }}
99
+ {%- endif -%}
100
+ {%- if content -%}
101
+ {{- content }}
102
+ {%- endif -%}
103
+ {%- if message.tool_calls -%}
104
+ {{- '\n' ~ toolcall_begin_token ~ '\n' }}
105
+
106
+ {%- for tool_call in message.tool_calls -%}
107
+ {%- if tool_call.function %}
108
+ {%- set tool_call = tool_call.function %}
109
+ {%- endif %}
110
+ {{- '<invoke name="' + tool_call.name + '">' }}
111
+ {% set _args = tool_call.arguments %}
112
+ {%- for k, v in _args.items() %}
113
+ {{- '<parameter name="' + k + '">' }}
114
+ {{- v | tojson(ensure_ascii=False) if v is not string else v }}
115
+ {{- '</parameter>' }}
116
+ {% endfor %}
117
+ {{- '</invoke>' ~ '\n' }}
118
+ {%- endfor -%}
119
+
120
+ {{- toolcall_end_token}}
121
+ {%- set last_tool_call.name = message.tool_calls[-1].name -%}
122
+ {%- else -%}
123
+ {%- set last_tool_call.name = none -%}
124
+ {%- endif -%}
125
+ {{- '[e~[' ~ '\n' }}
126
+
127
+ {%- elif message.role == 'tool' -%}
128
+ {%- if last_tool_call.name is none -%}
129
+ {{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") }}
130
+ {%- endif -%}
131
+ {%- if loop.first or (conversation_messages[loop.index0 - 1].role != 'tool') -%}
132
+ {{- ']~b]tool' }}
133
+ {%- endif -%}
134
+ {%- if message.content is string -%}
135
+ {{- '\n<response>' }}
136
+ {{- message.content }}
137
+ {{- '</response>' }}
138
+ {%- else -%}
139
+ {%- for tr in message.content -%}
140
+ {{- '\n<response>' }}
141
+ {{- tr.output if tr.output is defined else (tr.text if tr.type == 'text' and tr.text is defined else tr) }}
142
+ {{- '\n</response>' }}
143
+ {%- endfor -%}
144
+ {%- endif -%}
145
+ {%- if loop.last or (conversation_messages[loop.index0 + 1].role != 'tool') -%}
146
+ {{- '[e~[\n' -}}
147
+ {%- endif -%}
148
+
149
+ {%- elif message.role == 'user' -%}
150
+ {{- ']~b]user' ~ '\n' }}
151
+ {{- visible_text(message.content) }}
152
+ {{- '[e~[' ~ '\n' }}
153
+ {%- endif -%}
154
+ {%- endfor -%}
155
+
156
+ {#- Generation prompt -#}
157
+ {%- if add_generation_prompt -%}
158
+ {{- ']~b]ai' ~ '\n' ~ '<think>' ~ '\n' }}
159
+ {%- endif -%}
config.json ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MiniMaxM2ForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "attn_type_list": [
7
+ 1,
8
+ 1,
9
+ 1,
10
+ 1,
11
+ 1,
12
+ 1,
13
+ 1,
14
+ 1,
15
+ 1,
16
+ 1,
17
+ 1,
18
+ 1,
19
+ 1,
20
+ 1,
21
+ 1,
22
+ 1,
23
+ 1,
24
+ 1,
25
+ 1,
26
+ 1,
27
+ 1,
28
+ 1,
29
+ 1,
30
+ 1,
31
+ 1,
32
+ 1,
33
+ 1,
34
+ 1,
35
+ 1,
36
+ 1,
37
+ 1,
38
+ 1,
39
+ 1,
40
+ 1,
41
+ 1,
42
+ 1,
43
+ 1,
44
+ 1,
45
+ 1,
46
+ 1,
47
+ 1,
48
+ 1,
49
+ 1,
50
+ 1,
51
+ 1,
52
+ 1,
53
+ 1,
54
+ 1,
55
+ 1,
56
+ 1,
57
+ 1,
58
+ 1,
59
+ 1,
60
+ 1,
61
+ 1,
62
+ 1,
63
+ 1,
64
+ 1,
65
+ 1,
66
+ 1,
67
+ 1,
68
+ 1
69
+ ],
70
+ "bos_token_id": null,
71
+ "eos_token_id": null,
72
+ "head_dim": 128,
73
+ "hidden_act": "silu",
74
+ "hidden_size": 3072,
75
+ "initializer_range": 0.02,
76
+ "intermediate_size": 1536,
77
+ "layernorm_full_attention_beta": 1.0,
78
+ "layernorm_linear_attention_beta": 1.0,
79
+ "layernorm_mlp_beta": 1.0,
80
+ "max_position_embeddings": 196608,
81
+ "mlp_intermediate_size": 8192,
82
+ "model_type": "mixtral",
83
+ "mtp_transformer_layers": 1,
84
+ "num_attention_heads": 48,
85
+ "num_experts_per_tok": 8,
86
+ "num_hidden_layers": 62,
87
+ "num_key_value_heads": 8,
88
+ "num_local_experts": 256,
89
+ "num_mtp_modules": 3,
90
+ "output_router_logits": false,
91
+ "qk_norm_type": "per_layer",
92
+ "rms_norm_eps": 1e-06,
93
+ "rope_theta": 5000000,
94
+ "rotary_dim": 64,
95
+ "router_aux_loss_coef": 0.001,
96
+ "router_jitter_noise": 0.0,
97
+ "scoring_func": "sigmoid",
98
+ "shared_intermediate_size": 0,
99
+ "shared_moe_mode": "sigmoid",
100
+ "sliding_window": null,
101
+ "tie_word_embeddings": false,
102
+ "transformers_version": "4.46.1",
103
+ "use_cache": true,
104
+ "use_mtp": true,
105
+ "use_qk_norm": true,
106
+ "use_routing_bias": true,
107
+ "vocab_size": 200064,
108
+ "torch_dtype": "bfloat16"
109
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_sample": true,
3
+ "temperature": 1.0,
4
+ "top_p": 0.95,
5
+ "top_k": 40,
6
+ "transformers_version": "4.46.1"
7
+ }
model-00002-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5cdb8487957e7e4151ab4fc2e9e2822b6cc52f3f7d1e5b9ceeaee7bd464c6f1
3
+ size 4923826936
model-00003-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5784b7fac297facb2e542752469d3a55c540f997c1572c17e55e90cdc3aa0b01
3
+ size 2416280192
model-00004-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5f5d71b2a9c9ffc719732baea2bbc356b1afeecb60f1093b9c3341d31ecaa35
3
+ size 4923826936
model-00009-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcd125a92f3a323ed63a98033766ebf1f63eea2a6113a9e589168ac42b09cd5f
3
+ size 2416280192
model-00012-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70039f4cd4124bbb27970bd465e0c4c51a2461bdbb4f8268460cf47f09cee72e
3
+ size 4923826936
model-00013-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5820988019b7e8df3d32de0a8265206e203879a9f2e19ff9f19acfd6c549b086
3
+ size 2416280192
model-00015-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95cdf05464e9c0c7a70647dc726019ae64f4f08b7dba216ccd88e29eaffa9498
3
+ size 2416280192
model-00018-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f70bbfc07e702f1b155ea9f71343afbf5fb6f957cbb402dfa910974bf5d0da16
3
+ size 4923826936
model-00019-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee23fca561a07bf1a527d70046ad328972c7ecf07766c9faf5e298fdf2874f49
3
+ size 2416280192
model-00020-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c26ef124f7b568c80408399e7caf86d4f13bdac0471b796678cc776899f2d691
3
+ size 4923827976
model-00021-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4050b562effbc25203416d6303d8a2f1ab4cd1d81e45d11a28aa9dd42ebf686
3
+ size 2416280704
model-00026-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2655644dc99403a3cfcbfb1ef934610e8024dd9473ffb14986a6b940fba81d7e
3
+ size 4923827976
model-00030-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da69c44b27468fadfe2793ec7641cd88743aa0fbe93b2fa37ad0bc2913e4f05b
3
+ size 4923827976
model-00031-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:976b1a4986246d169f928713f95a62e3e40df69447e0a5dbe0cf94b85dde1326
3
+ size 2416280704
model-00037-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0985cfe14eb0793f6cc625cf96c9f104280469de7d16fcb7dd2f03b0bef3999f
3
+ size 2416280704
model-00040-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3443f802865fb2a4b2200fbf6b299b2f37f56a5b433b1d3aa2de13ab16fe90f8
3
+ size 4923827976
model-00041-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50d37bdb93f5ad78b52982f5506cfe01cf0b22c643cc761e302ba7f1744c5dc2
3
+ size 2416280704
model-00046-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3edd31a8e3257ff2f6ff01aaa9e0ec90c1b97439dcada6cc811273081b1c0671
3
+ size 4923827976
model-00050-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b2e8d02d2ae49dbaeefc0ede64954754a25c7066d49ef3e3a67300ac2a2dcfa
3
+ size 4923827976
model-00051-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b9122d511f08a6282412e78225ad5689983e9122a82e1f97673adfc9c86817b
3
+ size 2416280704
model-00057-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7de5cf9176243a0eca5779fc2071b177db11f074ed533e52b4309505767a8ff
3
+ size 2416280704
model-00062-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a854b39a1f47f35f5753c83780a4fca1c0285c5a5e49dc6337bdb6cc67295823
3
+ size 4923827976
model-00064-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ed084cd6c612cafc5cca6aa6f610e724478ef0d93a8e5d9f14e2ba9abb30cbe
3
+ size 4923827976
model-00068-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:704d9810c25b5108fc74a79d00d4b3b8a35774ce766ca57ffc092d92bb205e33
3
+ size 4923827976
model-00069-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8c4b78f9a07100c11ca0f9504430131508ae26af4a2af8bc56f1699519562dd
3
+ size 2416280704
model-00072-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1795197cad450f7903857d5d5f277b20c701cbe0d6ca6a903e0d3c08f700371e
3
+ size 4923827976
model-00073-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2cb69c1d36e3043411dea5f933bceb02dbee3bcfa11f6cde6fafa8c0749f072
3
+ size 2416280704
model-00075-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a7b5c311122e9362b9958b6f4ce8348a80e24aa1b99ad7227a251d4e04fae75
3
+ size 2416280704
model-00078-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c95ee2da1c04f22f746d3ae03ceb64fe40f59da462065e94513db64821492ad
3
+ size 4923827976
model-00079-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b25fabe308a5a434cd485b46fbf04e808544780f9288aa19952f90e679fee492
3
+ size 2416280704
model-00080-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b71f07cb2b224b06aa8f7d0d2b32dac8ba476318c862180cd5cc7c6598dad70b
3
+ size 4923827976
model-00081-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40d2abb075b9f32bd21d5195d1f119965a4ba721f1c866f6377c3882bce809b7
3
+ size 2416280704
model-00086-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f1cea981a0214a2b6eee95666e8e96d20db7749fd3807deac2100f242b558dc
3
+ size 4923827976
model-00091-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:357f4a2daffb4e33c7949172f8510e8a28a1f3f34c2300262b4a69baa248aea8
3
+ size 2416280704
model-00097-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95b17aa57de8f1457e3669963f04586690fc144b6fb5bfbf207f7d07959f4a20
3
+ size 2416280704
model-00102-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0ca775aaa51ba98d4887afc7a2884ced0c1e760f28bbd4ac1fd79d45975e226
3
+ size 4923827976
model-00104-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:121271e55ac190bc8926bc91930d18e8591542ca4ba6e277e031b96e2c86b031
3
+ size 4923827976
model-00105-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65e2045c471f3ddcf6e0c208586dc04633aacb4e3ebf8e84590a19208291247b
3
+ size 2416280704
model-00109-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f95fb6974760b15c11c2ff133adb8b687013dc7ad68778dfc92bb3c3be98667
3
+ size 2416280704
model-00113-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:772a17c80315fd8897ff53a3b9e1a22e83bae70617e0c7ab77b7d8193f3a97ab
3
+ size 2416280704
model-00114-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a772afe1b6a8bf8638a69e922c7ebaea377b8a4b59a0bad83dff6eba18232d3
3
+ size 4923827976
model-00115-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31f235e1dcea86c27ecc8ebcfc1a0d2743c3018d3e129a920c14f329defd9a5a
3
+ size 2416280704
model-00118-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11338392979b86935227f3591f5579d7f8b73d262ad72b4c6696d95f5304d73b
3
+ size 4923827976
model-00120-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48759ed476a6ba2a94d4cb1ab238952d801c420de481ceb1d87ce2338bc3f56a
3
+ size 4923827976
model-00126-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60d95b10b6e140a9626a7058d5038528f2ff80148dc4569b881db56052046509
3
+ size 40
model-00127-of-00130.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60d95b10b6e140a9626a7058d5038528f2ff80148dc4569b881db56052046509
3
+ size 40