Update tokenizer_config.json
Browse files- tokenizer_config.json +4 -6
tokenizer_config.json
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
{
|
| 2 |
-
"add_bos_token": false,
|
| 3 |
"add_prefix_space": false,
|
| 4 |
"added_tokens_decoder": {
|
| 5 |
"151643": {
|
|
@@ -195,15 +194,14 @@
|
|
| 195 |
"<|video_pad|>"
|
| 196 |
],
|
| 197 |
"bos_token": null,
|
| 198 |
-
"chat_template": "{
|
| 199 |
"clean_up_tokenization_spaces": false,
|
| 200 |
"eos_token": "<|im_end|>",
|
| 201 |
"errors": "replace",
|
| 202 |
-
"extra_special_tokens": {},
|
| 203 |
"model_max_length": 131072,
|
| 204 |
"pad_token": "<|endoftext|>",
|
| 205 |
-
"processor_class": "Qwen2_5_VLProcessor",
|
| 206 |
"split_special_tokens": false,
|
| 207 |
"tokenizer_class": "Qwen2Tokenizer",
|
| 208 |
-
"unk_token": null
|
| 209 |
-
|
|
|
|
|
|
| 1 |
{
|
|
|
|
| 2 |
"add_prefix_space": false,
|
| 3 |
"added_tokens_decoder": {
|
| 4 |
"151643": {
|
|
|
|
| 194 |
"<|video_pad|>"
|
| 195 |
],
|
| 196 |
"bos_token": null,
|
| 197 |
+
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
|
| 198 |
"clean_up_tokenization_spaces": false,
|
| 199 |
"eos_token": "<|im_end|>",
|
| 200 |
"errors": "replace",
|
|
|
|
| 201 |
"model_max_length": 131072,
|
| 202 |
"pad_token": "<|endoftext|>",
|
|
|
|
| 203 |
"split_special_tokens": false,
|
| 204 |
"tokenizer_class": "Qwen2Tokenizer",
|
| 205 |
+
"unk_token": null,
|
| 206 |
+
"add_bos_token": false
|
| 207 |
+
}
|