sergiopaniego HF Staff commited on
Commit
641b1d5
·
verified ·
1 Parent(s): 8fa9dc5

Training in progress, step 10

Browse files
README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Qwen/Qwen2-0.5B-Instruct
3
+ library_name: transformers
4
+ model_name: Qwen2-0.5B-GRPO-test
5
+ tags:
6
+ - generated_from_trainer
7
+ - trl
8
+ - grpo
9
+ licence: license
10
+ ---
11
+
12
+ # Model Card for Qwen2-0.5B-GRPO-test
13
+
14
+ This model is a fine-tuned version of [Qwen/Qwen2-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct).
15
+ It has been trained using [TRL](https://github.com/huggingface/trl).
16
+
17
+ ## Quick start
18
+
19
+ ```python
20
+ from transformers import pipeline
21
+
22
+ question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
23
+ generator = pipeline("text-generation", model="sergiopaniego/Qwen2-0.5B-GRPO-test", device="cuda")
24
+ output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
25
+ print(output["generated_text"])
26
+ ```
27
+
28
+ ## Training procedure
29
+
30
+
31
+
32
+
33
+ This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
34
+
35
+ ### Framework versions
36
+
37
+ - TRL: 0.23.1
38
+ - Transformers: 4.56.1
39
+ - Pytorch: 2.8.0+cu126
40
+ - Datasets: 4.0.0
41
+ - Tokenizers: 0.22.0
42
+
43
+ ## Citations
44
+
45
+ Cite GRPO as:
46
+
47
+ ```bibtex
48
+ @article{shao2024deepseekmath,
49
+ title = {{DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models}},
50
+ author = {Zhihong Shao and Peiyi Wang and Qihao Zhu and Runxin Xu and Junxiao Song and Mingchuan Zhang and Y. K. Li and Y. Wu and Daya Guo},
51
+ year = 2024,
52
+ eprint = {arXiv:2402.03300},
53
+ }
54
+
55
+ ```
56
+
57
+ Cite TRL as:
58
+
59
+ ```bibtex
60
+ @misc{vonwerra2022trl,
61
+ title = {{TRL: Transformer Reinforcement Learning}},
62
+ author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
63
+ year = 2020,
64
+ journal = {GitHub repository},
65
+ publisher = {GitHub},
66
+ howpublished = {\url{https://github.com/huggingface/trl}}
67
+ }
68
+ ```
adapter_config.json CHANGED
@@ -3,6 +3,7 @@
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "Qwen/Qwen2-0.5B-Instruct",
5
  "bias": "none",
 
6
  "eva_config": null,
7
  "exclude_modules": null,
8
  "fan_in_fan_out": false,
@@ -19,6 +20,7 @@
19
  "megatron_core": "megatron.core",
20
  "modules_to_save": null,
21
  "peft_type": "LORA",
 
22
  "r": 8,
23
  "rank_pattern": {},
24
  "revision": null,
@@ -26,7 +28,10 @@
26
  "v_proj",
27
  "q_proj"
28
  ],
 
29
  "task_type": "CAUSAL_LM",
 
30
  "use_dora": false,
 
31
  "use_rslora": false
32
  }
 
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "Qwen/Qwen2-0.5B-Instruct",
5
  "bias": "none",
6
+ "corda_config": null,
7
  "eva_config": null,
8
  "exclude_modules": null,
9
  "fan_in_fan_out": false,
 
20
  "megatron_core": "megatron.core",
21
  "modules_to_save": null,
22
  "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
  "r": 8,
25
  "rank_pattern": {},
26
  "revision": null,
 
28
  "v_proj",
29
  "q_proj"
30
  ],
31
+ "target_parameters": null,
32
  "task_type": "CAUSAL_LM",
33
+ "trainable_token_indices": null,
34
  "use_dora": false,
35
+ "use_qalora": false,
36
  "use_rslora": false
37
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:171c7622650df745d73549604149c5232e82af2800cc272a06d8078fa3ed96a5
3
  size 2175168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb8b1dcbec5e3766667f94b5da71f4af6c53b27c0f6aa5285571ff7199130035
3
  size 2175168
chat_template.jinja ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system
2
+ You are a helpful assistant.<|im_end|>
3
+ ' }}{% endif %}{{'<|im_start|>' + message['role'] + '
4
+ ' + message['content'] + '<|im_end|>' + '
5
+ '}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
6
+ ' }}{% endif %}
runs/Oct03_06-53-40_a32062520574/events.out.tfevents.1759474433.a32062520574.2823.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa652390043bff537c83b3f24d07d51579cf7e1218292b1f4dab6939ff5684ae
3
+ size 8554
tokenizer_config.json CHANGED
@@ -31,14 +31,12 @@
31
  "<|im_end|>"
32
  ],
33
  "bos_token": null,
34
- "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
35
  "clean_up_tokenization_spaces": false,
36
  "eos_token": "<|im_end|>",
37
  "errors": "replace",
38
  "extra_special_tokens": {},
39
  "model_max_length": 32768,
40
  "pad_token": "<|endoftext|>",
41
- "padding_side": "left",
42
  "split_special_tokens": false,
43
  "tokenizer_class": "Qwen2Tokenizer",
44
  "unk_token": null
 
31
  "<|im_end|>"
32
  ],
33
  "bos_token": null,
 
34
  "clean_up_tokenization_spaces": false,
35
  "eos_token": "<|im_end|>",
36
  "errors": "replace",
37
  "extra_special_tokens": {},
38
  "model_max_length": 32768,
39
  "pad_token": "<|endoftext|>",
 
40
  "split_special_tokens": false,
41
  "tokenizer_class": "Qwen2Tokenizer",
42
  "unk_token": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b767e4516cbf6f41351c45ca3b8e44d6358638737558333afc6e1b450085461
3
- size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e0a9ee85eb59f7bb972adf484cae93ceddf82229e9aa082fd1e4513252927ec
3
+ size 7185