llama-pii-syn / running_log.txt

Reuploading fine-tuned model

1d77cb5 12 months ago

19.7 kB

	[WARNING\|2024-12-04 13:56:21] logging.py:162 >> `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.

	[INFO\|2024-12-04 13:56:21] logging.py:157 >> Resuming training from saves/Llama-3.2-3B-Instruct/lora/train_llama-pii/checkpoint-100.

	[INFO\|2024-12-04 13:56:21] logging.py:157 >> Change `output_dir` or use `overwrite_output_dir` to avoid.

	[INFO\|2024-12-04 13:56:21] parser.py:355 >> Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: True, compute dtype: torch.bfloat16

	[INFO\|2024-12-04 13:56:21] configuration_utils.py:733 >> loading configuration file config.json from cache at /home/jiaheng/.cache/huggingface/hub/models--meta-llama--Llama-3.2-3B-Instruct/snapshots/0cb88a4f764b7a12671c53f0838cd831a0843b95/config.json

	[INFO\|2024-12-04 13:56:21] configuration_utils.py:800 >> Model config LlamaConfig {
	"_name_or_path": "meta-llama/Llama-3.2-3B-Instruct",
	"architectures": [
	"LlamaForCausalLM"
	],
	"attention_bias": false,
	"attention_dropout": 0.0,
	"bos_token_id": 128000,
	"eos_token_id": [
	128001,
	128008,
	128009
	],
	"head_dim": 128,
	"hidden_act": "silu",
	"hidden_size": 3072,
	"initializer_range": 0.02,
	"intermediate_size": 8192,
	"max_position_embeddings": 131072,
	"mlp_bias": false,
	"model_type": "llama",
	"num_attention_heads": 24,
	"num_hidden_layers": 28,
	"num_key_value_heads": 8,
	"pretraining_tp": 1,
	"rms_norm_eps": 1e-05,
	"rope_scaling": {
	"factor": 32.0,
	"high_freq_factor": 4.0,
	"low_freq_factor": 1.0,
	"original_max_position_embeddings": 8192,
	"rope_type": "llama3"
	},
	"rope_theta": 500000.0,
	"tie_word_embeddings": true,
	"torch_dtype": "bfloat16",
	"transformers_version": "4.43.1",
	"use_cache": true,
	"vocab_size": 128256
	}


	[INFO\|2024-12-04 13:56:21] parser.py:355 >> Process rank: 1, device: cuda:1, n_gpu: 1, distributed training: True, compute dtype: torch.bfloat16

	[INFO\|2024-12-04 13:56:22] parser.py:355 >> Process rank: 5, device: cuda:5, n_gpu: 1, distributed training: True, compute dtype: torch.bfloat16

	[INFO\|2024-12-04 13:56:22] parser.py:355 >> Process rank: 2, device: cuda:2, n_gpu: 1, distributed training: True, compute dtype: torch.bfloat16

	[INFO\|2024-12-04 13:56:22] parser.py:355 >> Process rank: 3, device: cuda:3, n_gpu: 1, distributed training: True, compute dtype: torch.bfloat16

	[INFO\|2024-12-04 13:56:22] parser.py:355 >> Process rank: 4, device: cuda:4, n_gpu: 1, distributed training: True, compute dtype: torch.bfloat16

	[INFO\|2024-12-04 13:56:22] parser.py:355 >> Process rank: 6, device: cuda:6, n_gpu: 1, distributed training: True, compute dtype: torch.bfloat16

	[INFO\|2024-12-04 13:56:22] tokenization_utils_base.py:2289 >> loading file tokenizer.json from cache at /home/jiaheng/.cache/huggingface/hub/models--meta-llama--Llama-3.2-3B-Instruct/snapshots/0cb88a4f764b7a12671c53f0838cd831a0843b95/tokenizer.json

	[INFO\|2024-12-04 13:56:22] tokenization_utils_base.py:2289 >> loading file added_tokens.json from cache at None

	[INFO\|2024-12-04 13:56:22] tokenization_utils_base.py:2289 >> loading file special_tokens_map.json from cache at /home/jiaheng/.cache/huggingface/hub/models--meta-llama--Llama-3.2-3B-Instruct/snapshots/0cb88a4f764b7a12671c53f0838cd831a0843b95/special_tokens_map.json

	[INFO\|2024-12-04 13:56:22] tokenization_utils_base.py:2289 >> loading file tokenizer_config.json from cache at /home/jiaheng/.cache/huggingface/hub/models--meta-llama--Llama-3.2-3B-Instruct/snapshots/0cb88a4f764b7a12671c53f0838cd831a0843b95/tokenizer_config.json

	[INFO\|2024-12-04 13:56:22] tokenization_utils_base.py:2533 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.

	[INFO\|2024-12-04 13:56:22] configuration_utils.py:733 >> loading configuration file config.json from cache at /home/jiaheng/.cache/huggingface/hub/models--meta-llama--Llama-3.2-3B-Instruct/snapshots/0cb88a4f764b7a12671c53f0838cd831a0843b95/config.json

	[INFO\|2024-12-04 13:56:22] configuration_utils.py:800 >> Model config LlamaConfig {
	"_name_or_path": "meta-llama/Llama-3.2-3B-Instruct",
	"architectures": [
	"LlamaForCausalLM"
	],
	"attention_bias": false,
	"attention_dropout": 0.0,
	"bos_token_id": 128000,
	"eos_token_id": [
	128001,
	128008,
	128009
	],
	"head_dim": 128,
	"hidden_act": "silu",
	"hidden_size": 3072,
	"initializer_range": 0.02,
	"intermediate_size": 8192,
	"max_position_embeddings": 131072,
	"mlp_bias": false,
	"model_type": "llama",
	"num_attention_heads": 24,
	"num_hidden_layers": 28,
	"num_key_value_heads": 8,
	"pretraining_tp": 1,
	"rms_norm_eps": 1e-05,
	"rope_scaling": {
	"factor": 32.0,
	"high_freq_factor": 4.0,
	"low_freq_factor": 1.0,
	"original_max_position_embeddings": 8192,
	"rope_type": "llama3"
	},
	"rope_theta": 500000.0,
	"tie_word_embeddings": true,
	"torch_dtype": "bfloat16",
	"transformers_version": "4.43.1",
	"use_cache": true,
	"vocab_size": 128256
	}


	[INFO\|2024-12-04 13:56:22] tokenization_utils_base.py:2289 >> loading file tokenizer.json from cache at /home/jiaheng/.cache/huggingface/hub/models--meta-llama--Llama-3.2-3B-Instruct/snapshots/0cb88a4f764b7a12671c53f0838cd831a0843b95/tokenizer.json

	[INFO\|2024-12-04 13:56:22] tokenization_utils_base.py:2289 >> loading file added_tokens.json from cache at None

	[INFO\|2024-12-04 13:56:22] tokenization_utils_base.py:2289 >> loading file special_tokens_map.json from cache at /home/jiaheng/.cache/huggingface/hub/models--meta-llama--Llama-3.2-3B-Instruct/snapshots/0cb88a4f764b7a12671c53f0838cd831a0843b95/special_tokens_map.json

	[INFO\|2024-12-04 13:56:22] tokenization_utils_base.py:2289 >> loading file tokenizer_config.json from cache at /home/jiaheng/.cache/huggingface/hub/models--meta-llama--Llama-3.2-3B-Instruct/snapshots/0cb88a4f764b7a12671c53f0838cd831a0843b95/tokenizer_config.json

	[INFO\|2024-12-04 13:56:23] tokenization_utils_base.py:2533 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.

	[INFO\|2024-12-04 13:56:23] logging.py:157 >> Replace eos token: <\|eot_id\|>

	[INFO\|2024-12-04 13:56:23] logging.py:157 >> Add pad token: <\|eot_id\|>

	[INFO\|2024-12-04 13:56:23] logging.py:157 >> Loading dataset formatted_dataset.json...

	[INFO\|2024-12-04 13:56:25] configuration_utils.py:733 >> loading configuration file config.json from cache at /home/jiaheng/.cache/huggingface/hub/models--meta-llama--Llama-3.2-3B-Instruct/snapshots/0cb88a4f764b7a12671c53f0838cd831a0843b95/config.json

	[INFO\|2024-12-04 13:56:25] configuration_utils.py:800 >> Model config LlamaConfig {
	"_name_or_path": "meta-llama/Llama-3.2-3B-Instruct",
	"architectures": [
	"LlamaForCausalLM"
	],
	"attention_bias": false,
	"attention_dropout": 0.0,
	"bos_token_id": 128000,
	"eos_token_id": [
	128001,
	128008,
	128009
	],
	"head_dim": 128,
	"hidden_act": "silu",
	"hidden_size": 3072,
	"initializer_range": 0.02,
	"intermediate_size": 8192,
	"max_position_embeddings": 131072,
	"mlp_bias": false,
	"model_type": "llama",
	"num_attention_heads": 24,
	"num_hidden_layers": 28,
	"num_key_value_heads": 8,
	"pretraining_tp": 1,
	"rms_norm_eps": 1e-05,
	"rope_scaling": {
	"factor": 32.0,
	"high_freq_factor": 4.0,
	"low_freq_factor": 1.0,
	"original_max_position_embeddings": 8192,
	"rope_type": "llama3"
	},
	"rope_theta": 500000.0,
	"tie_word_embeddings": true,
	"torch_dtype": "bfloat16",
	"transformers_version": "4.43.1",
	"use_cache": true,
	"vocab_size": 128256
	}


	[INFO\|2024-12-04 13:56:25] modeling_utils.py:3621 >> loading weights file model.safetensors from cache at /home/jiaheng/.cache/huggingface/hub/models--meta-llama--Llama-3.2-3B-Instruct/snapshots/0cb88a4f764b7a12671c53f0838cd831a0843b95/model.safetensors.index.json

	[INFO\|2024-12-04 13:56:25] modeling_utils.py:1569 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16.

	[INFO\|2024-12-04 13:56:25] configuration_utils.py:1038 >> Generate config GenerationConfig {
	"bos_token_id": 128000,
	"eos_token_id": [
	128001,
	128008,
	128009
	]
	}


	[INFO\|2024-12-04 13:56:28] modeling_utils.py:4450 >> All model checkpoint weights were used when initializing LlamaForCausalLM.


	[INFO\|2024-12-04 13:56:28] modeling_utils.py:4458 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at meta-llama/Llama-3.2-3B-Instruct.
	If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.

	[INFO\|2024-12-04 13:56:28] configuration_utils.py:993 >> loading configuration file generation_config.json from cache at /home/jiaheng/.cache/huggingface/hub/models--meta-llama--Llama-3.2-3B-Instruct/snapshots/0cb88a4f764b7a12671c53f0838cd831a0843b95/generation_config.json

	[INFO\|2024-12-04 13:56:28] configuration_utils.py:1038 >> Generate config GenerationConfig {
	"bos_token_id": 128000,
	"do_sample": true,
	"eos_token_id": [
	128001,
	128008,
	128009
	],
	"temperature": 0.6,
	"top_p": 0.9
	}


	[INFO\|2024-12-04 13:56:28] logging.py:157 >> Gradient checkpointing enabled.

	[INFO\|2024-12-04 13:56:28] logging.py:157 >> Using torch SDPA for faster training and inference.

	[INFO\|2024-12-04 13:56:28] logging.py:157 >> Upcasting trainable params to float32.

	[INFO\|2024-12-04 13:56:28] logging.py:157 >> Fine-tuning method: LoRA

	[INFO\|2024-12-04 13:56:28] logging.py:157 >> Found linear modules: q_proj,gate_proj,down_proj,up_proj,o_proj,v_proj,k_proj

	[INFO\|2024-12-04 13:56:29] logging.py:157 >> trainable params: 12,156,928 \|\| all params: 3,224,906,752 \|\| trainable%: 0.3770

	[INFO\|2024-12-04 13:56:29] trainer.py:648 >> Using auto half precision backend

	[INFO\|2024-12-04 13:56:29] trainer.py:2526 >> Loading model from saves/Llama-3.2-3B-Instruct/lora/train_llama-pii/checkpoint-100.

	[INFO\|2024-12-04 13:56:32] trainer.py:2134 >> *** Running training ***

	[INFO\|2024-12-04 13:56:32] trainer.py:2135 >> Num examples = 9,507

	[INFO\|2024-12-04 13:56:32] trainer.py:2136 >> Num Epochs = 3

	[INFO\|2024-12-04 13:56:32] trainer.py:2137 >> Instantaneous batch size per device = 2

	[INFO\|2024-12-04 13:56:32] trainer.py:2140 >> Total train batch size (w. parallel, distributed & accumulation) = 128

	[INFO\|2024-12-04 13:56:32] trainer.py:2141 >> Gradient Accumulation steps = 8

	[INFO\|2024-12-04 13:56:32] trainer.py:2142 >> Total optimization steps = 222

	[INFO\|2024-12-04 13:56:32] trainer.py:2143 >> Number of trainable parameters = 12,156,928

	[INFO\|2024-12-04 13:56:32] trainer.py:2165 >> Continuing training from checkpoint, will skip to saved global_step

	[INFO\|2024-12-04 13:56:32] trainer.py:2166 >> Continuing training from epoch 1

	[INFO\|2024-12-04 13:56:32] trainer.py:2167 >> Continuing training from global step 100

	[INFO\|2024-12-04 13:56:32] trainer.py:2169 >> Will skip the first 1 epochs then the first 208 batches in the first epoch.

	[INFO\|2024-12-04 13:57:13] logging.py:157 >> {'loss': 1.0489, 'learning_rate': 2.7120e-05, 'epoch': 1.42, 'throughput': 152046.66}

	[INFO\|2024-12-04 13:57:54] logging.py:157 >> {'loss': 1.0607, 'learning_rate': 2.5354e-05, 'epoch': 1.48, 'throughput': 79588.83}

	[INFO\|2024-12-04 13:58:35] logging.py:157 >> {'loss': 1.0722, 'learning_rate': 2.3586e-05, 'epoch': 1.55, 'throughput': 55727.70}

	[INFO\|2024-12-04 13:59:17] logging.py:157 >> {'loss': 1.0662, 'learning_rate': 2.1825e-05, 'epoch': 1.62, 'throughput': 43302.60}

	[INFO\|2024-12-04 14:00:00] logging.py:157 >> {'loss': 1.0548, 'learning_rate': 2.0079e-05, 'epoch': 1.69, 'throughput': 35890.41}

	[INFO\|2024-12-04 14:00:41] logging.py:157 >> {'loss': 1.0645, 'learning_rate': 1.8359e-05, 'epoch': 1.75, 'throughput': 31092.14}

	[INFO\|2024-12-04 14:01:20] logging.py:157 >> {'loss': 1.0671, 'learning_rate': 1.6672e-05, 'epoch': 1.82, 'throughput': 27982.89}

	[INFO\|2024-12-04 14:01:59] logging.py:157 >> {'loss': 1.0775, 'learning_rate': 1.5026e-05, 'epoch': 1.89, 'throughput': 25513.14}

	[INFO\|2024-12-04 14:02:39] logging.py:157 >> {'loss': 1.0781, 'learning_rate': 1.3430e-05, 'epoch': 1.95, 'throughput': 23537.72}

	[INFO\|2024-12-04 14:03:19] logging.py:157 >> {'loss': 1.0598, 'learning_rate': 1.1892e-05, 'epoch': 2.02, 'throughput': 21993.43}

	[INFO\|2024-12-04 14:03:59] logging.py:157 >> {'loss': 1.0507, 'learning_rate': 1.0420e-05, 'epoch': 2.09, 'throughput': 20677.45}

	[INFO\|2024-12-04 14:04:39] logging.py:157 >> {'loss': 1.0532, 'learning_rate': 9.0208e-06, 'epoch': 2.16, 'throughput': 19586.24}

	[INFO\|2024-12-04 14:05:19] logging.py:157 >> {'loss': 1.0589, 'learning_rate': 7.7015e-06, 'epoch': 2.22, 'throughput': 18691.67}

	[INFO\|2024-12-04 14:05:58] logging.py:157 >> {'loss': 1.0340, 'learning_rate': 6.4688e-06, 'epoch': 2.29, 'throughput': 17909.19}

	[INFO\|2024-12-04 14:06:37] logging.py:157 >> {'loss': 1.0442, 'learning_rate': 5.3288e-06, 'epoch': 2.36, 'throughput': 17276.50}

	[INFO\|2024-12-04 14:07:16] logging.py:157 >> {'loss': 1.0579, 'learning_rate': 4.2873e-06, 'epoch': 2.43, 'throughput': 16671.13}

	[INFO\|2024-12-04 14:07:57] logging.py:157 >> {'loss': 1.0397, 'learning_rate': 3.3494e-06, 'epoch': 2.49, 'throughput': 16124.99}

	[INFO\|2024-12-04 14:08:37] logging.py:157 >> {'loss': 1.0249, 'learning_rate': 2.5198e-06, 'epoch': 2.56, 'throughput': 15650.52}

	[INFO\|2024-12-04 14:09:17] logging.py:157 >> {'loss': 1.0343, 'learning_rate': 1.8028e-06, 'epoch': 2.63, 'throughput': 15222.33}

	[INFO\|2024-12-04 14:09:58] logging.py:157 >> {'loss': 1.0450, 'learning_rate': 1.2018e-06, 'epoch': 2.69, 'throughput': 14816.79}

	[INFO\|2024-12-04 14:09:58] trainer.py:3503 >> Saving model checkpoint to saves/Llama-3.2-3B-Instruct/lora/train_llama-pii/checkpoint-200

	[INFO\|2024-12-04 14:09:58] configuration_utils.py:733 >> loading configuration file config.json from cache at /home/jiaheng/.cache/huggingface/hub/models--meta-llama--Llama-3.2-3B-Instruct/snapshots/0cb88a4f764b7a12671c53f0838cd831a0843b95/config.json

	[INFO\|2024-12-04 14:09:58] configuration_utils.py:800 >> Model config LlamaConfig {
	"architectures": [
	"LlamaForCausalLM"
	],
	"attention_bias": false,
	"attention_dropout": 0.0,
	"bos_token_id": 128000,
	"eos_token_id": [
	128001,
	128008,
	128009
	],
	"head_dim": 128,
	"hidden_act": "silu",
	"hidden_size": 3072,
	"initializer_range": 0.02,
	"intermediate_size": 8192,
	"max_position_embeddings": 131072,
	"mlp_bias": false,
	"model_type": "llama",
	"num_attention_heads": 24,
	"num_hidden_layers": 28,
	"num_key_value_heads": 8,
	"pretraining_tp": 1,
	"rms_norm_eps": 1e-05,
	"rope_scaling": {
	"factor": 32.0,
	"high_freq_factor": 4.0,
	"low_freq_factor": 1.0,
	"original_max_position_embeddings": 8192,
	"rope_type": "llama3"
	},
	"rope_theta": 500000.0,
	"tie_word_embeddings": true,
	"torch_dtype": "bfloat16",
	"transformers_version": "4.43.1",
	"use_cache": true,
	"vocab_size": 128256
	}


	[INFO\|2024-12-04 14:09:58] tokenization_utils_base.py:2702 >> tokenizer config file saved in saves/Llama-3.2-3B-Instruct/lora/train_llama-pii/checkpoint-200/tokenizer_config.json

	[INFO\|2024-12-04 14:09:58] tokenization_utils_base.py:2711 >> Special tokens file saved in saves/Llama-3.2-3B-Instruct/lora/train_llama-pii/checkpoint-200/special_tokens_map.json

	[INFO\|2024-12-04 14:10:38] logging.py:157 >> {'loss': 1.0425, 'learning_rate': 7.1996e-07, 'epoch': 2.76, 'throughput': 14459.94}

	[INFO\|2024-12-04 14:11:17] logging.py:157 >> {'loss': 1.0649, 'learning_rate': 3.5960e-07, 'epoch': 2.83, 'throughput': 14155.22}

	[INFO\|2024-12-04 14:11:59] logging.py:157 >> {'loss': 1.0439, 'learning_rate': 1.2256e-07, 'epoch': 2.90, 'throughput': 13843.87}

	[INFO\|2024-12-04 14:12:38] logging.py:157 >> {'loss': 1.0382, 'learning_rate': 1.0012e-08, 'epoch': 2.96, 'throughput': 13593.13}

	[INFO\|2024-12-04 14:12:54] trainer.py:3503 >> Saving model checkpoint to saves/Llama-3.2-3B-Instruct/lora/train_llama-pii/checkpoint-222

	[INFO\|2024-12-04 14:12:54] configuration_utils.py:733 >> loading configuration file config.json from cache at /home/jiaheng/.cache/huggingface/hub/models--meta-llama--Llama-3.2-3B-Instruct/snapshots/0cb88a4f764b7a12671c53f0838cd831a0843b95/config.json

	[INFO\|2024-12-04 14:12:54] configuration_utils.py:800 >> Model config LlamaConfig {
	"architectures": [
	"LlamaForCausalLM"
	],
	"attention_bias": false,
	"attention_dropout": 0.0,
	"bos_token_id": 128000,
	"eos_token_id": [
	128001,
	128008,
	128009
	],
	"head_dim": 128,
	"hidden_act": "silu",
	"hidden_size": 3072,
	"initializer_range": 0.02,
	"intermediate_size": 8192,
	"max_position_embeddings": 131072,
	"mlp_bias": false,
	"model_type": "llama",
	"num_attention_heads": 24,
	"num_hidden_layers": 28,
	"num_key_value_heads": 8,
	"pretraining_tp": 1,
	"rms_norm_eps": 1e-05,
	"rope_scaling": {
	"factor": 32.0,
	"high_freq_factor": 4.0,
	"low_freq_factor": 1.0,
	"original_max_position_embeddings": 8192,
	"rope_type": "llama3"
	},
	"rope_theta": 500000.0,
	"tie_word_embeddings": true,
	"torch_dtype": "bfloat16",
	"transformers_version": "4.43.1",
	"use_cache": true,
	"vocab_size": 128256
	}


	[INFO\|2024-12-04 14:12:55] tokenization_utils_base.py:2702 >> tokenizer config file saved in saves/Llama-3.2-3B-Instruct/lora/train_llama-pii/checkpoint-222/tokenizer_config.json

	[INFO\|2024-12-04 14:12:55] tokenization_utils_base.py:2711 >> Special tokens file saved in saves/Llama-3.2-3B-Instruct/lora/train_llama-pii/checkpoint-222/special_tokens_map.json

	[INFO\|2024-12-04 14:12:55] trainer.py:2394 >>

	Training completed. Do not forget to share your model on huggingface.co/models =)



	[INFO\|2024-12-04 14:12:55] trainer.py:3503 >> Saving model checkpoint to saves/Llama-3.2-3B-Instruct/lora/train_llama-pii

	[INFO\|2024-12-04 14:12:55] configuration_utils.py:733 >> loading configuration file config.json from cache at /home/jiaheng/.cache/huggingface/hub/models--meta-llama--Llama-3.2-3B-Instruct/snapshots/0cb88a4f764b7a12671c53f0838cd831a0843b95/config.json

	[INFO\|2024-12-04 14:12:55] configuration_utils.py:800 >> Model config LlamaConfig {
	"architectures": [
	"LlamaForCausalLM"
	],
	"attention_bias": false,
	"attention_dropout": 0.0,
	"bos_token_id": 128000,
	"eos_token_id": [
	128001,
	128008,
	128009
	],
	"head_dim": 128,
	"hidden_act": "silu",
	"hidden_size": 3072,
	"initializer_range": 0.02,
	"intermediate_size": 8192,
	"max_position_embeddings": 131072,
	"mlp_bias": false,
	"model_type": "llama",
	"num_attention_heads": 24,
	"num_hidden_layers": 28,
	"num_key_value_heads": 8,
	"pretraining_tp": 1,
	"rms_norm_eps": 1e-05,
	"rope_scaling": {
	"factor": 32.0,
	"high_freq_factor": 4.0,
	"low_freq_factor": 1.0,
	"original_max_position_embeddings": 8192,
	"rope_type": "llama3"
	},
	"rope_theta": 500000.0,
	"tie_word_embeddings": true,
	"torch_dtype": "bfloat16",
	"transformers_version": "4.43.1",
	"use_cache": true,
	"vocab_size": 128256
	}


	[INFO\|2024-12-04 14:12:55] tokenization_utils_base.py:2702 >> tokenizer config file saved in saves/Llama-3.2-3B-Instruct/lora/train_llama-pii/tokenizer_config.json

	[INFO\|2024-12-04 14:12:55] tokenization_utils_base.py:2711 >> Special tokens file saved in saves/Llama-3.2-3B-Instruct/lora/train_llama-pii/special_tokens_map.json

	[WARNING\|2024-12-04 14:12:55] logging.py:162 >> No metric eval_loss to plot.

	[WARNING\|2024-12-04 14:12:55] logging.py:162 >> No metric eval_accuracy to plot.

	[INFO\|2024-12-04 14:12:55] modelcard.py:449 >> Dropping the following result as it does not have all the necessary fields:
	{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}