openaccess-ai-collective
/

minotaur-13b-fixed

@@ -1,143 +0,0 @@
-base_model: huggyllama/llama-13b
-base_model_config: huggyllama/llama-13b
-model_type: LlamaForCausalLM
-tokenizer_type: LlamaTokenizer
-load_in_8bit: false
-load_in_4bit: false
-gptq: false
-strict: false
-push_dataset_to_hub:
-hf_use_auth_token: true
-datasets:
-  - path: winglian/evals
-    data_files:
-      - hf/ARC-Challenge.jsonl
-      - hf/ARC-Easy.jsonl
-      - hf/riddle_sense.jsonl
-      - hf/piqa.jsonl
-    type: explainchoice:chat
-  - path: winglian/evals
-    data_files:
-      - hf/gsm8k.jsonl
-      - hf/winogrande.jsonl
-    type: alpaca_chat.load_qa
-  - path: winglian/evals
-    data_files:
-      - custom/n_task.jsonl
-      - custom/misconceptions.jsonl
-      - custom/context_insensitivity.jsonl
-    type: alpaca_chat
-  - path: camel-ai/math
-    type: alpaca_chat.load_camel_ai
-  - path: camel-ai/biology
-    type: alpaca_chat.load_camel_ai
-  - path: camel-ai/physics
-    type: alpaca_chat.load_camel_ai
-  - path: camel-ai/chemistry
-    type: alpaca_chat.load_camel_ai
-  - path: winglian/evals
-    data_files:
-      - custom/in_context_qa.jsonl
-    type: context_qa
-  - path: winglian/evals
-    data_files:
-      - custom/in_context_qa.jsonl
-    type: context_qa.load_404
-  - path: winglian/evals
-    data_files:
-      - custom/jokes_explained_500up.jsonl
-    type: sharegpt_jokes
-  - path: winglian/evals
-    data_files:
-      - custom/classify-self-chat.sharegpt.jsonl
-      - custom/coding-self-chat.sharegpt.jsonl
-      - custom/prose-gpt4.sharegpt.jsonl
-      - custom/prose-rewrite-gpt4.sharegpt.jsonl
-    type: sharegpt_simple.load_role
-  - path: winglian/evals
-    data_files:
-      - openai/tldr.jsonl
-    type: summarizetldr:chat
-  - path: winglian/evals
-    data_files:
-      - hellaswag/hellaswag.jsonl
-    type: explainchoice:chat
-  - path: metaeval/ScienceQA_text_only
-    type: concisechoice:chat
-  - path: teknium/GPT4-LLM-Cleaned
-    type: alpaca_chat
-  - path: teknium/GPTeacher-General-Instruct
-    data_files: gpt4-instruct-similarity-0.6-dataset.json
-    type: gpteacher:chat
-  - path: QingyiSi/Alpaca-CoT
-    data_files:
-      - Chain-of-Thought/formatted_cot_data/aqua_train.json
-      - Chain-of-Thought/formatted_cot_data/creak_train.json
-      - Chain-of-Thought/formatted_cot_data/ecqa_train.json
-      - Chain-of-Thought/formatted_cot_data/esnli_train.json
-      - Chain-of-Thought/formatted_cot_data/qasc_train.json
-      - Chain-of-Thought/formatted_cot_data/qed_train.json
-      - Chain-of-Thought/formatted_cot_data/sensemaking_train.json
-      - Chain-of-Thought/formatted_cot_data/strategyqa_train.json
-      - GPTeacher/Roleplay/formatted_roleplay-similarity_0.6-instruct-dataset.json
-    type: alpaca_chat
-  - path: ehartford/WizardLM_alpaca_evol_instruct_70k_unfiltered
-    type: alpaca_chat
-  - path: ehartford/wizard_vicuna_70k_unfiltered
-    type: sharegpt:chat
-dataset_prepared_path: last_run_prepared
-val_set_size: 0.004
-adapter:
-lora_model_dir:
-sequence_len: 2048
-max_packed_sequence_len: 2048
-lora_r:
-lora_alpha:
-lora_dropout:
-lora_target_modules:
-lora_target_linear: true
-lora_fan_in_fan_out:
-wandb_project: minotaur-13b
-wandb_watch:
-wandb_run_id:
-wandb_log_model:
-output_dir: ./minotaur-13b
-gradient_accumulation_steps: 1
-micro_batch_size: 12
-num_epochs: 3
-optimizer: adamw_bnb_8bit
-torchdistx_path:
-lr_scheduler: cosine
-learning_rate: 0.00013
-train_on_inputs: false
-group_by_length: true
-bf16: true
-fp16: false
-tf32: true
-gradient_checkpointing: true
-early_stopping_patience:
-resume_from_checkpoint:
-local_rank:
-logging_steps: 1
-xformers_attention: true
-flash_attention:
-gptq_groupsize:
-gptq_model_v1:
-warmup_steps: 100
-eval_steps: 20
-save_steps: 51
-load_best_model_at_end: false
-debug:
-deepspeed:
-weight_decay: 0.1
-fsdp:
-  - full_shard
-  - auto_wrap
-fsdp_config:
-  fsdp_offload_params: true
-  fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer
-special_tokens:
-  bos_token: "<s>"
-  eos_token: "</s>"
-  unk_token: "<unk>"