derek-thomas
/

prompt-order-experiment

Model card Files Files and versions

derek-thomas commited on Nov 29, 2024

Commit

f140b0f

·

1 Parent(s): b4f4f9d

Cleaning autotrain

Files changed (2) hide show

autotrain/autotrain.yml +0 -34
autotrain/run_autotrain.py +0 -67

autotrain/autotrain.yml DELETED Viewed

@@ -1,34 +0,0 @@
-task: llm-sft
-base_model: mistralai/Mistral-7B-Instruct-v0.3
-project_name: autotrain-mistral-v03-prompt-experiment-mc-R-FA-sg
-log: tensorboard
-backend: spaces-l4x1
-data:
-  path: derek-thomas/labeled-multiple-choice-explained-mistral-tokenized
-  train_split: train
-#   valid_split: val
-  valid_split: null
-  chat_template: none
-  column_mapping:
-    text_column: conversation_R_FA_sg
-params:
-  block_size: 1024
-  model_max_length: 1024
-  epochs: 2
-  batch_size: 1
-  lr: 3e-5
-  peft: true
-  quantization: int4
-  target_modules: all-linear
-  padding: left
-  optimizer: adamw_torch
-  scheduler: linear
-  gradient_accumulation: 8
-  mixed_precision: bf16
-hub:
-  username: derek-thomas
-  token: ${HF_TOKEN}
-  push_to_hub: true

autotrain/run_autotrain.py DELETED Viewed

@@ -1,67 +0,0 @@
-import os
-import subprocess
-import yaml
-# Base config
-config_template = {
-    "task": "llm-sft",
-    "base_model": "mistralai/Mistral-7B-Instruct-v0.3",
-    "project_name": "",
-    "log": "tensorboard",
-    "backend": "spaces-l4x1",
-    "data": {
-        "path": "derek-thomas/labeled-multiple-choice-explained-mistral-tokenized",
-        "train_split": "train",
-        "valid_split": None,
-        "chat_template": "none",
-        "column_mapping": {
-            "text_column": ""
-            },
-        },
-    "params": {
-        "block_size": 1024,
-        "model_max_length": 1024,
-        "epochs": 2,
-        "batch_size": 1,
-        "lr": 3e-5,
-        "peft": True,
-        "quantization": "int4",
-        "target_modules": "all-linear",
-        "padding": "left",
-        "optimizer": "adamw_torch",
-        "scheduler": "linear",
-        "gradient_accumulation": 8,
-        "mixed_precision": "bf16",
-        },
-    "hub": {
-        "username": "derek-thomas",
-        "token": os.getenv('HF_TOKEN'),
-        "push_to_hub": True,
-        },
-    }
-# Suffix options
-project_suffixes = ["RFA-gpt3-5", "RFA-mistral", "FAR-gpt3-5", "FAR-mistral", "FA"]
-text_columns = ["conversation_RFA_gpt3_5", "conversation_RFA_mistral", "conversation_FAR_gpt3_5",
-                "conversation_FAR_mistral", "conversation_FA"]
-# Directory to store generated configs
-output_dir = "./autotrain_configs"
-os.makedirs(output_dir, exist_ok=True)
-# Generate configs and run commands
-for project_suffix, text_column in zip(project_suffixes, text_columns):
-    # Modify the config
-    config = config_template.copy()
-    config["project_name"] = f"mistral-v03-poe-{project_suffix}"
-    config["data"]["column_mapping"]["text_column"] = text_column
-    # Save the config to a YAML file
-    config_path = os.path.join(output_dir, f"{text_column}.yml")
-    with open(config_path, "w") as f:
-        yaml.dump(config, f)
-    # Run the command
-    print(f"Running autotrain with config: {config_path}")
-    subprocess.run(["autotrain", "--config", config_path])