Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -109,7 +109,7 @@ model = FastLanguageModel.get_peft_model(
|
|
| 109 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
|
| 110 |
"gate_proj", "up_proj", "down_proj"],
|
| 111 |
lora_alpha=16,
|
| 112 |
-
lora_dropout=0, # Supports any, but = 0 is optimized
|
| 113 |
bias="none", # Supports any, but = "none" is optimized
|
| 114 |
use_gradient_checkpointing="unsloth", # True or "unsloth" for very long context
|
| 115 |
random_state=3407,
|
|
@@ -124,18 +124,18 @@ trainer = SFTTrainer(
|
|
| 124 |
eval_dataset=valid_dataset,
|
| 125 |
dataset_text_field="text",
|
| 126 |
max_seq_length=max_seq_length,
|
| 127 |
-
dataset_num_proc=
|
| 128 |
packing=True, # Enable sequence packing
|
| 129 |
args=TrainingArguments(
|
| 130 |
-
per_device_train_batch_size=
|
| 131 |
-
gradient_accumulation_steps=
|
| 132 |
warmup_steps=5,
|
| 133 |
-
max_steps
|
| 134 |
-
|
| 135 |
learning_rate=2e-4,
|
| 136 |
-
fp16=
|
| 137 |
-
bf16=
|
| 138 |
-
logging_steps=
|
| 139 |
evaluation_strategy="steps",
|
| 140 |
eval_steps=50, # Evaluate less frequently
|
| 141 |
max_grad_norm=1.0, # Add gradient clipping
|
|
|
|
| 109 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
|
| 110 |
"gate_proj", "up_proj", "down_proj"],
|
| 111 |
lora_alpha=16,
|
| 112 |
+
lora_dropout=0.05, # Supports any, but = 0 is optimized
|
| 113 |
bias="none", # Supports any, but = "none" is optimized
|
| 114 |
use_gradient_checkpointing="unsloth", # True or "unsloth" for very long context
|
| 115 |
random_state=3407,
|
|
|
|
| 124 |
eval_dataset=valid_dataset,
|
| 125 |
dataset_text_field="text",
|
| 126 |
max_seq_length=max_seq_length,
|
| 127 |
+
dataset_num_proc=8, # Increase parallelism
|
| 128 |
packing=True, # Enable sequence packing
|
| 129 |
args=TrainingArguments(
|
| 130 |
+
per_device_train_batch_size=32, # Lower batch size to prevent memory issues
|
| 131 |
+
gradient_accumulation_steps=1, # Maintain effective batch size
|
| 132 |
warmup_steps=5,
|
| 133 |
+
max_steps=-1, # Train in smaller chunks
|
| 134 |
+
num_train_epochs=3, # Test with fewer epochs
|
| 135 |
learning_rate=2e-4,
|
| 136 |
+
fp16=not is_bfloat16_supported(),
|
| 137 |
+
bf16=is_bfloat16_supported(),
|
| 138 |
+
logging_steps=10, # Log less frequently
|
| 139 |
evaluation_strategy="steps",
|
| 140 |
eval_steps=50, # Evaluate less frequently
|
| 141 |
max_grad_norm=1.0, # Add gradient clipping
|