Qwen-Training

Sleeping

App Files Files Community

rahul7star commited on 15 days ago

Commit

58ba31b

verified ·

1 Parent(s): be0df12

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -36

app.py CHANGED Viewed

@@ -244,8 +244,8 @@ def train_model(
         train_dataset, test_dataset = dataset["train"], dataset["test"]
         # ===== ⚡ FAST mode: use small subset =====
-        train_dataset = train_dataset.select(range(min(100, len(train_dataset))))
-        test_dataset = test_dataset.select(range(min(20, len(test_dataset))))
         log_message(output_log, f"⚡ Using {len(train_dataset)} train / {len(test_dataset)} test samples")
         # ===== Format samples =====
@@ -263,7 +263,21 @@ def train_model(
         train_dataset = train_dataset.map(format_example)
         test_dataset = test_dataset.map(format_example)
-        # ===== Tokenizer & Model =====
         tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
         if tokenizer.pad_token is None:
             tokenizer.pad_token = tokenizer.eos_token
@@ -271,25 +285,29 @@ def train_model(
         model = AutoModelForCausalLM.from_pretrained(
             base_model,
             trust_remote_code=True,
-            torch_dtype=dtype,
-            device_map="auto" if device == "cuda" else None,
             low_cpu_mem_usage=True,
         )
-        model.gradient_checkpointing_enable()
-        # ===== LoRA setup =====
         lora_config = LoraConfig(
             task_type=TaskType.CAUSAL_LM,
-            r=4,
-            lora_alpha=8,
             lora_dropout=0.1,
             target_modules=["q_proj", "v_proj"],
             bias="none",
         )
         model = get_peft_model(model, lora_config)
-        log_message(output_log, "✅ LoRA applied successfully")
-        # ===== Tokenization =====
         def tokenize_fn(examples):
             tokenized = tokenizer(
                 examples["text"],
@@ -302,24 +320,22 @@ def train_model(
         train_dataset = train_dataset.map(tokenize_fn, batched=True)
         test_dataset = test_dataset.map(tokenize_fn, batched=True)
-        # ===== Training setup =====
-        output_dir = "./prompt_expander_fast"
-        os.makedirs(output_dir, exist_ok=True)
         training_args = TrainingArguments(
             output_dir=output_dir,
             num_train_epochs=num_epochs,
             per_device_train_batch_size=batch_size,
             gradient_accumulation_steps=2,
-            warmup_steps=5,
             logging_steps=5,
-            save_strategy="no",  # don't save checkpoints
-            fp16=(dtype == torch.float16),
-            bf16=(dtype == torch.bfloat16),
-            learning_rate=learning_rate,
-            report_to="none",
             optim="adamw_torch",
         )
         trainer = Trainer(
@@ -331,29 +347,22 @@ def train_model(
         )
         # ===== Train =====
-        log_message(output_log, "\n🔥 Quick training started...")
         trainer.train()
-        # ===== Save + Upload =====
-        log_message(output_log, "\n💾 Saving fast fine-tuned model...")
-        model.save_pretrained(output_dir)
         tokenizer.save_pretrained(output_dir)
-        log_message(output_log, f"☁️ Uploading model to {hf_repo} ...")
-        upload_folder(
-            repo_id=hf_repo,
-            folder_path=output_dir,
-            repo_type="model",
-            commit_message="Quick test fine-tune upload",
-        )
-        log_message(output_log, "\n✅ FAST training completed successfully!")
     except Exception as e:
-        log_message(output_log, f"❌ Error: {e}")
     return "\n".join(output_log)
 # ==== Gradio Interface ====
 def create_interface():
     with gr.Blocks(title="PromptWizard — Qwen Trainer") as demo:

         train_dataset, test_dataset = dataset["train"], dataset["test"]
         # ===== ⚡ FAST mode: use small subset =====
+        train_dataset = train_dataset.select(range(min(1000, len(train_dataset))))
+        test_dataset = test_dataset.select(range(min(200, len(test_dataset))))
         log_message(output_log, f"⚡ Using {len(train_dataset)} train / {len(test_dataset)} test samples")
         # ===== Format samples =====
         train_dataset = train_dataset.map(format_example)
         test_dataset = test_dataset.map(format_example)
+            # ===== Format examples dynamically =====
+        def format_example(item):
+            text_content = item.get("text") or item.get("content") or str(item.get("path", "")) or " ".join(str(v) for v in item.values())
+            # Use shorter, clean system prompt + user content for better loss
+            prompt = (
+                f"<|system|>\nYou are an expert AI assistant.\n<|user|>\n{text_content}\n<|assistant|>\n"
+            )
+            return {"text": prompt}
+        train_dataset = train_dataset.map(format_example)
+        test_dataset = test_dataset.map(format_example)
+        log_message(output_log, f"✅ Formatted {len(train_dataset)} train + {len(test_dataset)} test examples")
+        # ===== Load model & tokenizer =====
+        log_message(output_log, f"\n🤖 Loading model: {base_model}")
         tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
         if tokenizer.pad_token is None:
             tokenizer.pad_token = tokenizer.eos_token
         model = AutoModelForCausalLM.from_pretrained(
             base_model,
             trust_remote_code=True,
+            torch_dtype=torch.float16 if device == "cuda" else torch.float32,
             low_cpu_mem_usage=True,
         )
+        if device == "cuda":
+            model = model.to(device)
+        log_message(output_log, "✅ Model and tokenizer loaded successfully")
+        log_message(output_log, f"Tokenizer vocab size: {tokenizer.vocab_size}")
+        # ===== LoRA configuration =====
+        log_message(output_log, "\n⚙️ Configuring LoRA for efficient fine-tuning...")
         lora_config = LoraConfig(
             task_type=TaskType.CAUSAL_LM,
+            r=8,
+            lora_alpha=16,
             lora_dropout=0.1,
             target_modules=["q_proj", "v_proj"],
             bias="none",
         )
         model = get_peft_model(model, lora_config)
+        trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
+        log_message(output_log, f"Trainable params after LoRA: {trainable_params:,}")
+        # ===== Tokenization + labels =====
         def tokenize_fn(examples):
             tokenized = tokenizer(
                 examples["text"],
         train_dataset = train_dataset.map(tokenize_fn, batched=True)
         test_dataset = test_dataset.map(tokenize_fn, batched=True)
+        log_message(output_log, "✅ Tokenization + labels done")
+        # ===== Training arguments =====
+        output_dir = "./qwen-gita-lora"
         training_args = TrainingArguments(
             output_dir=output_dir,
             num_train_epochs=num_epochs,
             per_device_train_batch_size=batch_size,
             gradient_accumulation_steps=2,
+            warmup_steps=10,
             logging_steps=5,
+            save_strategy="epoch",
+            fp16=device == "cuda",
             optim="adamw_torch",
+            learning_rate=learning_rate,
+            max_steps=500,  # Limit for demo is 100
         )
         trainer = Trainer(
         )
         # ===== Train =====
+        log_message(output_log, "\n🚀 Starting training...")
         trainer.train()
+        log_message(output_log, "\n💾 Saving trained model locally...")
+        trainer.save_model(output_dir)
         tokenizer.save_pretrained(output_dir)
+        # ===== Async upload =====
+        log_message(output_log, f"\n☁️ Initiating async upload to {hf_repo}")
+        start_async_upload(output_dir, hf_repo, output_log)
+        log_message(output_log, "✅ Training complete & async upload started!")
     except Exception as e:
+        log_message(output_log, f"\n❌ Error during training: {e}")
     return "\n".join(output_log)
 # ==== Gradio Interface ====
 def create_interface():
     with gr.Blocks(title="PromptWizard — Qwen Trainer") as demo: