""" PromptWizard Qwen Training — Configurable Dataset & Repo Fine-tunes Qwen using a user-selected dataset and optionally uploads the trained model to a Hugging Face Hub repo asynchronously with logs. """ import gradio as gr import spaces import torch from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments from datasets import load_dataset from peft import LoraConfig, get_peft_model, TaskType from huggingface_hub import upload_folder, HfFolder import os, asyncio, threading from datetime import datetime # ==== Async upload wrapper ==== def start_async_upload(local_dir, hf_repo, output_log): """Starts async model upload in a background thread.""" def runner(): output_log.append(f"[INFO] šŸš€ Async upload thread started for repo: {hf_repo}") asyncio.run(async_upload_model(local_dir, hf_repo, output_log)) output_log.append(f"[INFO] šŸ›‘ Async upload thread finished for repo: {hf_repo}") threading.Thread(target=runner, daemon=True).start() async def async_upload_model(local_dir, hf_repo, output_log, max_retries=3): """Upload model folder to HF Hub via HTTP API.""" try: token = HfFolder.get_token() output_log.append(f"[INFO] ā˜ļø Preparing to upload to repo: {hf_repo}") attempt = 0 while attempt < max_retries: try: output_log.append(f"[INFO] šŸ”„ Attempt {attempt+1} to upload folder via HTTP API...") upload_folder(folder_path=local_dir, repo_id=hf_repo, repo_type="model", token=token, ignore_patterns=["*.lock","*.tmp"], create_pr=False) output_log.append("[SUCCESS] āœ… Model successfully uploaded to HF Hub!") break except Exception as e: attempt += 1 output_log.append(f"[ERROR] Upload attempt {attempt} failed: {e}") if attempt < max_retries: output_log.append("[INFO] Retrying in 5 seconds...") await asyncio.sleep(5) else: output_log.append("[ERROR] āŒ Max retries reached. Upload failed.") except Exception as e: output_log.append(f"[ERROR] āŒ Unexpected error during upload: {e}") # ==== GPU check ==== def check_gpu_status(): return "šŸš€ Zero GPU Ready - GPU will be allocated when training starts" # ==== Logging helper ==== def log_message(output_log, msg): line = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}" print(line) output_log.append(line) # ==== Train model ==== @spaces.GPU(duration=300) def train_model(base_model, dataset_name, num_epochs, batch_size, learning_rate): output_log = [] test_split = 0.2 try: log_message(output_log, "šŸ” Initializing training sequence...") # ===== Device ===== device = "cuda" if torch.cuda.is_available() else "cpu" log_message(output_log, f"šŸŽ® Using device: {device}") if device == "cuda": log_message(output_log, f"āœ… GPU: {torch.cuda.get_device_name(0)}") # ===== Load dataset ===== log_message(output_log, f"\nšŸ“š Loading dataset: {dataset_name} ...") dataset = load_dataset(dataset_name) dataset = dataset["train"].train_test_split(test_size=test_split) train_dataset = dataset["train"] test_dataset = dataset["test"] # ===== Format examples ===== def format_example(item): text = item.get("text") or item.get("content") or " ".join(str(v) for v in item.values()) prompt = f"""<|system|> You are a wise teacher interpreting Bhagavad Gita with deep insights. <|user|> {text} <|assistant|> """ return {"text": prompt} train_dataset = train_dataset.map(format_example) test_dataset = test_dataset.map(format_example) log_message(output_log, f"āœ… Formatted {len(train_dataset)} train + {len(test_dataset)} test examples") # ===== Load model & tokenizer ===== tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained( base_model, trust_remote_code=True, torch_dtype=torch.float16 if device=="cuda" else torch.float32, low_cpu_mem_usage=True ) if device == "cuda": model = model.to(device) # ===== LoRA configuration ===== lora_config = LoraConfig(task_type=TaskType.CAUSAL_LM, r=8, lora_alpha=16, lora_dropout=0.1, target_modules=["q_proj","v_proj"], bias="none") model = get_peft_model(model, lora_config) # ===== Tokenization + labels ===== def tokenize_fn(examples): tokenized = tokenizer(examples["text"], padding="max_length", truncation=True, max_length=256) tokenized["labels"] = tokenized["input_ids"].copy() return tokenized train_dataset = train_dataset.map(tokenize_fn, batched=True) test_dataset = test_dataset.map(tokenize_fn, batched=True) # ===== Training ===== output_dir = "./qwen-gita-lora" training_args = TrainingArguments( output_dir=output_dir, num_train_epochs=num_epochs, per_device_train_batch_size=batch_size, gradient_accumulation_steps=2, warmup_steps=10, logging_steps=5, save_strategy="epoch", fp16=device=="cuda", optim="adamw_torch", learning_rate=learning_rate, max_steps=100 ) trainer = Trainer(model=model, args=training_args, train_dataset=train_dataset, eval_dataset=test_dataset, tokenizer=tokenizer) log_message(output_log, "\nšŸš€ Starting training...") trainer.train() trainer.save_model(output_dir) tokenizer.save_pretrained(output_dir) log_message(output_log, "\nāœ… Training finished and model saved locally.") return "\n".join(output_log), model, tokenizer, output_dir except Exception as e: log_message(output_log, f"\nāŒ Error during training: {e}") return "\n".join(output_log), None, None, None # ==== Gradio Interface ==== def create_interface(): with gr.Blocks(title="PromptWizard — Qwen Trainer") as demo: gr.Markdown(""" # 🧘 PromptWizard Qwen Fine-tuning Fine-tune Qwen and interact with it before optional upload. """) with gr.Row(): with gr.Column(): gr.Textbox(label="GPU Status", value=check_gpu_status(), interactive=False) base_model = gr.Textbox(label="Base Model", value="Qwen/Qwen2.5-0.5B") dataset_name = gr.Textbox(label="Dataset Name", value="rahul7star/Gita") hf_repo = gr.Textbox(label="HF Repo for Upload", value="rahul7star/Qwen0.5-3B-Gita") num_epochs = gr.Slider(1, 3, value=1, step=1, label="Epochs") batch_size = gr.Slider(1, 4, value=2, step=1, label="Batch Size") learning_rate = gr.Number(value=5e-5, label="Learning Rate") train_btn = gr.Button("šŸš€ Start Fine-tuning", variant="primary") upload_btn = gr.Button("ā˜ļø Upload Model to HF Hub", variant="secondary", interactive=False) with gr.Column(): output = gr.Textbox(label="Training Log", lines=25, max_lines=40, value="Click 'Start Fine-tuning' to train your model.") user_question = gr.Textbox(label="Ask your own question", placeholder="Type a question...") answer_box = gr.Textbox(label="Answer", lines=5, interactive=False) # ==== Train button ==== def train_click(base_model, dataset_name, num_epochs, batch_size, learning_rate): log, model, tokenizer, output_dir = train_model(base_model, dataset_name, num_epochs, batch_size, learning_rate) return log, True, model, tokenizer, output_dir train_btn.click( fn=train_click, inputs=[base_model, dataset_name, num_epochs, batch_size, learning_rate], outputs=[output, upload_btn, gr.State(), gr.State(), gr.State()], ) # ==== User question ==== def ask_question(user_input, model, tokenizer): if not model or not tokenizer: return "Model not loaded yet." device = "cuda" if torch.cuda.is_available() else "cpu" inputs = tokenizer(f"<|system|>\nYou are a wise teacher interpreting Bhagavad Gita.\n<|user|>\n{user_input}\n<|assistant|>\n", return_tensors="pt").to(device) outputs = model.generate(**inputs, max_new_tokens=100) answer = tokenizer.decode(outputs[0], skip_special_tokens=True) return answer user_question.submit(ask_question, inputs=[user_question, gr.State(), gr.State()], outputs=[answer_box]) # ==== Upload button ==== def upload_click(hf_repo): output_log = [] start_async_upload("./qwen-gita-lora", hf_repo, output_log) return "\n".join(output_log) upload_btn.click(upload_click, inputs=[hf_repo], outputs=[output]) return demo if __name__ == "__main__": demo = create_interface() demo.launch(server_name="0.0.0.0", server_port=7860)