Spaces:
Sleeping
Sleeping
| """ | |
| PromptWizard Qwen Training — Configurable Dataset & Repo | |
| Fine-tunes Qwen using a user-selected dataset and uploads the trained model | |
| to a user-specified Hugging Face Hub repo asynchronously with detailed logs. | |
| """ | |
| import gradio as gr | |
| import spaces | |
| import torch | |
| from transformers import ( | |
| AutoModelForCausalLM, | |
| AutoTokenizer, | |
| Trainer, | |
| TrainingArguments, | |
| ) | |
| from datasets import load_dataset | |
| from peft import LoraConfig, get_peft_model, TaskType | |
| from huggingface_hub import HfApi, HfFolder, Repository | |
| import os, tempfile, shutil, asyncio, threading, time | |
| from datetime import datetime | |
| # ==== Async upload wrapper ==== | |
| def start_async_upload(local_dir, hf_repo, output_log): | |
| """Starts async model upload in a background thread.""" | |
| def runner(): | |
| output_log.append(f"[INFO] 🚀 Async upload thread started for repo: {hf_repo}") | |
| asyncio.run(async_upload_model(local_dir, hf_repo, output_log)) | |
| output_log.append(f"[INFO] 🛑 Async upload thread finished for repo: {hf_repo}") | |
| threading.Thread(target=runner, daemon=True).start() | |
| from huggingface_hub import upload_folder, HfFolder | |
| async def async_upload_model(local_dir, hf_repo, output_log, max_retries=3): | |
| """ | |
| Uploads a local model directory to HF Hub asynchronously using HTTP API. | |
| """ | |
| try: | |
| token = HfFolder.get_token() | |
| output_log.append(f"[INFO] ☁️ Preparing to upload to repo: {hf_repo}") | |
| attempt = 0 | |
| while attempt < max_retries: | |
| try: | |
| output_log.append(f"[INFO] 🔄 Attempt {attempt+1} to upload folder via HTTP API...") | |
| upload_folder( | |
| folder_path=local_dir, | |
| repo_id=hf_repo, | |
| repo_type="model", | |
| token=token, | |
| ignore_patterns=["*.lock", "*.tmp"], # ignore temp files | |
| create_pr=False, | |
| ) | |
| output_log.append("[SUCCESS] ✅ Model successfully uploaded to HF Hub!") | |
| break | |
| except Exception as e: | |
| attempt += 1 | |
| output_log.append(f"[ERROR] Upload attempt {attempt} failed: {e}") | |
| if attempt >= max_retries: | |
| output_log.append("[ERROR] ❌ Max retries reached. Upload failed.") | |
| else: | |
| output_log.append("[INFO] Retrying upload in 5 seconds...") | |
| await asyncio.sleep(5) | |
| except Exception as e: | |
| output_log.append(f"[ERROR] ❌ Unexpected error during upload: {e}") | |
| # ==== GPU check ==== | |
| def check_gpu_status(): | |
| return "🚀 Zero GPU Ready - GPU will be allocated when training starts" | |
| # ==== Logging helper ==== | |
| def log_message(output_log, msg): | |
| line = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}" | |
| print(line) | |
| output_log.append(line) | |
| # ==== Main TExt Training ==== | |
| # @spaces.GPU(duration=300) | |
| # def train_model(base_model, dataset_name, num_epochs, batch_size, learning_rate, hf_repo): | |
| # output_log = [] | |
| # test_split = 0.2 | |
| # try: | |
| # log_message(output_log, "🔍 Initializing training sequence...") | |
| # # ===== Device ===== | |
| # device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # log_message(output_log, f"🎮 Using device: {device}") | |
| # if device == "cuda": | |
| # log_message(output_log, f"✅ GPU: {torch.cuda.get_device_name(0)}") | |
| # # ===== Load dataset ===== | |
| # log_message(output_log, f"\n📚 Loading dataset: {dataset_name} ...") | |
| # dataset = load_dataset(dataset_name) | |
| # dataset = dataset["train"].train_test_split(test_size=test_split) | |
| # train_dataset = dataset["train"] | |
| # test_dataset = dataset["test"] | |
| # log_message(output_log, f" Training samples: {len(train_dataset)}") | |
| # log_message(output_log, f" Test samples: {len(test_dataset)}") | |
| # log_message(output_log, f" Columns: {train_dataset.column_names}") | |
| # # ===== Format examples ===== | |
| # # def format_example(item): | |
| # # text = item.get("text") or item.get("content") or " ".join(str(v) for v in item.values()) | |
| # # prompt = f"""<|system|> | |
| # # You are a wise teacher interpreting Bhagavad Gita with deep insights. | |
| # # <|user|> | |
| # # {text} | |
| # # <|assistant|> | |
| # # """ | |
| # # return {"text": prompt} | |
| # # ===== Format examples dynamically ===== | |
| # def format_example(item): | |
| # text_content = item.get("text") or item.get("content") or str(item.get("path", "")) or " ".join(str(v) for v in item.values()) | |
| # # Use shorter, clean system prompt + user content for better loss | |
| # prompt = ( | |
| # f"<|system|>\nYou are an expert AI assistant.\n<|user|>\n{text_content}\n<|assistant|>\n" | |
| # ) | |
| # return {"text": prompt} | |
| # train_dataset = train_dataset.map(format_example) | |
| # test_dataset = test_dataset.map(format_example) | |
| # log_message(output_log, f"✅ Formatted {len(train_dataset)} train + {len(test_dataset)} test examples") | |
| # # ===== Load model & tokenizer ===== | |
| # log_message(output_log, f"\n🤖 Loading model: {base_model}") | |
| # tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True) | |
| # if tokenizer.pad_token is None: | |
| # tokenizer.pad_token = tokenizer.eos_token | |
| # model = AutoModelForCausalLM.from_pretrained( | |
| # base_model, | |
| # trust_remote_code=True, | |
| # torch_dtype=torch.float16 if device == "cuda" else torch.float32, | |
| # low_cpu_mem_usage=True, | |
| # ) | |
| # if device == "cuda": | |
| # model = model.to(device) | |
| # log_message(output_log, "✅ Model and tokenizer loaded successfully") | |
| # log_message(output_log, f"Tokenizer vocab size: {tokenizer.vocab_size}") | |
| # # ===== LoRA configuration ===== | |
| # log_message(output_log, "\n⚙️ Configuring LoRA for efficient fine-tuning...") | |
| # lora_config = LoraConfig( | |
| # task_type=TaskType.CAUSAL_LM, | |
| # r=8, | |
| # lora_alpha=16, | |
| # lora_dropout=0.1, | |
| # target_modules=["q_proj", "v_proj"], | |
| # bias="none", | |
| # ) | |
| # model = get_peft_model(model, lora_config) | |
| # trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) | |
| # log_message(output_log, f"Trainable params after LoRA: {trainable_params:,}") | |
| # # ===== Tokenization + labels ===== | |
| # def tokenize_fn(examples): | |
| # tokenized = tokenizer( | |
| # examples["text"], | |
| # padding="max_length", | |
| # truncation=True, | |
| # max_length=256, | |
| # ) | |
| # tokenized["labels"] = tokenized["input_ids"].copy() | |
| # return tokenized | |
| # train_dataset = train_dataset.map(tokenize_fn, batched=True) | |
| # test_dataset = test_dataset.map(tokenize_fn, batched=True) | |
| # log_message(output_log, "✅ Tokenization + labels done") | |
| # # ===== Training arguments ===== | |
| # output_dir = "./qwen-gita-lora" | |
| # training_args = TrainingArguments( | |
| # output_dir=output_dir, | |
| # num_train_epochs=num_epochs, | |
| # per_device_train_batch_size=batch_size, | |
| # gradient_accumulation_steps=2, | |
| # warmup_steps=10, | |
| # logging_steps=5, | |
| # save_strategy="epoch", | |
| # fp16=device == "cuda", | |
| # optim="adamw_torch", | |
| # learning_rate=learning_rate, | |
| # max_steps=500, # Limit for demo is 100 | |
| # ) | |
| # trainer = Trainer( | |
| # model=model, | |
| # args=training_args, | |
| # train_dataset=train_dataset, | |
| # eval_dataset=test_dataset, | |
| # tokenizer=tokenizer, | |
| # ) | |
| # # ===== Train ===== | |
| # log_message(output_log, "\n🚀 Starting training...") | |
| # trainer.train() | |
| # log_message(output_log, "\n💾 Saving trained model locally...") | |
| # trainer.save_model(output_dir) | |
| # tokenizer.save_pretrained(output_dir) | |
| # # ===== Async upload ===== | |
| # log_message(output_log, f"\n☁️ Initiating async upload to {hf_repo}") | |
| # start_async_upload(output_dir, hf_repo, output_log) | |
| # log_message(output_log, "✅ Training complete & async upload started!") | |
| # except Exception as e: | |
| # log_message(output_log, f"\n❌ Error during training: {e}") | |
| # return "\n".join(output_log) | |
| # ===================================================== | |
| # 🧠 Train model to expand short prompts into long ones | |
| # ===================================================== | |
| def train_model( | |
| base_model, dataset_name, num_epochs, batch_size, learning_rate, hf_repo | |
| ): | |
| output_log = [] | |
| try: | |
| log_message(output_log, "🚀 Starting FAST test training...") | |
| # ===== Device ===== | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else torch.float16 | |
| log_message(output_log, f"🎮 Device: {device}, dtype: {dtype}") | |
| if device == "cuda": | |
| log_message(output_log, f"✅ GPU: {torch.cuda.get_device_name(0)}") | |
| # ===== Load dataset ===== | |
| log_message(output_log, f"\n📚 Loading dataset: {dataset_name}") | |
| dataset = load_dataset(dataset_name) | |
| dataset = dataset["train"].train_test_split(test_size=0.2, seed=42) | |
| train_dataset, test_dataset = dataset["train"], dataset["test"] | |
| # ===== ⚡ FAST mode: use small subset ===== | |
| train_dataset = train_dataset.select(range(min(1000, len(train_dataset)))) | |
| test_dataset = test_dataset.select(range(min(200, len(test_dataset)))) | |
| log_message(output_log, f"⚡ Using {len(train_dataset)} train / {len(test_dataset)} test samples") | |
| # ===== Format samples ===== | |
| def format_example(example): | |
| short_prompt = example.get("short", "").strip() | |
| long_response = example.get("long", "").strip() | |
| return { | |
| "text": ( | |
| f"<|system|>\nYou are an AI that expands short prompts into detailed, descriptive ones.\n" | |
| f"<|user|>\nShort: {short_prompt}\n" | |
| f"<|assistant|>\n{long_response}" | |
| ) | |
| } | |
| train_dataset = train_dataset.map(format_example) | |
| test_dataset = test_dataset.map(format_example) | |
| # ===== Format examples dynamically ===== | |
| def format_example(item): | |
| text_content = item.get("text") or item.get("content") or str(item.get("path", "")) or " ".join(str(v) for v in item.values()) | |
| # Use shorter, clean system prompt + user content for better loss | |
| prompt = ( | |
| f"<|system|>\nYou are an expert AI assistant.\n<|user|>\n{text_content}\n<|assistant|>\n" | |
| ) | |
| return {"text": prompt} | |
| train_dataset = train_dataset.map(format_example) | |
| test_dataset = test_dataset.map(format_example) | |
| log_message(output_log, f"✅ Formatted {len(train_dataset)} train + {len(test_dataset)} test examples") | |
| # ===== Load model & tokenizer ===== | |
| log_message(output_log, f"\n🤖 Loading model: {base_model}") | |
| tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| model = AutoModelForCausalLM.from_pretrained( | |
| base_model, | |
| trust_remote_code=True, | |
| torch_dtype=torch.float16 if device == "cuda" else torch.float32, | |
| low_cpu_mem_usage=True, | |
| ) | |
| if device == "cuda": | |
| model = model.to(device) | |
| log_message(output_log, "✅ Model and tokenizer loaded successfully") | |
| log_message(output_log, f"Tokenizer vocab size: {tokenizer.vocab_size}") | |
| # ===== LoRA configuration ===== | |
| log_message(output_log, "\n⚙️ Configuring LoRA for efficient fine-tuning...") | |
| lora_config = LoraConfig( | |
| task_type=TaskType.CAUSAL_LM, | |
| r=8, | |
| lora_alpha=16, | |
| lora_dropout=0.1, | |
| target_modules=["q_proj", "v_proj"], | |
| bias="none", | |
| ) | |
| model = get_peft_model(model, lora_config) | |
| trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) | |
| log_message(output_log, f"Trainable params after LoRA: {trainable_params:,}") | |
| # ===== Tokenization + labels ===== | |
| def tokenize_fn(examples): | |
| tokenized = tokenizer( | |
| examples["text"], | |
| padding="max_length", | |
| truncation=True, | |
| max_length=256, | |
| ) | |
| tokenized["labels"] = tokenized["input_ids"].copy() | |
| return tokenized | |
| train_dataset = train_dataset.map(tokenize_fn, batched=True) | |
| test_dataset = test_dataset.map(tokenize_fn, batched=True) | |
| log_message(output_log, "✅ Tokenization + labels done") | |
| # ===== Training arguments ===== | |
| output_dir = "./qwen-gita-lora" | |
| training_args = TrainingArguments( | |
| output_dir=output_dir, | |
| num_train_epochs=num_epochs, | |
| per_device_train_batch_size=batch_size, | |
| gradient_accumulation_steps=2, | |
| warmup_steps=10, | |
| logging_steps=5, | |
| save_strategy="epoch", | |
| fp16=device == "cuda", | |
| optim="adamw_torch", | |
| learning_rate=learning_rate, | |
| max_steps=500, # Limit for demo is 100 | |
| ) | |
| trainer = Trainer( | |
| model=model, | |
| args=training_args, | |
| train_dataset=train_dataset, | |
| eval_dataset=test_dataset, | |
| tokenizer=tokenizer, | |
| ) | |
| # ===== Train ===== | |
| log_message(output_log, "\n🚀 Starting training...") | |
| trainer.train() | |
| log_message(output_log, "\n💾 Saving trained model locally...") | |
| trainer.save_model(output_dir) | |
| tokenizer.save_pretrained(output_dir) | |
| # ===== Async upload ===== | |
| log_message(output_log, f"\n☁️ Initiating async upload to {hf_repo}") | |
| start_async_upload(output_dir, hf_repo, output_log) | |
| log_message(output_log, "✅ Training complete & async upload started!") | |
| except Exception as e: | |
| log_message(output_log, f"\n❌ Error during training: {e}") | |
| return "\n".join(output_log) | |
| # ==== Gradio Interface ==== | |
| def create_interface(): | |
| with gr.Blocks(title="PromptWizard — Qwen Trainer") as demo: | |
| gr.Markdown(""" | |
| # 🧘 PromptWizard Qwen Fine-tuning | |
| Fine-tune Qwen on any dataset and upload to any Hugging Face repo. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Textbox(label="GPU Status", value=check_gpu_status(), interactive=False) | |
| base_model = gr.Textbox(label="Base Model", value="Qwen/Qwen2.5-0.5B") | |
| dataset_name = gr.Textbox(label="Dataset Name", value="rahul7star/Gita") | |
| hf_repo = gr.Textbox(label="HF Repo for Upload", value="rahul7star/Qwen0.5-3B-Gita") | |
| num_epochs = gr.Slider(1, 3, value=1, step=1, label="Epochs") | |
| batch_size = gr.Slider(1, 4, value=2, step=1, label="Batch Size") | |
| learning_rate = gr.Number(value=5e-5, label="Learning Rate") | |
| train_btn = gr.Button("🚀 Start Fine-tuning", variant="primary") | |
| with gr.Column(): | |
| output = gr.Textbox( | |
| label="Training Log", | |
| lines=25, | |
| max_lines=40, | |
| value="Click 'Start Fine-tuning' to train and upload your model.", | |
| ) | |
| train_btn.click( | |
| fn=train_model, | |
| inputs=[base_model, dataset_name, num_epochs, batch_size, learning_rate, hf_repo], | |
| outputs=output, | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| demo = create_interface() | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |