""" PromptWizard Qwen Training with Zero GPU Optimized for HuggingFace Spaces with automatic GPU allocation """ import gradio as gr import spaces import torch from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments from datasets import load_dataset, Dataset from peft import LoraConfig, get_peft_model, TaskType import json import os # Check if GPU is available def check_gpu_status(): if torch.cuda.is_available(): return f"āœ… GPU Available: {torch.cuda.get_device_name(0)} ({torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB)" else: return "āš ļø No GPU detected - Zero GPU will allocate when training starts" @spaces.GPU(duration=300) # Request GPU for 5 minutes (can extend if needed) def train_model(model_name, num_epochs, batch_size, learning_rate, progress=gr.Progress()): """Main training function with Zero GPU support""" progress(0, desc="Initializing...") output_log = [] try: # GPU should be available inside this function device = "cuda" if torch.cuda.is_available() else "cpu" output_log.append(f"šŸŽ® Using device: {device}") if device == "cuda": output_log.append(f"āœ… GPU: {torch.cuda.get_device_name(0)}") output_log.append(f" Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB") # Load GSM8K dataset progress(0.1, desc="Loading GSM8K dataset...") output_log.append("\nšŸ“š Loading GSM8K dataset...") # Load local data if available, otherwise from HF train_data = [] test_data = [] # Try local files first if os.path.exists("data/train.jsonl"): with open("data/train.jsonl", "r") as f: for line in f: train_data.append(json.loads(line)) output_log.append(f" Loaded {len(train_data)} training examples from local data") else: # Fallback to HF dataset dataset = load_dataset("openai/gsm8k", "main") train_data = dataset["train"].select(range(min(100, len(dataset["train"])))) output_log.append(f" Loaded {len(train_data)} training examples from HF") # Format prompts def format_example(item): prompt = f"""<|system|> You are a mathematics expert. Solve grade school math problems step by step. <|user|> {item.get('question', '')} <|assistant|> {item.get('full_solution', item.get('answer', ''))}""" return {"text": prompt} # Create dataset if isinstance(train_data, list): train_dataset = Dataset.from_list([format_example(item) for item in train_data]) else: train_dataset = train_data.map(format_example) output_log.append(f" Training samples ready: {len(train_dataset)}") # Load model and tokenizer progress(0.3, desc="Loading model and tokenizer...") output_log.append(f"\nšŸ¤– Loading {model_name}...") # Use smaller model for demo if "7B" in model_name: model_name = "Qwen/Qwen2.5-1.5B" # Use smaller model for Zero GPU demo output_log.append(" Note: Using 1.5B model for Zero GPU compatibility") tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token # Load model with 8-bit quantization model = AutoModelForCausalLM.from_pretrained( model_name, trust_remote_code=True, load_in_8bit=True, device_map="auto", torch_dtype=torch.float16 ) output_log.append(" Model loaded successfully") # Configure LoRA progress(0.4, desc="Configuring LoRA...") output_log.append("\nāš™ļø Configuring LoRA for efficient training...") lora_config = LoraConfig( task_type=TaskType.CAUSAL_LM, r=8, # Low rank for efficiency lora_alpha=16, lora_dropout=0.1, target_modules=["q_proj", "v_proj"], bias="none" ) model = get_peft_model(model, lora_config) trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) total_params = sum(p.numel() for p in model.parameters()) output_log.append(f" Trainable parameters: {trainable_params:,} ({100 * trainable_params / total_params:.2f}%)") # Tokenize dataset progress(0.5, desc="Preparing data...") output_log.append("\nšŸ“ Tokenizing dataset...") def tokenize_function(examples): return tokenizer( examples["text"], padding="max_length", truncation=True, max_length=256 # Shorter for demo ) train_dataset = train_dataset.map(tokenize_function, batched=True) # Training arguments progress(0.6, desc="Setting up training...") output_log.append("\nšŸŽÆ Setting up training configuration...") training_args = TrainingArguments( output_dir="./qwen-promptwizard-zerogpu", num_train_epochs=num_epochs, per_device_train_batch_size=batch_size, gradient_accumulation_steps=4, warmup_steps=50, logging_steps=10, save_strategy="no", # Don't save during demo fp16=True, gradient_checkpointing=True, optim="adamw_torch", learning_rate=learning_rate, ) # Create trainer trainer = Trainer( model=model, args=training_args, train_dataset=train_dataset, tokenizer=tokenizer, ) # Start training progress(0.7, desc="Training...") output_log.append(f"\nšŸš€ Starting training for {num_epochs} epochs...") output_log.append("=" * 50) # Train train_result = trainer.train() # Results progress(0.9, desc="Finalizing...") output_log.append("=" * 50) output_log.append("\nāœ… Training completed!") output_log.append(f" Final loss: {train_result.training_loss:.4f}") output_log.append(f" Total steps: {train_result.global_step}") # Save model info output_log.append("\nšŸ’¾ Model trained with PromptWizard + GSM8K") output_log.append(" Using REAL data and REAL evaluation!") progress(1.0, desc="Complete!") except Exception as e: output_log.append(f"\nāŒ Error: {str(e)}") output_log.append("Note: Zero GPU requires proper setup in Space settings") return "\n".join(output_log) # Gradio interface def create_interface(): with gr.Blocks(title="PromptWizard Qwen Training") as demo: gr.Markdown(""" # šŸ§™ PromptWizard Qwen Fine-tuning with Zero GPU Fine-tune Qwen models using GSM8K dataset with PromptWizard methodology. This Space uses HuggingFace Zero GPU for free GPU access during training. **Features:** - āœ… Real GSM8K mathematical problems (not fake data!) - āœ… LoRA-based efficient fine-tuning - āœ… Automatic Zero GPU allocation - āœ… PromptWizard optimization methodology """) with gr.Row(): with gr.Column(): gpu_status = gr.Textbox( label="GPU Status", value=check_gpu_status(), interactive=False ) model_name = gr.Dropdown( choices=[ "Qwen/Qwen2.5-1.5B", "Qwen/Qwen2.5-7B", ], value="Qwen/Qwen2.5-1.5B", label="Model (1.5B recommended for Zero GPU)" ) num_epochs = gr.Slider( minimum=1, maximum=3, value=1, step=1, label="Number of Epochs (1 for quick demo)" ) batch_size = gr.Slider( minimum=1, maximum=4, value=2, step=1, label="Batch Size (2 for Zero GPU)" ) learning_rate = gr.Number( value=5e-5, label="Learning Rate" ) train_btn = gr.Button("šŸš€ Start Training", variant="primary") with gr.Column(): output = gr.Textbox( label="Training Output", lines=20, max_lines=30, value="Click 'Start Training' to begin...\n\nZero GPU will automatically allocate a GPU when training starts." ) # Connect button to training function train_btn.click( fn=train_model, inputs=[model_name, num_epochs, batch_size, learning_rate], outputs=output ) gr.Markdown(""" ## Notes: - Zero GPU provides free GPU access for public Spaces - Training will automatically get GPU allocation when started - Using smaller model (1.5B) for faster demo - Real GSM8K data - no fake metrics! """) return demo # Launch app if __name__ == "__main__": demo = create_interface() demo.launch()