Upload 15 files

Browse files

Files changed (3) hide show

deployment_guide.md +26 -4
handler.py +51 -11
requirements.txt +10 -2

deployment_guide.md CHANGED Viewed

@@ -14,7 +14,16 @@
 pip install -r requirements.txt
 ```
-2. **Flash Attention (isteğe bağlı, performans için):**
 ```bash
 pip install flash-attn --no-build-isolation
 ```
@@ -103,19 +112,32 @@ print(f"GPU Memory: {torch.cuda.memory_allocated()/1024**3:.2f}GB")
 #### Common Issues:
-1. **CUDA Out of Memory**
    - Batch size'ı azaltın
    - `max_new_tokens` değerini düşürün
    - Gradient checkpointing kullanın
-2. **Slow Image Processing**
    - Image timeout değerini artırın
    - Image resize threshold ayarlayın
-3. **Model Loading Issues**
    - HuggingFace token'ını kontrol edin
    - Network bağlantısını doğrulayın
    - Cache dizinini temizleyin
 ### Security Best Practices

 pip install -r requirements.txt
 ```
+2. **LLaVA Architecture Desteği (PULSE-7B için kritik):**
+```bash
+# Eğer "llava_llama architecture not recognized" hatası alırsanız:
+pip install --upgrade transformers
+# Veya en son development sürümü:
+pip install git+https://github.com/huggingface/transformers.git
+```
+3. **Flash Attention (isteğe bağlı, performans için):**
 ```bash
 pip install flash-attn --no-build-isolation
 ```
 #### Common Issues:
+1. **"llava_llama architecture not recognized" Error**
+   ```bash
+   # Solution 1: Update transformers
+   pip install --upgrade transformers>=4.44.0
+   # Solution 2: Install from source
+   pip install git+https://github.com/huggingface/transformers.git
+   # Solution 3: Add to requirements.txt
+   git+https://github.com/huggingface/transformers.git
+   ```
+2. **CUDA Out of Memory**
    - Batch size'ı azaltın
    - `max_new_tokens` değerini düşürün
    - Gradient checkpointing kullanın
+3. **Slow Image Processing**
    - Image timeout değerini artırın
    - Image resize threshold ayarlayın
+4. **Model Loading Issues**
    - HuggingFace token'ını kontrol edin
    - Network bağlantısını doğrulayın
    - Cache dizinini temizleyin
+   - Transformers sürümünü kontrol edin
 ### Security Best Practices

handler.py CHANGED Viewed

@@ -144,18 +144,58 @@ class EndpointHandler:
                         print("✅ Model loaded manually with tokenizer!")
                     except Exception as e4:
-                        print(f"😓 All loading approaches failed!")
-                        print(f"Error 1 (AutoModel): {e1}")
-                        print(f"Error 2 (LLaVA): {e2}")
-                        print(f"Error 3 (Pipeline): {e3}")
-                        print(f"Error 4 (Manual): {e4}")
-                        # Complete failure - set everything to None
-                        self.model = None
-                        self.processor = None
-                        self.tokenizer = None
-                        self.pipe = None
-                        self.use_pipeline = None
         # Final status report
         print("\n🔍 Model Loading Status Report:")

                         print("✅ Model loaded manually with tokenizer!")
                     except Exception as e4:
+                        print(f"⚠️ Manual approach also failed: {e4}")
+                        # Final attempt: Try with custom architecture loading
+                        try:
+                            print("📦 Final attempt: Loading with custom architecture support...")
+                            # This approach loads the model with full trust_remote_code
+                            # and lets the model define its own architecture
+                            from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
+                            # Load config first to understand the model
+                            config = AutoConfig.from_pretrained("PULSE-ECG/PULSE-7B", trust_remote_code=True)
+                            print(f"🔧 Model config loaded: {config.model_type}")
+                            # Try to load with the config
+                            self.tokenizer = AutoTokenizer.from_pretrained("PULSE-ECG/PULSE-7B", trust_remote_code=True)
+                            self.model = AutoModelForCausalLM.from_pretrained(
+                                "PULSE-ECG/PULSE-7B",
+                                config=config,
+                                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+                                device_map="auto",
+                                low_cpu_mem_usage=True,
+                                trust_remote_code=True
+                            )
+                            # Fix padding token if missing
+                            if self.tokenizer.pad_token is None:
+                                self.tokenizer.pad_token = self.tokenizer.eos_token
+                                self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
+                            self.model.eval()
+                            self.use_pipeline = False
+                            print("✅ Model loaded with custom architecture support!")
+                        except Exception as e5:
+                            print(f"😓 All loading approaches failed!")
+                            print(f"Error 1 (AutoModel): {e1}")
+                            print(f"Error 2 (LLaVA): {e2}")
+                            print(f"Error 3 (Pipeline): {e3}")
+                            print(f"Error 4 (Manual): {e4}")
+                            print(f"Error 5 (Custom): {e5}")
+                            print("\n💡 SOLUTION: Update transformers to latest version:")
+                            print("   pip install --upgrade transformers")
+                            print("   OR: pip install git+https://github.com/huggingface/transformers.git")
+                            # Complete failure - set everything to None
+                            self.model = None
+                            self.processor = None
+                            self.tokenizer = None
+                            self.pipe = None
+                            self.use_pipeline = None
         # Final status report
         print("\n🔍 Model Loading Status Report:")

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
-transformers>=4.40.0
 torch>=2.1.0
 accelerate>=0.25.0
 sentencepiece
@@ -9,6 +10,9 @@ protobuf
 Pillow>=9.0.0
 requests>=2.28.0
 # Optional performance improvements
 flash-attn>=2.0.0; sys_platform != "darwin"
 bitsandbytes>=0.41.0; sys_platform != "darwin"
@@ -19,4 +23,8 @@ typing-extensions>=4.0.0
 psutil>=5.8.0
 # HuggingFace Inference specific
-huggingface-hub>=0.16.0

+# Core ML dependencies - PULSE-7B requires latest transformers for llava_llama architecture
+transformers>=4.44.0
 torch>=2.1.0
 accelerate>=0.25.0
 sentencepiece
 Pillow>=9.0.0
 requests>=2.28.0
+# LLaVA/Vision model dependencies
+timm>=0.9.0
 # Optional performance improvements
 flash-attn>=2.0.0; sys_platform != "darwin"
 bitsandbytes>=0.41.0; sys_platform != "darwin"
 psutil>=5.8.0
 # HuggingFace Inference specific
+huggingface-hub>=0.16.0
+# Alternative: Install from source if stable version doesn't work
+# Uncomment the line below if you get llava_llama architecture errors:
+# git+https://github.com/huggingface/transformers.git