ubden commited on
Commit
83a9c3e
·
verified ·
1 Parent(s): f77678c

Upload 15 files

Browse files
Files changed (3) hide show
  1. deployment_guide.md +26 -4
  2. handler.py +51 -11
  3. requirements.txt +10 -2
deployment_guide.md CHANGED
@@ -14,7 +14,16 @@
14
  pip install -r requirements.txt
15
  ```
16
 
17
- 2. **Flash Attention (isteğe bağlı, performans için):**
 
 
 
 
 
 
 
 
 
18
  ```bash
19
  pip install flash-attn --no-build-isolation
20
  ```
@@ -103,19 +112,32 @@ print(f"GPU Memory: {torch.cuda.memory_allocated()/1024**3:.2f}GB")
103
 
104
  #### Common Issues:
105
 
106
- 1. **CUDA Out of Memory**
 
 
 
 
 
 
 
 
 
 
 
 
107
  - Batch size'ı azaltın
108
  - `max_new_tokens` değerini düşürün
109
  - Gradient checkpointing kullanın
110
 
111
- 2. **Slow Image Processing**
112
  - Image timeout değerini artırın
113
  - Image resize threshold ayarlayın
114
 
115
- 3. **Model Loading Issues**
116
  - HuggingFace token'ını kontrol edin
117
  - Network bağlantısını doğrulayın
118
  - Cache dizinini temizleyin
 
119
 
120
  ### Security Best Practices
121
 
 
14
  pip install -r requirements.txt
15
  ```
16
 
17
+ 2. **LLaVA Architecture Desteği (PULSE-7B için kritik):**
18
+ ```bash
19
+ # Eğer "llava_llama architecture not recognized" hatası alırsanız:
20
+ pip install --upgrade transformers
21
+
22
+ # Veya en son development sürümü:
23
+ pip install git+https://github.com/huggingface/transformers.git
24
+ ```
25
+
26
+ 3. **Flash Attention (isteğe bağlı, performans için):**
27
  ```bash
28
  pip install flash-attn --no-build-isolation
29
  ```
 
112
 
113
  #### Common Issues:
114
 
115
+ 1. **"llava_llama architecture not recognized" Error**
116
+ ```bash
117
+ # Solution 1: Update transformers
118
+ pip install --upgrade transformers>=4.44.0
119
+
120
+ # Solution 2: Install from source
121
+ pip install git+https://github.com/huggingface/transformers.git
122
+
123
+ # Solution 3: Add to requirements.txt
124
+ git+https://github.com/huggingface/transformers.git
125
+ ```
126
+
127
+ 2. **CUDA Out of Memory**
128
  - Batch size'ı azaltın
129
  - `max_new_tokens` değerini düşürün
130
  - Gradient checkpointing kullanın
131
 
132
+ 3. **Slow Image Processing**
133
  - Image timeout değerini artırın
134
  - Image resize threshold ayarlayın
135
 
136
+ 4. **Model Loading Issues**
137
  - HuggingFace token'ını kontrol edin
138
  - Network bağlantısını doğrulayın
139
  - Cache dizinini temizleyin
140
+ - Transformers sürümünü kontrol edin
141
 
142
  ### Security Best Practices
143
 
handler.py CHANGED
@@ -144,18 +144,58 @@ class EndpointHandler:
144
  print("✅ Model loaded manually with tokenizer!")
145
 
146
  except Exception as e4:
147
- print(f"😓 All loading approaches failed!")
148
- print(f"Error 1 (AutoModel): {e1}")
149
- print(f"Error 2 (LLaVA): {e2}")
150
- print(f"Error 3 (Pipeline): {e3}")
151
- print(f"Error 4 (Manual): {e4}")
152
 
153
- # Complete failure - set everything to None
154
- self.model = None
155
- self.processor = None
156
- self.tokenizer = None
157
- self.pipe = None
158
- self.use_pipeline = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
  # Final status report
161
  print("\n🔍 Model Loading Status Report:")
 
144
  print("✅ Model loaded manually with tokenizer!")
145
 
146
  except Exception as e4:
147
+ print(f"⚠️ Manual approach also failed: {e4}")
 
 
 
 
148
 
149
+ # Final attempt: Try with custom architecture loading
150
+ try:
151
+ print("📦 Final attempt: Loading with custom architecture support...")
152
+
153
+ # This approach loads the model with full trust_remote_code
154
+ # and lets the model define its own architecture
155
+ from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
156
+
157
+ # Load config first to understand the model
158
+ config = AutoConfig.from_pretrained("PULSE-ECG/PULSE-7B", trust_remote_code=True)
159
+ print(f"🔧 Model config loaded: {config.model_type}")
160
+
161
+ # Try to load with the config
162
+ self.tokenizer = AutoTokenizer.from_pretrained("PULSE-ECG/PULSE-7B", trust_remote_code=True)
163
+ self.model = AutoModelForCausalLM.from_pretrained(
164
+ "PULSE-ECG/PULSE-7B",
165
+ config=config,
166
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
167
+ device_map="auto",
168
+ low_cpu_mem_usage=True,
169
+ trust_remote_code=True
170
+ )
171
+
172
+ # Fix padding token if missing
173
+ if self.tokenizer.pad_token is None:
174
+ self.tokenizer.pad_token = self.tokenizer.eos_token
175
+ self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
176
+
177
+ self.model.eval()
178
+ self.use_pipeline = False
179
+ print("✅ Model loaded with custom architecture support!")
180
+
181
+ except Exception as e5:
182
+ print(f"😓 All loading approaches failed!")
183
+ print(f"Error 1 (AutoModel): {e1}")
184
+ print(f"Error 2 (LLaVA): {e2}")
185
+ print(f"Error 3 (Pipeline): {e3}")
186
+ print(f"Error 4 (Manual): {e4}")
187
+ print(f"Error 5 (Custom): {e5}")
188
+
189
+ print("\n💡 SOLUTION: Update transformers to latest version:")
190
+ print(" pip install --upgrade transformers")
191
+ print(" OR: pip install git+https://github.com/huggingface/transformers.git")
192
+
193
+ # Complete failure - set everything to None
194
+ self.model = None
195
+ self.processor = None
196
+ self.tokenizer = None
197
+ self.pipe = None
198
+ self.use_pipeline = None
199
 
200
  # Final status report
201
  print("\n🔍 Model Loading Status Report:")
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
- transformers>=4.40.0
 
2
  torch>=2.1.0
3
  accelerate>=0.25.0
4
  sentencepiece
@@ -9,6 +10,9 @@ protobuf
9
  Pillow>=9.0.0
10
  requests>=2.28.0
11
 
 
 
 
12
  # Optional performance improvements
13
  flash-attn>=2.0.0; sys_platform != "darwin"
14
  bitsandbytes>=0.41.0; sys_platform != "darwin"
@@ -19,4 +23,8 @@ typing-extensions>=4.0.0
19
  psutil>=5.8.0
20
 
21
  # HuggingFace Inference specific
22
- huggingface-hub>=0.16.0
 
 
 
 
 
1
+ # Core ML dependencies - PULSE-7B requires latest transformers for llava_llama architecture
2
+ transformers>=4.44.0
3
  torch>=2.1.0
4
  accelerate>=0.25.0
5
  sentencepiece
 
10
  Pillow>=9.0.0
11
  requests>=2.28.0
12
 
13
+ # LLaVA/Vision model dependencies
14
+ timm>=0.9.0
15
+
16
  # Optional performance improvements
17
  flash-attn>=2.0.0; sys_platform != "darwin"
18
  bitsandbytes>=0.41.0; sys_platform != "darwin"
 
23
  psutil>=5.8.0
24
 
25
  # HuggingFace Inference specific
26
+ huggingface-hub>=0.16.0
27
+
28
+ # Alternative: Install from source if stable version doesn't work
29
+ # Uncomment the line below if you get llava_llama architecture errors:
30
+ # git+https://github.com/huggingface/transformers.git