ubden commited on
Commit
d734fab
·
verified ·
1 Parent(s): 9313c24

fixex llva vission capalities

Browse files
Files changed (3) hide show
  1. README.md +18 -24
  2. handler.py +121 -102
  3. test_requests.json +1 -1
README.md CHANGED
@@ -150,7 +150,7 @@ curl -X POST "https://YOUR-ENDPOINT.endpoints.huggingface.cloud" \
150
  }'
151
  ```
152
 
153
- #### Text-only Request (DeepSeek Türkçe Yorum Aktif)
154
  ```bash
155
  curl -X POST "https://YOUR-ENDPOINT.endpoints.huggingface.cloud" \
156
  -H "Authorization: Bearer YOUR_HF_TOKEN" \
@@ -162,8 +162,7 @@ curl -X POST "https://YOUR-ENDPOINT.endpoints.huggingface.cloud" \
162
  "parameters": {
163
  "max_new_tokens": 256,
164
  "temperature": 0.7,
165
- "top_p": 0.95,
166
- "enable_turkish_commentary": true
167
  }
168
  }'
169
  ```
@@ -185,7 +184,7 @@ class PULSEEndpoint:
185
  "Content-Type": "application/json"
186
  }
187
 
188
- def analyze_text(self, text, max_new_tokens=256, temperature=0.7, enable_turkish_commentary=True):
189
  """
190
  Send text to PULSE-7B endpoint for analysis
191
 
@@ -223,7 +222,7 @@ class PULSEEndpoint:
223
  else:
224
  raise Exception(f"Request failed: {response.status_code} - {response.text}")
225
 
226
- def analyze_image_url(self, image_url, query, max_new_tokens=512, temperature=0.2, enable_turkish_commentary=True):
227
  """
228
  Analyze ECG image from URL with DeepSeek Turkish commentary
229
 
@@ -264,7 +263,7 @@ class PULSEEndpoint:
264
  else:
265
  raise Exception(f"Request failed: {response.status_code} - {response.text}")
266
 
267
- def analyze_image_base64(self, image_path, query, max_new_tokens=512, temperature=0.2, enable_turkish_commentary=True):
268
  """
269
  Analyze ECG image from local file with DeepSeek Turkish commentary
270
 
@@ -319,26 +318,20 @@ if __name__ == "__main__":
319
  hf_token="YOUR_HF_TOKEN"
320
  )
321
 
322
- # Example 1: Text analysis with Turkish commentary
323
  response = endpoint.analyze_text(
324
- "What are the characteristics of a normal sinus rhythm?",
325
- enable_turkish_commentary=True
326
  )
327
  print("English Response:", response["generated_text"])
328
- if "comment_text" in response:
329
- print("Turkish Commentary:", response["comment_text"])
330
 
331
- # Example 2: Image URL analysis with Turkish commentary
332
  response = endpoint.analyze_image_url(
333
  image_url="https://i.imgur.com/7uuejqO.jpeg",
334
- query="What are the main features and diagnosis in this ECG image?",
335
- enable_turkish_commentary=True
336
  )
337
  print("English Analysis:", response["generated_text"])
338
- if "comment_text" in response:
339
- print("Turkish Commentary:", response["comment_text"])
340
 
341
- # Example 3: Local image analysis with Turkish commentary
342
  response = endpoint.analyze_image_base64(
343
  image_path="./ecg_image.jpg",
344
  query="Analyze this ECG for any abnormalities",
@@ -348,13 +341,14 @@ if __name__ == "__main__":
348
  if "comment_text" in response:
349
  print("Turkish Commentary:", response["comment_text"])
350
 
351
- # Example 4: Analysis without Turkish commentary
352
- response = endpoint.analyze_image_url(
353
- image_url="https://i.imgur.com/7uuejqO.jpeg",
354
- query="Brief ECG analysis",
355
- enable_turkish_commentary=False
356
  )
357
- print("English Only Response:", response["generated_text"])
 
 
358
  ```
359
 
360
  ### JavaScript / Node.js
@@ -602,7 +596,7 @@ main();
602
  | `top_k` | int | 50 | Top-k sampling parameter |
603
  | `do_sample` | bool | true | Whether to use sampling or greedy decoding |
604
  | `repetition_penalty` | float | 1.05 | Penalty for repeating tokens (1.0-2.0) |
605
- | `enable_turkish_commentary` | bool | true | Enable/disable DeepSeek Turkish commentary |
606
  | `deepseek_timeout` | int | 30 | DeepSeek API timeout in seconds (10-60) |
607
  | `stop` | array | ["</s>"] | Stop sequences for generation |
608
  | `return_full_text` | bool | false | Return full text including input |
 
150
  }'
151
  ```
152
 
153
+ #### Text-only Request (DeepSeek Türkçe Yorum Deaktif - Default)
154
  ```bash
155
  curl -X POST "https://YOUR-ENDPOINT.endpoints.huggingface.cloud" \
156
  -H "Authorization: Bearer YOUR_HF_TOKEN" \
 
162
  "parameters": {
163
  "max_new_tokens": 256,
164
  "temperature": 0.7,
165
+ "top_p": 0.95
 
166
  }
167
  }'
168
  ```
 
184
  "Content-Type": "application/json"
185
  }
186
 
187
+ def analyze_text(self, text, max_new_tokens=256, temperature=0.7, enable_turkish_commentary=False):
188
  """
189
  Send text to PULSE-7B endpoint for analysis
190
 
 
222
  else:
223
  raise Exception(f"Request failed: {response.status_code} - {response.text}")
224
 
225
+ def analyze_image_url(self, image_url, query, max_new_tokens=512, temperature=0.2, enable_turkish_commentary=False):
226
  """
227
  Analyze ECG image from URL with DeepSeek Turkish commentary
228
 
 
263
  else:
264
  raise Exception(f"Request failed: {response.status_code} - {response.text}")
265
 
266
+ def analyze_image_base64(self, image_path, query, max_new_tokens=512, temperature=0.2, enable_turkish_commentary=False):
267
  """
268
  Analyze ECG image from local file with DeepSeek Turkish commentary
269
 
 
318
  hf_token="YOUR_HF_TOKEN"
319
  )
320
 
321
+ # Example 1: Text analysis (default - no Turkish commentary)
322
  response = endpoint.analyze_text(
323
+ "What are the characteristics of a normal sinus rhythm?"
 
324
  )
325
  print("English Response:", response["generated_text"])
 
 
326
 
327
+ # Example 2: Image URL analysis (default - no Turkish commentary)
328
  response = endpoint.analyze_image_url(
329
  image_url="https://i.imgur.com/7uuejqO.jpeg",
330
+ query="What are the main features and diagnosis in this ECG image?"
 
331
  )
332
  print("English Analysis:", response["generated_text"])
 
 
333
 
334
+ # Example 3: Local image analysis with Turkish commentary (explicitly enabled)
335
  response = endpoint.analyze_image_base64(
336
  image_path="./ecg_image.jpg",
337
  query="Analyze this ECG for any abnormalities",
 
341
  if "comment_text" in response:
342
  print("Turkish Commentary:", response["comment_text"])
343
 
344
+ # Example 4: Text analysis with Turkish commentary (explicitly enabled)
345
+ response = endpoint.analyze_text(
346
+ "What are the characteristics of atrial fibrillation?",
347
+ enable_turkish_commentary=True
 
348
  )
349
+ print("English Response:", response["generated_text"])
350
+ if "comment_text" in response:
351
+ print("Turkish Commentary:", response["comment_text"])
352
  ```
353
 
354
  ### JavaScript / Node.js
 
596
  | `top_k` | int | 50 | Top-k sampling parameter |
597
  | `do_sample` | bool | true | Whether to use sampling or greedy decoding |
598
  | `repetition_penalty` | float | 1.05 | Penalty for repeating tokens (1.0-2.0) |
599
+ | `enable_turkish_commentary` | bool | false | Enable/disable DeepSeek Turkish commentary |
600
  | `deepseek_timeout` | int | 30 | DeepSeek API timeout in seconds (10-60) |
601
  | `stop` | array | ["</s>"] | Stop sequences for generation |
602
  | `return_full_text` | bool | false | Return full text including input |
handler.py CHANGED
@@ -46,66 +46,53 @@ class EndpointHandler:
46
  print(f"🖥️ Running on: {self.device}")
47
 
48
  try:
49
- # First attempt - using pipeline (easiest and most stable way)
50
- from transformers import pipeline
51
 
52
- print("📦 Fetching model from HuggingFace Hub...")
53
- self.pipe = pipeline(
54
- "text-generation",
55
- model="PULSE-ECG/PULSE-7B",
56
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
57
- device=0 if torch.cuda.is_available() else -1,
58
- trust_remote_code=True,
59
- model_kwargs={
60
- "low_cpu_mem_usage": True,
61
- "use_safetensors": True
62
- }
63
  )
64
- print("✅ Model loaded successfully via pipeline!")
 
 
65
 
66
  except Exception as e:
67
- print(f"⚠️ Pipeline didn't work out: {e}")
68
- print("🔄 Let me try a different approach...")
69
 
70
  try:
71
- # Plan B - load model and tokenizer separately
72
- from transformers import AutoTokenizer, LlamaForCausalLM
73
-
74
- # Get the tokenizer ready
75
- print("📖 Setting up tokenizer...")
76
- self.tokenizer = AutoTokenizer.from_pretrained(
77
- "PULSE-ECG/PULSE-7B",
78
- trust_remote_code=True
79
- )
80
 
81
- # Load the model as Llama (it works, trust me!)
82
- print("🧠 Loading the model as Llama...")
83
- self.model = LlamaForCausalLM.from_pretrained(
84
- "PULSE-ECG/PULSE-7B",
85
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
86
- device_map="auto",
87
- low_cpu_mem_usage=True,
88
- trust_remote_code=True
 
 
 
89
  )
90
-
91
- # Quick fix for padding token if it's missing
92
- if self.tokenizer.pad_token is None:
93
- self.tokenizer.pad_token = self.tokenizer.eos_token
94
- self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
95
-
96
- self.model.eval()
97
- self.use_pipeline = False
98
- print("✅ Model loaded successfully via direct loading!")
99
 
100
  except Exception as e2:
101
- print(f"😓 That didn't work either: {e2}")
102
  # If all else fails, we'll handle it gracefully
103
  self.pipe = None
104
  self.model = None
105
- self.tokenizer = None
106
  self.use_pipeline = None
107
- else:
108
- self.use_pipeline = True
109
 
110
  def process_image_input(self, image_input):
111
  """
@@ -292,11 +279,9 @@ class EndpointHandler:
292
 
293
  if image:
294
  print(f"✅ Image processed successfully: {image.size[0]}x{image.size[1]} pixels")
295
- # Add image context to the prompt for better processing
296
- if text:
297
- text = f"<image>\nUser query: {text}"
298
- else:
299
- text = "<image>\nAnalyze this medical image."
300
  else:
301
  # Simple string input
302
  text = str(inputs)
@@ -308,7 +293,7 @@ class EndpointHandler:
308
  parameters = data.get("parameters", {})
309
 
310
  # Check if Turkish commentary is requested
311
- enable_turkish_commentary = parameters.get("enable_turkish_commentary", True) # Default true
312
  deepseek_timeout = parameters.get("deepseek_timeout", 30)
313
 
314
  # Use utils for parameter sanitization if available
@@ -395,67 +380,101 @@ class EndpointHandler:
395
 
396
  return [result_dict]
397
 
398
- # Manual generation mode
399
  else:
400
- # Tokenize the input
401
- encoded = self.tokenizer(
402
- text,
403
- return_tensors="pt",
404
- truncation=True,
405
- max_length=4096 # Increased context length
406
- )
407
-
408
- input_ids = encoded["input_ids"].to(self.device)
409
- attention_mask = encoded.get("attention_mask")
410
- if attention_mask is not None:
411
- attention_mask = attention_mask.to(self.device)
412
-
413
- # Prepare stop token IDs
414
- stop_token_ids = []
415
- if stop_sequences:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416
  for stop_seq in stop_sequences:
417
- stop_tokens = self.tokenizer.encode(stop_seq, add_special_tokens=False)
418
- if stop_tokens:
419
- stop_token_ids.extend(stop_tokens)
420
-
421
- # Generate the response
422
- with torch.no_grad():
423
- generation_kwargs = {
424
- "input_ids": input_ids,
425
- "attention_mask": attention_mask,
426
- "max_new_tokens": max_new_tokens,
427
- "temperature": temperature,
428
- "top_p": top_p,
429
- "do_sample": do_sample,
430
- "repetition_penalty": repetition_penalty,
431
- "pad_token_id": self.tokenizer.pad_token_id,
432
- "eos_token_id": self.tokenizer.eos_token_id
433
- }
434
 
435
- # Add stop token IDs if we have them
436
- if stop_token_ids:
437
- generation_kwargs["eos_token_id"] = stop_token_ids + [self.tokenizer.eos_token_id]
438
 
439
- outputs = self.model.generate(**generation_kwargs)
440
-
441
- # Decode only the new tokens (not the input)
442
- generated_ids = outputs[0][input_ids.shape[-1]:]
443
- generated_text = self.tokenizer.decode(
444
- generated_ids,
445
- skip_special_tokens=True,
446
- clean_up_tokenization_spaces=True
447
- )
448
-
449
- # Clean up any remaining stop sequences
450
- for stop_seq in stop_sequences:
451
- if generated_text.endswith(stop_seq):
452
- generated_text = generated_text[:-len(stop_seq)].rstrip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
453
 
454
  success = True
455
  result = {
456
  "generated_text": generated_text.strip(),
457
  "model": "PULSE-7B",
458
- "processing_method": "manual"
459
  }
460
 
461
  # Add Turkish commentary if requested
 
46
  print(f"🖥️ Running on: {self.device}")
47
 
48
  try:
49
+ # For LLaVA models, we need to load them properly with vision capabilities
50
+ from transformers import AutoProcessor, LlavaForConditionalGeneration
51
 
52
+ print("📦 Loading LLaVA model with vision capabilities...")
53
+ self.processor = AutoProcessor.from_pretrained("PULSE-ECG/PULSE-7B", trust_remote_code=True)
54
+ self.model = LlavaForConditionalGeneration.from_pretrained(
55
+ "PULSE-ECG/PULSE-7B",
56
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
57
+ device_map="auto",
58
+ low_cpu_mem_usage=True,
59
+ trust_remote_code=True
 
 
 
60
  )
61
+ self.model.eval()
62
+ self.use_pipeline = False
63
+ print("✅ LLaVA model loaded successfully with vision support!")
64
 
65
  except Exception as e:
66
+ print(f"⚠️ LLaVA loading failed: {e}")
67
+ print("🔄 Falling back to pipeline approach...")
68
 
69
  try:
70
+ # Fallback - using pipeline but aware it won't handle images properly
71
+ from transformers import pipeline
 
 
 
 
 
 
 
72
 
73
+ print("📦 Fetching model from HuggingFace Hub...")
74
+ self.pipe = pipeline(
75
+ "text-generation",
76
+ model="PULSE-ECG/PULSE-7B",
77
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
78
+ device=0 if torch.cuda.is_available() else -1,
79
+ trust_remote_code=True,
80
+ model_kwargs={
81
+ "low_cpu_mem_usage": True,
82
+ "use_safetensors": True
83
+ }
84
  )
85
+ self.use_pipeline = True
86
+ self.processor = None
87
+ print("✅ Model loaded via pipeline (text-only mode)!")
 
 
 
 
 
 
88
 
89
  except Exception as e2:
90
+ print(f"😓 Pipeline also failed: {e2}")
91
  # If all else fails, we'll handle it gracefully
92
  self.pipe = None
93
  self.model = None
94
+ self.processor = None
95
  self.use_pipeline = None
 
 
96
 
97
  def process_image_input(self, image_input):
98
  """
 
279
 
280
  if image:
281
  print(f"✅ Image processed successfully: {image.size[0]}x{image.size[1]} pixels")
282
+ # Store the image for later use with LLaVA model
283
+ # Don't modify the text prompt - let LLaVA handle the image-text combination
284
+ print(f"🖼️ Image will be passed to model: {image.size} pixels")
 
 
285
  else:
286
  # Simple string input
287
  text = str(inputs)
 
293
  parameters = data.get("parameters", {})
294
 
295
  # Check if Turkish commentary is requested
296
+ enable_turkish_commentary = parameters.get("enable_turkish_commentary", False) # Default false
297
  deepseek_timeout = parameters.get("deepseek_timeout", 30)
298
 
299
  # Use utils for parameter sanitization if available
 
380
 
381
  return [result_dict]
382
 
383
+ # Manual generation mode (LLaVA with vision support)
384
  else:
385
+ if hasattr(self, 'processor') and self.processor is not None:
386
+ # LLaVA model with vision support
387
+ print("🔥 Using LLaVA model with vision capabilities")
388
+
389
+ if image is not None:
390
+ # Process both image and text with LLaVA processor
391
+ inputs = self.processor(text, image, return_tensors="pt")
392
+ print(f"🖼️ LLaVA processing image + text: '{text[:50]}...'")
393
+ else:
394
+ # Text-only processing
395
+ inputs = self.processor(text, return_tensors="pt")
396
+ print(f"📝 LLaVA processing text-only: '{text[:50]}...'")
397
+
398
+ # Move inputs to device
399
+ inputs = {k: v.to(self.device) for k, v in inputs.items()}
400
+
401
+ # Generate response
402
+ with torch.no_grad():
403
+ generation_kwargs = {
404
+ **inputs,
405
+ "max_new_tokens": max_new_tokens,
406
+ "temperature": temperature,
407
+ "top_p": top_p,
408
+ "do_sample": do_sample,
409
+ "repetition_penalty": repetition_penalty,
410
+ }
411
+
412
+ outputs = self.model.generate(**generation_kwargs)
413
+
414
+ # Decode the response
415
+ generated_text = self.processor.decode(outputs[0], skip_special_tokens=True)
416
+
417
+ # Clean up the generated text (remove input prompt)
418
+ if text in generated_text:
419
+ generated_text = generated_text.replace(text, "").strip()
420
+
421
+ # Clean up any remaining stop sequences
422
  for stop_seq in stop_sequences:
423
+ if generated_text.endswith(stop_seq):
424
+ generated_text = generated_text[:-len(stop_seq)].rstrip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
 
426
+ print(f"✅ LLaVA generated response: {len(generated_text)} characters")
 
 
427
 
428
+ else:
429
+ # Fallback to text-only processing (shouldn't happen with proper LLaVA loading)
430
+ print("⚠️ No processor available, falling back to text-only mode")
431
+
432
+ # This is the old tokenizer-based approach (without image support)
433
+ if not hasattr(self, 'tokenizer') or self.tokenizer is None:
434
+ return [{
435
+ "generated_text": "",
436
+ "error": "No tokenizer available for text processing",
437
+ "model": "PULSE-7B",
438
+ "processing_method": "manual"
439
+ }]
440
+
441
+ encoded = self.tokenizer(
442
+ text,
443
+ return_tensors="pt",
444
+ truncation=True,
445
+ max_length=4096
446
+ )
447
+
448
+ input_ids = encoded["input_ids"].to(self.device)
449
+ attention_mask = encoded.get("attention_mask")
450
+ if attention_mask is not None:
451
+ attention_mask = attention_mask.to(self.device)
452
+
453
+ with torch.no_grad():
454
+ outputs = self.model.generate(
455
+ input_ids=input_ids,
456
+ attention_mask=attention_mask,
457
+ max_new_tokens=max_new_tokens,
458
+ temperature=temperature,
459
+ top_p=top_p,
460
+ do_sample=do_sample,
461
+ repetition_penalty=repetition_penalty,
462
+ pad_token_id=self.tokenizer.pad_token_id,
463
+ eos_token_id=self.tokenizer.eos_token_id
464
+ )
465
+
466
+ generated_ids = outputs[0][input_ids.shape[-1]:]
467
+ generated_text = self.tokenizer.decode(
468
+ generated_ids,
469
+ skip_special_tokens=True,
470
+ clean_up_tokenization_spaces=True
471
+ )
472
 
473
  success = True
474
  result = {
475
  "generated_text": generated_text.strip(),
476
  "model": "PULSE-7B",
477
+ "processing_method": "llava_vision" if (hasattr(self, 'processor') and self.processor is not None) else "manual"
478
  }
479
 
480
  # Add Turkish commentary if requested
test_requests.json CHANGED
@@ -202,7 +202,7 @@
202
  "query", "text", "prompt"
203
  ],
204
  "deepseek_integration": {
205
- "enable_turkish_commentary": "true/false (default: true)",
206
  "deepseek_timeout": "10-60 seconds (default: 30)",
207
  "environment_variable": "deep_key (DeepSeek API key)",
208
  "commentary_status_values": [
 
202
  "query", "text", "prompt"
203
  ],
204
  "deepseek_integration": {
205
+ "enable_turkish_commentary": "true/false (default: false)",
206
  "deepseek_timeout": "10-60 seconds (default: 30)",
207
  "environment_variable": "deep_key (DeepSeek API key)",
208
  "commentary_status_values": [