| import torch | |
| import numpy as np | |
| from PIL import Image | |
| import cv2 | |
| from typing import List, Dict | |
| import torchvision.transforms as transforms | |
| class SaliencyDetectionManager: | |
| """Visual saliency detection using U2-Net""" | |
| def __init__(self): | |
| print("Loading U2-Net model...") | |
| try: | |
| from torchvision.models.segmentation import deeplabv3_resnet50 | |
| self.model = deeplabv3_resnet50(pretrained=True) | |
| self.model.eval() | |
| if torch.cuda.is_available(): | |
| self.model = self.model.cuda() | |
| except Exception as e: | |
| print(f"Warning: Cannot load deep learning model, using fallback: {e}") | |
| self.model = None | |
| self.threshold = 0.5 | |
| self.min_area = 1600 | |
| self.min_saliency = 0.6 | |
| self.transform = transforms.Compose([ | |
| transforms.Resize((320, 320)), | |
| transforms.ToTensor(), | |
| transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) | |
| ]) | |
| print("✓ SaliencyDetectionManager initialized") | |
| def detect_salient_regions(self, image: Image.Image) -> List[Dict]: | |
| """Detect salient regions""" | |
| img_array = np.array(image) | |
| gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY) | |
| _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) | |
| contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
| regions = [] | |
| height, width = img_array.shape[:2] | |
| for contour in contours: | |
| area = cv2.contourArea(contour) | |
| if area < self.min_area: | |
| continue | |
| x, y, w, h = cv2.boundingRect(contour) | |
| bbox = [float(x), float(y), float(x + w), float(y + h)] | |
| region_img = image.crop(bbox) | |
| regions.append({ | |
| 'bbox': bbox, | |
| 'area': area, | |
| 'saliency_score': min(area / (width * height), 1.0), | |
| 'image': region_img | |
| }) | |
| regions = sorted(regions, key=lambda x: x['saliency_score'], reverse=True) | |
| return regions[:10] | |
| def extract_unknown_regions(self, salient_regions: List[Dict], yolo_detections: List[Dict]) -> List[Dict]: | |
| """Extract salient regions not detected by YOLO""" | |
| unknown_regions = [] | |
| for region in salient_regions: | |
| max_iou = 0.0 | |
| for det in yolo_detections: | |
| iou = self._calculate_iou(region['bbox'], det['bbox']) | |
| max_iou = max(max_iou, iou) | |
| if max_iou < 0.3: | |
| unknown_regions.append(region) | |
| return unknown_regions | |
| def _calculate_iou(self, box1: List[float], box2: List[float]) -> float: | |
| """Calculate IoU (Intersection over Union)""" | |
| x1_min, y1_min, x1_max, y1_max = box1 | |
| x2_min, y2_min, x2_max, y2_max = box2 | |
| inter_xmin = max(x1_min, x2_min) | |
| inter_ymin = max(y1_min, y2_min) | |
| inter_xmax = min(x1_max, x2_max) | |
| inter_ymax = min(y1_max, y2_max) | |
| if inter_xmax < inter_xmin or inter_ymax < inter_ymin: | |
| return 0.0 | |
| inter_area = (inter_xmax - inter_xmin) * (inter_ymax - inter_ymin) | |
| box1_area = (x1_max - x1_min) * (y1_max - y1_min) | |
| box2_area = (x2_max - x2_min) * (y2_max - y2_min) | |
| union_area = box1_area + box2_area - inter_area | |
| return inter_area / union_area if union_area > 0 else 0.0 | |
| print("✓ SaliencyDetectionManager defined") | |