Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

.gitattributes +1 -0
SV_kp.engine +3 -0
config.yml +38 -0
miner.py +284 -0
object-detection.onnx +3 -0
pitch.py +679 -0
player.py +388 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+SV_kp.engine filter=lfs diff=lfs merge=lfs -text

SV_kp.engine ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f99452eb79e064189e2758abd20a78845a5b639fc8b9c4bc650519c83e13e8db
+size 368289641

config.yml ADDED Viewed

	@@ -0,0 +1,38 @@

+Image:
+  from_base: parachutes/python:3.12
+  run_command:
+    - pip install --upgrade setuptools wheel
+    - sudo apt install tensorrt python3-libnvinfer-dev
+    - pip install huggingface_hub==0.19.4 requests opencv-python-headless pydantic onnxruntime onnxruntime-gpu scikit-learn tensorflow torch-tensorrt==2.7 torch==2.7.1 torchvision==0.22.1 pyyaml
+  set_workdir: /app
+NodeSelector:
+  gpu_count: 1
+  min_vram_gb_per_gpu: 24
+  # include:
+    # - a100
+  exclude:
+    - h100
+    - a100
+    - l40s
+    - mi300x
+    - b200
+    - h200
+    - h20
+    - h800
+    - h100_sxm
+    - h100_nvl
+    - a100_sxm
+    - a100_40gb_sxm
+    - a100_40gb
+    - l40
+    - pro_6000
+    - a6000_ada
+    - '5090'
+Chute:
+  timeout_seconds: 300
+  concurrency: 2  # Reduced concurrency to limit memory usage
+  max_instances: 3  # Reduced max instances to limit memory usage
+  scaling_threshold: 0.7  # Higher threshold to reduce scaling frequency

miner.py ADDED Viewed

	@@ -0,0 +1,284 @@

+from pathlib import Path
+from numpy import ndarray
+import numpy as np
+from pydantic import BaseModel
+import sys, os
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+os.environ["OMP_NUM_THREADS"] = "16"
+os.environ["TF_NUM_INTRAOP_THREADS"] = "16"
+os.environ["TF_NUM_INTEROP_THREADS"] = "2"
+os.environ['CUDA_LAUNCH_BLOCKING'] = '0'
+# Suppress ONNX Runtime warnings
+os.environ['ORT_LOGGING_LEVEL'] = '3'
+import logging
+logging.getLogger('tensorflow').setLevel(logging.ERROR)
+import tensorflow as tf
+tf.config.threading.set_intra_op_parallelism_threads(16)
+tf.config.threading.set_inter_op_parallelism_threads(2)
+os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
+tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
+tf.get_logger().setLevel('ERROR')
+tf.autograph.set_verbosity(0)
+from tensorflow.keras import mixed_precision
+mixed_precision.set_global_policy('mixed_float16')
+tf.config.optimizer.set_jit(True)
+import torch._dynamo
+torch._dynamo.config.suppress_errors = True
+import onnxruntime as ort
+import gc
+import torch
+import torch_tensorrt
+import torchvision.transforms as T
+import yaml
+import cv2
+from player import player_detection_result
+from pitch import process_batch_input, get_cls_net, get_cls_net_l
+class BoundingBox(BaseModel):
+    x1: int
+    y1: int
+    x2: int
+    y2: int
+    cls_id: int
+    conf: float
+class TVFrameResult(BaseModel):
+    frame_id: int
+    boxes: list[BoundingBox]
+    keypoints: list[tuple[int, int]]
+class Miner:
+    """
+    This class is responsible for:
+    - Loading ML models.
+    - Running batched predictions on images.
+    - Parsing ML model outputs into structured results (TVFrameResult).
+    This class can be modified, but it must have the following to be compatible with the chute:
+        - be named `Miner`
+        - have a `predict_batch` function with the inputs and outputs specified
+        - be stored in a file called `miner.py` which lives in the root of the HFHub repo
+    """
+    def __init__(self, path_hf_repo: Path) -> None:
+        """
+        Loads all ML models from the repository.
+        -----(Adjust as needed)----
+        Args:
+            path_hf_repo (Path):
+                Path to the downloaded HuggingFace Hub repository
+        Returns:
+            None
+        """
+        global torch
+        device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        providers = [
+            'CUDAExecutionProvider',
+            'CPUExecutionProvider'
+        ]
+        # providers = [ 'CPUExecutionProvider']
+        model_path = path_hf_repo / "object-detection.onnx"
+        session = ort.InferenceSession(model_path, providers=providers)
+        input_name = session.get_inputs()[0].name
+        height = width = 640
+        dummy = np.zeros((1, 3, height, width), dtype=np.float32)
+        session.run(None, {input_name: dummy})
+        model = session
+        self.bbox_model = model
+        print(f"✅ BBox Model Loaded")
+        self.kp_threshold = 0.1
+        # self.lp_threshold = 0.7
+        model_kp_path = path_hf_repo / 'SV_kp.engine'
+        model_kp = torch_tensorrt.load(model_kp_path)
+        @torch.inference_mode()
+        def run_inference(model, input_tensor: torch.Tensor):
+            input_tensor = input_tensor.to(device).to(memory_format=torch.channels_last)
+            output = model.module().forward(input_tensor)
+            return output
+        run_inference(model_kp, torch.randn(8, 3, 540, 960, device=device, dtype=torch.float32))
+        # model_kp_path = path_hf_repo / 'SV_kp'
+        # model_lp_path = path_hf_repo / 'SV_lines'
+        # config_kp_path = path_hf_repo / 'hrnetv2_w48.yaml'
+        # config_lp_path = path_hf_repo / 'hrnetv2_w48_l.yaml'
+        # cfg_kp = yaml.safe_load(open(config_kp_path, 'r'))
+        # cfg_lp = yaml.safe_load(open(config_lp_path, 'r'))
+        # loaded_state_kp = torch.load(model_kp_path, map_location=device)
+        # model_kp = get_cls_net(cfg_kp)
+        # model_kp.load_state_dict(loaded_state_kp)
+        # model_kp.to(device)
+        # model_kp.eval()
+        # loaded_state_lp = torch.load(model_lp_path, map_location=device)
+        # model_lp = get_cls_net_l(cfg_lp)
+        # model_lp.load_state_dict(loaded_state_lp)
+        # model_lp.to(device)
+        # model_lp.eval()
+        # self.transform = T.Resize((540, 960))
+        self.keypoints_model = model_kp
+        # self.lines_model = model_lp
+        # print("🔥 Warming up compiled models...")
+        # self._warmup_models(device)
+        # Increase batch sizes for better GPU utilization
+        self.player_batch_size = 64  # Increased from 32
+        self.pitch_batch_size = 8   # Increased from 32
+        print(f"✅ Keypoints Model Loaded")
+    def __repr__(self) -> str:
+        return f"BBox Model: {type(self.bbox_model).__name__}\nKeypoints Model: {type(self.keypoints_model).__name__}"
+    def predict_batch(
+        self,
+        batch_images: list[ndarray],
+        offset: int,
+        n_keypoints: int,
+    ) -> list[TVFrameResult]:
+        player_batch_size = min(self.player_batch_size, len(batch_images))
+        bboxes: dict[int, list[BoundingBox]] = {}
+        while True:
+            try:
+                gc.collect()
+                if torch.cuda.is_available():
+                    tf.keras.backend.clear_session()
+                    torch.cuda.empty_cache()
+                    torch.cuda.synchronize()
+                bbox_model_results, _, _, _ = player_detection_result(batch_images, player_batch_size, self.bbox_model)
+                if bbox_model_results is not None:
+                    for frame_number_in_batch, detections in enumerate(bbox_model_results):
+                        boxes = []
+                        for detection in detections:
+                            # Detection format from player.py: {"id": int, "bbox": [x1, y1, x2, y2], "class_id": int}
+                            x1, y1, x2, y2 = detection["bbox"]
+                            cls_id = detection["class_id"]
+                            # Use a default confidence since it's not provided in the processed results
+                            conf = detection["conf"]  # Default confidence value
+                            boxes.append(
+                                BoundingBox(
+                                    x1=int(x1),
+                                    y1=int(y1),
+                                    x2=int(x2),
+                                    y2=int(y2),
+                                    cls_id=int(cls_id),
+                                    conf=float(conf),
+                                )
+                            )
+                        bboxes[offset + frame_number_in_batch] = boxes
+                print("✅ BBoxes predicted")
+                break
+            except RuntimeError as e:
+                print(self.player_batch_size)
+                if 'out of memory' in str(e):
+                    if self.player_batch_size == 1:
+                        raise e
+                    self.player_batch_size = self.player_batch_size // 2 if self.player_batch_size > 1 else 1
+                    player_batch_size = min(self.player_batch_size, len(batch_images))
+                else:
+                    raise e
+        pitch_batch_size = min(self.pitch_batch_size, len(batch_images))
+        keypoints: dict[int, list[tuple[int, int]]] = {}
+        while True:
+            try:
+                gc.collect()
+                if torch.cuda.is_available():
+                    tf.keras.backend.clear_session()
+                    torch.cuda.empty_cache()
+                    torch.cuda.synchronize()
+                # Removed expensive memory clearing operations for speed
+                keypoints_result = process_batch_input(
+                    batch_images,
+                    self.keypoints_model,
+                    self.kp_threshold,
+                    'cuda' if torch.cuda.is_available() else 'cpu',
+                    batch_size=pitch_batch_size
+                )
+                if keypoints_result is not None:
+                    for frame_number_in_batch, kp_dict in enumerate(keypoints_result):
+                        frame_keypoints: list[tuple[int, int]] = []
+                        # Get image dimensions for conversion from normalized to pixel coordinates
+                        if frame_number_in_batch < len(batch_images):
+                            height, width = batch_images[frame_number_in_batch].shape[:2]
+                            for idx in range(32):
+                                x, y = 0, 0
+                                idx = idx + 1
+                                if idx in kp_dict.keys():
+                                    kp_data = kp_dict[idx]
+                                    # Convert normalized coordinates to pixel coordinates
+                                    x = int(kp_data['x'] * width)
+                                    y = int(kp_data['y'] * height)
+                                frame_keypoints.append((x, y))
+                        # Pad or truncate to match expected number of keypoints
+                        if len(frame_keypoints) < n_keypoints:
+                            frame_keypoints.extend([(0, 0)] * (n_keypoints - len(frame_keypoints)))
+                        else:
+                            frame_keypoints = frame_keypoints[:n_keypoints]
+                        keypoints[offset + frame_number_in_batch] = frame_keypoints
+                print("✅ Keypoints predicted")
+                break
+            except RuntimeError as e:
+                print(self.pitch_batch_size)
+                if 'out of memory' in str(e):
+                    if self.pitch_batch_size == 1:
+                        raise e
+                    self.pitch_batch_size = self.pitch_batch_size // 2 if self.pitch_batch_size > 1 else 1
+                    pitch_batch_size = min(self.pitch_batch_size, len(batch_images))
+                else:
+                    raise e
+        # Combine results
+        results: list[TVFrameResult] = []
+        for i, frame_number in enumerate(range(offset, offset + len(batch_images))):
+            # Get the current frame
+            frame = batch_images[i]  # Use index i for batch_images
+            # Get detection results for this frame
+            frame_boxes = bboxes.get(frame_number, [])
+            frame_keypoints = keypoints.get(frame_number, [(0, 0) for _ in range(n_keypoints)])
+            # Create result object
+            result = TVFrameResult(
+                frame_id=frame_number,
+                boxes=frame_boxes,
+                keypoints=frame_keypoints,
+            )
+            results.append(result)
+        print("✅ Combined results as TVFrameResult")
+        gc.collect()
+        if torch.cuda.is_available():
+            tf.keras.backend.clear_session()
+            torch.cuda.empty_cache()
+            torch.cuda.synchronize()
+        return results

object-detection.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:05112479be8cb59494e9ae23a57af43becd5aa1f448b0e5ed33fcb6b4c2bbbc3
+size 273322667

pitch.py ADDED Viewed

	@@ -0,0 +1,679 @@

+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import sys
+import time
+from typing import List, Optional, Tuple
+import cv2
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.transforms as T
+import torchvision.transforms.functional as f
+from pydantic import BaseModel
+import logging
+logger = logging.getLogger(__name__)
+class BoundingBox(BaseModel):
+    x1: int
+    y1: int
+    x2: int
+    y2: int
+    cls_id: int
+    conf: float
+class TVFrameResult(BaseModel):
+    frame_id: int
+    boxes: list[BoundingBox]
+    keypoints: list[tuple[int, int]]
+BatchNorm2d = nn.BatchNorm2d
+BN_MOMENTUM = 0.1
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3,
+                     stride=stride, padding=1, bias=False)
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=False)
+        self.bn2 = BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
+                               bias=False)
+        self.bn3 = BatchNorm2d(planes * self.expansion,
+                               momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class HighResolutionModule(nn.Module):
+    def __init__(self, num_branches, blocks, num_blocks, num_inchannels,
+                 num_channels, fuse_method, multi_scale_output=True):
+        super(HighResolutionModule, self).__init__()
+        self._check_branches(
+            num_branches, blocks, num_blocks, num_inchannels, num_channels)
+        self.num_inchannels = num_inchannels
+        self.fuse_method = fuse_method
+        self.num_branches = num_branches
+        self.multi_scale_output = multi_scale_output
+        self.branches = self._make_branches(
+            num_branches, blocks, num_blocks, num_channels)
+        self.fuse_layers = self._make_fuse_layers()
+        self.relu = nn.ReLU(inplace=True)
+    def _check_branches(self, num_branches, blocks, num_blocks,
+                        num_inchannels, num_channels):
+        if num_branches != len(num_blocks):
+            error_msg = 'NUM_BRANCHES({}) <> NUM_BLOCKS({})'.format(
+                num_branches, len(num_blocks))
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+        if num_branches != len(num_channels):
+            error_msg = 'NUM_BRANCHES({}) <> NUM_CHANNELS({})'.format(
+                num_branches, len(num_channels))
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+        if num_branches != len(num_inchannels):
+            error_msg = 'NUM_BRANCHES({}) <> NUM_INCHANNELS({})'.format(
+                num_branches, len(num_inchannels))
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+    def _make_one_branch(self, branch_index, block, num_blocks, num_channels,
+                         stride=1):
+        downsample = None
+        if stride != 1 or \
+                self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.num_inchannels[branch_index],
+                          num_channels[branch_index] * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                BatchNorm2d(num_channels[branch_index] * block.expansion,
+                            momentum=BN_MOMENTUM),
+            )
+        layers = []
+        layers.append(block(self.num_inchannels[branch_index],
+                            num_channels[branch_index], stride, downsample))
+        self.num_inchannels[branch_index] = \
+            num_channels[branch_index] * block.expansion
+        for i in range(1, num_blocks[branch_index]):
+            layers.append(block(self.num_inchannels[branch_index],
+                                num_channels[branch_index]))
+        return nn.Sequential(*layers)
+    def _make_branches(self, num_branches, block, num_blocks, num_channels):
+        branches = []
+        for i in range(num_branches):
+            branches.append(
+                self._make_one_branch(i, block, num_blocks, num_channels))
+        return nn.ModuleList(branches)
+    def _make_fuse_layers(self):
+        if self.num_branches == 1:
+            return None
+        num_branches = self.num_branches
+        num_inchannels = self.num_inchannels
+        fuse_layers = []
+        for i in range(num_branches if self.multi_scale_output else 1):
+            fuse_layer = []
+            for j in range(num_branches):
+                if j > i:
+                    fuse_layer.append(nn.Sequential(
+                        nn.Conv2d(num_inchannels[j],
+                                  num_inchannels[i],
+                                  1,
+                                  1,
+                                  0,
+                                  bias=False),
+                        BatchNorm2d(num_inchannels[i], momentum=BN_MOMENTUM)))
+                    # nn.Upsample(scale_factor=2**(j-i), mode='nearest')))
+                elif j == i:
+                    fuse_layer.append(None)
+                else:
+                    conv3x3s = []
+                    for k in range(i - j):
+                        if k == i - j - 1:
+                            num_outchannels_conv3x3 = num_inchannels[i]
+                            conv3x3s.append(nn.Sequential(
+                                nn.Conv2d(num_inchannels[j],
+                                          num_outchannels_conv3x3,
+                                          3, 2, 1, bias=False),
+                                BatchNorm2d(num_outchannels_conv3x3, momentum=BN_MOMENTUM)))
+                        else:
+                            num_outchannels_conv3x3 = num_inchannels[j]
+                            conv3x3s.append(nn.Sequential(
+                                nn.Conv2d(num_inchannels[j],
+                                          num_outchannels_conv3x3,
+                                          3, 2, 1, bias=False),
+                                BatchNorm2d(num_outchannels_conv3x3,
+                                            momentum=BN_MOMENTUM),
+                                nn.ReLU(inplace=True)))
+                    fuse_layer.append(nn.Sequential(*conv3x3s))
+            fuse_layers.append(nn.ModuleList(fuse_layer))
+        return nn.ModuleList(fuse_layers)
+    def get_num_inchannels(self):
+        return self.num_inchannels
+    def forward(self, x):
+        if self.num_branches == 1:
+            return [self.branches[0](x[0])]
+        for i in range(self.num_branches):
+            x[i] = self.branches[i](x[i])
+        x_fuse = []
+        for i in range(len(self.fuse_layers)):
+            y = x[0] if i == 0 else self.fuse_layers[i][0](x[0])
+            for j in range(1, self.num_branches):
+                if i == j:
+                    y = y + x[j]
+                elif j > i:
+                    y = y + F.interpolate(
+                        self.fuse_layers[i][j](x[j]),
+                        size=[x[i].shape[2], x[i].shape[3]],
+                        mode='bilinear')
+                else:
+                    y = y + self.fuse_layers[i][j](x[j])
+            x_fuse.append(self.relu(y))
+        return x_fuse
+blocks_dict = {
+    'BASIC': BasicBlock,
+    'BOTTLENECK': Bottleneck
+}
+class HighResolutionNet(nn.Module):
+    def __init__(self, config, **kwargs):
+        self.inplanes = 64
+        extra = config['MODEL']['EXTRA']
+        super(HighResolutionNet, self).__init__()
+        # stem net
+        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=2, padding=1,
+                               bias=False)
+        self.bn1 = BatchNorm2d(self.inplanes, momentum=BN_MOMENTUM)
+        self.conv2 = nn.Conv2d(self.inplanes, self.inplanes, kernel_size=3, stride=2, padding=1,
+                               bias=False)
+        self.bn2 = BatchNorm2d(self.inplanes, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.sf = nn.Softmax(dim=1)
+        self.layer1 = self._make_layer(Bottleneck, 64, 64, 4)
+        self.stage2_cfg = extra['STAGE2']
+        num_channels = self.stage2_cfg['NUM_CHANNELS']
+        block = blocks_dict[self.stage2_cfg['BLOCK']]
+        num_channels = [
+            num_channels[i] * block.expansion for i in range(len(num_channels))]
+        self.transition1 = self._make_transition_layer(
+            [256], num_channels)
+        self.stage2, pre_stage_channels = self._make_stage(
+            self.stage2_cfg, num_channels)
+        self.stage3_cfg = extra['STAGE3']
+        num_channels = self.stage3_cfg['NUM_CHANNELS']
+        block = blocks_dict[self.stage3_cfg['BLOCK']]
+        num_channels = [
+            num_channels[i] * block.expansion for i in range(len(num_channels))]
+        self.transition2 = self._make_transition_layer(
+            pre_stage_channels, num_channels)
+        self.stage3, pre_stage_channels = self._make_stage(
+            self.stage3_cfg, num_channels)
+        self.stage4_cfg = extra['STAGE4']
+        num_channels = self.stage4_cfg['NUM_CHANNELS']
+        block = blocks_dict[self.stage4_cfg['BLOCK']]
+        num_channels = [
+            num_channels[i] * block.expansion for i in range(len(num_channels))]
+        self.transition3 = self._make_transition_layer(
+            pre_stage_channels, num_channels)
+        self.stage4, pre_stage_channels = self._make_stage(
+            self.stage4_cfg, num_channels, multi_scale_output=True)
+        self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
+        final_inp_channels = sum(pre_stage_channels) + self.inplanes
+        self.head = nn.Sequential(nn.Sequential(
+            nn.Conv2d(
+                in_channels=final_inp_channels,
+                out_channels=final_inp_channels,
+                kernel_size=1),
+            BatchNorm2d(final_inp_channels, momentum=BN_MOMENTUM),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(
+                in_channels=final_inp_channels,
+                out_channels=config['MODEL']['NUM_JOINTS'],
+                kernel_size=extra['FINAL_CONV_KERNEL']),
+            nn.Softmax(dim=1)))
+    def _make_head(self, x, x_skip):
+        x = self.upsample(x)
+        x = torch.cat([x, x_skip], dim=1)
+        x = self.head(x)
+        return x
+    def _make_transition_layer(
+            self, num_channels_pre_layer, num_channels_cur_layer):
+        num_branches_cur = len(num_channels_cur_layer)
+        num_branches_pre = len(num_channels_pre_layer)
+        transition_layers = []
+        for i in range(num_branches_cur):
+            if i < num_branches_pre:
+                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+                    transition_layers.append(nn.Sequential(
+                        nn.Conv2d(num_channels_pre_layer[i],
+                                  num_channels_cur_layer[i],
+                                  3,
+                                  1,
+                                  1,
+                                  bias=False),
+                        BatchNorm2d(
+                            num_channels_cur_layer[i], momentum=BN_MOMENTUM),
+                        nn.ReLU(inplace=True)))
+                else:
+                    transition_layers.append(None)
+            else:
+                conv3x3s = []
+                for j in range(i + 1 - num_branches_pre):
+                    inchannels = num_channels_pre_layer[-1]
+                    outchannels = num_channels_cur_layer[i] \
+                        if j == i - num_branches_pre else inchannels
+                    conv3x3s.append(nn.Sequential(
+                        nn.Conv2d(
+                            inchannels, outchannels, 3, 2, 1, bias=False),
+                        BatchNorm2d(outchannels, momentum=BN_MOMENTUM),
+                        nn.ReLU(inplace=True)))
+                transition_layers.append(nn.Sequential(*conv3x3s))
+        return nn.ModuleList(transition_layers)
+    def _make_layer(self, block, inplanes, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
+            )
+        layers = []
+        layers.append(block(inplanes, planes, stride, downsample))
+        inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(inplanes, planes))
+        return nn.Sequential(*layers)
+    def _make_stage(self, layer_config, num_inchannels,
+                    multi_scale_output=True):
+        num_modules = layer_config['NUM_MODULES']
+        num_branches = layer_config['NUM_BRANCHES']
+        num_blocks = layer_config['NUM_BLOCKS']
+        num_channels = layer_config['NUM_CHANNELS']
+        block = blocks_dict[layer_config['BLOCK']]
+        fuse_method = layer_config['FUSE_METHOD']
+        modules = []
+        for i in range(num_modules):
+            # multi_scale_output is only used last module
+            if not multi_scale_output and i == num_modules - 1:
+                reset_multi_scale_output = False
+            else:
+                reset_multi_scale_output = True
+            modules.append(
+                HighResolutionModule(num_branches,
+                                     block,
+                                     num_blocks,
+                                     num_inchannels,
+                                     num_channels,
+                                     fuse_method,
+                                     reset_multi_scale_output)
+            )
+            num_inchannels = modules[-1].get_num_inchannels()
+        return nn.Sequential(*modules), num_inchannels
+    def forward(self, x):
+        # h, w = x.size(2), x.size(3)
+        x = self.conv1(x)
+        x_skip = x.clone()
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.conv2(x)
+        x = self.bn2(x)
+        x = self.relu(x)
+        x = self.layer1(x)
+        x_list = []
+        for i in range(self.stage2_cfg['NUM_BRANCHES']):
+            if self.transition1[i] is not None:
+                x_list.append(self.transition1[i](x))
+            else:
+                x_list.append(x)
+        y_list = self.stage2(x_list)
+        x_list = []
+        for i in range(self.stage3_cfg['NUM_BRANCHES']):
+            if self.transition2[i] is not None:
+                x_list.append(self.transition2[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        y_list = self.stage3(x_list)
+        x_list = []
+        for i in range(self.stage4_cfg['NUM_BRANCHES']):
+            if self.transition3[i] is not None:
+                x_list.append(self.transition3[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        x = self.stage4(x_list)
+        # Head Part
+        height, width = x[0].size(2), x[0].size(3)
+        x1 = F.interpolate(x[1], size=(height, width), mode='bilinear', align_corners=False)
+        x2 = F.interpolate(x[2], size=(height, width), mode='bilinear', align_corners=False)
+        x3 = F.interpolate(x[3], size=(height, width), mode='bilinear', align_corners=False)
+        x = torch.cat([x[0], x1, x2, x3], 1)
+        x = self._make_head(x, x_skip)
+        return x
+    def init_weights(self, pretrained=''):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+                #nn.init.normal_(m.weight, std=0.001)
+                #nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+        if pretrained != '':
+            if os.path.isfile(pretrained):
+                pretrained_dict = torch.load(pretrained)
+                model_dict = self.state_dict()
+                pretrained_dict = {k: v for k, v in pretrained_dict.items()
+                                   if k in model_dict.keys()}
+                model_dict.update(pretrained_dict)
+                self.load_state_dict(model_dict)
+            else:
+                sys.exit(f'Weights {pretrained} not found.')
+def get_cls_net(config, pretrained='', **kwargs):
+    """Create keypoint detection model with softmax activation"""
+    model = HighResolutionNet(config, **kwargs)
+    model.init_weights(pretrained)
+    return model
+def get_cls_net_l(config, pretrained='', **kwargs):
+    """Create line detection model with sigmoid activation"""
+    model = HighResolutionNet(config, **kwargs)
+    model.init_weights(pretrained)
+    # After loading weights, replace just the activation function
+    # The saved model expects the nested Sequential structure
+    inner_seq = model.head[0]
+    # Replace softmax (index 4) with sigmoid
+    model.head[0][4] = nn.Sigmoid()
+    return model
+# Simplified utility functions - removed complex Gaussian generation functions
+# These were mainly used for training data generation, not inference
+# generate_gaussian_array_vectorized_dist_l function removed - not used in current implementation
+@torch.inference_mode()
+def run_inference(model, input_tensor: torch.Tensor, device):
+    input_tensor = input_tensor.to(device).to(memory_format=torch.channels_last)
+    output = model.module().forward(input_tensor)
+    return output
+def preprocess_batch_fast(frames, device):
+    """Ultra-fast batch preprocessing using optimized tensor operations"""
+    target_size = (540, 960)  # H, W format for model input
+    batch = []
+    for i, frame in enumerate(frames):
+        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        img = cv2.resize(frame_rgb, (target_size[1], target_size[0]))
+        img = img.astype(np.float32) / 255.0
+        img = np.transpose(img, (2, 0, 1))  # HWC -> CHW
+        batch.append(img)
+    batch = torch.tensor(np.stack(batch), dtype=torch.float32)
+    return batch
+def extract_keypoints_from_heatmap(heatmap: torch.Tensor, scale: int = 2, max_keypoints: int = 1):
+    """Optimized keypoint extraction from heatmaps"""
+    batch_size, n_channels, height, width = heatmap.shape
+    # Find local maxima using max pooling (keep on GPU)
+    kernel = 3
+    pad = 1
+    max_pooled = F.max_pool2d(heatmap, kernel, stride=1, padding=pad)
+    local_maxima = (max_pooled == heatmap)
+    heatmap = heatmap * local_maxima
+    # Get top keypoints (keep on GPU longer)
+    scores, indices = torch.topk(heatmap.view(batch_size, n_channels, -1), max_keypoints, sorted=False)
+    y_coords = torch.div(indices, width, rounding_mode="floor")
+    x_coords = indices % width
+    # Optimized tensor operations
+    x_coords = x_coords * scale
+    y_coords = y_coords * scale
+    # Create result tensor directly on GPU
+    results = torch.stack([x_coords.float(), y_coords.float(), scores], dim=-1)
+    return results
+def extract_keypoints_from_heatmap_fast(heatmap: torch.Tensor, scale: int = 2, max_keypoints: int = 1):
+    """Ultra-fast keypoint extraction optimized for speed"""
+    batch_size, n_channels, height, width = heatmap.shape
+    # Simplified local maxima detection (faster but slightly less accurate)
+    max_pooled = F.max_pool2d(heatmap, 3, stride=1, padding=1)
+    local_maxima = (max_pooled == heatmap)
+    # Apply mask and get top keypoints in one go
+    masked_heatmap = heatmap * local_maxima
+    flat_heatmap = masked_heatmap.view(batch_size, n_channels, -1)
+    scores, indices = torch.topk(flat_heatmap, max_keypoints, dim=-1, sorted=False)
+    # Vectorized coordinate calculation
+    y_coords = torch.div(indices, width, rounding_mode="floor") * scale
+    x_coords = (indices % width) * scale
+    # Stack results efficiently
+    results = torch.stack([x_coords.float(), y_coords.float(), scores], dim=-1)
+    return results
+def process_keypoints_vectorized(kp_coords, kp_threshold, w, h, batch_size):
+    """Ultra-fast vectorized keypoint processing"""
+    batch_results = []
+    # Convert to numpy once for faster CPU operations
+    kp_np = kp_coords.cpu().numpy()
+    for batch_idx in range(batch_size):
+        kp_dict = {}
+        # Vectorized threshold check
+        valid_kps = kp_np[batch_idx, :, 0, 2] > kp_threshold
+        valid_indices = np.where(valid_kps)[0]
+        for ch_idx in valid_indices:
+            x = float(kp_np[batch_idx, ch_idx, 0, 0]) / w
+            y = float(kp_np[batch_idx, ch_idx, 0, 1]) / h
+            p = float(kp_np[batch_idx, ch_idx, 0, 2])
+            kp_dict[ch_idx + 1] = {'x': x, 'y': y, 'p': p}
+        batch_results.append(kp_dict)
+    return batch_results
+def inference_batch(frames, model, kp_threshold, device, batch_size=8):
+    """Optimized batch inference for multiple frames"""
+    results = []
+    num_frames = len(frames)
+    # Process all frames in optimally-sized batches
+    for i in range(0, num_frames, batch_size):
+        current_batch_size = min(batch_size, num_frames - i)
+        batch_frames = frames[i:i + current_batch_size]
+        # Fast preprocessing
+        batch = preprocess_batch_fast(batch_frames, device)
+        heatmaps = run_inference(model, batch, device)
+        # Ultra-fast keypoint extraction
+        kp_coords = extract_keypoints_from_heatmap_fast(heatmaps[:,:-1,:,:], scale=2, max_keypoints=1)
+        # Vectorized batch processing - no loops
+        batch_results = process_keypoints_vectorized(kp_coords, kp_threshold, 960, 540, current_batch_size)
+        results.extend(batch_results)
+        # Minimal cleanup
+        del heatmaps, kp_coords, batch
+    return results
+# Keypoint mapping from detection indices to standard football pitch keypoint IDs
+map_keypoints = {
+    1: 1, 2: 14, 3: 25, 4: 2, 5: 10, 6: 18, 7: 26, 8: 3, 9: 7, 10: 23,
+    11: 27, 20: 4, 21: 8, 22: 24, 23: 28, 24: 5, 25: 13, 26: 21, 27: 29,
+    28: 6, 29: 17, 30: 30, 31: 11, 32: 15, 33: 19, 34: 12, 35: 16, 36: 20,
+    45: 9, 50: 31, 52: 32, 57: 22
+}
+def get_mapped_keypoints(kp_points):
+    """Apply keypoint mapping to detection results"""
+    mapped_points = {}
+    for key, value in kp_points.items():
+        if key in map_keypoints:
+            mapped_key = map_keypoints[key]
+            mapped_points[mapped_key] = value
+        # else:
+            # Keep unmapped keypoints with original key
+            # mapped_points[key] = value
+    return mapped_points
+def process_batch_input(frames, model, kp_threshold, device, batch_size=8):
+    """Process multiple input images in batch"""
+    # Batch inference
+    kp_results = inference_batch(frames, model, kp_threshold, device, batch_size)
+    kp_results = [get_mapped_keypoints(kp) for kp in kp_results]
+    # Draw results and save
+    # for i, (frame, kp_points, input_path) in enumerate(zip(frames, kp_results, valid_paths)):
+    #     height, width = frame.shape[:2]
+    #     # Apply mapping to get standard keypoint IDs
+    #     mapped_kp_points = get_mapped_keypoints(kp_points)
+    #     for key, value in mapped_kp_points.items():
+    #         x = int(value['x'] * width)
+    #         y = int(value['y'] * height)
+    #         cv2.circle(frame, (x, y), 5, (0, 255, 0), -1)  # Green circles
+    #         cv2.putText(frame, str(key), (x+10, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
+    #     # Save result
+    #     output_path = input_path.replace('.png', '_result.png').replace('.jpg', '_result.jpg')
+    #     cv2.imwrite(output_path, frame)
+    # print(f"Batch processing complete. Processed {len(frames)} images.")
+    return kp_results

player.py ADDED Viewed

	@@ -0,0 +1,388 @@

+import cv2
+import numpy as np
+from sklearn.cluster import KMeans
+import warnings
+import time
+import torch
+from torchvision.ops import batched_nms
+from numpy import ndarray
+# Suppress ALL runtime and sklearn warnings
+warnings.filterwarnings('ignore', category=RuntimeWarning)
+warnings.filterwarnings('ignore', category=FutureWarning)
+warnings.filterwarnings('ignore', category=UserWarning)
+# Suppress sklearn warnings specifically
+import logging
+logging.getLogger('sklearn').setLevel(logging.ERROR)
+def get_grass_color(img):
+    # Convert image to HSV color space
+    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
+    # Define range of green color in HSV
+    lower_green = np.array([30, 40, 40])
+    upper_green = np.array([80, 255, 255])
+    # Threshold the HSV image to get only green colors
+    mask = cv2.inRange(hsv, lower_green, upper_green)
+    # Calculate the mean value of the pixels that are not masked
+    masked_img = cv2.bitwise_and(img, img, mask=mask)
+    grass_color = cv2.mean(img, mask=mask)
+    return grass_color[:3]
+def get_players_boxes(frame, result):
+    players_imgs = []
+    players_boxes = []
+    for (box, score, cls) in result:
+        label = int(cls)
+        if label == 0:
+            x1, y1, x2, y2 = box.astype(int)
+            player_img = frame[y1: y2, x1: x2]
+            players_imgs.append(player_img)
+            players_boxes.append([box, score, cls])
+    return players_imgs, players_boxes
+def get_kits_colors(players, grass_hsv=None, frame=None):
+    kits_colors = []
+    if grass_hsv is None:
+        grass_color = get_grass_color(frame)
+        grass_hsv = cv2.cvtColor(np.uint8([[list(grass_color)]]), cv2.COLOR_BGR2HSV)
+    for player_img in players:
+        # Skip empty or invalid images
+        if player_img is None or player_img.size == 0 or len(player_img.shape) != 3:
+            continue
+        # Convert image to HSV color space
+        hsv = cv2.cvtColor(player_img, cv2.COLOR_BGR2HSV)
+        # Define range of green color in HSV
+        lower_green = np.array([grass_hsv[0, 0, 0] - 10, 40, 40])
+        upper_green = np.array([grass_hsv[0, 0, 0] + 10, 255, 255])
+        # Threshold the HSV image to get only green colors
+        mask = cv2.inRange(hsv, lower_green, upper_green)
+        # Bitwise-AND mask and original image
+        mask = cv2.bitwise_not(mask)
+        upper_mask = np.zeros(player_img.shape[:2], np.uint8)
+        upper_mask[0:player_img.shape[0]//2, 0:player_img.shape[1]] = 255
+        mask = cv2.bitwise_and(mask, upper_mask)
+        kit_color = np.array(cv2.mean(player_img, mask=mask)[:3])
+        kits_colors.append(kit_color)
+    return kits_colors
+def get_kits_classifier(kits_colors):
+    if len(kits_colors) == 0:
+        return None
+    if len(kits_colors) == 1:
+        # Only one kit color, create a dummy classifier
+        return None
+    kits_kmeans = KMeans(n_clusters=2)
+    kits_kmeans.fit(kits_colors)
+    return kits_kmeans
+def classify_kits(kits_classifer, kits_colors):
+    if kits_classifer is None or len(kits_colors) == 0:
+        return np.array([0])  # Default to team 0
+    team = kits_classifer.predict(kits_colors)
+    return team
+def get_left_team_label(players_boxes, kits_colors, kits_clf):
+    left_team_label = 0
+    team_0 = []
+    team_1 = []
+    for i in range(len(players_boxes)):
+        x1, y1, x2, y2 = players_boxes[i][0].astype(int)
+        team = classify_kits(kits_clf, [kits_colors[i]]).item()
+        if team == 0:
+            team_0.append(np.array([x1]))
+        else:
+            team_1.append(np.array([x1]))
+    team_0 = np.array(team_0)
+    team_1 = np.array(team_1)
+    # Safely calculate averages with fallback for empty arrays
+    avg_team_0 = np.average(team_0) if len(team_0) > 0 else 0
+    avg_team_1 = np.average(team_1) if len(team_1) > 0 else 0
+    if avg_team_0 - avg_team_1 > 0:
+        left_team_label = 1
+    return left_team_label
+def check_box_boundaries(boxes, img_height, img_width):
+    """
+    Check if bounding boxes are within image boundaries and clip them if necessary.
+    Args:
+        boxes: numpy array of shape (N, 4) with [x1, y1, x2, y2] format
+        img_height: height of the image
+        img_width: width of the image
+    Returns:
+        valid_boxes: numpy array of valid boxes within boundaries
+        valid_indices: indices of valid boxes
+    """
+    x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
+    # Check if boxes are within boundaries
+    valid_mask = (x1 >= 0) & (y1 >= 0) & (x2 < img_width) & (y2 < img_height) & (x1 < x2) & (y1 < y2)
+    if not np.any(valid_mask):
+        return np.array([]), np.array([])
+    valid_boxes = boxes[valid_mask]
+    valid_indices = np.where(valid_mask)[0]
+    # Clip boxes to image boundaries
+    valid_boxes[:, 0] = np.clip(valid_boxes[:, 0], 0, img_width - 1)   # x1
+    valid_boxes[:, 1] = np.clip(valid_boxes[:, 1], 0, img_height - 1)  # y1
+    valid_boxes[:, 2] = np.clip(valid_boxes[:, 2], 0, img_width - 1)   # x2
+    valid_boxes[:, 3] = np.clip(valid_boxes[:, 3], 0, img_height - 1)  # y2
+    return valid_boxes, valid_indices
+def process_team_identification_batch(frames, results, kits_clf, left_team_label, grass_hsv):
+    """
+    Process team identification and label formatting for batch results.
+    Args:
+        frames: list of frames
+        results: list of detection results for each frame
+        kits_clf: trained kit classifier
+        left_team_label: label for left team
+        grass_hsv: grass color in HSV format
+    Returns:
+        processed_results: list of processed results with team identification
+    """
+    processed_results = []
+    for frame_idx, frame in enumerate(frames):
+        frame_results = []
+        frame_detections = results[frame_idx]
+        if not frame_detections:
+            processed_results.append([])
+            continue
+        # Extract player boxes and images
+        players_imgs = []
+        players_boxes = []
+        player_indices = []
+        for idx, (box, score, cls) in enumerate(frame_detections):
+            label = int(cls)
+            if label == 0:  # Player detection
+                x1, y1, x2, y2 = box.astype(int)
+                # Check boundaries
+                if (x1 >= 0 and y1 >= 0 and x2 < frame.shape[1] and y2 < frame.shape[0] and x1 < x2 and y1 < y2):
+                    player_img = frame[y1:y2, x1:x2]
+                    if player_img.size > 0:  # Ensure valid image
+                        players_imgs.append(player_img)
+                        players_boxes.append([box, score, cls])
+                        player_indices.append(idx)
+        # Initialize player team mapping
+        player_team_map = {}
+        # Process team identification if we have players
+        if players_imgs and kits_clf is not None:
+            kits_colors = get_kits_colors(players_imgs, grass_hsv)
+            teams = classify_kits(kits_clf, kits_colors)
+            # Create mapping from player index to team
+            for i, team in enumerate(teams):
+                player_team_map[player_indices[i]] = team.item()
+        id = 0
+        # Process all detections with team identification
+        for idx, (box, score, cls) in enumerate(frame_detections):
+            label = int(cls)
+            x1, y1, x2, y2 = box.astype(int)
+            # Check boundaries
+            valid_boxes, valid_indices = check_box_boundaries(
+                np.array([[x1, y1, x2, y2]]), frame.shape[0], frame.shape[1]
+            )
+            if len(valid_boxes) == 0:
+                continue
+            x1, y1, x2, y2 = valid_boxes[0].astype(int)
+            # Apply team identification logic
+            if label == 0:  # Player
+                if players_imgs and kits_clf is not None and idx in player_team_map:
+                    team = player_team_map[idx]
+                    if team == left_team_label:
+                        final_label = 6  # Player-L (Left team)
+                    else:
+                        final_label = 7  # Player-R (Right team)
+                else:
+                    final_label = 6  # Default player label
+            elif label == 1:  # Goalkeeper
+                final_label = 1  # GK
+            elif label == 2:  # Ball
+                final_label = 0  # Ball
+            elif label == 3 or label == 4:  # Referee or other
+                final_label = 3  # Referee
+            else:
+                final_label = int(label)  # Keep original label, ensure it's int
+            frame_results.append({
+                "id": int(id),
+                "bbox": [int(x1), int(y1), int(x2), int(y2)],
+                "class_id": int(final_label),
+                "conf": float(score)
+            })
+            id = id + 1
+        processed_results.append(frame_results)
+    return processed_results
+def convert_numpy_types(obj):
+    """Convert numpy types to native Python types for JSON serialization."""
+    if isinstance(obj, np.integer):
+        return int(obj)
+    elif isinstance(obj, np.floating):
+        return float(obj)
+    elif isinstance(obj, np.ndarray):
+        return obj.tolist()
+    elif isinstance(obj, dict):
+        return {key: convert_numpy_types(value) for key, value in obj.items()}
+    elif isinstance(obj, list):
+        return [convert_numpy_types(item) for item in obj]
+    else:
+        return obj
+def pre_process_img(frames, scale):
+    imgs = np.stack([cv2.resize(frame, (int(scale), int(scale))) for frame in frames])
+    imgs = imgs.transpose(0, 3, 1, 2)
+    imgs = imgs.astype(np.float32) / 255.0  # Normalize
+    return imgs
+def post_process_output(outputs, x_scale, y_scale, conf_thresh=0.6, nms_thresh=0.75):
+    B, C, N = outputs.shape
+    outputs = torch.from_numpy(outputs)
+    outputs = outputs.permute(0, 2, 1)
+    boxes = outputs[..., :4]
+    class_scores = 1 / (1 + torch.exp(-outputs[..., 4:]))
+    conf, class_id = class_scores.max(dim=2)
+    mask = conf > conf_thresh
+    for i in range(class_id.shape[0]):  # loop over batch
+    # Find detections that are balls
+        ball_idx = np.where(class_id[i] == 2)[0]
+        if ball_idx.size > 0:
+            # Pick the one with the highest confidence
+            top = ball_idx[np.argmax(conf[i, ball_idx])]
+            if conf[i, top] > 0.55:  # apply confidence threshold
+                mask[i, top] = True
+    # ball_mask = (class_id == 2) & (conf > 0.51)
+    # mask = mask | ball_mask
+    batch_idx, pred_idx = mask.nonzero(as_tuple=True)
+    if len(batch_idx) == 0:
+        return [[] for _ in range(B)]
+    boxes = boxes[batch_idx, pred_idx]
+    conf = conf[batch_idx, pred_idx]
+    class_id = class_id[batch_idx, pred_idx]
+    x, y, w, h = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
+    x1 = (x - w / 2) * x_scale
+    y1 = (y - h / 2) * y_scale
+    x2 = (x + w / 2) * x_scale
+    y2 = (y + h / 2) * y_scale
+    boxes_xyxy = torch.stack([x1, y1, x2, y2], dim=1)
+    max_coord = 1e4
+    offset = batch_idx.to(boxes_xyxy) * max_coord
+    boxes_for_nms = boxes_xyxy + offset[:, None]
+    keep = batched_nms(boxes_for_nms, conf, batch_idx, nms_thresh)
+    boxes_final = boxes_xyxy[keep]
+    conf_final = conf[keep]
+    class_final = class_id[keep]
+    batch_final = batch_idx[keep]
+    results = [[] for _ in range(B)]
+    for b in range(B):
+        mask_b = batch_final == b
+        if mask_b.sum() == 0:
+            continue
+        results[b] = list(zip(boxes_final[mask_b].numpy(),
+                              conf_final[mask_b].numpy(),
+                              class_final[mask_b].numpy()))
+    return results
+def player_detection_result(frames: list[ndarray], batch_size, model, kits_clf=None, left_team_label=None, grass_hsv=None):
+    start_time = time.time()
+    # input_layer = model.input(0)
+    # output_layer = model.output(0)
+    height, width = frames[0].shape[:2]
+    scale = 640.0
+    x_scale = width / scale
+    y_scale = height / scale
+    # infer_queue = AsyncInferQueue(model, len(frames))
+    infer_time = time.time()
+    kits_clf = kits_clf
+    left_team_label = left_team_label
+    grass_hsv = grass_hsv
+    results = []
+    for i in range(0, len(frames), batch_size):
+        if i + batch_size > len(frames):
+            batch_size = len(frames) - i
+        batch_frames = frames[i:i + batch_size]
+        imgs = pre_process_img(batch_frames, scale)
+        input_name = model.get_inputs()[0].name
+        outputs = model.run(None, {input_name: imgs})[0]
+        raw_results = post_process_output(np.array(outputs), x_scale, y_scale)
+        if kits_clf is None or left_team_label is None or grass_hsv is None:
+            # Use first frame to initialize team classification
+            first_frame = batch_frames[0]
+            first_frame_results = raw_results[0] if raw_results else []
+            if first_frame_results:
+                players_imgs, players_boxes = get_players_boxes(first_frame, first_frame_results)
+                if players_imgs:
+                    grass_color = get_grass_color(first_frame)
+                    grass_hsv = cv2.cvtColor(np.uint8([[list(grass_color)]]), cv2.COLOR_BGR2HSV)
+                    kits_colors = get_kits_colors(players_imgs, grass_hsv)
+                    if kits_colors:  # Only proceed if we have valid kit colors
+                        kits_clf = get_kits_classifier(kits_colors)
+                        if kits_clf is not None:
+                            left_team_label = int(get_left_team_label(players_boxes, kits_colors, kits_clf))
+        # Process team identification and boundary checking
+        processed_results = process_team_identification_batch(
+            batch_frames, raw_results, kits_clf, left_team_label, grass_hsv
+        )
+        processed_results = convert_numpy_types(processed_results)
+        results.extend(processed_results)
+    # Return the same format as before for compatibility
+    return results, kits_clf, left_team_label, grass_hsv