Merge branch 'main' into size-measurement-docs-change

PawelPeczek-Roboflow · web-flow · commit b90120880e64 · 2025-10-21T19:04:57.000+02:00
diff --git a/inference/core/interfaces/http/http_api.py b/inference/core/interfaces/http/http_api.py
@@ -250,6 +250,11 @@
     EXECUTION_ID_HEADER = None
 
 
+def get_content_type(request: Request) -> str:
+    content_type = request.headers.get("content-type", "")
+    return content_type.split(";")[0].strip()
+
+
 class LambdaMiddleware(BaseHTTPMiddleware):
     async def dispatch(self, request, call_next):
         response = await call_next(request)
@@ -457,7 +462,7 @@ async def check_authorization_serverless(request: Request, call_next):
                         skip_check = True
 
                     elif (
-                        request.headers.get("content-type", None) == "application/json"
+                        get_content_type(request) == "application/json"
                         and int(request.headers.get("content-length", 0)) > 0
                     ):
                         json_params = await request.json()
@@ -484,7 +489,7 @@ def _unauthorized_response(msg):
                 api_key = req_params.get("api_key", None)
                 if (
                     api_key is None
-                    and request.headers.get("content-type", None) == "application/json"
+                    and get_content_type(request) == "application/json"
                     and int(request.headers.get("content-length", 0)) > 0
                 ):
                     # have to try catch here, because some legacy endpoints that abuse Content-Type header but dont actually receive json
@@ -544,7 +549,7 @@ def _unauthorized_response(msg):
                 api_key = req_params.get("api_key", None)
                 if (
                     api_key is None
-                    and request.headers.get("content-type", None) == "application/json"
+                    and get_content_type(request) == "application/json"
                     and int(request.headers.get("content-length", 0)) > 0
                 ):
                     # have to try catch here, because some legacy endpoints that abuse Content-Type header but dont actually receive json
diff --git a/inference_experimental/inference_exp/models/auto_loaders/models_registry.py b/inference_experimental/inference_exp/models/auto_loaders/models_registry.py
@@ -57,6 +57,12 @@ class RegistryEntry:
         module_name="inference_exp.models.yolov7.yolov7_instance_segmentation_trt",
         class_name="YOLOv7ForInstanceSegmentationTRT",
     ),
+    ("yolov8", CLASSIFICATION_TASK, BackendType.ONNX): RegistryEntry(
+        model_class=LazyClass(
+            module_name="inference_exp.models.yolov8.yolov8_classification_onnx",
+            class_name="YOLOv8ForClassificationOnnx",
+        ),
+    ),
     ("yolov8", OBJECT_DETECTION_TASK, BackendType.ONNX): RegistryEntry(
         model_class=LazyClass(
             module_name="inference_exp.models.yolov8.yolov8_object_detection_onnx",
@@ -137,6 +143,12 @@ class RegistryEntry:
         module_name="inference_exp.models.yolov10.yolov10_object_detection_trt",
         class_name="YOLOv10ForObjectDetectionTRT",
     ),
+    ("yolov11", CLASSIFICATION_TASK, BackendType.ONNX): RegistryEntry(
+        model_class=LazyClass(
+            module_name="inference_exp.models.yolov11.yolov11_onnx",
+            class_name="YOLOv11ForClassificationOnnx",
+        ),
+    ),
     ("yolov11", OBJECT_DETECTION_TASK, BackendType.ONNX): RegistryEntry(
         model_class=LazyClass(
             module_name="inference_exp.models.yolov11.yolov11_onnx",
diff --git a/inference_experimental/inference_exp/models/common/roboflow/post_processing.py b/inference_experimental/inference_exp/models/common/roboflow/post_processing.py
@@ -263,20 +263,18 @@ def rescale_key_points_detections(
             dtype=image_detections.dtype,
             device=image_detections.device,
         ).repeat(key_points_slots_in_prediction)
-        image_detections[:, 5 + num_classes :].sub_(key_points_offsets)
+        image_detections[:, 6:].sub_(key_points_offsets)
         key_points_scale = torch.as_tensor(
             [metadata.scale_width, metadata.scale_height, 1.0],
             dtype=image_detections.dtype,
             device=image_detections.device,
         ).repeat(key_points_slots_in_prediction)
-        image_detections[:, 5 + num_classes :].div_(key_points_scale)
+        image_detections[:, 6:].div_(key_points_scale)
         if (
             metadata.static_crop_offset.offset_x != 0
             or metadata.static_crop_offset.offset_y != 0
         ):
-            static_crop_offset_length = (
-                image_detections.shape[1] - 5 - num_classes
-            ) // 3
+            static_crop_offset_length = (image_detections.shape[1] - 6) // 3
             static_crop_offsets = torch.as_tensor(
                 [
                     metadata.static_crop_offset.offset_x,
@@ -287,7 +285,7 @@ def rescale_key_points_detections(
                 dtype=image_detections.dtype,
                 device=image_detections.device,
             )
-            image_detections[:, 5 + num_classes :].add_(static_crop_offsets)
+            image_detections[:, 6:].add_(static_crop_offsets)
             static_crop_offsets = torch.as_tensor(
                 [
                     metadata.static_crop_offset.offset_x,
diff --git a/inference_experimental/inference_exp/models/yolov11/yolov11_onnx.py b/inference_experimental/inference_exp/models/yolov11/yolov11_onnx.py
@@ -1,3 +1,6 @@
+from inference_exp.models.yolov8.yolov8_classification_onnx import (
+    YOLOv8ForClassificationOnnx,
+)
 from inference_exp.models.yolov8.yolov8_instance_segmentation_onnx import (
     YOLOv8ForInstanceSegmentationOnnx,
 )
@@ -19,3 +22,7 @@ class YOLOv11ForInstanceSegmentationOnnx(YOLOv8ForInstanceSegmentationOnnx):
 
 class YOLOv11ForForKeyPointsDetectionOnnx(YOLOv8ForKeyPointsDetectionOnnx):
     pass
+
+
+class YOLOv11ForClassificationOnnx(YOLOv8ForClassificationOnnx):
+    pass
diff --git a/inference_experimental/inference_exp/models/yolov8/yolov8_classification_onnx.py b/inference_experimental/inference_exp/models/yolov8/yolov8_classification_onnx.py
@@ -0,0 +1,178 @@
+from threading import Lock
+from typing import List, Optional, Tuple, Union
+
+import numpy as np
+import torch
+from inference_exp import ClassificationModel, ClassificationPrediction
+from inference_exp.configuration import DEFAULT_DEVICE
+from inference_exp.entities import ColorFormat
+from inference_exp.errors import (
+    CorruptedModelPackageError,
+    EnvironmentConfigurationError,
+    MissingDependencyError,
+)
+from inference_exp.models.base.types import PreprocessedInputs
+from inference_exp.models.common.model_packages import get_model_package_contents
+from inference_exp.models.common.onnx import (
+    run_session_with_batch_size_limit,
+    set_execution_provider_defaults,
+)
+from inference_exp.models.common.roboflow.model_packages import (
+    InferenceConfig,
+    ResizeMode,
+    parse_class_names_file,
+    parse_inference_config,
+)
+from inference_exp.models.common.roboflow.pre_processing import (
+    pre_process_network_input,
+)
+from inference_exp.utils.onnx_introspection import get_selected_onnx_execution_providers
+
+try:
+    import onnxruntime
+except ImportError as import_error:
+    raise MissingDependencyError(
+        message=f"Could not import ResNet model with ONNX backend - this error means that some additional dependencies "
+        f"are not installed in the environment. If you run the `inference-exp` library directly in your Python "
+        f"program, make sure the following extras of the package are installed: \n"
+        f"\t* `onnx-cpu` - when you wish to use library with CPU support only\n"
+        f"\t* `onnx-cu12` - for running on GPU with Cuda 12 installed\n"
+        f"\t* `onnx-cu118` - for running on GPU with Cuda 11.8 installed\n"
+        f"\t* `onnx-jp6-cu126` - for running on Jetson with Jetpack 6\n"
+        f"If you see this error using Roboflow infrastructure, make sure the service you use does support the model. "
+        f"You can also contact Roboflow to get support.",
+        help_url="https://todo",
+    ) from import_error
+
+
+class YOLOv8ForClassificationOnnx(ClassificationModel[torch.Tensor, torch.Tensor]):
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        model_name_or_path: str,
+        onnx_execution_providers: Optional[List[Union[str, tuple]]] = None,
+        default_onnx_trt_options: bool = True,
+        device: torch.device = DEFAULT_DEVICE,
+        **kwargs,
+    ) -> "YOLOv8ForClassificationOnnx":
+        if onnx_execution_providers is None:
+            onnx_execution_providers = get_selected_onnx_execution_providers()
+        if not onnx_execution_providers:
+            raise EnvironmentConfigurationError(
+                message=f"Could not initialize model - selected backend is ONNX which requires execution provider to "
+                f"be specified - explicitly in `from_pretrained(...)` method or via env variable "
+                f"`ONNXRUNTIME_EXECUTION_PROVIDERS`. If you run model locally - adjust your setup, otherwise "
+                f"contact the platform support.",
+                help_url="https://todo",
+            )
+        onnx_execution_providers = set_execution_provider_defaults(
+            providers=onnx_execution_providers,
+            model_package_path=model_name_or_path,
+            device=device,
+            default_onnx_trt_options=default_onnx_trt_options,
+        )
+        model_package_content = get_model_package_contents(
+            model_package_dir=model_name_or_path,
+            elements=[
+                "class_names.txt",
+                "inference_config.json",
+                "weights.onnx",
+            ],
+        )
+        class_names = parse_class_names_file(
+            class_names_path=model_package_content["class_names.txt"]
+        )
+        inference_config = parse_inference_config(
+            config_path=model_package_content["inference_config.json"],
+            allowed_resize_modes={
+                ResizeMode.STRETCH_TO,
+                ResizeMode.LETTERBOX,
+                ResizeMode.CENTER_CROP,
+                ResizeMode.LETTERBOX_REFLECT_EDGES,
+            },
+        )
+        if inference_config.post_processing.type != "softmax":
+            raise CorruptedModelPackageError(
+                message="Expected Softmax to be the post-processing",
+                help_url="https://todo",
+            )
+        session = onnxruntime.InferenceSession(
+            path_or_bytes=model_package_content["weights.onnx"],
+            providers=onnx_execution_providers,
+        )
+        input_shape = session.get_inputs()[0].shape
+        input_batch_size = input_shape[0]
+        if isinstance(input_batch_size, str):
+            input_batch_size = None
+        input_name = session.get_inputs()[0].name
+        return cls(
+            session=session,
+            input_name=input_name,
+            inference_config=inference_config,
+            class_names=class_names,
+            device=device,
+            input_batch_size=input_batch_size,
+        )
+
+    def __init__(
+        self,
+        session: onnxruntime.InferenceSession,
+        input_name: str,
+        inference_config: InferenceConfig,
+        class_names: List[str],
+        device: torch.device,
+        input_batch_size: Optional[int],
+    ):
+        self._session = session
+        self._input_name = input_name
+        self._inference_config = inference_config
+        self._class_names = class_names
+        self._device = device
+        self._input_batch_size = input_batch_size
+        self._session_thread_lock = Lock()
+
+    @property
+    def class_names(self) -> List[str]:
+        return self._class_names
+
+    def pre_process(
+        self,
+        images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
+        input_color_format: Optional[ColorFormat] = None,
+        image_size: Optional[Tuple[int, int]] = None,
+        **kwargs,
+    ) -> torch.Tensor:
+        return pre_process_network_input(
+            images=images,
+            image_pre_processing=self._inference_config.image_pre_processing,
+            network_input=self._inference_config.network_input,
+            target_device=self._device,
+            input_color_format=input_color_format,
+            image_size_wh=image_size,
+        )[0]
+
+    def forward(
+        self, pre_processed_images: PreprocessedInputs, **kwargs
+    ) -> torch.Tensor:
+        with self._session_thread_lock:
+            return run_session_with_batch_size_limit(
+                session=self._session,
+                inputs={self._input_name: pre_processed_images},
+                min_batch_size=self._input_batch_size,
+                max_batch_size=self._input_batch_size,
+            )[0]
+
+    def post_process(
+        self,
+        model_results: torch.Tensor,
+        **kwargs,
+    ) -> ClassificationPrediction:
+        if self._inference_config.post_processing.fused:
+            confidence = model_results
+        else:
+            confidence = torch.nn.functional.softmax(model_results, dim=-1)
+        return ClassificationPrediction(
+            class_id=confidence.argmax(dim=-1),
+            confidence=confidence,
+        )
diff --git a/inference_experimental/inference_exp/models/yolov8/yolov8_key_points_detection_onnx.py b/inference_experimental/inference_exp/models/yolov8/yolov8_key_points_detection_onnx.py
@@ -233,7 +233,9 @@ def post_process(
                     confidence=result[:, 4],
                 )
             )
-            key_points_reshaped = result[:, 6:].view(result.shape[0], -1, 3)
+            key_points_reshaped = result[:, 6:].view(
+                result.shape[0], self._key_points_slots_in_prediction, 3
+            )
             xy = key_points_reshaped[:, :, :2]
             confidence = key_points_reshaped[:, :, 2]
             key_points_classes_for_instance_class = (
diff --git a/inference_experimental/inference_exp/models/yolov8/yolov8_key_points_detection_torch_script.py b/inference_experimental/inference_exp/models/yolov8/yolov8_key_points_detection_torch_script.py
@@ -191,7 +191,9 @@ def post_process(
                     confidence=result[:, 4],
                 )
             )
-            key_points_reshaped = result[:, 6:].view(result.shape[0], -1, 3)
+            key_points_reshaped = result[:, 6:].view(
+                result.shape[0], self._key_points_slots_in_prediction, 3
+            )
             xy = key_points_reshaped[:, :, :2]
             confidence = key_points_reshaped[:, :, 2]
             key_points_classes_for_instance_class = (
diff --git a/inference_experimental/inference_exp/models/yolov8/yolov8_key_points_detection_trt.py b/inference_experimental/inference_exp/models/yolov8/yolov8_key_points_detection_trt.py
@@ -257,7 +257,9 @@ def post_process(
                     confidence=result[:, 4],
                 )
             )
-            key_points_reshaped = result[:, 6:].view(result.shape[0], -1, 3)
+            key_points_reshaped = result[:, 6:].view(
+                result.shape[0], self._key_points_slots_in_prediction, 3
+            )
             xy = key_points_reshaped[:, :, :2]
             confidence = key_points_reshaped[:, :, 2]
             key_points_classes_for_instance_class = (
diff --git a/inference_experimental/tests/integration_tests/models/conftest.py b/inference_experimental/tests/integration_tests/models/conftest.py
@@ -118,6 +118,8 @@
 YOLOV8N_POSE_TORCHSCRIPT_STATIC_NMS_FUSED_CENTER_CROP_PACKAGE_URL = "https://storage.googleapis.com/roboflow-tests-assets/rf-platform-models/yolov8n-pose-torchscript-static-nms-fused-center-crop.zip"
 YOLOV8N_POSE_TORCHSCRIPT_STATIC_NMS_FUSED_STATIC_CROP_CENTER_CROP_PACKAGE_URL = "https://storage.googleapis.com/roboflow-tests-assets/rf-platform-models/yolov8n-pose-torchscript-static-nms-fused-static-crop-center-crop.zip"
 
+YOLOV8_CLS_ONNX_PACKAGE_URL = "https://storage.googleapis.com/roboflow-tests-assets/rf-platform-models/yolov8-cls-onnx-static-bs.zip"
+
 
 @pytest.fixture(scope="module")
 def original_clip_download_dir() -> str:
@@ -942,3 +944,11 @@ def yolov8n_pose_torchscript_static_nms_fused_static_crop_center_crop_package()
         model_package_zip_url=YOLOV8N_POSE_TORCHSCRIPT_STATIC_NMS_FUSED_STATIC_CROP_CENTER_CROP_PACKAGE_URL,
         package_name="yolov8n-pose-torchscript-static-nms-fused-static-crop-center-crop",
     )
+
+
+@pytest.fixture(scope="module")
+def yolov8_cls_static_bs_onnx_package() -> str:
+    return download_model_package(
+        model_package_zip_url=YOLOV8_CLS_ONNX_PACKAGE_URL,
+        package_name="yolov8-cls-static-onnx",
+    )
diff --git a/inference_experimental/tests/integration_tests/models/test_yolov8_cls_predictions_onnx.py b/inference_experimental/tests/integration_tests/models/test_yolov8_cls_predictions_onnx.py

Original file line number	Diff line number	Diff line change
`@@ -233,7 +233,9 @@ def post_process(`
`233`	`233`	`confidence=result[:, 4],`
`234`	`234`	`)`
`235`	`235`	`)`
`236`		`- key_points_reshaped = result[:, 6:].view(result.shape[0], -1, 3)`
	`236`	`+ key_points_reshaped = result[:, 6:].view(`
	`237`	`+ result.shape[0], self._key_points_slots_in_prediction, 3`
	`238`	`+ )`
`237`	`239`	`xy = key_points_reshaped[:, :, :2]`
`238`	`240`	`confidence = key_points_reshaped[:, :, 2]`
`239`	`241`	`key_points_classes_for_instance_class = (`
Original file line number	Diff line number	Diff line change
`@@ -191,7 +191,9 @@ def post_process(`
`191`	`191`	`confidence=result[:, 4],`
`192`	`192`	`)`
`193`	`193`	`)`
`194`		`- key_points_reshaped = result[:, 6:].view(result.shape[0], -1, 3)`
	`194`	`+ key_points_reshaped = result[:, 6:].view(`
	`195`	`+ result.shape[0], self._key_points_slots_in_prediction, 3`
	`196`	`+ )`
`195`	`197`	`xy = key_points_reshaped[:, :, :2]`
`196`	`198`	`confidence = key_points_reshaped[:, :, 2]`
`197`	`199`	`key_points_classes_for_instance_class = (`
Original file line number	Diff line number	Diff line change
`@@ -257,7 +257,9 @@ def post_process(`
`257`	`257`	`confidence=result[:, 4],`
`258`	`258`	`)`
`259`	`259`	`)`
`260`		`- key_points_reshaped = result[:, 6:].view(result.shape[0], -1, 3)`
	`260`	`+ key_points_reshaped = result[:, 6:].view(`
	`261`	`+ result.shape[0], self._key_points_slots_in_prediction, 3`
	`262`	`+ )`
`261`	`263`	`xy = key_points_reshaped[:, :, :2]`
`262`	`264`	`confidence = key_points_reshaped[:, :, 2]`
`263`	`265`	`key_points_classes_for_instance_class = (`