No public description

tensorflower-gardener · tensorflower-gardener · commit 6aed712d5040 · 2024-01-11T20:23:37.000-08:00
PiperOrigin-RevId: 597723134
diff --git a/official/vision/modeling/layers/detection_generator.py b/official/vision/modeling/layers/detection_generator.py
@@ -794,6 +794,7 @@ def _generate_detections_tflite(
     raw_scores: Mapping[str, tf.Tensor],
     anchor_boxes: Mapping[str, tf.Tensor],
     config: Dict[str, Any],
+    box_coder_weights: List[float] | None = None,
 ) -> Sequence[Any]:
   """Generate detections for conversion to TFLite.
 
@@ -817,7 +818,10 @@ def _generate_detections_tflite(
       features and value is a tensor denoting a level of anchors with shape
       [num_anchors, 4].
     config: A dictionary of configs defining parameters for TFLite NMS op.
-
+    box_coder_weights: An optional `list` of 4 positive floats to scale y, x, h,
+      and w when encoding box coordinates. If set to None, does not perform
+      scaling. For Faster RCNN, the open-source implementation recommends using
+      [10.0, 10.0, 5.0, 5.0].
   Returns:
     A (dummy) tuple of (boxes, scores, classess, num_detections).
 
@@ -839,15 +843,18 @@ def _generate_detections_tflite(
     raise ValueError(
         'The last dimension of predicted boxes should be divisible by 4.'
     )
+
   num_anchors_per_locations = num_anchors_per_locations_times_4 // 4
-  if num_anchors_per_locations_times_4 % 4 != 0:
+  num_classes_times_anchors_per_location = (
+      raw_scores[str(min_level)].get_shape().as_list()[-1]
+  )
+  if num_classes_times_anchors_per_location % num_anchors_per_locations != 0:
     raise ValueError(
         'The last dimension of predicted scores should be divisible by'
         f' {num_anchors_per_locations}.'
     )
   num_classes = (
-      raw_scores[str(min_level)].get_shape().as_list()[-1]
-      // num_anchors_per_locations
+      num_classes_times_anchors_per_location // num_anchors_per_locations
   )
   config.update({'num_classes': num_classes})
 
@@ -865,6 +872,14 @@ def _generate_detections_tflite(
   wa = anchors[..., 3] - anchors[..., 1]
   anchors = tf.stack([ycenter_a, xcenter_a, ha, wa], axis=-1)
 
+  if box_coder_weights:
+    config.update({
+        'y_scale': box_coder_weights[0],
+        'x_scale': box_coder_weights[1],
+        'h_scale': box_coder_weights[2],
+        'w_scale': box_coder_weights[3],
+    })
+
   if config.get('normalize_anchor_coordinates', False):
     # TFLite's object detection APIs require normalized anchors.
     height, width = config['input_image_size']
@@ -1463,6 +1478,7 @@ def __call__(
           raw_scores,
           anchor_boxes,
           self.get_config()['tflite_post_processing_config'],
+          self._config_dict['box_coder_weights'],
       )
       return {
           'num_detections': num_detections,
diff --git a/official/vision/modeling/layers/detection_generator_test.py b/official/vision/modeling/layers/detection_generator_test.py
@@ -13,12 +13,15 @@
 # limitations under the License.
 
 """Tests for detection_generator.py."""
+from unittest import mock
+
 # Import libraries
 
 from absl.testing import parameterized
 import numpy as np
 import tensorflow as tf, tf_keras
 
+from official.vision.configs import common
 from official.vision.modeling.layers import detection_generator
 from official.vision.ops import anchor
 
@@ -327,6 +330,107 @@ def test_decode_multilevel_outputs_and_pre_nms_top_k(self):
             ]),
         ]))
 
+  def test_decode_multilevel_with_tflite_nms(self):
+    config = common.TFLitePostProcessingConfig().as_dict()
+    generator = detection_generator.MultilevelDetectionGenerator(
+        apply_nms=True,
+        nms_version='tflite',
+        box_coder_weights=[9, 8, 7, 6],
+        tflite_post_processing_config=config,
+    )
+    raw_scores = {
+        '4': tf.zeros(shape=[1, 8, 8, 3 * 2], dtype=tf.float32),
+        '5': tf.zeros(shape=[1, 4, 4, 3 * 2], dtype=tf.float32),
+    }
+    raw_boxes = {
+        '4': tf.zeros(shape=[1, 8, 8, 4 * 2], dtype=tf.float32),
+        '5': tf.zeros(shape=[1, 4, 4, 4 * 2], dtype=tf.float32),
+    }
+    anchor_boxes = {
+        '4': tf.zeros(shape=[1, 8, 8, 4 * 2], dtype=tf.float32),
+        '5': tf.zeros(shape=[1, 4, 4, 4 * 2], dtype=tf.float32),
+    }
+
+    expected_signature = (
+        'name: "TFLite_Detection_PostProcess" attr { key: "max_detections"'
+        ' value { i: 200 } } attr { key: "max_classes_per_detection" value { i:'
+        ' 5 } } attr { key: "detections_per_class" value { i: 5 } } attr { key:'
+        ' "use_regular_nms" value { b: false } } attr { key:'
+        ' "nms_score_threshold" value { f: 0.100000 } } attr { key:'
+        ' "nms_iou_threshold" value { f: 0.500000 } } attr { key: "y_scale"'
+        ' value { f: 9.000000 } } attr { key: "x_scale" value { f: 8.000000 } }'
+        ' attr { key: "h_scale" value { f: 7.000000 } } attr { key: "w_scale"'
+        ' value { f: 6.000000 } } attr { key: "num_classes" value { i: 3 } }'
+    )
+
+    with mock.patch.object(
+        tf, 'function', wraps=tf.function
+    ) as mock_tf_function:
+      test_output = generator(
+          raw_boxes=raw_boxes,
+          raw_scores=raw_scores,
+          anchor_boxes=anchor_boxes,
+          image_shape=tf.constant([], dtype=tf.int32),
+      )
+      mock_tf_function.assert_called_once_with(
+          experimental_implements=expected_signature
+      )
+
+    self.assertEqual(
+        test_output['num_detections'], tf.constant(0.0, dtype=tf.float32)
+    )
+    self.assertEqual(
+        test_output['detection_boxes'], tf.constant(0.0, dtype=tf.float32)
+    )
+    self.assertEqual(
+        test_output['detection_classes'], tf.constant(0.0, dtype=tf.float32)
+    )
+    self.assertEqual(
+        test_output['detection_scores'], tf.constant(0.0, dtype=tf.float32)
+    )
+
+  def test_decode_multilevel_tflite_nms_error_on_wrong_boxes_shape(self):
+    config = common.TFLitePostProcessingConfig().as_dict()
+    generator = detection_generator.MultilevelDetectionGenerator(
+        apply_nms=True,
+        nms_version='tflite',
+        tflite_post_processing_config=config,
+    )
+    raw_scores = {'4': tf.zeros(shape=[1, 4, 4, 3 * 2], dtype=tf.float32)}
+    raw_boxes = {'4': tf.zeros(shape=[1, 4, 4, 3], dtype=tf.float32)}
+    anchor_boxes = {'4': tf.zeros(shape=[1, 4, 4, 4 * 2], dtype=tf.float32)}
+    with self.assertRaisesRegex(
+        ValueError,
+        'The last dimension of predicted boxes should be divisible by 4.',
+    ):
+      generator(
+          raw_boxes=raw_boxes,
+          raw_scores=raw_scores,
+          anchor_boxes=anchor_boxes,
+          image_shape=tf.constant([], dtype=tf.int32),
+      )
+
+  def test_decode_multilevel_tflite_nms_error_on_wrong_scores_shape(self):
+    config = common.TFLitePostProcessingConfig().as_dict()
+    generator = detection_generator.MultilevelDetectionGenerator(
+        apply_nms=True,
+        nms_version='tflite',
+        tflite_post_processing_config=config,
+    )
+    raw_scores = {'4': tf.zeros(shape=[1, 4, 4, 7 * 3], dtype=tf.float32)}
+    raw_boxes = {'4': tf.zeros(shape=[1, 4, 4, 4 * 5], dtype=tf.float32)}
+    anchor_boxes = {'4': tf.zeros(shape=[1, 4, 4, 4 * 5], dtype=tf.float32)}
+    with self.assertRaisesRegex(
+        ValueError,
+        'The last dimension of predicted scores should be divisible by',
+    ):
+      generator(
+          raw_boxes=raw_boxes,
+          raw_scores=raw_scores,
+          anchor_boxes=anchor_boxes,
+          image_shape=tf.constant([], dtype=tf.int32),
+      )
+
   def test_serialize_deserialize(self):
     tflite_post_processing_config = {
         'max_detections': 100,