Add int8 and int16 support for SELECT op to AEQ

marialyu · copybara-github · commit 2205e48cd3f4 · 2025-08-14T10:59:45.000-07:00
PiperOrigin-RevId: 795111628
diff --git a/README.md b/README.md
@@ -139,6 +139,7 @@ The table below outlines the allowed configurations for available recipes.
 |SPLIT            |     |     |<div align="center"> &check; </div>|     |<div align="center"> &check; </div>|    |    |    |
 |LOGISTIC         |     |     |<div align="center"> &check; </div>|     |<div align="center"> &check; </div>|    |    |    |
 |SLICE            |     |     |<div align="center"> &check; </div>|     |<div align="center"> &check; </div>|    |    |    |
+|SELECT           |     |     |<div align="center"> &check; </div>|     |<div align="center"> &check; </div>|    |    |    |
 |SELECT_V2        |     |     |<div align="center"> &check; </div>|     |<div align="center"> &check; </div>|    |    |    |
 |SUM              |     |     |<div align="center"> &check; </div>|     |<div align="center"> &check; </div>|    |    |    |
 |PAD              |     |     |<div align="center"> &check; </div>|     |<div align="center"> &check; </div>|    |    |    |
diff --git a/ai_edge_quantizer/algorithm_manager.py b/ai_edge_quantizer/algorithm_manager.py
@@ -102,6 +102,7 @@ class AlgorithmName(str, enum.Enum):
     _TFLOpName.LOGISTIC: common_quantize.materialize_softmax_and_logistic,
     _TFLOpName.SLICE: common_quantize.materialize_slice,
     _TFLOpName.SUM: common_quantize.materialize_sum,
+    _TFLOpName.SELECT: common_quantize.materialize_select,
     _TFLOpName.SELECT_V2: common_quantize.materialize_select_v2,
     _TFLOpName.DYNAMIC_UPDATE_SLICE: (
         common_quantize.materialize_dynamic_update_slice
@@ -250,6 +251,7 @@ class AlgorithmName(str, enum.Enum):
     _TFLOpName.LOGISTIC: common_quantize.materialize_softmax_and_logistic,
     _TFLOpName.SLICE: common_quantize.materialize_slice,
     _TFLOpName.SUM: common_quantize.materialize_sum,
+    _TFLOpName.SELECT: common_quantize.materialize_select,
     _TFLOpName.SELECT_V2: common_quantize.materialize_select_v2,
     _TFLOpName.DYNAMIC_UPDATE_SLICE: (
         common_quantize.materialize_dynamic_update_slice
diff --git a/ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py b/ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py
@@ -371,6 +371,25 @@ def materialize_slice(
   )
 
 
+def materialize_select(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.select."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_OUTPUT_SCALE,
+      inputs_to_ignore=[
+          0,
+      ],  # Condition tensor does not need to be quantized.
+  )
+
+
 def materialize_select_v2(
     get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
     op_info: qtyping.OpInfo,
diff --git a/ai_edge_quantizer/algorithms/uniform_quantize/op_architecture_tests/select_test.py b/ai_edge_quantizer/algorithms/uniform_quantize/op_architecture_tests/select_test.py
@@ -0,0 +1,102 @@
+# Copyright 2024 The AI Edge Quantizer Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import os
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.python.platform import googletest
+from ai_edge_quantizer import qtyping
+from ai_edge_quantizer.algorithms.uniform_quantize import common_quantize
+from ai_edge_quantizer.algorithms.uniform_quantize import naive_min_max_quantize
+from ai_edge_quantizer.algorithms.uniform_quantize import octav
+from ai_edge_quantizer.algorithms.uniform_quantize.op_architecture_tests import test_utils as op_test_utils
+from ai_edge_quantizer.utils import test_utils
+from ai_edge_quantizer.utils import tfl_flatbuffer_utils
+
+
+_TEST_DATA_PREFIX_PATH = test_utils.get_path_to_datafile(
+    "../../../tests/models"
+)
+
+
+class SelectTest(op_test_utils.BaseQuantizeTest):
+
+  def setUp(self):
+    super().setUp()
+    np.random.seed(666)
+    self._test_model_path = os.path.join(
+        _TEST_DATA_PREFIX_PATH, "single_select.tflite"
+    )
+    self._op_test_info = op_test_utils.OpTestInfo(
+        test_model=tfl_flatbuffer_utils.read_model(self._test_model_path),
+        op_tensor_names={},
+        input_range=(np.array([[-10]]), np.array([[10]])),
+        output_range=(np.array([[-10]]), np.array([[10]])),
+    )
+    # The test model has one subgraph for now.
+    self._graph_info = qtyping.GraphInfo(
+        subgraph_tensors=self._op_test_info.test_model.subgraphs[0].tensors,
+        buffers=self._op_test_info.test_model.buffers,
+    )
+
+  @parameterized.parameters(
+      # get_tensor_quant_params_func, activations_num_bits, symmetric
+      (naive_min_max_quantize.get_tensor_quant_params, 8, True),
+      (naive_min_max_quantize.get_tensor_quant_params, 8, False),
+      (naive_min_max_quantize.get_tensor_quant_params, 16, True),
+      (octav.get_tensor_quant_params, 8, True),
+      (octav.get_tensor_quant_params, 16, True),
+  )
+  def test_materialize_select_succeeds(
+      self, get_tensor_quant_params_func, activations_num_bits, symmetric
+  ):
+    activation_config = test_utils.get_static_activation_quant_setting(
+        activations_num_bits, symmetric
+    )
+    op_quant_config = test_utils.get_static_op_quant_config(activation_config)
+
+    # Read from Model Explorer.
+    subgraph0 = self._op_test_info.test_model.subgraphs[0]
+    subgraph_op_id = 0
+    op = subgraph0.operators[subgraph_op_id]
+    op_info = qtyping.OpInfo(
+        op=op,
+        op_name=qtyping.TFLOperationName.SELECT,
+        subgraph_op_index=subgraph_op_id,
+        op_quant_config=op_quant_config,
+    )
+
+    # Test settings.
+    op_tensor_names = {}
+    op_tensor_names["input"] = "serving_default_condition:0"
+    op_tensor_names["input2"] = "serving_default_x:0"
+    op_tensor_names["input3"] = "serving_default_y:0"
+    op_tensor_names["output"] = "PartitionedCall:0"
+    self._op_test_info.op_tensor_names = op_tensor_names
+    self._test_no_weights_op(
+        op_info,
+        self._graph_info,
+        self._op_test_info,
+        common_quantize.materialize_select,
+        get_tensor_quant_params_func,
+        same_input_output_params=True,
+        inputs_to_ignore=[0],  # Condition tensor does not need to be quantized.
+    )
+
+
+if __name__ == "__main__":
+  googletest.main()
diff --git a/ai_edge_quantizer/calibrator_test.py b/ai_edge_quantizer/calibrator_test.py
@@ -302,7 +302,7 @@ def test_toy_gemma2_calibration_success(self):
         self._toy_gemma2_calibration_dataset,
         model_recipe_manager=recipe_mngr,
     )
-    self.assertLen(calib.get_model_qsvs(), 288)
+    self.assertLen(calib.get_model_qsvs(), 290)
 
 
 if __name__ == "__main__":
diff --git a/ai_edge_quantizer/default_policy.py b/ai_edge_quantizer/default_policy.py
@@ -180,6 +180,7 @@
       "SLICE",
       "EMBEDDING_LOOKUP",
       "SUM",
+      "SELECT",
       "SELECT_V2",
       "DYNAMIC_UPDATE_SLICE",
       "SELECT_V2",
@@ -222,6 +223,7 @@
       "SLICE",
       "EMBEDDING_LOOKUP",
       "SUM",
+      "SELECT",
       "SELECT_V2",
       "DYNAMIC_UPDATE_SLICE",
       "SELECT_V2",
diff --git a/ai_edge_quantizer/qtyping.py b/ai_edge_quantizer/qtyping.py
@@ -59,6 +59,7 @@ class TFLOperationName(str, enum.Enum):
   LOGISTIC = 'LOGISTIC'
   SLICE = 'SLICE'
   SUM = 'SUM'
+  SELECT = 'SELECT'
   SELECT_V2 = 'SELECT_V2'
   DYNAMIC_UPDATE_SLICE = 'DYNAMIC_UPDATE_SLICE'
   STABLEHLO_COMPOSITE = 'STABLEHLO_COMPOSITE'
diff --git a/ai_edge_quantizer/tests/end_to_end_tests/select_test.py b/ai_edge_quantizer/tests/end_to_end_tests/select_test.py
@@ -0,0 +1,69 @@
+# Copyright 2024 The AI Edge Quantizer Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""E2E tests for the quantizer for model with select op."""
+
+import os
+
+from absl.testing import parameterized
+
+from tensorflow.python.platform import googletest
+from ai_edge_quantizer import qtyping
+from ai_edge_quantizer import quantizer
+from ai_edge_quantizer.utils import test_utils
+
+
+_TEST_MODEL_FOLDER = test_utils.get_path_to_datafile('../models/')
+_QuantAlgo = quantizer.AlgorithmName
+
+
+class SelectTest(test_utils.BaseOpTestCase):
+
+  def setUp(self):
+    super().setUp()
+    self._op_name = qtyping.TFLOperationName.SELECT
+
+  @parameterized.parameters(
+      # algorithm_key, activations_num_bits, symmetric
+      (_QuantAlgo.MIN_MAX_UNIFORM_QUANT, 8, True),
+      (_QuantAlgo.MIN_MAX_UNIFORM_QUANT, 8, False),
+      (_QuantAlgo.MIN_MAX_UNIFORM_QUANT, 16, True),
+      (_QuantAlgo.OCTAV, 8, True),
+      (_QuantAlgo.OCTAV, 16, True),
+  )
+  def test_select_static_quantization_accuracy_and_size_within_tolerance(
+      self, algorithm_key, activations_num_bits, symmetric
+  ):
+    output_tolerance = 5e-4
+    model_filename = 'single_select.tflite'
+    model_path = os.path.join(_TEST_MODEL_FOLDER, model_filename)
+
+    activation_config = test_utils.get_static_activation_quant_setting(
+        activations_num_bits, symmetric
+    )
+    op_config = test_utils.get_static_op_quant_config(activation_config)
+    self.assert_quantization_accuracy(
+        algorithm_key=algorithm_key,
+        model_path=model_path,
+        op_name=self._op_name,
+        op_config=op_config,
+        output_tolerance=output_tolerance,
+        num_calibration_samples=1,
+        num_validation_samples=1,
+    )
+
+
+if __name__ == '__main__':
+  googletest.main()
diff --git a/ai_edge_quantizer/tests/models/single_select.tflite b/ai_edge_quantizer/tests/models/single_select.tflite
diff --git a/ai_edge_quantizer/utils/tfl_flatbuffer_utils.py b/ai_edge_quantizer/utils/tfl_flatbuffer_utils.py
@@ -51,6 +51,7 @@
     _TFLOpName.LOGISTIC: schema.BuiltinOperator.LOGISTIC,
     _TFLOpName.SLICE: schema.BuiltinOperator.SLICE,
     _TFLOpName.SUM: schema.BuiltinOperator.SUM,
+    _TFLOpName.SELECT: schema.BuiltinOperator.SELECT,
     _TFLOpName.SELECT_V2: schema.BuiltinOperator.SELECT_V2,
     _TFLOpName.STABLEHLO_COMPOSITE: schema.BuiltinOperator.STABLEHLO_COMPOSITE,
     _TFLOpName.DYNAMIC_UPDATE_SLICE: (

Original file line number	Diff line number	Diff line change
`@@ -302,7 +302,7 @@ def test_toy_gemma2_calibration_success(self):`
`302`	`302`	`self._toy_gemma2_calibration_dataset,`
`303`	`303`	`model_recipe_manager=recipe_mngr,`
`304`	`304`	`)`
`305`		`- self.assertLen(calib.get_model_qsvs(), 288)`
	`305`	`+ self.assertLen(calib.get_model_qsvs(), 290)`
`306`	`306`
`307`	`307`
`308`	`308`	`if __name__ == "__main__":`