From 190f9d5384542d13bc9e01f52aed9fbd06ebe403 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Mon, 22 Sep 2025 17:22:50 +0400 Subject: [PATCH 01/91] init --- .../algorithms/weight_compression/__init__.py | 10 ++ .../torch/fx/quantization/quantize_pt2e.py | 42 ++++++++ .../quantizer/openvino_adapter.py | 9 +- .../weight_compression/algorithm.py | 101 ++++++++++++------ 4 files changed, 130 insertions(+), 32 deletions(-) create mode 100644 src/nncf/experimental/quantization/algorithms/weight_compression/__init__.py diff --git a/src/nncf/experimental/quantization/algorithms/weight_compression/__init__.py b/src/nncf/experimental/quantization/algorithms/weight_compression/__init__.py new file mode 100644 index 00000000000..e5a42efc0ef --- /dev/null +++ b/src/nncf/experimental/quantization/algorithms/weight_compression/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) 2025 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py index 3f0b3186310..3c7a32be4c1 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py +++ b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py @@ -157,3 +157,45 @@ def _quant_node_constraint(n: torch.fx.Node) -> bool: related to quantization """ return n.op == "call_function" and n.target in QUANTIZE_NODE_TARGETS + +@api(canonical_alias="nncf.experimental.torch.fx.compress_pt2e") +def compress_pt2e( + model: torch.fx.GraphModule, + quantizer: Quantizer, + dataset: Optional[nncf.Dataset] = None, + awq: bool = False, + scale_estimation: bool = False, + gptq: bool = False, + lora_correction: bool = False, + subset_size: int = 128, # Dataset size to use + sensitivity_metric: nncf.SensitivityMetric = nncf.SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, + advanced_parameters: nncf.AdvancedCompressionParameters = None, + ) -> torch.fx.GraphModule: + + if isinstance(quantizer, OpenVINOQuantizer) or hasattr(quantizer, "get_nncf_weight_compression_setup"): + quantizer = OpenVINOQuantizerAdapter(quantizer) + compression_format = nncf.CompressionFormat.DQ # since OVQUantizer has a defined decompression subgraph which we want, this is a minimally invasive way to do it + else: + #TODO Path has issues with constant foldign and the QDQ subgraph + # Group size will explicitly have to be passed for other quantizers again with compress pt2e api. + quantizer = TorchAOQuantizerAdapter(quantizer) + compression_format = nncf.CompressionFormat.FQ # Insert QDQ nodes instead of Openvino decompression subgraph. The code for this is in torch fx backend + + quantization_algorithm = WeightsCompressionPT2E( + quantizer=quantizer, + awq=awq, + subset_size=subset_size, + scale_estimation=scale_estimation, + gptq=gptq, + lora_correction=lora_correction, + sensitivity_metric=sensitivity_metric, + compression_format=compression_format, + advanced_parameters=advanced_parameters, + ) + + # Here the model is annotated + transformed_model = quantizer.transform_prior_quantization(model) + nncf_graph = NNCFGraphFactory.create(transformed_model) + quantized_model = quantization_algorithm.apply(transformed_model, nncf_graph, dataset=dataset) + quantized_model = torch.fx.GraphModule(quantized_model, graph=quantized_model.graph) + return quantized_model diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py index 2283d9d9dbb..abc40d2d096 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py @@ -14,7 +14,7 @@ from nncf.common.graph.graph import NNCFGraph from nncf.common.quantization.quantizer_setup import SingleConfigQuantizerSetup from nncf.experimental.quantization.quantizer import Quantizer -from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer +from executorch.backends.openvino.quantizer.quantizer import OpenVINOQuantizer class OpenVINOQuantizerAdapter(Quantizer): @@ -24,9 +24,16 @@ class OpenVINOQuantizerAdapter(Quantizer): def __init__(self, quantizer: OpenVINOQuantizer): self._quantizer = quantizer + self._weight_compression_configuration = self._quantizer.weight_compression_configuration def transform_prior_quantization(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: return self._quantizer.transform_for_annotation(model) def get_quantization_setup(self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph) -> SingleConfigQuantizerSetup: return self._quantizer.get_nncf_quantization_setup(model, nncf_graph) + + def get_weight_compression_setup(self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph) -> SingleConfigQuantizerSetup: + return self._quantizer.get_nncf_weight_compression_setup(model, nncf_graph) + + def get_nodes_to_compress(self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph): + return self._quantizer.get_nodes_to_compress(model, nncf_graph) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index d0407a1eff4..f93b85a9e13 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -769,10 +769,51 @@ def is_weight_compression_supported( return is_supported_dtype and not no_bit_reduction + def collect_weight_compression_statistics( + self, + model: TModel, + graph: NNCFGraph, + dataset: Dataset, + weight_params: list[WeightCompressionParameters], + statistic_points: Optional[StatisticPointsContainer] = None, + ) -> Optional[dict[str, Any]]: + """ + Collects statistics for weight compression if data-aware compression or + mixed-precision is enabled. + + :param model: Backend-specific input model. + :param graph: NNCFGraph instance. + :param dataset: Dataset for statistics collection. + :param weight_params: Weight parameters for which to collect statistics. + :param statistic_points: Optional pre-collected statistic points. + :return: A dictionary of collected statistics, or None if not applicable. + """ + statistics = None + if not (self._data_aware_mixed_precision or self._data_aware_compression) and not dataset: + return statistics, statistic_points + matmul_nodes_to_compress = [ + wp.node_with_weight + for wp in weight_params + if wp.node_with_weight.metatype in self._backend_entity.matmul_metatypes + ] + matmul_input_to_output_nodes_map = self.get_matmul_input_to_output_nodes_map( + matmul_nodes_to_compress, graph + ) + + if statistic_points is None: + statistic_points = self.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys()) + statistic_points = self._collect_statistics(dataset, graph, model, statistic_points) + + statistics = self._get_statistics_for_weights_compression( + matmul_input_to_output_nodes_map, statistic_points + ) + return statistics, statistic_points + def get_weight_compression_parameters( self, model: TModel, graph: NNCFGraph, + nodes_to_compress: list[NNCFNode], statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, ) -> tuple[list[WeightCompressionParameters], Optional[dict[str, WCTensorStatistic]]]: @@ -791,8 +832,6 @@ def get_weight_compression_parameters( Compression algorithm configuration, and a mapping of target node names to the collected statistics. """ - nodes_to_compress = self.get_nodes_to_compress(graph) - all_weight_params: list[WeightCompressionParameters] = [] skipped_weight_params: list[WeightCompressionParameters] = [] @@ -870,23 +909,8 @@ def get_weight_compression_parameters( group_size_values = {w_params.weight_name: self._group_size for w_params in ratio_defining_params} # Collect statistics for the weights compression - statistics = None - if (self._data_aware_mixed_precision or self._data_aware_compression) and dataset: - weight_params = ratio_defining_params if self._backup_mode == BackupMode.NONE else all_weight_params - matmul_nodes_to_compress = [ - wp.node_with_weight - for wp in weight_params - if wp.node_with_weight.metatype in self._backend_entity.matmul_metatypes - ] - matmul_input_to_output_nodes_map = self.get_matmul_input_to_output_nodes_map( - matmul_nodes_to_compress, graph - ) - if statistic_points is None: - statistic_points = self.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys()) - statistic_points = self._collect_statistics(dataset, graph, model, statistic_points) - statistics = self._get_statistics_for_weights_compression( - matmul_input_to_output_nodes_map, statistic_points - ) + weight_params = ratio_defining_params if self._backup_mode == BackupMode.NONE else all_weight_params + statistics, statistic_points = self.collect_weight_compression_statistics(model, graph, dataset, weight_params, statistic_points) # Set weight compression configuration self._set_weight_compression_config(ratio_defining_params, model, graph, statistic_points, group_size_values) @@ -901,18 +925,14 @@ def get_weight_compression_parameters( return all_weight_params, statistics - def apply( - self, - model: TModel, - graph: NNCFGraph, - statistic_points: Optional[StatisticPointsContainer] = None, - dataset: Optional[Dataset] = None, + def apply_wc_algos( + self, + model: TModel, + graph: NNCFGraph, + all_weight_params: list[WeightCompressionParameters], + statistics: dict[str, Any], + dataset: Optional[Dataset] = None, ) -> TModel: - self.set_backend_entity(model) - - # Get processed weight compression parameters ready for compression - all_weight_params, statistics = self.get_weight_compression_parameters(model, graph, statistic_points, dataset) - if self._awq: model = self.awq_algo.apply(model, graph, all_weight_params, statistics, self._backend_entity) # After applying AWQ we need to update statistics since AWQ alters the activations @@ -967,7 +987,7 @@ def apply( self._backend_entity.dump_parameters( model, parameters={ - "mode": self._mode.value, + "mode": self._mode.value if isinstance(self._mode, str) else self._mode, "group_size": self._group_size, "ratio": self._ratio, "all_layers": self._all_layers, @@ -983,6 +1003,25 @@ def apply( }, algo_name="weight_compression", ) + + return transformed_model + + + def apply( + self, + model: TModel, + graph: NNCFGraph, + statistic_points: Optional[StatisticPointsContainer] = None, + dataset: Optional[Dataset] = None, + ) -> TModel: + self.set_backend_entity(model) + nodes_to_compress = self.get_nodes_to_compress(graph) + # Get processed weight compression parameters ready for compression + all_weight_params, statistics = self.get_weight_compression_parameters( + model, graph, nodes_to_compress, statistic_points, dataset + ) + transformed_model = self.apply_wc_algos(model, graph, all_weight_params, statistics, dataset) + return transformed_model def _get_activation_node_and_port(self, node: NNCFNode, nncf_graph: NNCFGraph) -> tuple[NNCFNode, int]: From c52fccaa020d5689a8ba656d94647561c9d79eb6 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Mon, 22 Sep 2025 18:33:30 +0400 Subject: [PATCH 02/91] fixes --- .../torch/fx/quantization/quantize_pt2e.py | 20 ++++++++++++++++++- .../weight_compression/algorithm.py | 2 +- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py index 3c7a32be4c1..b6406011d15 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py +++ b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py @@ -27,6 +27,7 @@ from nncf.common.logging import nncf_logger from nncf.common.utils.api_marker import api from nncf.experimental.quantization.algorithms.post_training.algorithm import ExperimentalPostTrainingQuantization +from nncf.experimental.quantization.algorithms.weight_compression.algorithm import WeightsCompressionPT2E from nncf.experimental.torch.fx.constant_folding import constant_fold from nncf.experimental.torch.fx.quantization.quantizer.openvino_adapter import OpenVINOQuantizerAdapter from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer @@ -38,7 +39,6 @@ from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters from nncf.quantization.range_estimator import RangeEstimatorParameters - @api(canonical_alias="nncf.experimental.torch.fx.quantize_pt2e") def quantize_pt2e( model: torch.fx.GraphModule, @@ -171,7 +171,25 @@ def compress_pt2e( sensitivity_metric: nncf.SensitivityMetric = nncf.SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, advanced_parameters: nncf.AdvancedCompressionParameters = None, ) -> torch.fx.GraphModule: + """ + Applies Weight Compression to the torch.fx.GraphModule provided model + using provided torch.ao quantizer. + :param model: A torch.fx.GraphModule instance to be quantized. + :param quantizer: Torch ao quantizer to annotate nodes in the graph with quantization setups + to convey the desired way of quantization. + :param dataset: A representative dataset for the + calibration process. + :param awq: Determines whether to use or not the modified AWQ algorithm. + :param scale_estimation: Determines whether to use or not scale estimation for 4-bit layers. + :param gptq: Determines whether to use or not GPTQ algorithm. + :param lora_correction: Determines whether to use or not LoRA Correction algorithm. + :param subset_size: Number of data samples to calculate activation statistics used for assigning different + quantization precision. + :param sensitivity_metric: The sensitivity metric for assigning quantization precision to layers. In order to + preserve the accuracy of the model, the more sensitive layers receive a higher precision. + :param advanced_parameters: Advanced parameters for algorithms in the compression pipeline. + """ if isinstance(quantizer, OpenVINOQuantizer) or hasattr(quantizer, "get_nncf_weight_compression_setup"): quantizer = OpenVINOQuantizerAdapter(quantizer) compression_format = nncf.CompressionFormat.DQ # since OVQUantizer has a defined decompression subgraph which we want, this is a minimally invasive way to do it diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index f93b85a9e13..8390594571c 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -987,7 +987,7 @@ def apply_wc_algos( self._backend_entity.dump_parameters( model, parameters={ - "mode": self._mode.value if isinstance(self._mode, str) else self._mode, + "mode": self._mode.value if not isinstance(self._mode, str) else self._mode, "group_size": self._group_size, "ratio": self._ratio, "all_layers": self._all_layers, From 4e56cb5639b56c543d442273243356b846d0f8c0 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Mon, 22 Sep 2025 18:38:44 +0400 Subject: [PATCH 03/91] add message for unsupported external quantizers --- .../experimental/torch/fx/quantization/quantize_pt2e.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py index b6406011d15..d24f9568420 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py +++ b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py @@ -194,10 +194,9 @@ def compress_pt2e( quantizer = OpenVINOQuantizerAdapter(quantizer) compression_format = nncf.CompressionFormat.DQ # since OVQUantizer has a defined decompression subgraph which we want, this is a minimally invasive way to do it else: - #TODO Path has issues with constant foldign and the QDQ subgraph - # Group size will explicitly have to be passed for other quantizers again with compress pt2e api. - quantizer = TorchAOQuantizerAdapter(quantizer) - compression_format = nncf.CompressionFormat.FQ # Insert QDQ nodes instead of Openvino decompression subgraph. The code for this is in torch fx backend + #TODO Support Third party quantizers here. + msg = 'Only OpenVINO Quantizer is supported currently.' + raise nncf.InternalError(msg) quantization_algorithm = WeightsCompressionPT2E( quantizer=quantizer, From 9651ceb5076c7eddfdf5e742c80828567947b7bd Mon Sep 17 00:00:00 2001 From: anzr299 Date: Mon, 22 Sep 2025 19:27:47 +0400 Subject: [PATCH 04/91] add algorithm --- .../weight_compression/algorithm.py | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py diff --git a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py new file mode 100644 index 00000000000..9679a1ca85b --- /dev/null +++ b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py @@ -0,0 +1,93 @@ +# Copyright (c) 2025 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import nncf # type: ignore[import-untyped] +import torch + +from nncf.common.graph.graph import NNCFGraph # type: ignore[import-untyped] +from nncf.quantization.algorithms.weight_compression.algorithm import WeightCompression + +import nncf +from nncf.quantization.algorithms.algorithm import Algorithm +from nncf.common.graph.graph import NNCFGraph +from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters +from nncf.quantization.algorithms.weight_compression.weight_lowering import WeightCompressionConfig +from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer +from nncf.quantization.algorithms.weight_compression.weight_lowering import get_reduction_channel_size +from nncf.common.utils.backend import BackendType +from typing import Optional + +class WeightsCompressionPT2E(Algorithm): + def __init__(self, + quantizer, + subset_size: int = 128, + awq: bool = False, + scale_estimation: bool = False, + gptq: bool = False, + lora_correction: bool = False, + sensitivity_metric: nncf.SensitivityMetric = nncf.SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, + compression_format: nncf.CompressionFormat = nncf.CompressionFormat.DQ, + advanced_parameters: nncf.AdvancedCompressionParameters = None, + ) -> torch.fx.GraphModule: + self._quantizer = quantizer + + wc_config = quantizer._weight_compression_configuration + + mode=wc_config.get("mode", None) + ratio = wc_config.get("ratio", 1) # TODO Discuss if ratio should be passed in quantizer or in the compress_pt2e api + group_size = wc_config.get("group_size", 128) + all_layers=wc_config.get("all_layers", False) + backup_mode=wc_config.get("backup_mode", nncf.BackupMode.INT8_ASYM) + self._sensitivity_metric = sensitivity_metric + + self._algo = WeightCompression( + mode=mode, + ratio=ratio, + group_size=group_size, + ignored_scope=nncf.IgnoredScope(), # Ignored scope is useless in the case of External Quantizers because we onyl compress "nodes_to_compress" + all_layers=all_layers, + sensitivity_metric=self._sensitivity_metric, + awq=awq, + subset_size=subset_size, + scale_estimation=scale_estimation, + gptq=gptq, + lora_correction=lora_correction, + backup_mode=backup_mode, + compression_format=compression_format, + advanced_parameters=advanced_parameters, + ) + + def available_backends(self) -> list[BackendType]: + return self._algo.available_backends() + + def apply(self, + model: torch.fx.GraphModule, + graph: NNCFGraph, + statistic_points=None, + dataset=None + ): + self._algo.set_backend_entity(model) #Set algo backend + + if(self._sensitivity_metric == nncf.SensitivityMetric.WEIGHT_QUANTIZATION_ERROR): + # Default case, _sensitivity_metric == nncf.SensitivityMetric.WEIGHT_QUANTIZATION_ERROR means that it is not defined by the user in the API + # Hence, the annotation(Quantization parameters for all layers) from the quantizer will be used. + all_weight_params = self._quantizer.get_weight_compression_setup(model, graph) # Get weight compression params FROM QUANTIZER + statistics, statistic_points = self._algo.collect_weight_compression_statistics(model, graph, dataset, all_weight_params, statistic_points) + else: + # Data Aware mixed precision is used. In this case, only nodes_to_compress is obtained from the quantizer + nodes_to_compress = self._quantizer.get_nodes_to_compress(model, graph) # Get nodes to compress FROM QUANTIZER + all_weight_params, statistics = self._algo.get_weight_compression_parameters(model, graph, nodes_to_compress, statistic_points, dataset) + + transformed_model = self._algo.apply_wc_algos(model, graph, all_weight_params, statistics, dataset) # Apply the wc algos FROM ALGO + return transformed_model + + def get_statistic_points(self, model, graph: NNCFGraph) -> StatisticPointsContainer: + return self._algo.get_statistic_points(model, graph) From 14daeb581df157b7866042bdca4106a7e4adfb52 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Mon, 22 Sep 2025 21:03:03 +0400 Subject: [PATCH 05/91] impotr openvino quantizer from nncf instead of executorch --- .../torch/fx/quantization/quantizer/openvino_adapter.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py index abc40d2d096..6a39cc7fcdf 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py @@ -14,8 +14,7 @@ from nncf.common.graph.graph import NNCFGraph from nncf.common.quantization.quantizer_setup import SingleConfigQuantizerSetup from nncf.experimental.quantization.quantizer import Quantizer -from executorch.backends.openvino.quantizer.quantizer import OpenVINOQuantizer - +from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer class OpenVINOQuantizerAdapter(Quantizer): """ From 374681501cf0a427e261efd9fc5c6dc7c7184d5e Mon Sep 17 00:00:00 2001 From: anzr299 Date: Mon, 22 Sep 2025 23:26:49 +0400 Subject: [PATCH 06/91] Add observers and openvino quantizer to nncf --- .../fx/quantization/quantizer/observers.py | 190 +++++++++ .../quantizer/openvino_quantizer.py | 392 +++++++++++------- 2 files changed, 443 insertions(+), 139 deletions(-) create mode 100644 src/nncf/experimental/torch/fx/quantization/quantizer/observers.py diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/observers.py b/src/nncf/experimental/torch/fx/quantization/quantizer/observers.py new file mode 100644 index 00000000000..22f296e3e6b --- /dev/null +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/observers.py @@ -0,0 +1,190 @@ +# Copyright (c) 2025 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from abc import ABC, abstractmethod +from typing import Optional, Tuple + +import torch + +from nncf.experimental.torch.fx.node_utils import ( # type: ignore[import-untyped] + get_tensor_constant_from_node, +) +from nncf.experimental.torch.fx.transformations import ( # type: ignore[import-untyped] + constant_update, + module_insertion, + node_removal, +) +from nncf.quantization.algorithms.weight_compression.config import ( # type: ignore[import-untyped] + WeightCompressionParameters, +) +from nncf.quantization.algorithms.weight_compression.weight_lowering import ( # type: ignore[import-untyped] + do_integer_quantization, +) +from nncf.tensor.tensor import Tensor as NNCFTensor # type: ignore[import-untyped] +from nncf.torch.graph.transformations.commands import ( # type: ignore[import-untyped] + PTTargetPoint, + TargetType, +) +from nncf.torch.quantization.layers import ( # type: ignore[import-untyped] + BaseWeightsDecompressor, + INT4AsymmetricWeightsDecompressor, + INT4SymmetricWeightsDecompressor, + INT8AsymmetricWeightsDecompressor, + INT8SymmetricWeightsDecompressor, +) +from torchao.quantization.pt2e import ObserverBase + + +class WeightObserverBase(ObserverBase, ABC): + """ + Base implementation of an NNCF observer that defines the rules for compressing layer weights into the OpenVINO representation. + """ + + def __init__( + self, + wc_param: WeightCompressionParameters, + dtype: torch.dtype, + **kwargs, + ) -> None: + """ + :param wc_param: Weight compression parameters container. + :param dtype: target dtype for the quantization. + """ + super().__init__(dtype=dtype, is_dynamic=False) + self._wc_param = wc_param + + def calculate_qparams( # type: ignore[override] + self, + weight: torch.Tensor, + ) -> Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: + """ + Calculates quantization parameters: quantized weight, quantization scale and quantization zero point. + + :param weight: FP weight to be used for calculating qparams. + :return: A tuple containing the quantized weight, quantization scale and quantization zero point. + """ + wc_param = self._wc_param + wc_config = wc_param.compression_config + reduction_axes = wc_param.reduction_axes + q_weight, scale, zp = do_integer_quantization( + NNCFTensor(weight), wc_config, reduction_axes=reduction_axes + ) + zp = zp.data if zp is not None else None + return q_weight.data, scale.data, zp + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return x + + def convert( + self, model: torch.fx.GraphModule, observer_node: torch.fx.Node + ) -> None: + """ + Replaces the given observer node from the given model with a quantized + weight and a OpenVINO specific decompression module. + + :param model: A `torch.fx.GraphModule` representing the statically traced model + with observer nodes attached and calibrated. + :param observer_node: The `torch.fx.Node` corresponding to the observer module for + the weight that is being transformed into a compressed representation. + """ + weight_node = observer_node.args[0] + original_weight = get_tensor_constant_from_node(weight_node, model) + q_weight, scale, zero_point = self.calculate_qparams(original_weight) + + decompressor = self._create_decompressor( + scale, zero_point, q_weight, original_weight + ) + packed_q_weight = decompressor.pack_weight(q_weight) + + # Weight port id is 0 since observer is inserted for a single weight only. + constant_update(model, observer_node, packed_q_weight, input_port_id=0) + + compressed_weight_name = observer_node.all_input_nodes[0].name + decompressor_suffix = "_".join( + compressed_weight_name.replace(".", "_").split("_")[:-2] + ) + decompressor_name = f"{decompressor.quantization_mode}_weights_decompressor_{decompressor_suffix}" + + module_insertion( + model, + decompressor, + [ + PTTargetPoint( + TargetType.OPERATOR_POST_HOOK, + target_node_name=compressed_weight_name, + ) + ], + decompressor_name, + ) + node_removal(model, observer_node, 0) + + @abstractmethod + def _create_decompressor( + self, + scale: torch.Tensor, + zero_point: Optional[torch.Tensor], + q_weight: torch.Tensor, + original_weight: torch.Tensor, + ) -> BaseWeightsDecompressor: + """ + Returns a respective NNCF decompressor for different types of quantization. + + :param scale: Calculated scale quantization parameter. + :param zero_point: Calculated zero_point quantization parameter. + :param q_weight: Calculated quantized weight. + :param original_weight: FP weight. + :return: NNCF observer according to the qmode which creates the decompression subgraph supported by OpenVINO. + """ + + +class INT4WeightObserver(WeightObserverBase): + """ + OpenVINO INT4 Weight Compression observer. + """ + + def _create_decompressor( + self, + scale: torch.Tensor, + zero_point: Optional[torch.Tensor], + q_weight: torch.Tensor, + original_weight: torch.Tensor, + ) -> BaseWeightsDecompressor: + if zero_point is None: + return INT4SymmetricWeightsDecompressor( + scale, q_weight.shape, original_weight.shape, original_weight.dtype + ) + return INT4AsymmetricWeightsDecompressor( + scale, + zero_point, + q_weight.shape, + original_weight.shape, + original_weight.dtype, + ) + + +class INT8WeightObserver(WeightObserverBase): + """ + OpenVINO INT8 Weight Compression per channel observer. + """ + + def _create_decompressor( + self, + scale: torch.Tensor, + zero_point: Optional[torch.Tensor], + q_weight: torch.Tensor, + original_weight: torch.Tensor, + ) -> BaseWeightsDecompressor: + if zero_point is None: + return INT8SymmetricWeightsDecompressor(scale, original_weight.dtype) + return INT8AsymmetricWeightsDecompressor( + scale, zero_point, original_weight.dtype + ) diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py index 55611a7d095..45a917cc3c8 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py @@ -10,99 +10,127 @@ # limitations under the License. from collections import defaultdict -from typing import Optional, Union +from enum import Enum +from typing import ( + Any, + Optional, +) import torch.fx +from observers import INT4WeightObserver +from observers import INT8WeightObserver from torch.ao.quantization.observer import HistogramObserver from torch.ao.quantization.observer import PerChannelMinMaxObserver +from torch.ao.quantization.observer import UniformQuantizationObserverBase from torch.ao.quantization.quantizer.quantizer import EdgeOrNode -from torch.ao.quantization.quantizer.quantizer import QuantizationAnnotation as TorchAOQuantizationAnnotation -from torch.ao.quantization.quantizer.quantizer import QuantizationSpec as TorchAOQuantizationSpec -from torch.ao.quantization.quantizer.quantizer import QuantizationSpecBase as TorchAOQuantizationSpecBase from torch.ao.quantization.quantizer.quantizer import Quantizer as TorchAOQuantizer -from torch.ao.quantization.quantizer.quantizer import SharedQuantizationSpec as TorchAOSharedQuantizationSpec +from torchao.quantization.pt2e.quantizer import ( + QuantizationAnnotation as TorchAOQuantizationAnnotation, +) +from torchao.quantization.pt2e.quantizer import ( + QuantizationSpec as TorchAOQuantizationSpec, +) +from torchao.quantization.pt2e.quantizer import ( + QuantizationSpecBase as TorchAOQuantizationSpecBase, +) import nncf -from nncf import IgnoredScope -from nncf import ModelType -from nncf import OverflowFix -from nncf import QuantizationMode -from nncf import QuantizationPreset -from nncf import TargetDevice +import nncf.common.quantization as quantization +import nncf.experimental.torch.fx as nncf_fx from nncf.common.graph.graph import NNCFGraph -from nncf.common.logging import nncf_logger -from nncf.common.quantization.quantizer_propagation.structs import QuantizerPropagationRule -from nncf.common.quantization.quantizer_setup import QuantizationPointBase +from nncf.common.graph.graph import NNCFNode from nncf.common.quantization.quantizer_setup import SingleConfigQuantizerSetup -from nncf.common.quantization.structs import QuantizationScheme from nncf.common.utils.api_marker import api -from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter -from nncf.experimental.torch.fx.node_utils import get_graph_node_by_name -from nncf.quantization.advanced_parameters import FP8QuantizationParameters -from nncf.quantization.advanced_parameters import QuantizationParameters -from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization -from nncf.torch.model_graph_manager import get_weight_tensor_port_ids +from nncf.quantization.algorithms.weight_compression.config import ( + WeightCompressionParameters, +) +from nncf.quantization.quantize_model import get_weight_compression_configuration QUANT_ANNOTATION_KEY = "quantization_annotation" +class QuantizationMode(Enum): + """ + Defines special quantization modes. + + - INT8_SYM: INT8 symmetric quantization for both activations and weights. + - INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights. + - INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models + - INT8WO_SYM: INT8 symmetric quantization for weights only. + - INT8WO_ASYM: INT8 asymmetric quantization for weights only. + - INT4WO_SYM: INT4 symmetric quantization for weights only. + - INT4WO_ASYM: INT4 asymmetric quantization for weights only + """ + + INT8_SYM = "int8_sym" + INT8_MIXED = "int8_mixed" + INT8_TRANSFORMER = "int8_transformer" + INT8WO_SYM = "int8wo_sym" + INT8WO_ASYM = "int8wo_asym" + INT4WO_SYM = "int4wo_sym" + INT4WO_ASYM = "int4wo_asym" + + @api(canonical_alias="nncf.experimental.torch.fx.OpenVINOQuantizer") class OpenVINOQuantizer(TorchAOQuantizer): """ Implementation of the Torch AO quantizer which annotates models with quantization annotations optimally for the inference via OpenVINO. - - :param mode: Defines optimization mode for the algorithm. None by default. - :param preset: A preset controls the quantization mode (symmetric and asymmetric). - It can take the following values: - - `performance`: Symmetric quantization of weights and activations. - - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations. - Default value is None. In this case, `mixed` preset is used for `transformer` - model type otherwise `performance`. - :param target_device: A target device the specificity of which will be taken - into account while compressing in order to obtain the best performance - for this type of device, defaults to TargetDevice.ANY. - :param model_type: Model type is needed to specify additional patterns - in the model. Supported only `transformer` now. - :param ignored_scope: An ignored scope that defined the list of model control - flow graph nodes to be ignored during quantization. - :param overflow_fix: This option controls whether to apply the overflow issue - fix for the 8-bit quantization. - :param quantize_outputs: Whether to insert additional quantizers right before - each of the model outputs. - :param activations_quantization_params: Quantization parameters for model - activations. - :param weights_quantization_params: Quantization parameters for model weights. - :param quantizer_propagation_rule: The strategy to be used while propagating and merging quantizers. - MERGE_ALL_IN_ONE by default. """ + WEIGHTS_ONLY_COMPRESSION_MODES = ( + QuantizationMode.INT4WO_SYM, + QuantizationMode.INT4WO_ASYM, + QuantizationMode.INT8WO_SYM, + QuantizationMode.INT8WO_ASYM, + ) + def __init__( self, *, - mode: Optional[QuantizationMode] = None, - preset: Optional[QuantizationPreset] = None, - target_device: TargetDevice = TargetDevice.ANY, - model_type: Optional[ModelType] = None, - ignored_scope: Optional[IgnoredScope] = None, - overflow_fix: Optional[OverflowFix] = None, - quantize_outputs: bool = False, - activations_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None, - weights_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None, - quantizer_propagation_rule: QuantizerPropagationRule = QuantizerPropagationRule.MERGE_ALL_IN_ONE, + mode: QuantizationMode = QuantizationMode.INT8_SYM, + **kwargs, ): - self._min_max_algo = MinMaxQuantization( - mode=mode, - preset=preset, - target_device=target_device, - model_type=model_type, - ignored_scope=ignored_scope, - overflow_fix=overflow_fix, - quantize_outputs=quantize_outputs, - activations_quantization_params=activations_quantization_params, - weights_quantization_params=weights_quantization_params, - quantizer_propagation_rule=quantizer_propagation_rule, - ) + """ + :param mode: Defines special quantization modes. + - INT8_SYM: INT8 symmetric quantization for both activations and weights. + - INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights. + - INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models + Default value is INT8_SYM. + - INT4_SYM: Symmetric INT4 Weights-Only Compression + - INT4_ASYM: Asymmetric INT4 Weights-Only Compression + :param kwargs: Arguments to pass to the NNCF MinMaxQuantization algorithm. + """ + self.mode = mode + if self.mode not in OpenVINOQuantizer.WEIGHTS_ONLY_COMPRESSION_MODES: + if mode == QuantizationMode.INT8_SYM: + preset = nncf.quantization.structs.QuantizationPreset.PERFORMANCE + model_type = None + elif mode == QuantizationMode.INT8_MIXED: + preset = quantization.structs.QuantizationPreset.MIXED + model_type = None + else: + preset = None + model_type = nncf.parameters.ModelType.TRANSFORMER + self._algo = ( + nncf.quantization.algorithms.min_max.algorithm.MinMaxQuantization( + preset=preset, model_type=model_type, **kwargs + ) + ) + else: + self.weight_compression_configuration = ( + get_weight_compression_configuration( + mode.value.replace( + "wo", "" + ), # Mode value has to match NNCF CompressWeightsMode + **kwargs, + ) + ) + _weight_compression_configuration = self.weight_compression_configuration + subset_size = 1 # Doesn't really matter in this case since it is data-free. Should just be +ve + self._algo = nncf.quantization.algorithms.weight_compression.algorithm.WeightCompression( + subset_size=subset_size, **_weight_compression_configuration + ) def set_ignored_scope( self, @@ -123,7 +151,7 @@ def set_ignored_scope( :param validate: If set to True, then a RuntimeError will be raised if any ignored scope does not match in the model graph. """ - self._min_max_algo.set_ignored_scope( + self._algo.set_ignored_scope( nncf.IgnoredScope( names=names or [], patterns=patterns or [], @@ -139,62 +167,45 @@ def get_nncf_quantization_setup( self._min_max_algo._set_backend_entity(model) return self._min_max_algo.find_quantization_setup(model, nncf_graph) - def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: - """ - Adds quantization annotations to the nodes in the model graph in-place. + def get_nodes_to_compress(self, model, nncf_graph) -> list[NNCFNode]: + self._algo.set_backend_entity(model) + return self._algo.get_nodes_to_compress(nncf_graph) - :param model: A torch.fx.GraphModule to annotate. - :return: The torch.fx.GraphModule with updated annotations. - """ - nncf_graph = GraphConverter.create_nncf_graph(model) - quantization_setup = self.get_nncf_quantization_setup(model, nncf_graph) + def get_nncf_weight_compression_setup( + self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph + ) -> quantization.quantizer_setup.SingleConfigQuantizerSetup: + nodes_to_compress = self.get_nodes_to_compress(model, nncf_graph) + return self._algo.get_weight_compression_parameters( + model, nncf_graph, nodes_to_compress + )[0] + def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: + nncf_graph = nncf_fx.nncf_graph_builder.GraphConverter.create_nncf_graph(model) graph = model.graph - node_vs_torch_annotation = defaultdict(TorchAOQuantizationAnnotation) + node_vs_torch_annotation: defaultdict[ + torch.fx.Node, TorchAOQuantizationAnnotation + ] = defaultdict(TorchAOQuantizationAnnotation) - for qp in quantization_setup.quantization_points.values(): - edge_or_node, annotation = self._get_edge_or_node_and_annotation( - graph, nncf_graph, qp, node_vs_torch_annotation + if self.mode in OpenVINOQuantizer.WEIGHTS_ONLY_COMPRESSION_MODES: + node_vs_torch_annotation = self._annotate_weight_compression( + model, graph, nncf_graph, node_vs_torch_annotation ) - qspec = self._get_torch_ao_qspec_from_qp(qp) - self._fill_torch_ao_annotation(edge_or_node, qspec, annotation) - - for quantizer_ids in quantization_setup.unified_scale_groups.values(): - root_quantizer_id = self._get_unified_scales_root_quantizer_id( - nncf_graph, quantizer_ids, quantization_setup + else: + node_vs_torch_annotation = self._annotate_post_training_quantization( + model, graph, nncf_graph, node_vs_torch_annotation ) - root_qp = quantization_setup.quantization_points[root_quantizer_id] - - if any(root_qp.qconfig != quantization_setup.quantization_points[q_id].qconfig for q_id in quantizer_ids): - qps = [quantization_setup.quantization_points[q_id] for q_id in quantizer_ids] - msg = ( - "Different quantization configs are set to one unified scale group:" - f"{[(qp.insertion_point.__dict__, str(qp.qconfig)) for qp in qps]}" - ) - raise nncf.InternalError(msg) - - root_target_node = get_graph_node_by_name(graph, root_qp.insertion_point.target_node_name) - root_edge_or_node = self._get_edge_or_node(root_target_node, root_qp, nncf_graph) - - for quantizer_id in quantizer_ids: - if quantizer_id == root_quantizer_id: - continue - - qspec = TorchAOSharedQuantizationSpec(root_edge_or_node) - qp = quantization_setup.quantization_points[quantizer_id] - edge_or_node, annotation = self._get_edge_or_node_and_annotation( - graph, nncf_graph, qp, node_vs_torch_annotation - ) - self._fill_torch_ao_annotation(edge_or_node, qspec, annotation) for node, annotation in node_vs_torch_annotation.items(): assert QUANT_ANNOTATION_KEY not in node.meta node.meta[QUANT_ANNOTATION_KEY] = annotation + return model @staticmethod def _get_unified_scales_root_quantizer_id( - nncf_graph: NNCFGraph, quantizer_ids: list[int], quantizer_setup: SingleConfigQuantizerSetup + nncf_graph: NNCFGraph, + quantizer_ids: list[int], + quantizer_setup: quantization.quantizer_setup.SingleConfigQuantizerSetup, ) -> int: """ Identifies the earliest quantizer node ID based on the corresponding `nncf_node.node_id` @@ -209,18 +220,26 @@ def _get_unified_scales_root_quantizer_id( nncf_node_quantizer_id = None root_quantizer_id = None for quantizer_id in quantizer_ids: - target_node_name = quantizer_setup.quantization_points[quantizer_id].insertion_point.target_node_name + target_node_name = quantizer_setup.quantization_points[ + quantizer_id + ].insertion_point.target_node_name nncf_node = nncf_graph.get_node_by_name(target_node_name) - if nncf_node_quantizer_id is None or nncf_node.node_id < nncf_node_quantizer_id: + if ( + nncf_node_quantizer_id is None + or nncf_node.node_id < nncf_node_quantizer_id + ): root_quantizer_id = quantizer_id nncf_node_quantizer_id = nncf_node.node_id + if root_quantizer_id is None: + msg = "Root quantizer ids can't be None" + raise nncf.InternalError(msg) return root_quantizer_id @staticmethod def _get_edge_or_node_and_annotation( graph: torch.fx.Graph, nncf_graph: NNCFGraph, - qp: QuantizationPointBase, + qp: quantization.quantizer_setup.QuantizationPointBase, node_vs_torch_annotation: dict[torch.fx.Node, TorchAOQuantizationAnnotation], ) -> tuple[EdgeOrNode, TorchAOQuantizationAnnotation]: """ @@ -231,16 +250,52 @@ def _get_edge_or_node_and_annotation( :param nncf_graph: NNCFGraph instance. :param qp: QuantizationPointBase instance. :param node_vs_torch_annotation: A dictionary mapping torch.fx.GraphNode objects to their respective - TorchAOQuantizationAnnotations. + QuantizationAnnotations. :return: A tuple containing the EdgeOrNode and its associated TorchAOQuantizationAnnotation. """ - target_node = get_graph_node_by_name(graph, qp.insertion_point.target_node_name) + target_node = nncf_fx.node_utils.get_graph_node_by_name( + graph, qp.insertion_point.target_node_name + ) annotation = node_vs_torch_annotation[target_node] edge_or_node = OpenVINOQuantizer._get_edge_or_node(target_node, qp, nncf_graph) return edge_or_node, annotation @staticmethod - def _get_edge_or_node(target_node: torch.fx.Node, qp: QuantizationPointBase, nncf_graph: NNCFGraph) -> EdgeOrNode: + def _get_weight_edge( + target_node: torch.fx.Node, + nncf_graph: NNCFGraph, + ) -> tuple[torch.fx.Node, torch.fx.Node]: + """ + Returns the FX node corresponding to the weight tensor input of a given operator node. + Uses the NNCF graph to identify which input port of the target node holds the weight. + If multiple weight ports are present, a warning is issued and only the first one is used. + + :param target_node: FX node representing a weighted operation (e.g., Linear, Conv). + :param nncf_graph: NNCFGraph used to determine weight port indices. + :return: Edge represented by a Tuple of (weight_node, target_node), where weight_node is the FX node supplying + the weight. + """ + nncf_node = nncf_graph.get_node_by_name(target_node.name) + weights_ports_ids = nncf.torch.model_graph_manager.get_weight_tensor_port_ids( + nncf_node, nncf_graph + ) + if len(weights_ports_ids) > 1: + # TODO(dlyakhov): support quantization for nodes with several weights + nncf.common.logging.nncf_logger.warning( + f"Quantization of the weighted node {target_node.name}" + " is not yet supported by the OpenVINOQuantizer." + f" Only the weight on port ID {weights_ports_ids[0]} will be quantized." + f" Quantizable weights are located on ports: {weights_ports_ids}." + ) + weight_node = target_node.all_input_nodes[weights_ports_ids[0]] + return (weight_node, target_node) + + @staticmethod + def _get_edge_or_node( + target_node: torch.fx.Node, + qp: quantization.quantizer_setup.QuantizationPointBase, + nncf_graph: NNCFGraph, + ) -> EdgeOrNode: """ Returns the edge or node based on the given target node and quantization point. @@ -251,18 +306,7 @@ def _get_edge_or_node(target_node: torch.fx.Node, qp: QuantizationPointBase, nnc """ ip = qp.insertion_point if qp.is_weight_quantization_point(): - nncf_node = nncf_graph.get_node_by_name(target_node.name) - weights_ports_ids = get_weight_tensor_port_ids(nncf_node, nncf_graph) - if len(weights_ports_ids) > 1: - # TODO(dlyakhov): support quantization for nodes with several weights - nncf_logger.warning( - f"Quantization of the weighted node {target_node.name}" - " is not yet supported by the OpenVINOQuantizer." - f" Only the weight on port ID {weights_ports_ids[0]} will be quantized." - f" Quantizable weights are located on ports: {weights_ports_ids}." - ) - weight_node = target_node.all_input_nodes[weights_ports_ids[0]] - return (weight_node, target_node) + OpenVINOQuantizer._get_weight_edge(target_node, nncf_graph) if ip.input_port_id is None: return target_node @@ -289,28 +333,90 @@ def _fill_torch_ao_annotation( annotation_to_update.input_qspec_map[edge_or_node[0]] = qspec @staticmethod - def _get_torch_ao_qspec_from_qp(qp: QuantizationPointBase) -> TorchAOQuantizationSpec: + def _get_torch_ao_qspec_from_nncf_config_for_wc( + wc_param: WeightCompressionParameters, + ) -> TorchAOQuantizationSpec: """ - Retrieves the quantization configuration from the given quantization point and - converts it into a TorchAOQuantizationSpec. + Returns a TorchAO TorchAOQuantizationSpec based on NNCF weight compression parameter. - :param qp: An instance of QuantizationPointBase. - :return: A TorchAOQuantizationSpec retrieved and converted from the quantization point. + :param wc_param: NNCF Weight compression parameters for the node. + :return: A TorchAO TorchAOQuantizationSpec. """ + observer: type[UniformQuantizationObserverBase] + + extra_args: dict[str, Any] = {} + + qmode = wc_param.compression_config.mode + extra_args["wc_param"] = wc_param + is_asym_mode = wc_param.compression_config.is_asym_mode + if qmode in [ + nncf.CompressWeightsMode.INT4_ASYM, + nncf.CompressWeightsMode.INT4_SYM, + ]: + observer = INT4WeightObserver # type: ignore[type-abstract] + quant_min = -8 if not is_asym_mode else 0 + quant_max = 7 if not is_asym_mode else 15 + dtype = torch.int8 + channel_axis = 0 + torch_qscheme = torch_qscheme = ( + torch.per_channel_symmetric + if not is_asym_mode + else torch.per_channel_affine + ) + else: + observer = INT8WeightObserver # type: ignore[type-abstract] + quant_min = -128 if not is_asym_mode else 0 + quant_max = 127 if not is_asym_mode else 255 + dtype = torch.int8 + channel_axis = 0 + torch_qscheme = ( + torch.per_channel_symmetric + if not is_asym_mode + else torch.per_channel_affine + ) + return TorchAOQuantizationSpec( + dtype=dtype, + observer_or_fake_quant_ctr=observer.with_args(**extra_args), + quant_min=quant_min, + quant_max=quant_max, + qscheme=torch_qscheme, + ch_axis=channel_axis, + is_dynamic=False, + ) + + @staticmethod + def _get_torch_ao_qspec_from_nncf_config_for_ptq( + qp: quantization.quantizer_setup.QuantizationPointBase, + ) -> TorchAOQuantizationSpec: + """ + Returns a TorchAO TorchAOQuantizationSpec based on NNCF quantization point. + + :param qp: Quantization point from NNCF. + :return: A TorchAO TorchAOQuantizationSpec. + """ + observer: type[UniformQuantizationObserverBase] + # Eps value is copied from nncf/torch/quantization/layers.py - extra_args = {"eps": 1e-16} - qconfig = qp.qconfig + extra_args: dict[str, Any] = {"eps": 1e-16} + is_weight = qp.is_weight_quantization_point() + qconfig = qp.qconfig + dtype = torch.int8 + quant_min = None + quant_max = None + channel_axis = None if qconfig.per_channel: torch_qscheme = ( torch.per_channel_symmetric - if qconfig.mode is QuantizationScheme.SYMMETRIC + if qconfig.mode is quantization.structs.QuantizationScheme.SYMMETRIC else torch.per_channel_affine ) else: torch_qscheme = ( - torch.per_tensor_symmetric if qconfig.mode is QuantizationScheme.SYMMETRIC else torch.per_tensor_affine + torch.per_tensor_symmetric + if qconfig.mode is quantization.structs.QuantizationScheme.SYMMETRIC + else torch.per_tensor_affine ) if is_weight: observer = PerChannelMinMaxObserver @@ -318,10 +424,16 @@ def _get_torch_ao_qspec_from_qp(qp: QuantizationPointBase) -> TorchAOQuantizatio quant_max = 127 dtype = torch.int8 channel_axis = 0 + torch_qscheme = ( + torch.per_channel_symmetric + if qconfig.mode is quantization.structs.QuantizationScheme.SYMMETRIC + else torch.per_channel_affine + ) else: observer = ( HistogramObserver - if torch_qscheme in [torch.per_tensor_symmetric, torch.per_tensor_affine] + if torch_qscheme + in [torch.per_tensor_symmetric, torch.per_tensor_affine] else PerChannelMinMaxObserver ) quant_min = 0 @@ -346,7 +458,9 @@ def validate(self, model: torch.fx.GraphModule) -> None: """ pass - def transform_for_annotation(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: + def transform_for_annotation( + self, model: torch.fx.GraphModule + ) -> torch.fx.GraphModule: """ Allows for user defined transforms to run before annotating the graph. This allows quantizer to allow quantizing part of the model that are otherwise not quantizable. From 0815dc596c071b8a9df5236d3dd10bfd1d588a2c Mon Sep 17 00:00:00 2001 From: anzr299 Date: Mon, 22 Sep 2025 23:47:45 +0400 Subject: [PATCH 07/91] fix --- .../fx/quantization/quantizer/__init__.py | 4 + .../quantizer/openvino_quantizer.py | 104 +++++++++++++++++- 2 files changed, 106 insertions(+), 2 deletions(-) diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/__init__.py b/src/nncf/experimental/torch/fx/quantization/quantizer/__init__.py index e5a42efc0ef..c422301608c 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/__init__.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/__init__.py @@ -8,3 +8,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +from .openvino_quantizer import OpenVINOQuantizer, QuantizationMode + +__all__ = ["OpenVINOQuantizer", "quantize_model", "QuantizationMode"] diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py index 45a917cc3c8..e9e3344c641 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py @@ -17,8 +17,8 @@ ) import torch.fx -from observers import INT4WeightObserver -from observers import INT8WeightObserver +from .observers import INT4WeightObserver +from .observers import INT8WeightObserver from torch.ao.quantization.observer import HistogramObserver from torch.ao.quantization.observer import PerChannelMinMaxObserver from torch.ao.quantization.observer import UniformQuantizationObserverBase @@ -178,6 +178,106 @@ def get_nncf_weight_compression_setup( return self._algo.get_weight_compression_parameters( model, nncf_graph, nodes_to_compress )[0] + + def _annotate_weight_compression( + self, + model: torch.fx.GraphModule, + graph: torch.fx.Graph, + nncf_graph: NNCFGraph, + node_vs_torch_annotation: defaultdict[torch.fx.Node, TorchAOQuantizationAnnotation], + ) -> defaultdict[torch.fx.Node, TorchAOQuantizationAnnotation]: + """ + Annotates the model graph with weight-only quantization specs. + + Identifies compressible nodes in the NNCF graph and attaches the corresponding + TorchAO quantization specifications to their weight edges for later transformation. + + :param model: The FX GraphModule to annotate. + :param graph: The underlying FX graph. + :param nncf_graph: The corresponding NNCF graph. + :param node_vs_torch_annotation: A mapping of FX nodes to quantization annotations. + :return: Updated mapping of FX nodes with weight compression annotations. + """ + all_wc_params = self.get_nncf_weight_compression_setup( + model, nncf_graph + ) + + for wc_param in all_wc_params: + node_with_weight = wc_param.node_with_weight + target_node = nncf_fx.node_utils.get_graph_node_by_name( + graph, node_with_weight.node_name + ) + annotation = node_vs_torch_annotation[target_node] + edge_or_node = self._get_weight_edge(target_node, nncf_graph) + qspec = self._get_torch_ao_qspec_from_nncf_config_for_wc(wc_param=wc_param) + self._fill_torch_ao_annotation(edge_or_node, qspec, annotation) + + return node_vs_torch_annotation + + def _annotate_post_training_quantization( + self, + model: torch.fx.GraphModule, + graph: torch.fx.Graph, + nncf_graph: NNCFGraph, + node_vs_torch_annotation: defaultdict[torch.fx.Node, TorchAOQuantizationAnnotation], + ) -> defaultdict[torch.fx.Node, TorchAOQuantizationAnnotation]: + """ + Annotates the model graph with post-training quantization configurations. + + :param model: The FX GraphModule to annotate. + :param graph: The underlying FX graph. + :param nncf_graph: The corresponding NNCF graph. + :param node_vs_torch_annotation: A mapping of FX nodes to quantization annotations. + :return: Updated mapping of FX nodes with post-training quantization annotations. + """ + quantization_setup = self.get_nncf_quantization_setup(model, nncf_graph) + + for qp in quantization_setup.quantization_points.values(): + edge_or_node, annotation = self._get_edge_or_node_and_annotation( + graph, nncf_graph, qp, node_vs_torch_annotation + ) + qspec: TorchAOQuantizationSpecBase = ( + self._get_torch_ao_qspec_from_nncf_config_for_ptq(qp) + ) + self._fill_torch_ao_annotation(edge_or_node, qspec, annotation) + + for quantizer_ids in quantization_setup.unified_scale_groups.values(): + root_quantizer_id = self._get_unified_scales_root_quantizer_id( + nncf_graph, quantizer_ids, quantization_setup + ) + root_qp = quantization_setup.quantization_points[root_quantizer_id] + + if any( + root_qp.qconfig != quantization_setup.quantization_points[q_id].qconfig + for q_id in quantizer_ids + ): + qps = [ + quantization_setup.quantization_points[qid] for qid in quantizer_ids + ] + raise nncf.InternalError( + "Different quantization configs are set to one unified scale group:" + f"{[(qp.insertion_point.__dict__, str(qp.qconfig)) for qp in qps]}" + ) + + root_target_node = nncf_fx.node_utils.get_graph_node_by_name( + graph, root_qp.insertion_point.target_node_name + ) + root_edge_or_node = self._get_edge_or_node( + root_target_node, root_qp, nncf_graph + ) + + for quantizer_id in quantizer_ids: + if quantizer_id == root_quantizer_id: + continue + + qspec = SharedQuantizationSpec(root_edge_or_node) # type: ignore[assignment] + qp = quantization_setup.quantization_points[quantizer_id] + edge_or_node, annotation = self._get_edge_or_node_and_annotation( + graph, nncf_graph, qp, node_vs_torch_annotation + ) + self._fill_torch_ao_annotation(edge_or_node, qspec, annotation) + + return node_vs_torch_annotation def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: nncf_graph = nncf_fx.nncf_graph_builder.GraphConverter.create_nncf_graph(model) From 1b8d9409d10c0de35660dd0c9413ab5e4c0c3b3d Mon Sep 17 00:00:00 2001 From: anzr299 Date: Mon, 22 Sep 2025 23:58:30 +0400 Subject: [PATCH 08/91] minor fix --- .../fx/quantization/quantizer/openvino_quantizer.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py index e9e3344c641..d57f3ae9d54 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py @@ -24,15 +24,18 @@ from torch.ao.quantization.observer import UniformQuantizationObserverBase from torch.ao.quantization.quantizer.quantizer import EdgeOrNode from torch.ao.quantization.quantizer.quantizer import Quantizer as TorchAOQuantizer -from torchao.quantization.pt2e.quantizer import ( +from torch.ao.quantization.quantizer import ( QuantizationAnnotation as TorchAOQuantizationAnnotation, ) -from torchao.quantization.pt2e.quantizer import ( +from torch.ao.quantization.quantizer import ( QuantizationSpec as TorchAOQuantizationSpec, ) -from torchao.quantization.pt2e.quantizer import ( +from torch.ao.quantization.quantizer import ( QuantizationSpecBase as TorchAOQuantizationSpecBase, ) +from torch.ao.quantization.quantizer import ( + SharedQuantizationSpec as TorchAOSharedQuantizationSpec, +) import nncf import nncf.common.quantization as quantization @@ -270,7 +273,7 @@ def _annotate_post_training_quantization( if quantizer_id == root_quantizer_id: continue - qspec = SharedQuantizationSpec(root_edge_or_node) # type: ignore[assignment] + qspec = TorchAOSharedQuantizationSpec(root_edge_or_node) # type: ignore[assignment] qp = quantization_setup.quantization_points[quantizer_id] edge_or_node, annotation = self._get_edge_or_node_and_annotation( graph, nncf_graph, qp, node_vs_torch_annotation From 7d35374698f8f5ce59c5eb3c52d0bc8e46dbcd99 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 23 Sep 2025 00:11:04 +0400 Subject: [PATCH 09/91] fix --- .../torch/fx/quantization/quantizer/openvino_quantizer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py index d57f3ae9d54..b1f8c8d57f0 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py @@ -273,7 +273,7 @@ def _annotate_post_training_quantization( if quantizer_id == root_quantizer_id: continue - qspec = TorchAOSharedQuantizationSpec(root_edge_or_node) # type: ignore[assignment] + qspec = TorchAOSharedQuantizationSpec(root_edge_or_node) qp = quantization_setup.quantization_points[quantizer_id] edge_or_node, annotation = self._get_edge_or_node_and_annotation( graph, nncf_graph, qp, node_vs_torch_annotation @@ -409,7 +409,7 @@ def _get_edge_or_node( """ ip = qp.insertion_point if qp.is_weight_quantization_point(): - OpenVINOQuantizer._get_weight_edge(target_node, nncf_graph) + return OpenVINOQuantizer._get_weight_edge(target_node, nncf_graph) if ip.input_port_id is None: return target_node @@ -456,7 +456,7 @@ def _get_torch_ao_qspec_from_nncf_config_for_wc( nncf.CompressWeightsMode.INT4_ASYM, nncf.CompressWeightsMode.INT4_SYM, ]: - observer = INT4WeightObserver # type: ignore[type-abstract] + observer = INT4WeightObserver quant_min = -8 if not is_asym_mode else 0 quant_max = 7 if not is_asym_mode else 15 dtype = torch.int8 @@ -467,7 +467,7 @@ def _get_torch_ao_qspec_from_nncf_config_for_wc( else torch.per_channel_affine ) else: - observer = INT8WeightObserver # type: ignore[type-abstract] + observer = INT8WeightObserver quant_min = -128 if not is_asym_mode else 0 quant_max = 127 if not is_asym_mode else 255 dtype = torch.int8 From 427ebc29182edeef1a8a067b803efc6ea236a70a Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 23 Sep 2025 00:18:16 +0400 Subject: [PATCH 10/91] fix some more bugs; observers was importing from torchao. causing mismatch in signatures in prepare_pt2e. --- .../torch/fx/quantization/quantizer/observers.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/observers.py b/src/nncf/experimental/torch/fx/quantization/quantizer/observers.py index 22f296e3e6b..40e79713ae6 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/observers.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/observers.py @@ -15,33 +15,33 @@ import torch -from nncf.experimental.torch.fx.node_utils import ( # type: ignore[import-untyped] +from nncf.experimental.torch.fx.node_utils import ( get_tensor_constant_from_node, ) -from nncf.experimental.torch.fx.transformations import ( # type: ignore[import-untyped] +from nncf.experimental.torch.fx.transformations import ( constant_update, module_insertion, node_removal, ) -from nncf.quantization.algorithms.weight_compression.config import ( # type: ignore[import-untyped] +from nncf.quantization.algorithms.weight_compression.config import ( WeightCompressionParameters, ) -from nncf.quantization.algorithms.weight_compression.weight_lowering import ( # type: ignore[import-untyped] +from nncf.quantization.algorithms.weight_compression.weight_lowering import ( do_integer_quantization, ) -from nncf.tensor.tensor import Tensor as NNCFTensor # type: ignore[import-untyped] -from nncf.torch.graph.transformations.commands import ( # type: ignore[import-untyped] +from nncf.tensor.tensor import Tensor as NNCFTensor +from nncf.torch.graph.transformations.commands import ( PTTargetPoint, TargetType, ) -from nncf.torch.quantization.layers import ( # type: ignore[import-untyped] +from nncf.torch.quantization.layers import ( BaseWeightsDecompressor, INT4AsymmetricWeightsDecompressor, INT4SymmetricWeightsDecompressor, INT8AsymmetricWeightsDecompressor, INT8SymmetricWeightsDecompressor, ) -from torchao.quantization.pt2e import ObserverBase +from torch.ao.quantization.observer import ObserverBase class WeightObserverBase(ObserverBase, ABC): From 24dbfb668d62c30fca62cdde296607913778fc87 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 23 Sep 2025 00:21:29 +0400 Subject: [PATCH 11/91] add compress pt2e to init --- src/nncf/experimental/torch/fx/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/nncf/experimental/torch/fx/__init__.py b/src/nncf/experimental/torch/fx/__init__.py index 2ecdde60840..866fbaad4b3 100644 --- a/src/nncf/experimental/torch/fx/__init__.py +++ b/src/nncf/experimental/torch/fx/__init__.py @@ -10,4 +10,5 @@ # limitations under the License. from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e as quantize_pt2e +from nncf.experimental.torch.fx.quantization.quantize_pt2e import compress_pt2e as compress_pt2e from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer as OpenVINOQuantizer From 4bb8c1acc83311c8bb4df9126dc7f301ef4928f0 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 23 Sep 2025 00:23:06 +0400 Subject: [PATCH 12/91] fix quantizer init file. Remove extra code. --- .../experimental/torch/fx/quantization/quantizer/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/__init__.py b/src/nncf/experimental/torch/fx/quantization/quantizer/__init__.py index c422301608c..617f6642d73 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/__init__.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/__init__.py @@ -10,5 +10,3 @@ # limitations under the License. from .openvino_quantizer import OpenVINOQuantizer, QuantizationMode - -__all__ = ["OpenVINOQuantizer", "quantize_model", "QuantizationMode"] From 89028423a79707b097a81565de547e5a9572e3ae Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 23 Sep 2025 12:41:21 +0400 Subject: [PATCH 13/91] small fix for the big problem:) --- .../quantization/algorithms/weight_compression/algorithm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index 8390594571c..cafdc5bee06 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -789,7 +789,7 @@ def collect_weight_compression_statistics( :return: A dictionary of collected statistics, or None if not applicable. """ statistics = None - if not (self._data_aware_mixed_precision or self._data_aware_compression) and not dataset: + if not (self._data_aware_mixed_precision or self._data_aware_compression) or not dataset: return statistics, statistic_points matmul_nodes_to_compress = [ wp.node_with_weight From 384253819f73ced9107a2c23bac3e76e6f0d3c0b Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 23 Sep 2025 13:06:36 +0400 Subject: [PATCH 14/91] fix quantizer preset definition --- .../torch/fx/quantization/quantizer/openvino_quantizer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py index b1f8c8d57f0..88fedd72e28 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py @@ -107,14 +107,14 @@ def __init__( self.mode = mode if self.mode not in OpenVINOQuantizer.WEIGHTS_ONLY_COMPRESSION_MODES: if mode == QuantizationMode.INT8_SYM: - preset = nncf.quantization.structs.QuantizationPreset.PERFORMANCE + preset = nncf.QuantizationPreset.PERFORMANCE model_type = None elif mode == QuantizationMode.INT8_MIXED: - preset = quantization.structs.QuantizationPreset.MIXED + preset = nncf.QuantizationPreset.MIXED model_type = None else: preset = None - model_type = nncf.parameters.ModelType.TRANSFORMER + model_type = nncf.ModelType.TRANSFORMER self._algo = ( nncf.quantization.algorithms.min_max.algorithm.MinMaxQuantization( preset=preset, model_type=model_type, **kwargs From 2e70c2e2e2f08911b3513a456ada6a89e07479c0 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 23 Sep 2025 13:29:13 +0400 Subject: [PATCH 15/91] fix openvino quantizer for ptq. call _algo instead of legacy _min_max_algo --- .../torch/fx/quantization/quantizer/openvino_quantizer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py index 88fedd72e28..ce22f278c8f 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py @@ -105,6 +105,8 @@ def __init__( :param kwargs: Arguments to pass to the NNCF MinMaxQuantization algorithm. """ self.mode = mode + self.weight_compression_configuration = None + if self.mode not in OpenVINOQuantizer.WEIGHTS_ONLY_COMPRESSION_MODES: if mode == QuantizationMode.INT8_SYM: preset = nncf.QuantizationPreset.PERFORMANCE @@ -167,8 +169,8 @@ def set_ignored_scope( def get_nncf_quantization_setup( self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph ) -> SingleConfigQuantizerSetup: - self._min_max_algo._set_backend_entity(model) - return self._min_max_algo.find_quantization_setup(model, nncf_graph) + self._algo._set_backend_entity(model) + return self._algo.find_quantization_setup(model, nncf_graph) def get_nodes_to_compress(self, model, nncf_graph) -> list[NNCFNode]: self._algo.set_backend_entity(model) From b1c9aadf83b185a3d66e1deeebb4f19f387990cf Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 23 Sep 2025 14:16:11 +0400 Subject: [PATCH 16/91] fix quantizer defaults --- .../torch/fx/quantization/quantizer/openvino_quantizer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py index ce22f278c8f..c2f1b64414e 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py @@ -48,6 +48,7 @@ WeightCompressionParameters, ) from nncf.quantization.quantize_model import get_weight_compression_configuration +from nncf.common.quantization.quantizer_propagation.structs import QuantizerPropagationRule QUANT_ANNOTATION_KEY = "quantization_annotation" @@ -91,7 +92,8 @@ class OpenVINOQuantizer(TorchAOQuantizer): def __init__( self, *, - mode: QuantizationMode = QuantizationMode.INT8_SYM, + mode: Optional[QuantizationMode] = None, + quantizer_propagation_rule: Optional[QuantizerPropagationRule] = QuantizerPropagationRule.MERGE_ALL_IN_ONE **kwargs, ): """ @@ -119,7 +121,7 @@ def __init__( model_type = nncf.ModelType.TRANSFORMER self._algo = ( nncf.quantization.algorithms.min_max.algorithm.MinMaxQuantization( - preset=preset, model_type=model_type, **kwargs + mode=mode, quantizer_propagation_rule=quantizer_propagation_rule, **kwargs ) ) else: From 33fe01c1539c99c2cacdff67db3265729566e84f Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 23 Sep 2025 14:46:42 +0400 Subject: [PATCH 17/91] microfix --- .../torch/fx/quantization/quantizer/openvino_quantizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py index c2f1b64414e..0ca455132cf 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py @@ -93,7 +93,7 @@ def __init__( self, *, mode: Optional[QuantizationMode] = None, - quantizer_propagation_rule: Optional[QuantizerPropagationRule] = QuantizerPropagationRule.MERGE_ALL_IN_ONE + quantizer_propagation_rule: Optional[QuantizerPropagationRule] = QuantizerPropagationRule.MERGE_ALL_IN_ONE, **kwargs, ): """ From d8e1006b8de98d657132ccc4fb2e1d52820b47ae Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 23 Sep 2025 15:53:46 +0400 Subject: [PATCH 18/91] precommit fix --- .../weight_compression/algorithm.py | 90 ++++++++++--------- .../torch/fx/quantization/quantize_pt2e.py | 32 +++---- .../fx/quantization/quantizer/observers.py | 74 ++++++--------- 3 files changed, 92 insertions(+), 104 deletions(-) diff --git a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py index 9679a1ca85b..9798ec108cb 100644 --- a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py @@ -9,50 +9,47 @@ # See the License for the specific language governing permissions and # limitations under the License. -import nncf # type: ignore[import-untyped] import torch +import nncf # type: ignore[import-untyped] from nncf.common.graph.graph import NNCFGraph # type: ignore[import-untyped] -from nncf.quantization.algorithms.weight_compression.algorithm import WeightCompression - -import nncf -from nncf.quantization.algorithms.algorithm import Algorithm -from nncf.common.graph.graph import NNCFGraph -from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters -from nncf.quantization.algorithms.weight_compression.weight_lowering import WeightCompressionConfig from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer -from nncf.quantization.algorithms.weight_compression.weight_lowering import get_reduction_channel_size from nncf.common.utils.backend import BackendType -from typing import Optional +from nncf.quantization.algorithms.algorithm import Algorithm +from nncf.quantization.algorithms.weight_compression.algorithm import WeightCompression + class WeightsCompressionPT2E(Algorithm): - def __init__(self, - quantizer, - subset_size: int = 128, - awq: bool = False, - scale_estimation: bool = False, - gptq: bool = False, - lora_correction: bool = False, - sensitivity_metric: nncf.SensitivityMetric = nncf.SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, - compression_format: nncf.CompressionFormat = nncf.CompressionFormat.DQ, - advanced_parameters: nncf.AdvancedCompressionParameters = None, - ) -> torch.fx.GraphModule: + def __init__( + self, + quantizer, + subset_size: int = 128, + awq: bool = False, + scale_estimation: bool = False, + gptq: bool = False, + lora_correction: bool = False, + sensitivity_metric: nncf.SensitivityMetric = nncf.SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, + compression_format: nncf.CompressionFormat = nncf.CompressionFormat.DQ, + advanced_parameters: nncf.AdvancedCompressionParameters = None, + ) -> torch.fx.GraphModule: self._quantizer = quantizer wc_config = quantizer._weight_compression_configuration - mode=wc_config.get("mode", None) - ratio = wc_config.get("ratio", 1) # TODO Discuss if ratio should be passed in quantizer or in the compress_pt2e api + mode = wc_config.get("mode", None) + ratio = wc_config.get( + "ratio", 1 + ) # TODO Discuss if ratio should be passed in quantizer or in the compress_pt2e api group_size = wc_config.get("group_size", 128) - all_layers=wc_config.get("all_layers", False) - backup_mode=wc_config.get("backup_mode", nncf.BackupMode.INT8_ASYM) + all_layers = wc_config.get("all_layers", False) + backup_mode = wc_config.get("backup_mode", nncf.BackupMode.INT8_ASYM) self._sensitivity_metric = sensitivity_metric self._algo = WeightCompression( mode=mode, ratio=ratio, group_size=group_size, - ignored_scope=nncf.IgnoredScope(), # Ignored scope is useless in the case of External Quantizers because we onyl compress "nodes_to_compress" + ignored_scope=nncf.IgnoredScope(), # only compress "nodes_to_compress" all_layers=all_layers, sensitivity_metric=self._sensitivity_metric, awq=awq, @@ -63,30 +60,41 @@ def __init__(self, backup_mode=backup_mode, compression_format=compression_format, advanced_parameters=advanced_parameters, - ) + ) def available_backends(self) -> list[BackendType]: return self._algo.available_backends() - def apply(self, - model: torch.fx.GraphModule, - graph: NNCFGraph, - statistic_points=None, - dataset=None - ): - self._algo.set_backend_entity(model) #Set algo backend + def apply( + self, + model: torch.fx.GraphModule, + graph: NNCFGraph, + statistic_points=None, + dataset=None, + ): + self._algo.set_backend_entity(model) # Set algo backend - if(self._sensitivity_metric == nncf.SensitivityMetric.WEIGHT_QUANTIZATION_ERROR): - # Default case, _sensitivity_metric == nncf.SensitivityMetric.WEIGHT_QUANTIZATION_ERROR means that it is not defined by the user in the API + if self._sensitivity_metric == nncf.SensitivityMetric.WEIGHT_QUANTIZATION_ERROR: + # Default case. It means that it is not defined by the user in the API # Hence, the annotation(Quantization parameters for all layers) from the quantizer will be used. - all_weight_params = self._quantizer.get_weight_compression_setup(model, graph) # Get weight compression params FROM QUANTIZER - statistics, statistic_points = self._algo.collect_weight_compression_statistics(model, graph, dataset, all_weight_params, statistic_points) + all_weight_params = self._quantizer.get_weight_compression_setup( + model, graph + ) # Get weight compression params FROM QUANTIZER + statistics, statistic_points = self._algo.collect_weight_compression_statistics( + model, graph, dataset, all_weight_params, statistic_points + ) else: # Data Aware mixed precision is used. In this case, only nodes_to_compress is obtained from the quantizer - nodes_to_compress = self._quantizer.get_nodes_to_compress(model, graph) # Get nodes to compress FROM QUANTIZER - all_weight_params, statistics = self._algo.get_weight_compression_parameters(model, graph, nodes_to_compress, statistic_points, dataset) + nodes_to_compress = self._quantizer.get_nodes_to_compress( + model, graph + ) # Get nodes to compress FROM QUANTIZER + all_weight_params, statistics = self._algo.get_weight_compression_parameters( + model, graph, nodes_to_compress, statistic_points, dataset + ) - transformed_model = self._algo.apply_wc_algos(model, graph, all_weight_params, statistics, dataset) # Apply the wc algos FROM ALGO + transformed_model = self._algo.apply_wc_algos( + model, graph, all_weight_params, statistics, dataset + ) # Apply the wc algos FROM ALGO return transformed_model def get_statistic_points(self, model, graph: NNCFGraph) -> StatisticPointsContainer: diff --git a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py index d24f9568420..ae08f09e985 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py +++ b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py @@ -39,6 +39,7 @@ from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters from nncf.quantization.range_estimator import RangeEstimatorParameters + @api(canonical_alias="nncf.experimental.torch.fx.quantize_pt2e") def quantize_pt2e( model: torch.fx.GraphModule, @@ -158,19 +159,20 @@ def _quant_node_constraint(n: torch.fx.Node) -> bool: """ return n.op == "call_function" and n.target in QUANTIZE_NODE_TARGETS + @api(canonical_alias="nncf.experimental.torch.fx.compress_pt2e") def compress_pt2e( - model: torch.fx.GraphModule, - quantizer: Quantizer, - dataset: Optional[nncf.Dataset] = None, - awq: bool = False, - scale_estimation: bool = False, - gptq: bool = False, - lora_correction: bool = False, - subset_size: int = 128, # Dataset size to use - sensitivity_metric: nncf.SensitivityMetric = nncf.SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, - advanced_parameters: nncf.AdvancedCompressionParameters = None, - ) -> torch.fx.GraphModule: + model: torch.fx.GraphModule, + quantizer: Quantizer, + dataset: Optional[nncf.Dataset] = None, + awq: bool = False, + scale_estimation: bool = False, + gptq: bool = False, + lora_correction: bool = False, + subset_size: int = 128, # Dataset size to use + sensitivity_metric: nncf.SensitivityMetric = nncf.SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, + advanced_parameters: nncf.AdvancedCompressionParameters = None, +) -> torch.fx.GraphModule: """ Applies Weight Compression to the torch.fx.GraphModule provided model using provided torch.ao quantizer. @@ -192,10 +194,10 @@ def compress_pt2e( """ if isinstance(quantizer, OpenVINOQuantizer) or hasattr(quantizer, "get_nncf_weight_compression_setup"): quantizer = OpenVINOQuantizerAdapter(quantizer) - compression_format = nncf.CompressionFormat.DQ # since OVQUantizer has a defined decompression subgraph which we want, this is a minimally invasive way to do it + compression_format = nncf.CompressionFormat.DQ else: - #TODO Support Third party quantizers here. - msg = 'Only OpenVINO Quantizer is supported currently.' + # TODO Support Third party quantizers here. + msg = "Only OpenVINO Quantizer is supported currently." raise nncf.InternalError(msg) quantization_algorithm = WeightsCompressionPT2E( @@ -208,7 +210,7 @@ def compress_pt2e( sensitivity_metric=sensitivity_metric, compression_format=compression_format, advanced_parameters=advanced_parameters, - ) + ) # Here the model is annotated transformed_model = quantizer.transform_prior_quantization(model) diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/observers.py b/src/nncf/experimental/torch/fx/quantization/quantizer/observers.py index 40e79713ae6..c283024c9be 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/observers.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/observers.py @@ -10,43 +10,33 @@ # limitations under the License. -from abc import ABC, abstractmethod -from typing import Optional, Tuple +from abc import ABC +from abc import abstractmethod +from typing import Optional import torch +from torch.ao.quantization.observer import ObserverBase -from nncf.experimental.torch.fx.node_utils import ( - get_tensor_constant_from_node, -) -from nncf.experimental.torch.fx.transformations import ( - constant_update, - module_insertion, - node_removal, -) -from nncf.quantization.algorithms.weight_compression.config import ( - WeightCompressionParameters, -) -from nncf.quantization.algorithms.weight_compression.weight_lowering import ( - do_integer_quantization, -) +from nncf.experimental.torch.fx.node_utils import get_tensor_constant_from_node +from nncf.experimental.torch.fx.transformations import constant_update +from nncf.experimental.torch.fx.transformations import module_insertion +from nncf.experimental.torch.fx.transformations import node_removal +from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters +from nncf.quantization.algorithms.weight_compression.weight_lowering import do_integer_quantization from nncf.tensor.tensor import Tensor as NNCFTensor -from nncf.torch.graph.transformations.commands import ( - PTTargetPoint, - TargetType, -) -from nncf.torch.quantization.layers import ( - BaseWeightsDecompressor, - INT4AsymmetricWeightsDecompressor, - INT4SymmetricWeightsDecompressor, - INT8AsymmetricWeightsDecompressor, - INT8SymmetricWeightsDecompressor, -) -from torch.ao.quantization.observer import ObserverBase +from nncf.torch.graph.transformations.commands import PTTargetPoint +from nncf.torch.graph.transformations.commands import TargetType +from nncf.torch.quantization.layers import BaseWeightsDecompressor +from nncf.torch.quantization.layers import INT4AsymmetricWeightsDecompressor +from nncf.torch.quantization.layers import INT4SymmetricWeightsDecompressor +from nncf.torch.quantization.layers import INT8AsymmetricWeightsDecompressor +from nncf.torch.quantization.layers import INT8SymmetricWeightsDecompressor class WeightObserverBase(ObserverBase, ABC): """ - Base implementation of an NNCF observer that defines the rules for compressing layer weights into the OpenVINO representation. + Base implementation of an NNCF observer that defines the rules for compressing layer + weights into the OpenVINO representation. """ def __init__( @@ -65,7 +55,7 @@ def __init__( def calculate_qparams( # type: ignore[override] self, weight: torch.Tensor, - ) -> Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: + ) -> tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: """ Calculates quantization parameters: quantized weight, quantization scale and quantization zero point. @@ -75,18 +65,14 @@ def calculate_qparams( # type: ignore[override] wc_param = self._wc_param wc_config = wc_param.compression_config reduction_axes = wc_param.reduction_axes - q_weight, scale, zp = do_integer_quantization( - NNCFTensor(weight), wc_config, reduction_axes=reduction_axes - ) + q_weight, scale, zp = do_integer_quantization(NNCFTensor(weight), wc_config, reduction_axes=reduction_axes) zp = zp.data if zp is not None else None return q_weight.data, scale.data, zp def forward(self, x: torch.Tensor) -> torch.Tensor: return x - def convert( - self, model: torch.fx.GraphModule, observer_node: torch.fx.Node - ) -> None: + def convert(self, model: torch.fx.GraphModule, observer_node: torch.fx.Node) -> None: """ Replaces the given observer node from the given model with a quantized weight and a OpenVINO specific decompression module. @@ -100,18 +86,14 @@ def convert( original_weight = get_tensor_constant_from_node(weight_node, model) q_weight, scale, zero_point = self.calculate_qparams(original_weight) - decompressor = self._create_decompressor( - scale, zero_point, q_weight, original_weight - ) + decompressor = self._create_decompressor(scale, zero_point, q_weight, original_weight) packed_q_weight = decompressor.pack_weight(q_weight) # Weight port id is 0 since observer is inserted for a single weight only. constant_update(model, observer_node, packed_q_weight, input_port_id=0) compressed_weight_name = observer_node.all_input_nodes[0].name - decompressor_suffix = "_".join( - compressed_weight_name.replace(".", "_").split("_")[:-2] - ) + decompressor_suffix = "_".join(compressed_weight_name.replace(".", "_").split("_")[:-2]) decompressor_name = f"{decompressor.quantization_mode}_weights_decompressor_{decompressor_suffix}" module_insertion( @@ -159,9 +141,7 @@ def _create_decompressor( original_weight: torch.Tensor, ) -> BaseWeightsDecompressor: if zero_point is None: - return INT4SymmetricWeightsDecompressor( - scale, q_weight.shape, original_weight.shape, original_weight.dtype - ) + return INT4SymmetricWeightsDecompressor(scale, q_weight.shape, original_weight.shape, original_weight.dtype) return INT4AsymmetricWeightsDecompressor( scale, zero_point, @@ -185,6 +165,4 @@ def _create_decompressor( ) -> BaseWeightsDecompressor: if zero_point is None: return INT8SymmetricWeightsDecompressor(scale, original_weight.dtype) - return INT8AsymmetricWeightsDecompressor( - scale, zero_point, original_weight.dtype - ) + return INT8AsymmetricWeightsDecompressor(scale, zero_point, original_weight.dtype) From 88a8472217cf19a82bdf2f5b806b9fb196528197 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 23 Sep 2025 16:59:23 +0400 Subject: [PATCH 19/91] revert openvino quantizer to old --- .../quantizer/openvino_quantizer.py | 451 +++++------------- 1 file changed, 115 insertions(+), 336 deletions(-) diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py index 0ca455132cf..d9db3b29e7e 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py @@ -10,134 +10,99 @@ # limitations under the License. from collections import defaultdict -from enum import Enum -from typing import ( - Any, - Optional, -) +from typing import Optional, Union import torch.fx -from .observers import INT4WeightObserver -from .observers import INT8WeightObserver from torch.ao.quantization.observer import HistogramObserver from torch.ao.quantization.observer import PerChannelMinMaxObserver -from torch.ao.quantization.observer import UniformQuantizationObserverBase from torch.ao.quantization.quantizer.quantizer import EdgeOrNode +from torch.ao.quantization.quantizer.quantizer import QuantizationAnnotation as TorchAOQuantizationAnnotation +from torch.ao.quantization.quantizer.quantizer import QuantizationSpec as TorchAOQuantizationSpec +from torch.ao.quantization.quantizer.quantizer import QuantizationSpecBase as TorchAOQuantizationSpecBase from torch.ao.quantization.quantizer.quantizer import Quantizer as TorchAOQuantizer -from torch.ao.quantization.quantizer import ( - QuantizationAnnotation as TorchAOQuantizationAnnotation, -) -from torch.ao.quantization.quantizer import ( - QuantizationSpec as TorchAOQuantizationSpec, -) -from torch.ao.quantization.quantizer import ( - QuantizationSpecBase as TorchAOQuantizationSpecBase, -) -from torch.ao.quantization.quantizer import ( - SharedQuantizationSpec as TorchAOSharedQuantizationSpec, -) +from torch.ao.quantization.quantizer.quantizer import SharedQuantizationSpec as TorchAOSharedQuantizationSpec import nncf -import nncf.common.quantization as quantization -import nncf.experimental.torch.fx as nncf_fx +from nncf import IgnoredScope +from nncf import ModelType +from nncf import OverflowFix +from nncf import QuantizationMode +from nncf import QuantizationPreset +from nncf import TargetDevice from nncf.common.graph.graph import NNCFGraph -from nncf.common.graph.graph import NNCFNode +from nncf.common.logging import nncf_logger +from nncf.common.quantization.quantizer_propagation.structs import QuantizerPropagationRule +from nncf.common.quantization.quantizer_setup import QuantizationPointBase from nncf.common.quantization.quantizer_setup import SingleConfigQuantizerSetup +from nncf.common.quantization.structs import QuantizationScheme from nncf.common.utils.api_marker import api -from nncf.quantization.algorithms.weight_compression.config import ( - WeightCompressionParameters, -) -from nncf.quantization.quantize_model import get_weight_compression_configuration -from nncf.common.quantization.quantizer_propagation.structs import QuantizerPropagationRule +from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter +from nncf.experimental.torch.fx.node_utils import get_graph_node_by_name +from nncf.quantization.advanced_parameters import FP8QuantizationParameters +from nncf.quantization.advanced_parameters import QuantizationParameters +from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization +from nncf.torch.model_graph_manager import get_weight_tensor_port_ids QUANT_ANNOTATION_KEY = "quantization_annotation" -class QuantizationMode(Enum): - """ - Defines special quantization modes. - - - INT8_SYM: INT8 symmetric quantization for both activations and weights. - - INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights. - - INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models - - INT8WO_SYM: INT8 symmetric quantization for weights only. - - INT8WO_ASYM: INT8 asymmetric quantization for weights only. - - INT4WO_SYM: INT4 symmetric quantization for weights only. - - INT4WO_ASYM: INT4 asymmetric quantization for weights only - """ - - INT8_SYM = "int8_sym" - INT8_MIXED = "int8_mixed" - INT8_TRANSFORMER = "int8_transformer" - INT8WO_SYM = "int8wo_sym" - INT8WO_ASYM = "int8wo_asym" - INT4WO_SYM = "int4wo_sym" - INT4WO_ASYM = "int4wo_asym" - - @api(canonical_alias="nncf.experimental.torch.fx.OpenVINOQuantizer") class OpenVINOQuantizer(TorchAOQuantizer): """ Implementation of the Torch AO quantizer which annotates models with quantization annotations optimally for the inference via OpenVINO. - """ - WEIGHTS_ONLY_COMPRESSION_MODES = ( - QuantizationMode.INT4WO_SYM, - QuantizationMode.INT4WO_ASYM, - QuantizationMode.INT8WO_SYM, - QuantizationMode.INT8WO_ASYM, - ) + :param mode: Defines optimization mode for the algorithm. None by default. + :param preset: A preset controls the quantization mode (symmetric and asymmetric). + It can take the following values: + - `performance`: Symmetric quantization of weights and activations. + - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations. + Default value is None. In this case, `mixed` preset is used for `transformer` + model type otherwise `performance`. + :param target_device: A target device the specificity of which will be taken + into account while compressing in order to obtain the best performance + for this type of device, defaults to TargetDevice.ANY. + :param model_type: Model type is needed to specify additional patterns + in the model. Supported only `transformer` now. + :param ignored_scope: An ignored scope that defined the list of model control + flow graph nodes to be ignored during quantization. + :param overflow_fix: This option controls whether to apply the overflow issue + fix for the 8-bit quantization. + :param quantize_outputs: Whether to insert additional quantizers right before + each of the model outputs. + :param activations_quantization_params: Quantization parameters for model + activations. + :param weights_quantization_params: Quantization parameters for model weights. + :param quantizer_propagation_rule: The strategy to be used while propagating and merging quantizers. + MERGE_ALL_IN_ONE by default. + """ def __init__( self, *, mode: Optional[QuantizationMode] = None, - quantizer_propagation_rule: Optional[QuantizerPropagationRule] = QuantizerPropagationRule.MERGE_ALL_IN_ONE, - **kwargs, + preset: Optional[QuantizationPreset] = None, + target_device: TargetDevice = TargetDevice.ANY, + model_type: Optional[ModelType] = None, + ignored_scope: Optional[IgnoredScope] = None, + overflow_fix: Optional[OverflowFix] = None, + quantize_outputs: bool = False, + activations_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None, + weights_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None, + quantizer_propagation_rule: QuantizerPropagationRule = QuantizerPropagationRule.MERGE_ALL_IN_ONE, ): - """ - :param mode: Defines special quantization modes. - - INT8_SYM: INT8 symmetric quantization for both activations and weights. - - INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights. - - INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models - Default value is INT8_SYM. - - INT4_SYM: Symmetric INT4 Weights-Only Compression - - INT4_ASYM: Asymmetric INT4 Weights-Only Compression - :param kwargs: Arguments to pass to the NNCF MinMaxQuantization algorithm. - """ - self.mode = mode - self.weight_compression_configuration = None - - if self.mode not in OpenVINOQuantizer.WEIGHTS_ONLY_COMPRESSION_MODES: - if mode == QuantizationMode.INT8_SYM: - preset = nncf.QuantizationPreset.PERFORMANCE - model_type = None - elif mode == QuantizationMode.INT8_MIXED: - preset = nncf.QuantizationPreset.MIXED - model_type = None - else: - preset = None - model_type = nncf.ModelType.TRANSFORMER - self._algo = ( - nncf.quantization.algorithms.min_max.algorithm.MinMaxQuantization( - mode=mode, quantizer_propagation_rule=quantizer_propagation_rule, **kwargs - ) - ) - else: - self.weight_compression_configuration = ( - get_weight_compression_configuration( - mode.value.replace( - "wo", "" - ), # Mode value has to match NNCF CompressWeightsMode - **kwargs, - ) - ) - _weight_compression_configuration = self.weight_compression_configuration - subset_size = 1 # Doesn't really matter in this case since it is data-free. Should just be +ve - self._algo = nncf.quantization.algorithms.weight_compression.algorithm.WeightCompression( - subset_size=subset_size, **_weight_compression_configuration - ) + self._min_max_algo = MinMaxQuantization( + mode=mode, + preset=preset, + target_device=target_device, + model_type=model_type, + ignored_scope=ignored_scope, + overflow_fix=overflow_fix, + quantize_outputs=quantize_outputs, + activations_quantization_params=activations_quantization_params, + weights_quantization_params=weights_quantization_params, + quantizer_propagation_rule=quantizer_propagation_rule, + ) def set_ignored_scope( self, @@ -158,7 +123,7 @@ def set_ignored_scope( :param validate: If set to True, then a RuntimeError will be raised if any ignored scope does not match in the model graph. """ - self._algo.set_ignored_scope( + self._min_max_algo.set_ignored_scope( nncf.IgnoredScope( names=names or [], patterns=patterns or [], @@ -171,81 +136,27 @@ def set_ignored_scope( def get_nncf_quantization_setup( self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph ) -> SingleConfigQuantizerSetup: - self._algo._set_backend_entity(model) - return self._algo.find_quantization_setup(model, nncf_graph) - - def get_nodes_to_compress(self, model, nncf_graph) -> list[NNCFNode]: - self._algo.set_backend_entity(model) - return self._algo.get_nodes_to_compress(nncf_graph) - - def get_nncf_weight_compression_setup( - self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph - ) -> quantization.quantizer_setup.SingleConfigQuantizerSetup: - nodes_to_compress = self.get_nodes_to_compress(model, nncf_graph) - return self._algo.get_weight_compression_parameters( - model, nncf_graph, nodes_to_compress - )[0] - - def _annotate_weight_compression( - self, - model: torch.fx.GraphModule, - graph: torch.fx.Graph, - nncf_graph: NNCFGraph, - node_vs_torch_annotation: defaultdict[torch.fx.Node, TorchAOQuantizationAnnotation], - ) -> defaultdict[torch.fx.Node, TorchAOQuantizationAnnotation]: - """ - Annotates the model graph with weight-only quantization specs. + self._min_max_algo._set_backend_entity(model) + return self._min_max_algo.find_quantization_setup(model, nncf_graph) - Identifies compressible nodes in the NNCF graph and attaches the corresponding - TorchAO quantization specifications to their weight edges for later transformation. - - :param model: The FX GraphModule to annotate. - :param graph: The underlying FX graph. - :param nncf_graph: The corresponding NNCF graph. - :param node_vs_torch_annotation: A mapping of FX nodes to quantization annotations. - :return: Updated mapping of FX nodes with weight compression annotations. - """ - all_wc_params = self.get_nncf_weight_compression_setup( - model, nncf_graph - ) - - for wc_param in all_wc_params: - node_with_weight = wc_param.node_with_weight - target_node = nncf_fx.node_utils.get_graph_node_by_name( - graph, node_with_weight.node_name - ) - annotation = node_vs_torch_annotation[target_node] - edge_or_node = self._get_weight_edge(target_node, nncf_graph) - qspec = self._get_torch_ao_qspec_from_nncf_config_for_wc(wc_param=wc_param) - self._fill_torch_ao_annotation(edge_or_node, qspec, annotation) - - return node_vs_torch_annotation - - def _annotate_post_training_quantization( - self, - model: torch.fx.GraphModule, - graph: torch.fx.Graph, - nncf_graph: NNCFGraph, - node_vs_torch_annotation: defaultdict[torch.fx.Node, TorchAOQuantizationAnnotation], - ) -> defaultdict[torch.fx.Node, TorchAOQuantizationAnnotation]: + def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: """ - Annotates the model graph with post-training quantization configurations. + Adds quantization annotations to the nodes in the model graph in-place. - :param model: The FX GraphModule to annotate. - :param graph: The underlying FX graph. - :param nncf_graph: The corresponding NNCF graph. - :param node_vs_torch_annotation: A mapping of FX nodes to quantization annotations. - :return: Updated mapping of FX nodes with post-training quantization annotations. + :param model: A torch.fx.GraphModule to annotate. + :return: The torch.fx.GraphModule with updated annotations. """ + nncf_graph = GraphConverter.create_nncf_graph(model) quantization_setup = self.get_nncf_quantization_setup(model, nncf_graph) + graph = model.graph + node_vs_torch_annotation = defaultdict(TorchAOQuantizationAnnotation) + for qp in quantization_setup.quantization_points.values(): edge_or_node, annotation = self._get_edge_or_node_and_annotation( graph, nncf_graph, qp, node_vs_torch_annotation ) - qspec: TorchAOQuantizationSpecBase = ( - self._get_torch_ao_qspec_from_nncf_config_for_ptq(qp) - ) + qspec = self._get_torch_ao_qspec_from_qp(qp) self._fill_torch_ao_annotation(edge_or_node, qspec, annotation) for quantizer_ids in quantization_setup.unified_scale_groups.values(): @@ -254,24 +165,16 @@ def _annotate_post_training_quantization( ) root_qp = quantization_setup.quantization_points[root_quantizer_id] - if any( - root_qp.qconfig != quantization_setup.quantization_points[q_id].qconfig - for q_id in quantizer_ids - ): - qps = [ - quantization_setup.quantization_points[qid] for qid in quantizer_ids - ] - raise nncf.InternalError( + if any(root_qp.qconfig != quantization_setup.quantization_points[q_id].qconfig for q_id in quantizer_ids): + qps = [quantization_setup.quantization_points[q_id] for q_id in quantizer_ids] + msg = ( "Different quantization configs are set to one unified scale group:" f"{[(qp.insertion_point.__dict__, str(qp.qconfig)) for qp in qps]}" ) + raise nncf.InternalError(msg) - root_target_node = nncf_fx.node_utils.get_graph_node_by_name( - graph, root_qp.insertion_point.target_node_name - ) - root_edge_or_node = self._get_edge_or_node( - root_target_node, root_qp, nncf_graph - ) + root_target_node = get_graph_node_by_name(graph, root_qp.insertion_point.target_node_name) + root_edge_or_node = self._get_edge_or_node(root_target_node, root_qp, nncf_graph) for quantizer_id in quantizer_ids: if quantizer_id == root_quantizer_id: @@ -284,35 +187,14 @@ def _annotate_post_training_quantization( ) self._fill_torch_ao_annotation(edge_or_node, qspec, annotation) - return node_vs_torch_annotation - - def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: - nncf_graph = nncf_fx.nncf_graph_builder.GraphConverter.create_nncf_graph(model) - graph = model.graph - node_vs_torch_annotation: defaultdict[ - torch.fx.Node, TorchAOQuantizationAnnotation - ] = defaultdict(TorchAOQuantizationAnnotation) - - if self.mode in OpenVINOQuantizer.WEIGHTS_ONLY_COMPRESSION_MODES: - node_vs_torch_annotation = self._annotate_weight_compression( - model, graph, nncf_graph, node_vs_torch_annotation - ) - else: - node_vs_torch_annotation = self._annotate_post_training_quantization( - model, graph, nncf_graph, node_vs_torch_annotation - ) - for node, annotation in node_vs_torch_annotation.items(): assert QUANT_ANNOTATION_KEY not in node.meta node.meta[QUANT_ANNOTATION_KEY] = annotation - return model @staticmethod def _get_unified_scales_root_quantizer_id( - nncf_graph: NNCFGraph, - quantizer_ids: list[int], - quantizer_setup: quantization.quantizer_setup.SingleConfigQuantizerSetup, + nncf_graph: NNCFGraph, quantizer_ids: list[int], quantizer_setup: SingleConfigQuantizerSetup ) -> int: """ Identifies the earliest quantizer node ID based on the corresponding `nncf_node.node_id` @@ -327,26 +209,18 @@ def _get_unified_scales_root_quantizer_id( nncf_node_quantizer_id = None root_quantizer_id = None for quantizer_id in quantizer_ids: - target_node_name = quantizer_setup.quantization_points[ - quantizer_id - ].insertion_point.target_node_name + target_node_name = quantizer_setup.quantization_points[quantizer_id].insertion_point.target_node_name nncf_node = nncf_graph.get_node_by_name(target_node_name) - if ( - nncf_node_quantizer_id is None - or nncf_node.node_id < nncf_node_quantizer_id - ): + if nncf_node_quantizer_id is None or nncf_node.node_id < nncf_node_quantizer_id: root_quantizer_id = quantizer_id nncf_node_quantizer_id = nncf_node.node_id - if root_quantizer_id is None: - msg = "Root quantizer ids can't be None" - raise nncf.InternalError(msg) return root_quantizer_id @staticmethod def _get_edge_or_node_and_annotation( graph: torch.fx.Graph, nncf_graph: NNCFGraph, - qp: quantization.quantizer_setup.QuantizationPointBase, + qp: QuantizationPointBase, node_vs_torch_annotation: dict[torch.fx.Node, TorchAOQuantizationAnnotation], ) -> tuple[EdgeOrNode, TorchAOQuantizationAnnotation]: """ @@ -357,52 +231,16 @@ def _get_edge_or_node_and_annotation( :param nncf_graph: NNCFGraph instance. :param qp: QuantizationPointBase instance. :param node_vs_torch_annotation: A dictionary mapping torch.fx.GraphNode objects to their respective - QuantizationAnnotations. + TorchAOQuantizationAnnotations. :return: A tuple containing the EdgeOrNode and its associated TorchAOQuantizationAnnotation. """ - target_node = nncf_fx.node_utils.get_graph_node_by_name( - graph, qp.insertion_point.target_node_name - ) + target_node = get_graph_node_by_name(graph, qp.insertion_point.target_node_name) annotation = node_vs_torch_annotation[target_node] edge_or_node = OpenVINOQuantizer._get_edge_or_node(target_node, qp, nncf_graph) return edge_or_node, annotation @staticmethod - def _get_weight_edge( - target_node: torch.fx.Node, - nncf_graph: NNCFGraph, - ) -> tuple[torch.fx.Node, torch.fx.Node]: - """ - Returns the FX node corresponding to the weight tensor input of a given operator node. - Uses the NNCF graph to identify which input port of the target node holds the weight. - If multiple weight ports are present, a warning is issued and only the first one is used. - - :param target_node: FX node representing a weighted operation (e.g., Linear, Conv). - :param nncf_graph: NNCFGraph used to determine weight port indices. - :return: Edge represented by a Tuple of (weight_node, target_node), where weight_node is the FX node supplying - the weight. - """ - nncf_node = nncf_graph.get_node_by_name(target_node.name) - weights_ports_ids = nncf.torch.model_graph_manager.get_weight_tensor_port_ids( - nncf_node, nncf_graph - ) - if len(weights_ports_ids) > 1: - # TODO(dlyakhov): support quantization for nodes with several weights - nncf.common.logging.nncf_logger.warning( - f"Quantization of the weighted node {target_node.name}" - " is not yet supported by the OpenVINOQuantizer." - f" Only the weight on port ID {weights_ports_ids[0]} will be quantized." - f" Quantizable weights are located on ports: {weights_ports_ids}." - ) - weight_node = target_node.all_input_nodes[weights_ports_ids[0]] - return (weight_node, target_node) - - @staticmethod - def _get_edge_or_node( - target_node: torch.fx.Node, - qp: quantization.quantizer_setup.QuantizationPointBase, - nncf_graph: NNCFGraph, - ) -> EdgeOrNode: + def _get_edge_or_node(target_node: torch.fx.Node, qp: QuantizationPointBase, nncf_graph: NNCFGraph) -> EdgeOrNode: """ Returns the edge or node based on the given target node and quantization point. @@ -413,7 +251,18 @@ def _get_edge_or_node( """ ip = qp.insertion_point if qp.is_weight_quantization_point(): - return OpenVINOQuantizer._get_weight_edge(target_node, nncf_graph) + nncf_node = nncf_graph.get_node_by_name(target_node.name) + weights_ports_ids = get_weight_tensor_port_ids(nncf_node, nncf_graph) + if len(weights_ports_ids) > 1: + # TODO(dlyakhov): support quantization for nodes with several weights + nncf_logger.warning( + f"Quantization of the weighted node {target_node.name}" + " is not yet supported by the OpenVINOQuantizer." + f" Only the weight on port ID {weights_ports_ids[0]} will be quantized." + f" Quantizable weights are located on ports: {weights_ports_ids}." + ) + weight_node = target_node.all_input_nodes[weights_ports_ids[0]] + return (weight_node, target_node) if ip.input_port_id is None: return target_node @@ -440,90 +289,28 @@ def _fill_torch_ao_annotation( annotation_to_update.input_qspec_map[edge_or_node[0]] = qspec @staticmethod - def _get_torch_ao_qspec_from_nncf_config_for_wc( - wc_param: WeightCompressionParameters, - ) -> TorchAOQuantizationSpec: - """ - Returns a TorchAO TorchAOQuantizationSpec based on NNCF weight compression parameter. - - :param wc_param: NNCF Weight compression parameters for the node. - :return: A TorchAO TorchAOQuantizationSpec. - """ - observer: type[UniformQuantizationObserverBase] - - extra_args: dict[str, Any] = {} - - qmode = wc_param.compression_config.mode - extra_args["wc_param"] = wc_param - is_asym_mode = wc_param.compression_config.is_asym_mode - if qmode in [ - nncf.CompressWeightsMode.INT4_ASYM, - nncf.CompressWeightsMode.INT4_SYM, - ]: - observer = INT4WeightObserver - quant_min = -8 if not is_asym_mode else 0 - quant_max = 7 if not is_asym_mode else 15 - dtype = torch.int8 - channel_axis = 0 - torch_qscheme = torch_qscheme = ( - torch.per_channel_symmetric - if not is_asym_mode - else torch.per_channel_affine - ) - else: - observer = INT8WeightObserver - quant_min = -128 if not is_asym_mode else 0 - quant_max = 127 if not is_asym_mode else 255 - dtype = torch.int8 - channel_axis = 0 - torch_qscheme = ( - torch.per_channel_symmetric - if not is_asym_mode - else torch.per_channel_affine - ) - return TorchAOQuantizationSpec( - dtype=dtype, - observer_or_fake_quant_ctr=observer.with_args(**extra_args), - quant_min=quant_min, - quant_max=quant_max, - qscheme=torch_qscheme, - ch_axis=channel_axis, - is_dynamic=False, - ) - - @staticmethod - def _get_torch_ao_qspec_from_nncf_config_for_ptq( - qp: quantization.quantizer_setup.QuantizationPointBase, - ) -> TorchAOQuantizationSpec: + def _get_torch_ao_qspec_from_qp(qp: QuantizationPointBase) -> TorchAOQuantizationSpec: """ - Returns a TorchAO TorchAOQuantizationSpec based on NNCF quantization point. + Retrieves the quantization configuration from the given quantization point and + converts it into a TorchAOQuantizationSpec. - :param qp: Quantization point from NNCF. - :return: A TorchAO TorchAOQuantizationSpec. + :param qp: An instance of QuantizationPointBase. + :return: A TorchAOQuantizationSpec retrieved and converted from the quantization point. """ - observer: type[UniformQuantizationObserverBase] - # Eps value is copied from nncf/torch/quantization/layers.py - extra_args: dict[str, Any] = {"eps": 1e-16} - - is_weight = qp.is_weight_quantization_point() + extra_args = {"eps": 1e-16} qconfig = qp.qconfig - dtype = torch.int8 - quant_min = None - quant_max = None - channel_axis = None + is_weight = qp.is_weight_quantization_point() if qconfig.per_channel: torch_qscheme = ( torch.per_channel_symmetric - if qconfig.mode is quantization.structs.QuantizationScheme.SYMMETRIC + if qconfig.mode is QuantizationScheme.SYMMETRIC else torch.per_channel_affine ) else: torch_qscheme = ( - torch.per_tensor_symmetric - if qconfig.mode is quantization.structs.QuantizationScheme.SYMMETRIC - else torch.per_tensor_affine + torch.per_tensor_symmetric if qconfig.mode is QuantizationScheme.SYMMETRIC else torch.per_tensor_affine ) if is_weight: observer = PerChannelMinMaxObserver @@ -531,16 +318,10 @@ def _get_torch_ao_qspec_from_nncf_config_for_ptq( quant_max = 127 dtype = torch.int8 channel_axis = 0 - torch_qscheme = ( - torch.per_channel_symmetric - if qconfig.mode is quantization.structs.QuantizationScheme.SYMMETRIC - else torch.per_channel_affine - ) else: observer = ( HistogramObserver - if torch_qscheme - in [torch.per_tensor_symmetric, torch.per_tensor_affine] + if torch_qscheme in [torch.per_tensor_symmetric, torch.per_tensor_affine] else PerChannelMinMaxObserver ) quant_min = 0 @@ -565,9 +346,7 @@ def validate(self, model: torch.fx.GraphModule) -> None: """ pass - def transform_for_annotation( - self, model: torch.fx.GraphModule - ) -> torch.fx.GraphModule: + def transform_for_annotation(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: """ Allows for user defined transforms to run before annotating the graph. This allows quantizer to allow quantizing part of the model that are otherwise not quantizable. @@ -581,4 +360,4 @@ def transform_for_annotation( :param model: Given torch.fx.GraphModule to transform before the annotation. :return: The transformed torch.fx.GraphModule ready for the annotation. """ - return model + return model \ No newline at end of file From 7a8e51a9251119366bf9aae785528603492fc5d1 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 23 Sep 2025 17:18:40 +0400 Subject: [PATCH 20/91] create ovquantizer in executorch dir --- .../fx/quantization/quantizer/__init__.py | 2 +- .../quantizer/executorch/__init__.py | 13 + .../executorch_openvino_quantizer.py | 606 ++++++++++++++++++ .../quantizer/{ => executorch}/observers.py | 0 4 files changed, 620 insertions(+), 1 deletion(-) create mode 100644 src/nncf/experimental/torch/fx/quantization/quantizer/executorch/__init__.py create mode 100644 src/nncf/experimental/torch/fx/quantization/quantizer/executorch/executorch_openvino_quantizer.py rename src/nncf/experimental/torch/fx/quantization/quantizer/{ => executorch}/observers.py (100%) diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/__init__.py b/src/nncf/experimental/torch/fx/quantization/quantizer/__init__.py index 617f6642d73..a78b76bc409 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/__init__.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/__init__.py @@ -9,4 +9,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .openvino_quantizer import OpenVINOQuantizer, QuantizationMode +from .openvino_quantizer import OpenVINOQuantizer diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/executorch/__init__.py b/src/nncf/experimental/torch/fx/quantization/quantizer/executorch/__init__.py new file mode 100644 index 00000000000..62297ed2ebb --- /dev/null +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/executorch/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2025 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .executorch_openvino_quantizer import OpenVINOQuantizer \ No newline at end of file diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/executorch/executorch_openvino_quantizer.py b/src/nncf/experimental/torch/fx/quantization/quantizer/executorch/executorch_openvino_quantizer.py new file mode 100644 index 00000000000..f5df1ba5955 --- /dev/null +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/executorch/executorch_openvino_quantizer.py @@ -0,0 +1,606 @@ +# Copyright (c) Intel Corporation +# +# Licensed under the BSD License (the "License"); you may not use this file +# except in compliance with the License. See the license file found in the +# LICENSE file in the root directory of this source tree. + +# mypy: disable-error-code=import-not-found + +from collections import defaultdict +from enum import Enum +from typing import Any, Callable, DefaultDict, Dict, List, Optional, Tuple, Type + +import nncf # type: ignore[import-untyped] +import nncf.common.quantization as quantization # type: ignore[import-untyped] +import nncf.experimental.torch.fx as nncf_fx # type: ignore[import-untyped] +from nncf.common.graph.graph import NNCFNode + +import torch.fx +from .observers import ( + INT4WeightObserver, + INT8WeightObserver, +) +from nncf.common.graph.graph import NNCFGraph # type: ignore[import-untyped] +from nncf.quantization.algorithms.weight_compression.config import ( # type: ignore[import-untyped] + WeightCompressionParameters, +) +from nncf.quantization.quantize_model import ( # type: ignore[import-untyped] + get_weight_compression_configuration, +) +from torch.ao.quantization.observer import ( + HistogramObserver, + PerChannelMinMaxObserver, + UniformQuantizationObserverBase, +) +from torch.ao.quantization.quantizer.quantizer import ( + EdgeOrNode, + QuantizationAnnotation, + QuantizationSpec, + QuantizationSpecBase, + Quantizer, + SharedQuantizationSpec, +) + +QUANT_ANNOTATION_KEY = "quantization_annotation" + + +class QuantizationMode(Enum): + """ + Defines special quantization modes. + + - INT8_SYM: INT8 symmetric quantization for both activations and weights. + - INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights. + - INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models + - INT8WO_SYM: INT8 symmetric quantization for weights only. + - INT8WO_ASYM: INT8 asymmetric quantization for weights only. + - INT4WO_SYM: INT4 symmetric quantization for weights only. + - INT4WO_ASYM: INT4 asymmetric quantization for weights only + """ + + INT8_SYM = "int8_sym" + INT8_MIXED = "int8_mixed" + INT8_TRANSFORMER = "int8_transformer" + INT8WO_SYM = "int8wo_sym" + INT8WO_ASYM = "int8wo_asym" + INT4WO_SYM = "int4wo_sym" + INT4WO_ASYM = "int4wo_asym" + + +class OpenVINOQuantizer(Quantizer): + """ + Implementation of the Torch AO quantizer which annotates models with quantization annotations + optimally for the inference via OpenVINO. + """ + + WEIGHTS_ONLY_COMPRESSION_MODES = ( + QuantizationMode.INT4WO_SYM, + QuantizationMode.INT4WO_ASYM, + QuantizationMode.INT8WO_SYM, + QuantizationMode.INT8WO_ASYM, + ) + + def __init__( + self, + *, + mode: QuantizationMode = QuantizationMode.INT8_SYM, + **kwargs, + ): + """ + :param mode: Defines special quantization modes. + - INT8_SYM: INT8 symmetric quantization for both activations and weights. + - INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights. + - INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models + Default value is INT8_SYM. + - INT4_SYM: Symmetric INT4 Weights-Only Compression + - INT4_ASYM: Asymmetric INT4 Weights-Only Compression + :param kwargs: Arguments to pass to the NNCF MinMaxQuantization algorithm. + """ + self.mode = mode + if self.mode not in OpenVINOQuantizer.WEIGHTS_ONLY_COMPRESSION_MODES: + if mode == QuantizationMode.INT8_SYM: + preset = quantization.structs.QuantizationPreset.PERFORMANCE + model_type = None + elif mode == QuantizationMode.INT8_MIXED: + preset = quantization.structs.QuantizationPreset.MIXED + model_type = None + else: + preset = None + model_type = nncf.parameters.ModelType.TRANSFORMER + self._algo = ( + nncf.quantization.algorithms.min_max.algorithm.MinMaxQuantization( + preset=preset, model_type=model_type, **kwargs + ) + ) + else: + self.weight_compression_configuration = get_weight_compression_configuration( + mode.value.replace( + "wo", "" + ), # Mode value has to match NNCF CompressWeightsMode + **kwargs, + ) + _weight_compression_configuration = self.weight_compression_configuration + subset_size = 1 # Doesn't really matter in this case since it is data-free. Should just be +ve + self._algo = nncf.quantization.algorithms.weight_compression.algorithm.WeightCompression( + subset_size=subset_size, **_weight_compression_configuration + ) + + def set_ignored_scope( + self, + names: Optional[List[str]] = None, + patterns: Optional[List[str]] = None, + types: Optional[List[str]] = None, + subgraphs: Optional[List[Tuple[List[str], List[str]]]] = None, + validate: bool = True, + ) -> None: + """ + Provides an option to specify portions of model to be excluded from compression. + The ignored scope defines model sub-graphs that should be excluded from the quantization process. + + :param names: List of ignored node names. + :param patterns: List of regular expressions that define patterns for names of ignored nodes. + :param types: List of ignored operation types. + :param subgraphs: List of ignored subgraphs. + :param validate: If set to True, then a RuntimeError will be raised if any ignored scope does not match + in the model graph. + """ + self._algo.set_ignored_scope( + nncf.IgnoredScope( + names=names or [], + patterns=patterns or [], + types=types or [], + subgraphs=subgraphs or [], + validate=validate, + ) + ) + + def get_nncf_quantization_setup( + self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph + ) -> quantization.quantizer_setup.SingleConfigQuantizerSetup: + self._algo._set_backend_entity(model) + return self._algo.find_quantization_setup(model, nncf_graph) + + def get_nodes_to_compress( + self, model, nncf_graph + ) -> list[NNCFNode]: + self._algo.set_backend_entity(model) + return self._algo.get_nodes_to_compress(nncf_graph) + + def get_nncf_weight_compression_setup( + self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph + ) -> quantization.quantizer_setup.SingleConfigQuantizerSetup: + nodes_to_compress = self.get_nodes_to_compress(model, nncf_graph) + return self._algo.get_weight_compression_parameters( + model, nncf_graph, nodes_to_compress + )[0] + + def _annotate_weight_compression( + self, + model: torch.fx.GraphModule, + graph: torch.fx.Graph, + nncf_graph: NNCFGraph, + node_vs_torch_annotation: DefaultDict[torch.fx.Node, QuantizationAnnotation], + ) -> DefaultDict[torch.fx.Node, QuantizationAnnotation]: + """ + Annotates the model graph with weight-only quantization specs. + + Identifies compressible nodes in the NNCF graph and attaches the corresponding + TorchAO quantization specifications to their weight edges for later transformation. + + :param model: The FX GraphModule to annotate. + :param graph: The underlying FX graph. + :param nncf_graph: The corresponding NNCF graph. + :param node_vs_torch_annotation: A mapping of FX nodes to quantization annotations. + :return: Updated mapping of FX nodes with weight compression annotations. + """ + all_wc_params = self.get_nncf_weight_compression_setup( + model, nncf_graph + ) + + for wc_param in all_wc_params: + node_with_weight = wc_param.node_with_weight + target_node = nncf_fx.node_utils.get_graph_node_by_name( + graph, node_with_weight.node_name + ) + annotation = node_vs_torch_annotation[target_node] + edge_or_node = self._get_weight_edge(target_node, nncf_graph) + qspec = self._get_torch_ao_qspec_from_nncf_config_for_wc(wc_param=wc_param) + self._fill_torch_ao_annotation(edge_or_node, qspec, annotation) + + return node_vs_torch_annotation + + def _annotate_post_training_quantization( + self, + model: torch.fx.GraphModule, + graph: torch.fx.Graph, + nncf_graph: NNCFGraph, + node_vs_torch_annotation: DefaultDict[torch.fx.Node, QuantizationAnnotation], + ) -> DefaultDict[torch.fx.Node, QuantizationAnnotation]: + """ + Annotates the model graph with post-training quantization configurations. + + :param model: The FX GraphModule to annotate. + :param graph: The underlying FX graph. + :param nncf_graph: The corresponding NNCF graph. + :param node_vs_torch_annotation: A mapping of FX nodes to quantization annotations. + :return: Updated mapping of FX nodes with post-training quantization annotations. + """ + quantization_setup = self.get_nncf_quantization_setup(model, nncf_graph) + + for qp in quantization_setup.quantization_points.values(): + edge_or_node, annotation = self._get_edge_or_node_and_annotation( + graph, nncf_graph, qp, node_vs_torch_annotation + ) + qspec: QuantizationSpecBase = ( + self._get_torch_ao_qspec_from_nncf_config_for_ptq(qp) + ) + self._fill_torch_ao_annotation(edge_or_node, qspec, annotation) + + for quantizer_ids in quantization_setup.unified_scale_groups.values(): + root_quantizer_id = self._get_unified_scales_root_quantizer_id( + nncf_graph, quantizer_ids, quantization_setup + ) + root_qp = quantization_setup.quantization_points[root_quantizer_id] + + if any( + root_qp.qconfig != quantization_setup.quantization_points[q_id].qconfig + for q_id in quantizer_ids + ): + qps = [ + quantization_setup.quantization_points[qid] for qid in quantizer_ids + ] + raise nncf.InternalError( + "Different quantization configs are set to one unified scale group:" + f"{[(qp.insertion_point.__dict__, str(qp.qconfig)) for qp in qps]}" + ) + + root_target_node = nncf_fx.node_utils.get_graph_node_by_name( + graph, root_qp.insertion_point.target_node_name + ) + root_edge_or_node = self._get_edge_or_node( + root_target_node, root_qp, nncf_graph + ) + + for quantizer_id in quantizer_ids: + if quantizer_id == root_quantizer_id: + continue + + qspec = SharedQuantizationSpec(root_edge_or_node) # type: ignore[assignment] + qp = quantization_setup.quantization_points[quantizer_id] + edge_or_node, annotation = self._get_edge_or_node_and_annotation( + graph, nncf_graph, qp, node_vs_torch_annotation + ) + self._fill_torch_ao_annotation(edge_or_node, qspec, annotation) + + return node_vs_torch_annotation + + def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: + nncf_graph = nncf_fx.nncf_graph_builder.GraphConverter.create_nncf_graph(model) + graph = model.graph + node_vs_torch_annotation: DefaultDict[torch.fx.Node, QuantizationAnnotation] = ( + defaultdict(QuantizationAnnotation) + ) + + if self.mode in OpenVINOQuantizer.WEIGHTS_ONLY_COMPRESSION_MODES: + node_vs_torch_annotation = self._annotate_weight_compression( + model, graph, nncf_graph, node_vs_torch_annotation + ) + else: + node_vs_torch_annotation = self._annotate_post_training_quantization( + model, graph, nncf_graph, node_vs_torch_annotation + ) + + for node, annotation in node_vs_torch_annotation.items(): + assert QUANT_ANNOTATION_KEY not in node.meta + node.meta[QUANT_ANNOTATION_KEY] = annotation + + return model + + @staticmethod + def _get_unified_scales_root_quantizer_id( + nncf_graph: NNCFGraph, + quantizer_ids: List[int], + quantizer_setup: quantization.quantizer_setup.SingleConfigQuantizerSetup, + ) -> int: + """ + Identifies the earliest quantizer node ID based on the corresponding `nncf_node.node_id` + in the given NNCFGraph. This is required by the `_get_obs_or_fq_map` function. + Refer to: https://github.com/pytorch/pytorch/blob/main/torch/ao/quantization/pt2e/prepare.py#L291 + + :param nncf_graph: The NNCFGraph instance. + :param quantizer_ids: The list of quantizer IDs to evaluate. + :param quantizer_setup: The instance of SingleConfigQuantizerSetup. + :return: The ID of the earliest quantizer node in terms of `nncf_node.node_id`. + """ + nncf_node_quantizer_id = None + root_quantizer_id = None + for quantizer_id in quantizer_ids: + target_node_name = quantizer_setup.quantization_points[ + quantizer_id + ].insertion_point.target_node_name + nncf_node = nncf_graph.get_node_by_name(target_node_name) + if ( + nncf_node_quantizer_id is None + or nncf_node.node_id < nncf_node_quantizer_id + ): + root_quantizer_id = quantizer_id + nncf_node_quantizer_id = nncf_node.node_id + if root_quantizer_id is None: + msg = "Root quantizer ids can't be None" + raise nncf.InternalError(msg) + return root_quantizer_id + + @staticmethod + def _get_edge_or_node_and_annotation( + graph: torch.fx.Graph, + nncf_graph: NNCFGraph, + qp: quantization.quantizer_setup.QuantizationPointBase, + node_vs_torch_annotation: Dict[torch.fx.Node, QuantizationAnnotation], + ) -> Tuple[EdgeOrNode, QuantizationAnnotation]: + """ + Retrieves the edge or node and its corresponding QuantizationAnnotation based on the given graph, + quantization point, and node-to-annotation mapping. + + :param graph: torch.fx.Graph instance. + :param nncf_graph: NNCFGraph instance. + :param qp: QuantizationPointBase instance. + :param node_vs_torch_annotation: A dictionary mapping torch.fx.GraphNode objects to their respective + QuantizationAnnotations. + :return: A tuple containing the EdgeOrNode and its associated QuantizationAnnotation. + """ + target_node = nncf_fx.node_utils.get_graph_node_by_name( + graph, qp.insertion_point.target_node_name + ) + annotation = node_vs_torch_annotation[target_node] + edge_or_node = OpenVINOQuantizer._get_edge_or_node(target_node, qp, nncf_graph) + return edge_or_node, annotation + + @staticmethod + def _get_weight_edge( + target_node: torch.fx.Node, + nncf_graph: NNCFGraph, + ) -> tuple[torch.fx.Node, torch.fx.Node]: + """ + Returns the FX node corresponding to the weight tensor input of a given operator node. + Uses the NNCF graph to identify which input port of the target node holds the weight. + If multiple weight ports are present, a warning is issued and only the first one is used. + + :param target_node: FX node representing a weighted operation (e.g., Linear, Conv). + :param nncf_graph: NNCFGraph used to determine weight port indices. + :return: Edge represented by a Tuple of (weight_node, target_node), where weight_node is the FX node supplying the weight. + """ + nncf_node = nncf_graph.get_node_by_name(target_node.name) + weights_ports_ids = nncf.torch.model_graph_manager.get_weight_tensor_port_ids( + nncf_node, nncf_graph + ) + if len(weights_ports_ids) > 1: + # TODO(dlyakhov): support quantization for nodes with several weights + nncf.common.logging.nncf_logger.warning( + f"Quantization of the weighted node {target_node.name}" + " is not yet supported by the OpenVINOQuantizer." + f" Only the weight on port ID {weights_ports_ids[0]} will be quantized." + f" Quantizable weights are located on ports: {weights_ports_ids}." + ) + weight_node = target_node.all_input_nodes[weights_ports_ids[0]] + return (weight_node, target_node) + + @staticmethod + def _get_edge_or_node( + target_node: torch.fx.Node, + qp: quantization.quantizer_setup.QuantizationPointBase, + nncf_graph: NNCFGraph, + ) -> EdgeOrNode: + """ + Returns the edge or node based on the given target node and quantization point. + + :param target_node: Target node instance. + :param qp: QuantizationPointBase instance. + :param graph: NNCFGraph instance. + :return: The corresponding EdgeOrNode derived from the target node and quantization point. + """ + ip = qp.insertion_point + if qp.is_weight_quantization_point(): + OpenVINOQuantizer._get_weight_edge(target_node, nncf_graph) + + if ip.input_port_id is None: + return target_node + + node = target_node.all_input_nodes[ip.input_port_id] + return (node, target_node) + + @staticmethod + def _fill_torch_ao_annotation( + edge_or_node: EdgeOrNode, + qspec: QuantizationSpecBase, + annotation_to_update: QuantizationAnnotation, + ) -> None: + """ + Helper method to update the annotation_to_update based on the specified edge_or_node and qspec. + + :param edge_or_node: The target EdgeOrNode to be used for the update. + :param qspec: An instance of QuantizationSpecBase representing the quantization specification to apply. + :param annotation_to_update: The annotation to update based on the edge_or_node and qspec. + """ + if isinstance(edge_or_node, torch.fx.Node): + annotation_to_update.output_qspec = qspec + else: + annotation_to_update.input_qspec_map[edge_or_node[0]] = qspec + + @staticmethod + def _get_torch_ao_qspec_from_nncf_config_for_wc( + wc_param: WeightCompressionParameters, + ) -> QuantizationSpec: + """ + Returns a TorchAO QuantizationSpec based on NNCF weight compression parameter. + + :param wc_param: NNCF Weight compression parameters for the node. + :return: A TorchAO QuantizationSpec. + """ + observer: Type[UniformQuantizationObserverBase] + + extra_args: Dict[str, Any] = {} + + qmode = wc_param.compression_config.mode + extra_args["wc_param"] = wc_param + is_asym_mode = wc_param.compression_config.is_asym_mode + if qmode in [ + nncf.CompressWeightsMode.INT4_ASYM, + nncf.CompressWeightsMode.INT4_SYM, + ]: + observer = INT4WeightObserver # type: ignore[type-abstract] + quant_min = -8 if not is_asym_mode else 0 + quant_max = 7 if not is_asym_mode else 15 + dtype = torch.int8 + channel_axis = 0 + torch_qscheme = torch_qscheme = ( + torch.per_channel_symmetric + if not is_asym_mode + else torch.per_channel_affine + ) + else: + observer = INT8WeightObserver # type: ignore[type-abstract] + quant_min = -128 if not is_asym_mode else 0 + quant_max = 127 if not is_asym_mode else 255 + dtype = torch.int8 + channel_axis = 0 + torch_qscheme = ( + torch.per_channel_symmetric + if not is_asym_mode + else torch.per_channel_affine + ) + return QuantizationSpec( + dtype=dtype, + observer_or_fake_quant_ctr=observer.with_args(**extra_args), + quant_min=quant_min, + quant_max=quant_max, + qscheme=torch_qscheme, + ch_axis=channel_axis, + is_dynamic=False, + ) + + @staticmethod + def _get_torch_ao_qspec_from_nncf_config_for_ptq( + qp: quantization.quantizer_setup.QuantizationPointBase, + ) -> QuantizationSpec: + """ + Returns a TorchAO QuantizationSpec based on NNCF quantization point. + + :param qp: Quantization point from NNCF. + :return: A TorchAO QuantizationSpec. + """ + observer: Type[UniformQuantizationObserverBase] + + # Eps value is copied from nncf/torch/quantization/layers.py + extra_args: Dict[str, Any] = {"eps": 1e-16} + + is_weight = qp.is_weight_quantization_point() + qconfig = qp.qconfig + dtype = torch.int8 + quant_min = None + quant_max = None + channel_axis = None + + if qconfig.per_channel: + torch_qscheme = ( + torch.per_channel_symmetric + if qconfig.mode is quantization.structs.QuantizationScheme.SYMMETRIC + else torch.per_channel_affine + ) + else: + torch_qscheme = ( + torch.per_tensor_symmetric + if qconfig.mode is quantization.structs.QuantizationScheme.SYMMETRIC + else torch.per_tensor_affine + ) + if is_weight: + observer = PerChannelMinMaxObserver + quant_min = -128 + quant_max = 127 + dtype = torch.int8 + channel_axis = 0 + torch_qscheme = ( + torch.per_channel_symmetric + if qconfig.mode is quantization.structs.QuantizationScheme.SYMMETRIC + else torch.per_channel_affine + ) + else: + observer = ( + HistogramObserver + if torch_qscheme + in [torch.per_tensor_symmetric, torch.per_tensor_affine] + else PerChannelMinMaxObserver + ) + quant_min = 0 + quant_max = 255 + dtype = torch.int8 if qconfig.signedness_to_force else torch.uint8 + channel_axis = 1 # channel dim for activations + return QuantizationSpec( + dtype=dtype, + observer_or_fake_quant_ctr=observer.with_args(**extra_args), + quant_min=quant_min, + quant_max=quant_max, + qscheme=torch_qscheme, + ch_axis=channel_axis, + is_dynamic=False, + ) + + def validate(self, model: torch.fx.GraphModule) -> None: + pass + + +def quantize_model( + captured_model: torch.fx.GraphModule, + calibration_dataset: torch.utils.data.DataLoader, + *, + mode: QuantizationMode = QuantizationMode.INT8_SYM, + subset_size: int = 300, + fast_bias_correction: Optional[bool] = True, + smooth_quant: bool = False, + transform_fn: Optional[Callable[[Any], Any]] = None, + extra_quantizer_options: Optional[Dict[str, Any]] = None, + **kwargs, +) -> torch.fx.GraphModule: + """ + Quantizes a model using NNCF quantize_pt2e API. + + :param captured_model: The model to be quantized, represented as a torch.fx.GraphModule. + :param calibration_dataset: A DataLoader containing calibration data for quantization. + :param mode: Defines special quantization modes. + - INT8_SYM: INT8 symmetric quantization for both activations and weights. + - INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights. + - INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models + Default value is INT8_SYM. + :param subset_size: Size of a subset to calculate activations + statistics used for quantization. + :param fast_bias_correction: Setting this option to `False` enables a different + bias correction method which is more accurate, in general, and takes + more time but requires less memory. None disables the bias correction algorithm. + :param smooth_quant: Setting this option to `True` enables the SmoothQuant algorithm. + :param extra_quantizer_options: A dictionary containing additional configuration options + for the OpenVINOQuantizer. + :param kwargs: The keyword arguments for the nncf quantize_pt2e function. + :return: The quantized model as a torch.fx.GraphModule. + """ + extra_quantizer_options = extra_quantizer_options or {} + if "mode" in extra_quantizer_options: + print( + f'Ignoring "mode" from the quantizer_config. Using parameter mode = {mode}' + ) + del extra_quantizer_options["mode"] + + quantizer = OpenVINOQuantizer(mode=mode, **extra_quantizer_options) + + print("PTQ: Quantize the model") + + if "fold_quantize" not in kwargs: + kwargs["fold_quantize"] = False + + quantized_model = nncf_fx.quantize_pt2e( + captured_model, + quantizer, + subset_size=subset_size, + calibration_dataset=nncf.Dataset(calibration_dataset, transform_fn), + fast_bias_correction=fast_bias_correction, + smooth_quant=smooth_quant, + **kwargs, + ) + return quantized_model diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/observers.py b/src/nncf/experimental/torch/fx/quantization/quantizer/executorch/observers.py similarity index 100% rename from src/nncf/experimental/torch/fx/quantization/quantizer/observers.py rename to src/nncf/experimental/torch/fx/quantization/quantizer/executorch/observers.py From fed50521a8ce80231916015f212c06f970b70bc4 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 23 Sep 2025 17:26:43 +0400 Subject: [PATCH 21/91] update executorch quantizer location. --- .../fx/quantization/quantizer => tests}/executorch/__init__.py | 0 .../executorch/executorch_openvino_quantizer.py | 0 .../fx/quantization/quantizer => tests}/executorch/observers.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename {src/nncf/experimental/torch/fx/quantization/quantizer => tests}/executorch/__init__.py (100%) rename {src/nncf/experimental/torch/fx/quantization/quantizer => tests}/executorch/executorch_openvino_quantizer.py (100%) rename {src/nncf/experimental/torch/fx/quantization/quantizer => tests}/executorch/observers.py (100%) diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/executorch/__init__.py b/tests/executorch/__init__.py similarity index 100% rename from src/nncf/experimental/torch/fx/quantization/quantizer/executorch/__init__.py rename to tests/executorch/__init__.py diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/executorch/executorch_openvino_quantizer.py b/tests/executorch/executorch_openvino_quantizer.py similarity index 100% rename from src/nncf/experimental/torch/fx/quantization/quantizer/executorch/executorch_openvino_quantizer.py rename to tests/executorch/executorch_openvino_quantizer.py diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/executorch/observers.py b/tests/executorch/observers.py similarity index 100% rename from src/nncf/experimental/torch/fx/quantization/quantizer/executorch/observers.py rename to tests/executorch/observers.py From 28664730a87e154b52dad7d0bfda8a71aff1a29e Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 23 Sep 2025 17:48:48 +0400 Subject: [PATCH 22/91] check if openvino quantizer has weight compression in openvino adapter --- .../torch/fx/quantization/quantizer/openvino_adapter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py index 6a39cc7fcdf..215cbcf65bd 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py @@ -23,7 +23,8 @@ class OpenVINOQuantizerAdapter(Quantizer): def __init__(self, quantizer: OpenVINOQuantizer): self._quantizer = quantizer - self._weight_compression_configuration = self._quantizer.weight_compression_configuration + if(hasattr(self._quantizer, "weight_compression_configuration")): + self._weight_compression_configuration = self._quantizer.weight_compression_configuration def transform_prior_quantization(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: return self._quantizer.transform_for_annotation(model) From 7171d56519147851c64d68e99ef60618ca0a1af7 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 24 Sep 2025 09:54:29 +0400 Subject: [PATCH 23/91] review comments --- .../quantization/algorithms/weight_compression/algorithm.py | 3 +-- .../experimental/torch/fx/quantization/quantizer/__init__.py | 2 +- .../torch/fx/quantization/quantizer/openvino_quantizer.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py index 9798ec108cb..3b06128fbeb 100644 --- a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py @@ -39,7 +39,7 @@ def __init__( mode = wc_config.get("mode", None) ratio = wc_config.get( "ratio", 1 - ) # TODO Discuss if ratio should be passed in quantizer or in the compress_pt2e api + ) group_size = wc_config.get("group_size", 128) all_layers = wc_config.get("all_layers", False) backup_mode = wc_config.get("backup_mode", nncf.BackupMode.INT8_ASYM) @@ -49,7 +49,6 @@ def __init__( mode=mode, ratio=ratio, group_size=group_size, - ignored_scope=nncf.IgnoredScope(), # only compress "nodes_to_compress" all_layers=all_layers, sensitivity_metric=self._sensitivity_metric, awq=awq, diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/__init__.py b/src/nncf/experimental/torch/fx/quantization/quantizer/__init__.py index a78b76bc409..555af32509a 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/__init__.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/__init__.py @@ -9,4 +9,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .openvino_quantizer import OpenVINOQuantizer +from nncf.experimental.torch.fx import OpenVINOQuantizer diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py index d9db3b29e7e..55611a7d095 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py @@ -360,4 +360,4 @@ def transform_for_annotation(self, model: torch.fx.GraphModule) -> torch.fx.Grap :param model: Given torch.fx.GraphModule to transform before the annotation. :return: The transformed torch.fx.GraphModule ready for the annotation. """ - return model \ No newline at end of file + return model From 3e3b067f134d65e4e8b4c1c736e16258f610dbff Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 24 Sep 2025 13:24:08 +0400 Subject: [PATCH 24/91] revert ignored scope changes; make sensitivity metric None to check if user passed it or not; Remove init file for tests/executorch; remove init file from nncf openvino quantizer --- .../algorithms/weight_compression/algorithm.py | 13 +++++++------ .../torch/fx/quantization/quantize_pt2e.py | 2 +- .../torch/fx/quantization/quantizer/__init__.py | 4 +--- tests/executorch/__init__.py | 13 ------------- 4 files changed, 9 insertions(+), 23 deletions(-) delete mode 100644 tests/executorch/__init__.py diff --git a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py index 3b06128fbeb..9fcdc7daede 100644 --- a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py @@ -11,13 +11,13 @@ import torch -import nncf # type: ignore[import-untyped] -from nncf.common.graph.graph import NNCFGraph # type: ignore[import-untyped] +import nncf +from nncf.common.graph.graph import NNCFGraph from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType from nncf.quantization.algorithms.algorithm import Algorithm from nncf.quantization.algorithms.weight_compression.algorithm import WeightCompression - +from nncf import SensitivityMetric class WeightsCompressionPT2E(Algorithm): def __init__( @@ -28,7 +28,7 @@ def __init__( scale_estimation: bool = False, gptq: bool = False, lora_correction: bool = False, - sensitivity_metric: nncf.SensitivityMetric = nncf.SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, + sensitivity_metric: nncf.SensitivityMetric = None, compression_format: nncf.CompressionFormat = nncf.CompressionFormat.DQ, advanced_parameters: nncf.AdvancedCompressionParameters = None, ) -> torch.fx.GraphModule: @@ -49,8 +49,9 @@ def __init__( mode=mode, ratio=ratio, group_size=group_size, + ignored_scope=nncf.IgnoredScope(), # only compress "nodes_to_compress" all_layers=all_layers, - sensitivity_metric=self._sensitivity_metric, + sensitivity_metric=self._sensitivity_metric or SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, awq=awq, subset_size=subset_size, scale_estimation=scale_estimation, @@ -73,7 +74,7 @@ def apply( ): self._algo.set_backend_entity(model) # Set algo backend - if self._sensitivity_metric == nncf.SensitivityMetric.WEIGHT_QUANTIZATION_ERROR: + if self._sensitivity_metric is None: # Default case. It means that it is not defined by the user in the API # Hence, the annotation(Quantization parameters for all layers) from the quantizer will be used. all_weight_params = self._quantizer.get_weight_compression_setup( diff --git a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py index ae08f09e985..30adf9990cd 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py +++ b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py @@ -170,7 +170,7 @@ def compress_pt2e( gptq: bool = False, lora_correction: bool = False, subset_size: int = 128, # Dataset size to use - sensitivity_metric: nncf.SensitivityMetric = nncf.SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, + sensitivity_metric: nncf.SensitivityMetric = None, advanced_parameters: nncf.AdvancedCompressionParameters = None, ) -> torch.fx.GraphModule: """ diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/__init__.py b/src/nncf/experimental/torch/fx/quantization/quantizer/__init__.py index 555af32509a..9feaaa94cfc 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/__init__.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/__init__.py @@ -7,6 +7,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. - -from nncf.experimental.torch.fx import OpenVINOQuantizer +# limitations under the License. \ No newline at end of file diff --git a/tests/executorch/__init__.py b/tests/executorch/__init__.py deleted file mode 100644 index 62297ed2ebb..00000000000 --- a/tests/executorch/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2025 Intel Corporation -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from .executorch_openvino_quantizer import OpenVINOQuantizer \ No newline at end of file From 5b7b2105bc79d81aa89f6711d3c842cb0c4843bc Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 24 Sep 2025 13:35:40 +0400 Subject: [PATCH 25/91] precommit fix --- .../weight_compression/algorithm.py | 7 +- .../fx/quantization/quantizer/__init__.py | 2 +- .../quantizer/openvino_adapter.py | 7 +- .../weight_compression/algorithm.py | 31 ++- .../executorch_openvino_quantizer.py | 177 +++++++----------- 5 files changed, 91 insertions(+), 133 deletions(-) diff --git a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py index 9fcdc7daede..45eec1bf4a6 100644 --- a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py @@ -12,12 +12,13 @@ import torch import nncf +from nncf import SensitivityMetric from nncf.common.graph.graph import NNCFGraph from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType from nncf.quantization.algorithms.algorithm import Algorithm from nncf.quantization.algorithms.weight_compression.algorithm import WeightCompression -from nncf import SensitivityMetric + class WeightsCompressionPT2E(Algorithm): def __init__( @@ -37,9 +38,7 @@ def __init__( wc_config = quantizer._weight_compression_configuration mode = wc_config.get("mode", None) - ratio = wc_config.get( - "ratio", 1 - ) + ratio = wc_config.get("ratio", 1) group_size = wc_config.get("group_size", 128) all_layers = wc_config.get("all_layers", False) backup_mode = wc_config.get("backup_mode", nncf.BackupMode.INT8_ASYM) diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/__init__.py b/src/nncf/experimental/torch/fx/quantization/quantizer/__init__.py index 9feaaa94cfc..e5a42efc0ef 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/__init__.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/__init__.py @@ -7,4 +7,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py index 215cbcf65bd..8696740c824 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py @@ -16,6 +16,7 @@ from nncf.experimental.quantization.quantizer import Quantizer from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer + class OpenVINOQuantizerAdapter(Quantizer): """ Implementation of the NNCF Quantizer interface for the OpenVINOQuantizer. @@ -23,7 +24,7 @@ class OpenVINOQuantizerAdapter(Quantizer): def __init__(self, quantizer: OpenVINOQuantizer): self._quantizer = quantizer - if(hasattr(self._quantizer, "weight_compression_configuration")): + if hasattr(self._quantizer, "weight_compression_configuration"): self._weight_compression_configuration = self._quantizer.weight_compression_configuration def transform_prior_quantization(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: @@ -32,7 +33,9 @@ def transform_prior_quantization(self, model: torch.fx.GraphModule) -> torch.fx. def get_quantization_setup(self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph) -> SingleConfigQuantizerSetup: return self._quantizer.get_nncf_quantization_setup(model, nncf_graph) - def get_weight_compression_setup(self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph) -> SingleConfigQuantizerSetup: + def get_weight_compression_setup( + self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph + ) -> SingleConfigQuantizerSetup: return self._quantizer.get_nncf_weight_compression_setup(model, nncf_graph) def get_nodes_to_compress(self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph): diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index cafdc5bee06..68b8538a8c5 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -778,7 +778,7 @@ def collect_weight_compression_statistics( statistic_points: Optional[StatisticPointsContainer] = None, ) -> Optional[dict[str, Any]]: """ - Collects statistics for weight compression if data-aware compression or + Collects statistics for weight compression if data-aware compression or mixed-precision is enabled. :param model: Backend-specific input model. @@ -796,17 +796,13 @@ def collect_weight_compression_statistics( for wp in weight_params if wp.node_with_weight.metatype in self._backend_entity.matmul_metatypes ] - matmul_input_to_output_nodes_map = self.get_matmul_input_to_output_nodes_map( - matmul_nodes_to_compress, graph - ) + matmul_input_to_output_nodes_map = self.get_matmul_input_to_output_nodes_map(matmul_nodes_to_compress, graph) if statistic_points is None: statistic_points = self.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys()) statistic_points = self._collect_statistics(dataset, graph, model, statistic_points) - statistics = self._get_statistics_for_weights_compression( - matmul_input_to_output_nodes_map, statistic_points - ) + statistics = self._get_statistics_for_weights_compression(matmul_input_to_output_nodes_map, statistic_points) return statistics, statistic_points def get_weight_compression_parameters( @@ -910,7 +906,9 @@ def get_weight_compression_parameters( # Collect statistics for the weights compression weight_params = ratio_defining_params if self._backup_mode == BackupMode.NONE else all_weight_params - statistics, statistic_points = self.collect_weight_compression_statistics(model, graph, dataset, weight_params, statistic_points) + statistics, statistic_points = self.collect_weight_compression_statistics( + model, graph, dataset, weight_params, statistic_points + ) # Set weight compression configuration self._set_weight_compression_config(ratio_defining_params, model, graph, statistic_points, group_size_values) @@ -926,12 +924,12 @@ def get_weight_compression_parameters( return all_weight_params, statistics def apply_wc_algos( - self, - model: TModel, - graph: NNCFGraph, - all_weight_params: list[WeightCompressionParameters], - statistics: dict[str, Any], - dataset: Optional[Dataset] = None, + self, + model: TModel, + graph: NNCFGraph, + all_weight_params: list[WeightCompressionParameters], + statistics: dict[str, Any], + dataset: Optional[Dataset] = None, ) -> TModel: if self._awq: model = self.awq_algo.apply(model, graph, all_weight_params, statistics, self._backend_entity) @@ -1005,7 +1003,6 @@ def apply_wc_algos( ) return transformed_model - def apply( self, @@ -1016,12 +1013,12 @@ def apply( ) -> TModel: self.set_backend_entity(model) nodes_to_compress = self.get_nodes_to_compress(graph) - # Get processed weight compression parameters ready for compression + # Get processed weight compression parameters ready for compression all_weight_params, statistics = self.get_weight_compression_parameters( model, graph, nodes_to_compress, statistic_points, dataset ) transformed_model = self.apply_wc_algos(model, graph, all_weight_params, statistics, dataset) - + return transformed_model def _get_activation_node_and_port(self, node: NNCFNode, nncf_graph: NNCFGraph) -> tuple[NNCFNode, int]: diff --git a/tests/executorch/executorch_openvino_quantizer.py b/tests/executorch/executorch_openvino_quantizer.py index f5df1ba5955..96d4491389d 100644 --- a/tests/executorch/executorch_openvino_quantizer.py +++ b/tests/executorch/executorch_openvino_quantizer.py @@ -1,45 +1,41 @@ -# Copyright (c) Intel Corporation -# -# Licensed under the BSD License (the "License"); you may not use this file -# except in compliance with the License. See the license file found in the -# LICENSE file in the root directory of this source tree. - -# mypy: disable-error-code=import-not-found +# Copyright (c) 2025 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from collections import defaultdict from enum import Enum -from typing import Any, Callable, DefaultDict, Dict, List, Optional, Tuple, Type +from typing import Any, Callable, Optional + +import torch.fx +from torch.ao.quantization.observer import HistogramObserver +from torch.ao.quantization.observer import PerChannelMinMaxObserver +from torch.ao.quantization.observer import UniformQuantizationObserverBase +from torch.ao.quantization.quantizer.quantizer import EdgeOrNode +from torch.ao.quantization.quantizer.quantizer import QuantizationAnnotation +from torch.ao.quantization.quantizer.quantizer import QuantizationSpec +from torch.ao.quantization.quantizer.quantizer import QuantizationSpecBase +from torch.ao.quantization.quantizer.quantizer import Quantizer +from torch.ao.quantization.quantizer.quantizer import SharedQuantizationSpec import nncf # type: ignore[import-untyped] import nncf.common.quantization as quantization # type: ignore[import-untyped] import nncf.experimental.torch.fx as nncf_fx # type: ignore[import-untyped] -from nncf.common.graph.graph import NNCFNode - -import torch.fx -from .observers import ( - INT4WeightObserver, - INT8WeightObserver, -) from nncf.common.graph.graph import NNCFGraph # type: ignore[import-untyped] +from nncf.common.graph.graph import NNCFNode from nncf.quantization.algorithms.weight_compression.config import ( # type: ignore[import-untyped] WeightCompressionParameters, ) -from nncf.quantization.quantize_model import ( # type: ignore[import-untyped] - get_weight_compression_configuration, -) -from torch.ao.quantization.observer import ( - HistogramObserver, - PerChannelMinMaxObserver, - UniformQuantizationObserverBase, -) -from torch.ao.quantization.quantizer.quantizer import ( - EdgeOrNode, - QuantizationAnnotation, - QuantizationSpec, - QuantizationSpecBase, - Quantizer, - SharedQuantizationSpec, -) +from nncf.quantization.quantize_model import get_weight_compression_configuration # type: ignore[import-untyped] + +from tests.executorch.observers import INT4WeightObserver +from tests.executorch.observers import INT8WeightObserver QUANT_ANNOTATION_KEY = "quantization_annotation" @@ -106,16 +102,12 @@ def __init__( else: preset = None model_type = nncf.parameters.ModelType.TRANSFORMER - self._algo = ( - nncf.quantization.algorithms.min_max.algorithm.MinMaxQuantization( - preset=preset, model_type=model_type, **kwargs - ) + self._algo = nncf.quantization.algorithms.min_max.algorithm.MinMaxQuantization( + preset=preset, model_type=model_type, **kwargs ) else: self.weight_compression_configuration = get_weight_compression_configuration( - mode.value.replace( - "wo", "" - ), # Mode value has to match NNCF CompressWeightsMode + mode.value.replace("wo", ""), # Mode value has to match NNCF CompressWeightsMode **kwargs, ) _weight_compression_configuration = self.weight_compression_configuration @@ -126,10 +118,10 @@ def __init__( def set_ignored_scope( self, - names: Optional[List[str]] = None, - patterns: Optional[List[str]] = None, - types: Optional[List[str]] = None, - subgraphs: Optional[List[Tuple[List[str], List[str]]]] = None, + names: Optional[list[str]] = None, + patterns: Optional[list[str]] = None, + types: Optional[list[str]] = None, + subgraphs: Optional[list[tuple[list[str], list[str]]]] = None, validate: bool = True, ) -> None: """ @@ -159,9 +151,7 @@ def get_nncf_quantization_setup( self._algo._set_backend_entity(model) return self._algo.find_quantization_setup(model, nncf_graph) - def get_nodes_to_compress( - self, model, nncf_graph - ) -> list[NNCFNode]: + def get_nodes_to_compress(self, model, nncf_graph) -> list[NNCFNode]: self._algo.set_backend_entity(model) return self._algo.get_nodes_to_compress(nncf_graph) @@ -169,17 +159,15 @@ def get_nncf_weight_compression_setup( self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph ) -> quantization.quantizer_setup.SingleConfigQuantizerSetup: nodes_to_compress = self.get_nodes_to_compress(model, nncf_graph) - return self._algo.get_weight_compression_parameters( - model, nncf_graph, nodes_to_compress - )[0] + return self._algo.get_weight_compression_parameters(model, nncf_graph, nodes_to_compress)[0] def _annotate_weight_compression( self, model: torch.fx.GraphModule, graph: torch.fx.Graph, nncf_graph: NNCFGraph, - node_vs_torch_annotation: DefaultDict[torch.fx.Node, QuantizationAnnotation], - ) -> DefaultDict[torch.fx.Node, QuantizationAnnotation]: + node_vs_torch_annotation: defaultdict[torch.fx.Node, QuantizationAnnotation], + ) -> defaultdict[torch.fx.Node, QuantizationAnnotation]: """ Annotates the model graph with weight-only quantization specs. @@ -192,15 +180,11 @@ def _annotate_weight_compression( :param node_vs_torch_annotation: A mapping of FX nodes to quantization annotations. :return: Updated mapping of FX nodes with weight compression annotations. """ - all_wc_params = self.get_nncf_weight_compression_setup( - model, nncf_graph - ) + all_wc_params = self.get_nncf_weight_compression_setup(model, nncf_graph) for wc_param in all_wc_params: node_with_weight = wc_param.node_with_weight - target_node = nncf_fx.node_utils.get_graph_node_by_name( - graph, node_with_weight.node_name - ) + target_node = nncf_fx.node_utils.get_graph_node_by_name(graph, node_with_weight.node_name) annotation = node_vs_torch_annotation[target_node] edge_or_node = self._get_weight_edge(target_node, nncf_graph) qspec = self._get_torch_ao_qspec_from_nncf_config_for_wc(wc_param=wc_param) @@ -213,8 +197,8 @@ def _annotate_post_training_quantization( model: torch.fx.GraphModule, graph: torch.fx.Graph, nncf_graph: NNCFGraph, - node_vs_torch_annotation: DefaultDict[torch.fx.Node, QuantizationAnnotation], - ) -> DefaultDict[torch.fx.Node, QuantizationAnnotation]: + node_vs_torch_annotation: defaultdict[torch.fx.Node, QuantizationAnnotation], + ) -> defaultdict[torch.fx.Node, QuantizationAnnotation]: """ Annotates the model graph with post-training quantization configurations. @@ -230,9 +214,7 @@ def _annotate_post_training_quantization( edge_or_node, annotation = self._get_edge_or_node_and_annotation( graph, nncf_graph, qp, node_vs_torch_annotation ) - qspec: QuantizationSpecBase = ( - self._get_torch_ao_qspec_from_nncf_config_for_ptq(qp) - ) + qspec: QuantizationSpecBase = self._get_torch_ao_qspec_from_nncf_config_for_ptq(qp) self._fill_torch_ao_annotation(edge_or_node, qspec, annotation) for quantizer_ids in quantization_setup.unified_scale_groups.values(): @@ -241,24 +223,18 @@ def _annotate_post_training_quantization( ) root_qp = quantization_setup.quantization_points[root_quantizer_id] - if any( - root_qp.qconfig != quantization_setup.quantization_points[q_id].qconfig - for q_id in quantizer_ids - ): - qps = [ - quantization_setup.quantization_points[qid] for qid in quantizer_ids - ] - raise nncf.InternalError( + if any(root_qp.qconfig != quantization_setup.quantization_points[q_id].qconfig for q_id in quantizer_ids): + qps = [quantization_setup.quantization_points[qid] for qid in quantizer_ids] + msg = ( "Different quantization configs are set to one unified scale group:" f"{[(qp.insertion_point.__dict__, str(qp.qconfig)) for qp in qps]}" ) + raise nncf.InternalError(msg) root_target_node = nncf_fx.node_utils.get_graph_node_by_name( graph, root_qp.insertion_point.target_node_name ) - root_edge_or_node = self._get_edge_or_node( - root_target_node, root_qp, nncf_graph - ) + root_edge_or_node = self._get_edge_or_node(root_target_node, root_qp, nncf_graph) for quantizer_id in quantizer_ids: if quantizer_id == root_quantizer_id: @@ -276,8 +252,8 @@ def _annotate_post_training_quantization( def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: nncf_graph = nncf_fx.nncf_graph_builder.GraphConverter.create_nncf_graph(model) graph = model.graph - node_vs_torch_annotation: DefaultDict[torch.fx.Node, QuantizationAnnotation] = ( - defaultdict(QuantizationAnnotation) + node_vs_torch_annotation: defaultdict[torch.fx.Node, QuantizationAnnotation] = defaultdict( + QuantizationAnnotation ) if self.mode in OpenVINOQuantizer.WEIGHTS_ONLY_COMPRESSION_MODES: @@ -298,7 +274,7 @@ def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: @staticmethod def _get_unified_scales_root_quantizer_id( nncf_graph: NNCFGraph, - quantizer_ids: List[int], + quantizer_ids: list[int], quantizer_setup: quantization.quantizer_setup.SingleConfigQuantizerSetup, ) -> int: """ @@ -314,14 +290,9 @@ def _get_unified_scales_root_quantizer_id( nncf_node_quantizer_id = None root_quantizer_id = None for quantizer_id in quantizer_ids: - target_node_name = quantizer_setup.quantization_points[ - quantizer_id - ].insertion_point.target_node_name + target_node_name = quantizer_setup.quantization_points[quantizer_id].insertion_point.target_node_name nncf_node = nncf_graph.get_node_by_name(target_node_name) - if ( - nncf_node_quantizer_id is None - or nncf_node.node_id < nncf_node_quantizer_id - ): + if nncf_node_quantizer_id is None or nncf_node.node_id < nncf_node_quantizer_id: root_quantizer_id = quantizer_id nncf_node_quantizer_id = nncf_node.node_id if root_quantizer_id is None: @@ -334,8 +305,8 @@ def _get_edge_or_node_and_annotation( graph: torch.fx.Graph, nncf_graph: NNCFGraph, qp: quantization.quantizer_setup.QuantizationPointBase, - node_vs_torch_annotation: Dict[torch.fx.Node, QuantizationAnnotation], - ) -> Tuple[EdgeOrNode, QuantizationAnnotation]: + node_vs_torch_annotation: dict[torch.fx.Node, QuantizationAnnotation], + ) -> tuple[EdgeOrNode, QuantizationAnnotation]: """ Retrieves the edge or node and its corresponding QuantizationAnnotation based on the given graph, quantization point, and node-to-annotation mapping. @@ -347,9 +318,7 @@ def _get_edge_or_node_and_annotation( QuantizationAnnotations. :return: A tuple containing the EdgeOrNode and its associated QuantizationAnnotation. """ - target_node = nncf_fx.node_utils.get_graph_node_by_name( - graph, qp.insertion_point.target_node_name - ) + target_node = nncf_fx.node_utils.get_graph_node_by_name(graph, qp.insertion_point.target_node_name) annotation = node_vs_torch_annotation[target_node] edge_or_node = OpenVINOQuantizer._get_edge_or_node(target_node, qp, nncf_graph) return edge_or_node, annotation @@ -366,12 +335,11 @@ def _get_weight_edge( :param target_node: FX node representing a weighted operation (e.g., Linear, Conv). :param nncf_graph: NNCFGraph used to determine weight port indices. - :return: Edge represented by a Tuple of (weight_node, target_node), where weight_node is the FX node supplying the weight. + :return: Edge represented by a Tuple of (weight_node, target_node), where weight_node is the + FX node supplying the weight. """ nncf_node = nncf_graph.get_node_by_name(target_node.name) - weights_ports_ids = nncf.torch.model_graph_manager.get_weight_tensor_port_ids( - nncf_node, nncf_graph - ) + weights_ports_ids = nncf.torch.model_graph_manager.get_weight_tensor_port_ids(nncf_node, nncf_graph) if len(weights_ports_ids) > 1: # TODO(dlyakhov): support quantization for nodes with several weights nncf.common.logging.nncf_logger.warning( @@ -435,9 +403,9 @@ def _get_torch_ao_qspec_from_nncf_config_for_wc( :param wc_param: NNCF Weight compression parameters for the node. :return: A TorchAO QuantizationSpec. """ - observer: Type[UniformQuantizationObserverBase] + observer: type[UniformQuantizationObserverBase] - extra_args: Dict[str, Any] = {} + extra_args: dict[str, Any] = {} qmode = wc_param.compression_config.mode extra_args["wc_param"] = wc_param @@ -452,9 +420,7 @@ def _get_torch_ao_qspec_from_nncf_config_for_wc( dtype = torch.int8 channel_axis = 0 torch_qscheme = torch_qscheme = ( - torch.per_channel_symmetric - if not is_asym_mode - else torch.per_channel_affine + torch.per_channel_symmetric if not is_asym_mode else torch.per_channel_affine ) else: observer = INT8WeightObserver # type: ignore[type-abstract] @@ -462,11 +428,7 @@ def _get_torch_ao_qspec_from_nncf_config_for_wc( quant_max = 127 if not is_asym_mode else 255 dtype = torch.int8 channel_axis = 0 - torch_qscheme = ( - torch.per_channel_symmetric - if not is_asym_mode - else torch.per_channel_affine - ) + torch_qscheme = torch.per_channel_symmetric if not is_asym_mode else torch.per_channel_affine return QuantizationSpec( dtype=dtype, observer_or_fake_quant_ctr=observer.with_args(**extra_args), @@ -487,10 +449,10 @@ def _get_torch_ao_qspec_from_nncf_config_for_ptq( :param qp: Quantization point from NNCF. :return: A TorchAO QuantizationSpec. """ - observer: Type[UniformQuantizationObserverBase] + observer: type[UniformQuantizationObserverBase] # Eps value is copied from nncf/torch/quantization/layers.py - extra_args: Dict[str, Any] = {"eps": 1e-16} + extra_args: dict[str, Any] = {"eps": 1e-16} is_weight = qp.is_weight_quantization_point() qconfig = qp.qconfig @@ -525,8 +487,7 @@ def _get_torch_ao_qspec_from_nncf_config_for_ptq( else: observer = ( HistogramObserver - if torch_qscheme - in [torch.per_tensor_symmetric, torch.per_tensor_affine] + if torch_qscheme in [torch.per_tensor_symmetric, torch.per_tensor_affine] else PerChannelMinMaxObserver ) quant_min = 0 @@ -556,7 +517,7 @@ def quantize_model( fast_bias_correction: Optional[bool] = True, smooth_quant: bool = False, transform_fn: Optional[Callable[[Any], Any]] = None, - extra_quantizer_options: Optional[Dict[str, Any]] = None, + extra_quantizer_options: Optional[dict[str, Any]] = None, **kwargs, ) -> torch.fx.GraphModule: """ @@ -582,9 +543,7 @@ def quantize_model( """ extra_quantizer_options = extra_quantizer_options or {} if "mode" in extra_quantizer_options: - print( - f'Ignoring "mode" from the quantizer_config. Using parameter mode = {mode}' - ) + print(f'Ignoring "mode" from the quantizer_config. Using parameter mode = {mode}') del extra_quantizer_options["mode"] quantizer = OpenVINOQuantizer(mode=mode, **extra_quantizer_options) From 71a479fe56c9766de06ac9dde0e0a47277d77dd6 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 24 Sep 2025 13:37:03 +0400 Subject: [PATCH 26/91] pre commit format --- tests/executorch/executorch_openvino_quantizer.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/executorch/executorch_openvino_quantizer.py b/tests/executorch/executorch_openvino_quantizer.py index 96d4491389d..0b2379986ce 100644 --- a/tests/executorch/executorch_openvino_quantizer.py +++ b/tests/executorch/executorch_openvino_quantizer.py @@ -32,8 +32,7 @@ from nncf.quantization.algorithms.weight_compression.config import ( # type: ignore[import-untyped] WeightCompressionParameters, ) -from nncf.quantization.quantize_model import get_weight_compression_configuration # type: ignore[import-untyped] - +from nncf.quantization.quantize_model import get_weight_compression_configuration from tests.executorch.observers import INT4WeightObserver from tests.executorch.observers import INT8WeightObserver @@ -335,7 +334,7 @@ def _get_weight_edge( :param target_node: FX node representing a weighted operation (e.g., Linear, Conv). :param nncf_graph: NNCFGraph used to determine weight port indices. - :return: Edge represented by a Tuple of (weight_node, target_node), where weight_node is the + :return: Edge represented by a Tuple of (weight_node, target_node), where weight_node is the FX node supplying the weight. """ nncf_node = nncf_graph.get_node_by_name(target_node.name) From b24a59cb2ab2c70342ed78674f26ef2bd8de599b Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 24 Sep 2025 13:40:29 +0400 Subject: [PATCH 27/91] rename executorch quantizer to test_quantizer --- .../{executorch_openvino_quantizer.py => test_quantizer.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/executorch/{executorch_openvino_quantizer.py => test_quantizer.py} (100%) diff --git a/tests/executorch/executorch_openvino_quantizer.py b/tests/executorch/test_quantizer.py similarity index 100% rename from tests/executorch/executorch_openvino_quantizer.py rename to tests/executorch/test_quantizer.py From d12225a7a7ef50296459c72e39c2fca0eda1d493 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 24 Sep 2025 15:26:03 +0400 Subject: [PATCH 28/91] fix last precommit --- src/nncf/experimental/torch/fx/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nncf/experimental/torch/fx/__init__.py b/src/nncf/experimental/torch/fx/__init__.py index 866fbaad4b3..86cd9709f6b 100644 --- a/src/nncf/experimental/torch/fx/__init__.py +++ b/src/nncf/experimental/torch/fx/__init__.py @@ -9,6 +9,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e as quantize_pt2e from nncf.experimental.torch.fx.quantization.quantize_pt2e import compress_pt2e as compress_pt2e +from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e as quantize_pt2e from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer as OpenVINOQuantizer From 9870ee2cba6e7226257f5278bb650fe239d21e87 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 24 Sep 2025 15:27:19 +0400 Subject: [PATCH 29/91] remove unused mypy ignore --- tests/executorch/observers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/executorch/observers.py b/tests/executorch/observers.py index c283024c9be..073d944e98b 100644 --- a/tests/executorch/observers.py +++ b/tests/executorch/observers.py @@ -52,7 +52,7 @@ def __init__( super().__init__(dtype=dtype, is_dynamic=False) self._wc_param = wc_param - def calculate_qparams( # type: ignore[override] + def calculate_qparams( self, weight: torch.Tensor, ) -> tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: From 8015629e56af108c983642b18abd985e5b3b7a9c Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 24 Sep 2025 16:35:47 +0400 Subject: [PATCH 30/91] get the mode as struct --- .../torch/fx/quantization/quantizer/openvino_adapter.py | 5 +++-- .../quantization/algorithms/weight_compression/algorithm.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py index 8696740c824..3640957d4fc 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py @@ -24,8 +24,6 @@ class OpenVINOQuantizerAdapter(Quantizer): def __init__(self, quantizer: OpenVINOQuantizer): self._quantizer = quantizer - if hasattr(self._quantizer, "weight_compression_configuration"): - self._weight_compression_configuration = self._quantizer.weight_compression_configuration def transform_prior_quantization(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: return self._quantizer.transform_for_annotation(model) @@ -40,3 +38,6 @@ def get_weight_compression_setup( def get_nodes_to_compress(self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph): return self._quantizer.get_nodes_to_compress(model, nncf_graph) + + def get_weight_compression_config(self): + return self._quantizer.weight_compression_configuration diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index 68b8538a8c5..b6039f50349 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -102,7 +102,7 @@ def get_weight_compression_configuration( ) return { - "mode": mode, + "mode": mode if isinstance(mode, nncf.CompressWeightsMode) else nncf.CompressWeightsMode(mode), "ratio": ratio or 1, "group_size": group_size, "all_layers": all_layers or False, @@ -985,7 +985,7 @@ def apply_wc_algos( self._backend_entity.dump_parameters( model, parameters={ - "mode": self._mode.value if not isinstance(self._mode, str) else self._mode, + "mode": self._mode.value, "group_size": self._group_size, "ratio": self._ratio, "all_layers": self._all_layers, From 08042187354f6b0c9ac859de5359ee5bc24e06b6 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 24 Sep 2025 16:39:58 +0400 Subject: [PATCH 31/91] fix algorithm --- .../quantization/algorithms/weight_compression/algorithm.py | 3 +-- .../torch/fx/quantization/quantizer/openvino_adapter.py | 4 +++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py index 45eec1bf4a6..20673e9e51b 100644 --- a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py @@ -35,7 +35,7 @@ def __init__( ) -> torch.fx.GraphModule: self._quantizer = quantizer - wc_config = quantizer._weight_compression_configuration + wc_config = self._quantizer.get_weight_compression_config() mode = wc_config.get("mode", None) ratio = wc_config.get("ratio", 1) @@ -43,7 +43,6 @@ def __init__( all_layers = wc_config.get("all_layers", False) backup_mode = wc_config.get("backup_mode", nncf.BackupMode.INT8_ASYM) self._sensitivity_metric = sensitivity_metric - self._algo = WeightCompression( mode=mode, ratio=ratio, diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py index 3640957d4fc..695c7082c56 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py @@ -9,6 +9,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Any + import torch.fx from nncf.common.graph.graph import NNCFGraph @@ -39,5 +41,5 @@ def get_weight_compression_setup( def get_nodes_to_compress(self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph): return self._quantizer.get_nodes_to_compress(model, nncf_graph) - def get_weight_compression_config(self): + def get_weight_compression_config(self) -> dict[str, Any]: return self._quantizer.weight_compression_configuration From 1f1fda33133d571ed4e5739a42382b67ccc8b637 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 24 Sep 2025 17:01:06 +0400 Subject: [PATCH 32/91] remove quantizer and observers from nncf. Instead import from executorch --- tests/executorch/observers.py | 168 --------- tests/executorch/test_quantizer.py | 564 ----------------------------- 2 files changed, 732 deletions(-) delete mode 100644 tests/executorch/observers.py delete mode 100644 tests/executorch/test_quantizer.py diff --git a/tests/executorch/observers.py b/tests/executorch/observers.py deleted file mode 100644 index 073d944e98b..00000000000 --- a/tests/executorch/observers.py +++ /dev/null @@ -1,168 +0,0 @@ -# Copyright (c) 2025 Intel Corporation -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from abc import ABC -from abc import abstractmethod -from typing import Optional - -import torch -from torch.ao.quantization.observer import ObserverBase - -from nncf.experimental.torch.fx.node_utils import get_tensor_constant_from_node -from nncf.experimental.torch.fx.transformations import constant_update -from nncf.experimental.torch.fx.transformations import module_insertion -from nncf.experimental.torch.fx.transformations import node_removal -from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters -from nncf.quantization.algorithms.weight_compression.weight_lowering import do_integer_quantization -from nncf.tensor.tensor import Tensor as NNCFTensor -from nncf.torch.graph.transformations.commands import PTTargetPoint -from nncf.torch.graph.transformations.commands import TargetType -from nncf.torch.quantization.layers import BaseWeightsDecompressor -from nncf.torch.quantization.layers import INT4AsymmetricWeightsDecompressor -from nncf.torch.quantization.layers import INT4SymmetricWeightsDecompressor -from nncf.torch.quantization.layers import INT8AsymmetricWeightsDecompressor -from nncf.torch.quantization.layers import INT8SymmetricWeightsDecompressor - - -class WeightObserverBase(ObserverBase, ABC): - """ - Base implementation of an NNCF observer that defines the rules for compressing layer - weights into the OpenVINO representation. - """ - - def __init__( - self, - wc_param: WeightCompressionParameters, - dtype: torch.dtype, - **kwargs, - ) -> None: - """ - :param wc_param: Weight compression parameters container. - :param dtype: target dtype for the quantization. - """ - super().__init__(dtype=dtype, is_dynamic=False) - self._wc_param = wc_param - - def calculate_qparams( - self, - weight: torch.Tensor, - ) -> tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: - """ - Calculates quantization parameters: quantized weight, quantization scale and quantization zero point. - - :param weight: FP weight to be used for calculating qparams. - :return: A tuple containing the quantized weight, quantization scale and quantization zero point. - """ - wc_param = self._wc_param - wc_config = wc_param.compression_config - reduction_axes = wc_param.reduction_axes - q_weight, scale, zp = do_integer_quantization(NNCFTensor(weight), wc_config, reduction_axes=reduction_axes) - zp = zp.data if zp is not None else None - return q_weight.data, scale.data, zp - - def forward(self, x: torch.Tensor) -> torch.Tensor: - return x - - def convert(self, model: torch.fx.GraphModule, observer_node: torch.fx.Node) -> None: - """ - Replaces the given observer node from the given model with a quantized - weight and a OpenVINO specific decompression module. - - :param model: A `torch.fx.GraphModule` representing the statically traced model - with observer nodes attached and calibrated. - :param observer_node: The `torch.fx.Node` corresponding to the observer module for - the weight that is being transformed into a compressed representation. - """ - weight_node = observer_node.args[0] - original_weight = get_tensor_constant_from_node(weight_node, model) - q_weight, scale, zero_point = self.calculate_qparams(original_weight) - - decompressor = self._create_decompressor(scale, zero_point, q_weight, original_weight) - packed_q_weight = decompressor.pack_weight(q_weight) - - # Weight port id is 0 since observer is inserted for a single weight only. - constant_update(model, observer_node, packed_q_weight, input_port_id=0) - - compressed_weight_name = observer_node.all_input_nodes[0].name - decompressor_suffix = "_".join(compressed_weight_name.replace(".", "_").split("_")[:-2]) - decompressor_name = f"{decompressor.quantization_mode}_weights_decompressor_{decompressor_suffix}" - - module_insertion( - model, - decompressor, - [ - PTTargetPoint( - TargetType.OPERATOR_POST_HOOK, - target_node_name=compressed_weight_name, - ) - ], - decompressor_name, - ) - node_removal(model, observer_node, 0) - - @abstractmethod - def _create_decompressor( - self, - scale: torch.Tensor, - zero_point: Optional[torch.Tensor], - q_weight: torch.Tensor, - original_weight: torch.Tensor, - ) -> BaseWeightsDecompressor: - """ - Returns a respective NNCF decompressor for different types of quantization. - - :param scale: Calculated scale quantization parameter. - :param zero_point: Calculated zero_point quantization parameter. - :param q_weight: Calculated quantized weight. - :param original_weight: FP weight. - :return: NNCF observer according to the qmode which creates the decompression subgraph supported by OpenVINO. - """ - - -class INT4WeightObserver(WeightObserverBase): - """ - OpenVINO INT4 Weight Compression observer. - """ - - def _create_decompressor( - self, - scale: torch.Tensor, - zero_point: Optional[torch.Tensor], - q_weight: torch.Tensor, - original_weight: torch.Tensor, - ) -> BaseWeightsDecompressor: - if zero_point is None: - return INT4SymmetricWeightsDecompressor(scale, q_weight.shape, original_weight.shape, original_weight.dtype) - return INT4AsymmetricWeightsDecompressor( - scale, - zero_point, - q_weight.shape, - original_weight.shape, - original_weight.dtype, - ) - - -class INT8WeightObserver(WeightObserverBase): - """ - OpenVINO INT8 Weight Compression per channel observer. - """ - - def _create_decompressor( - self, - scale: torch.Tensor, - zero_point: Optional[torch.Tensor], - q_weight: torch.Tensor, - original_weight: torch.Tensor, - ) -> BaseWeightsDecompressor: - if zero_point is None: - return INT8SymmetricWeightsDecompressor(scale, original_weight.dtype) - return INT8AsymmetricWeightsDecompressor(scale, zero_point, original_weight.dtype) diff --git a/tests/executorch/test_quantizer.py b/tests/executorch/test_quantizer.py deleted file mode 100644 index 0b2379986ce..00000000000 --- a/tests/executorch/test_quantizer.py +++ /dev/null @@ -1,564 +0,0 @@ -# Copyright (c) 2025 Intel Corporation -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from collections import defaultdict -from enum import Enum -from typing import Any, Callable, Optional - -import torch.fx -from torch.ao.quantization.observer import HistogramObserver -from torch.ao.quantization.observer import PerChannelMinMaxObserver -from torch.ao.quantization.observer import UniformQuantizationObserverBase -from torch.ao.quantization.quantizer.quantizer import EdgeOrNode -from torch.ao.quantization.quantizer.quantizer import QuantizationAnnotation -from torch.ao.quantization.quantizer.quantizer import QuantizationSpec -from torch.ao.quantization.quantizer.quantizer import QuantizationSpecBase -from torch.ao.quantization.quantizer.quantizer import Quantizer -from torch.ao.quantization.quantizer.quantizer import SharedQuantizationSpec - -import nncf # type: ignore[import-untyped] -import nncf.common.quantization as quantization # type: ignore[import-untyped] -import nncf.experimental.torch.fx as nncf_fx # type: ignore[import-untyped] -from nncf.common.graph.graph import NNCFGraph # type: ignore[import-untyped] -from nncf.common.graph.graph import NNCFNode -from nncf.quantization.algorithms.weight_compression.config import ( # type: ignore[import-untyped] - WeightCompressionParameters, -) -from nncf.quantization.quantize_model import get_weight_compression_configuration -from tests.executorch.observers import INT4WeightObserver -from tests.executorch.observers import INT8WeightObserver - -QUANT_ANNOTATION_KEY = "quantization_annotation" - - -class QuantizationMode(Enum): - """ - Defines special quantization modes. - - - INT8_SYM: INT8 symmetric quantization for both activations and weights. - - INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights. - - INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models - - INT8WO_SYM: INT8 symmetric quantization for weights only. - - INT8WO_ASYM: INT8 asymmetric quantization for weights only. - - INT4WO_SYM: INT4 symmetric quantization for weights only. - - INT4WO_ASYM: INT4 asymmetric quantization for weights only - """ - - INT8_SYM = "int8_sym" - INT8_MIXED = "int8_mixed" - INT8_TRANSFORMER = "int8_transformer" - INT8WO_SYM = "int8wo_sym" - INT8WO_ASYM = "int8wo_asym" - INT4WO_SYM = "int4wo_sym" - INT4WO_ASYM = "int4wo_asym" - - -class OpenVINOQuantizer(Quantizer): - """ - Implementation of the Torch AO quantizer which annotates models with quantization annotations - optimally for the inference via OpenVINO. - """ - - WEIGHTS_ONLY_COMPRESSION_MODES = ( - QuantizationMode.INT4WO_SYM, - QuantizationMode.INT4WO_ASYM, - QuantizationMode.INT8WO_SYM, - QuantizationMode.INT8WO_ASYM, - ) - - def __init__( - self, - *, - mode: QuantizationMode = QuantizationMode.INT8_SYM, - **kwargs, - ): - """ - :param mode: Defines special quantization modes. - - INT8_SYM: INT8 symmetric quantization for both activations and weights. - - INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights. - - INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models - Default value is INT8_SYM. - - INT4_SYM: Symmetric INT4 Weights-Only Compression - - INT4_ASYM: Asymmetric INT4 Weights-Only Compression - :param kwargs: Arguments to pass to the NNCF MinMaxQuantization algorithm. - """ - self.mode = mode - if self.mode not in OpenVINOQuantizer.WEIGHTS_ONLY_COMPRESSION_MODES: - if mode == QuantizationMode.INT8_SYM: - preset = quantization.structs.QuantizationPreset.PERFORMANCE - model_type = None - elif mode == QuantizationMode.INT8_MIXED: - preset = quantization.structs.QuantizationPreset.MIXED - model_type = None - else: - preset = None - model_type = nncf.parameters.ModelType.TRANSFORMER - self._algo = nncf.quantization.algorithms.min_max.algorithm.MinMaxQuantization( - preset=preset, model_type=model_type, **kwargs - ) - else: - self.weight_compression_configuration = get_weight_compression_configuration( - mode.value.replace("wo", ""), # Mode value has to match NNCF CompressWeightsMode - **kwargs, - ) - _weight_compression_configuration = self.weight_compression_configuration - subset_size = 1 # Doesn't really matter in this case since it is data-free. Should just be +ve - self._algo = nncf.quantization.algorithms.weight_compression.algorithm.WeightCompression( - subset_size=subset_size, **_weight_compression_configuration - ) - - def set_ignored_scope( - self, - names: Optional[list[str]] = None, - patterns: Optional[list[str]] = None, - types: Optional[list[str]] = None, - subgraphs: Optional[list[tuple[list[str], list[str]]]] = None, - validate: bool = True, - ) -> None: - """ - Provides an option to specify portions of model to be excluded from compression. - The ignored scope defines model sub-graphs that should be excluded from the quantization process. - - :param names: List of ignored node names. - :param patterns: List of regular expressions that define patterns for names of ignored nodes. - :param types: List of ignored operation types. - :param subgraphs: List of ignored subgraphs. - :param validate: If set to True, then a RuntimeError will be raised if any ignored scope does not match - in the model graph. - """ - self._algo.set_ignored_scope( - nncf.IgnoredScope( - names=names or [], - patterns=patterns or [], - types=types or [], - subgraphs=subgraphs or [], - validate=validate, - ) - ) - - def get_nncf_quantization_setup( - self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph - ) -> quantization.quantizer_setup.SingleConfigQuantizerSetup: - self._algo._set_backend_entity(model) - return self._algo.find_quantization_setup(model, nncf_graph) - - def get_nodes_to_compress(self, model, nncf_graph) -> list[NNCFNode]: - self._algo.set_backend_entity(model) - return self._algo.get_nodes_to_compress(nncf_graph) - - def get_nncf_weight_compression_setup( - self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph - ) -> quantization.quantizer_setup.SingleConfigQuantizerSetup: - nodes_to_compress = self.get_nodes_to_compress(model, nncf_graph) - return self._algo.get_weight_compression_parameters(model, nncf_graph, nodes_to_compress)[0] - - def _annotate_weight_compression( - self, - model: torch.fx.GraphModule, - graph: torch.fx.Graph, - nncf_graph: NNCFGraph, - node_vs_torch_annotation: defaultdict[torch.fx.Node, QuantizationAnnotation], - ) -> defaultdict[torch.fx.Node, QuantizationAnnotation]: - """ - Annotates the model graph with weight-only quantization specs. - - Identifies compressible nodes in the NNCF graph and attaches the corresponding - TorchAO quantization specifications to their weight edges for later transformation. - - :param model: The FX GraphModule to annotate. - :param graph: The underlying FX graph. - :param nncf_graph: The corresponding NNCF graph. - :param node_vs_torch_annotation: A mapping of FX nodes to quantization annotations. - :return: Updated mapping of FX nodes with weight compression annotations. - """ - all_wc_params = self.get_nncf_weight_compression_setup(model, nncf_graph) - - for wc_param in all_wc_params: - node_with_weight = wc_param.node_with_weight - target_node = nncf_fx.node_utils.get_graph_node_by_name(graph, node_with_weight.node_name) - annotation = node_vs_torch_annotation[target_node] - edge_or_node = self._get_weight_edge(target_node, nncf_graph) - qspec = self._get_torch_ao_qspec_from_nncf_config_for_wc(wc_param=wc_param) - self._fill_torch_ao_annotation(edge_or_node, qspec, annotation) - - return node_vs_torch_annotation - - def _annotate_post_training_quantization( - self, - model: torch.fx.GraphModule, - graph: torch.fx.Graph, - nncf_graph: NNCFGraph, - node_vs_torch_annotation: defaultdict[torch.fx.Node, QuantizationAnnotation], - ) -> defaultdict[torch.fx.Node, QuantizationAnnotation]: - """ - Annotates the model graph with post-training quantization configurations. - - :param model: The FX GraphModule to annotate. - :param graph: The underlying FX graph. - :param nncf_graph: The corresponding NNCF graph. - :param node_vs_torch_annotation: A mapping of FX nodes to quantization annotations. - :return: Updated mapping of FX nodes with post-training quantization annotations. - """ - quantization_setup = self.get_nncf_quantization_setup(model, nncf_graph) - - for qp in quantization_setup.quantization_points.values(): - edge_or_node, annotation = self._get_edge_or_node_and_annotation( - graph, nncf_graph, qp, node_vs_torch_annotation - ) - qspec: QuantizationSpecBase = self._get_torch_ao_qspec_from_nncf_config_for_ptq(qp) - self._fill_torch_ao_annotation(edge_or_node, qspec, annotation) - - for quantizer_ids in quantization_setup.unified_scale_groups.values(): - root_quantizer_id = self._get_unified_scales_root_quantizer_id( - nncf_graph, quantizer_ids, quantization_setup - ) - root_qp = quantization_setup.quantization_points[root_quantizer_id] - - if any(root_qp.qconfig != quantization_setup.quantization_points[q_id].qconfig for q_id in quantizer_ids): - qps = [quantization_setup.quantization_points[qid] for qid in quantizer_ids] - msg = ( - "Different quantization configs are set to one unified scale group:" - f"{[(qp.insertion_point.__dict__, str(qp.qconfig)) for qp in qps]}" - ) - raise nncf.InternalError(msg) - - root_target_node = nncf_fx.node_utils.get_graph_node_by_name( - graph, root_qp.insertion_point.target_node_name - ) - root_edge_or_node = self._get_edge_or_node(root_target_node, root_qp, nncf_graph) - - for quantizer_id in quantizer_ids: - if quantizer_id == root_quantizer_id: - continue - - qspec = SharedQuantizationSpec(root_edge_or_node) # type: ignore[assignment] - qp = quantization_setup.quantization_points[quantizer_id] - edge_or_node, annotation = self._get_edge_or_node_and_annotation( - graph, nncf_graph, qp, node_vs_torch_annotation - ) - self._fill_torch_ao_annotation(edge_or_node, qspec, annotation) - - return node_vs_torch_annotation - - def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: - nncf_graph = nncf_fx.nncf_graph_builder.GraphConverter.create_nncf_graph(model) - graph = model.graph - node_vs_torch_annotation: defaultdict[torch.fx.Node, QuantizationAnnotation] = defaultdict( - QuantizationAnnotation - ) - - if self.mode in OpenVINOQuantizer.WEIGHTS_ONLY_COMPRESSION_MODES: - node_vs_torch_annotation = self._annotate_weight_compression( - model, graph, nncf_graph, node_vs_torch_annotation - ) - else: - node_vs_torch_annotation = self._annotate_post_training_quantization( - model, graph, nncf_graph, node_vs_torch_annotation - ) - - for node, annotation in node_vs_torch_annotation.items(): - assert QUANT_ANNOTATION_KEY not in node.meta - node.meta[QUANT_ANNOTATION_KEY] = annotation - - return model - - @staticmethod - def _get_unified_scales_root_quantizer_id( - nncf_graph: NNCFGraph, - quantizer_ids: list[int], - quantizer_setup: quantization.quantizer_setup.SingleConfigQuantizerSetup, - ) -> int: - """ - Identifies the earliest quantizer node ID based on the corresponding `nncf_node.node_id` - in the given NNCFGraph. This is required by the `_get_obs_or_fq_map` function. - Refer to: https://github.com/pytorch/pytorch/blob/main/torch/ao/quantization/pt2e/prepare.py#L291 - - :param nncf_graph: The NNCFGraph instance. - :param quantizer_ids: The list of quantizer IDs to evaluate. - :param quantizer_setup: The instance of SingleConfigQuantizerSetup. - :return: The ID of the earliest quantizer node in terms of `nncf_node.node_id`. - """ - nncf_node_quantizer_id = None - root_quantizer_id = None - for quantizer_id in quantizer_ids: - target_node_name = quantizer_setup.quantization_points[quantizer_id].insertion_point.target_node_name - nncf_node = nncf_graph.get_node_by_name(target_node_name) - if nncf_node_quantizer_id is None or nncf_node.node_id < nncf_node_quantizer_id: - root_quantizer_id = quantizer_id - nncf_node_quantizer_id = nncf_node.node_id - if root_quantizer_id is None: - msg = "Root quantizer ids can't be None" - raise nncf.InternalError(msg) - return root_quantizer_id - - @staticmethod - def _get_edge_or_node_and_annotation( - graph: torch.fx.Graph, - nncf_graph: NNCFGraph, - qp: quantization.quantizer_setup.QuantizationPointBase, - node_vs_torch_annotation: dict[torch.fx.Node, QuantizationAnnotation], - ) -> tuple[EdgeOrNode, QuantizationAnnotation]: - """ - Retrieves the edge or node and its corresponding QuantizationAnnotation based on the given graph, - quantization point, and node-to-annotation mapping. - - :param graph: torch.fx.Graph instance. - :param nncf_graph: NNCFGraph instance. - :param qp: QuantizationPointBase instance. - :param node_vs_torch_annotation: A dictionary mapping torch.fx.GraphNode objects to their respective - QuantizationAnnotations. - :return: A tuple containing the EdgeOrNode and its associated QuantizationAnnotation. - """ - target_node = nncf_fx.node_utils.get_graph_node_by_name(graph, qp.insertion_point.target_node_name) - annotation = node_vs_torch_annotation[target_node] - edge_or_node = OpenVINOQuantizer._get_edge_or_node(target_node, qp, nncf_graph) - return edge_or_node, annotation - - @staticmethod - def _get_weight_edge( - target_node: torch.fx.Node, - nncf_graph: NNCFGraph, - ) -> tuple[torch.fx.Node, torch.fx.Node]: - """ - Returns the FX node corresponding to the weight tensor input of a given operator node. - Uses the NNCF graph to identify which input port of the target node holds the weight. - If multiple weight ports are present, a warning is issued and only the first one is used. - - :param target_node: FX node representing a weighted operation (e.g., Linear, Conv). - :param nncf_graph: NNCFGraph used to determine weight port indices. - :return: Edge represented by a Tuple of (weight_node, target_node), where weight_node is the - FX node supplying the weight. - """ - nncf_node = nncf_graph.get_node_by_name(target_node.name) - weights_ports_ids = nncf.torch.model_graph_manager.get_weight_tensor_port_ids(nncf_node, nncf_graph) - if len(weights_ports_ids) > 1: - # TODO(dlyakhov): support quantization for nodes with several weights - nncf.common.logging.nncf_logger.warning( - f"Quantization of the weighted node {target_node.name}" - " is not yet supported by the OpenVINOQuantizer." - f" Only the weight on port ID {weights_ports_ids[0]} will be quantized." - f" Quantizable weights are located on ports: {weights_ports_ids}." - ) - weight_node = target_node.all_input_nodes[weights_ports_ids[0]] - return (weight_node, target_node) - - @staticmethod - def _get_edge_or_node( - target_node: torch.fx.Node, - qp: quantization.quantizer_setup.QuantizationPointBase, - nncf_graph: NNCFGraph, - ) -> EdgeOrNode: - """ - Returns the edge or node based on the given target node and quantization point. - - :param target_node: Target node instance. - :param qp: QuantizationPointBase instance. - :param graph: NNCFGraph instance. - :return: The corresponding EdgeOrNode derived from the target node and quantization point. - """ - ip = qp.insertion_point - if qp.is_weight_quantization_point(): - OpenVINOQuantizer._get_weight_edge(target_node, nncf_graph) - - if ip.input_port_id is None: - return target_node - - node = target_node.all_input_nodes[ip.input_port_id] - return (node, target_node) - - @staticmethod - def _fill_torch_ao_annotation( - edge_or_node: EdgeOrNode, - qspec: QuantizationSpecBase, - annotation_to_update: QuantizationAnnotation, - ) -> None: - """ - Helper method to update the annotation_to_update based on the specified edge_or_node and qspec. - - :param edge_or_node: The target EdgeOrNode to be used for the update. - :param qspec: An instance of QuantizationSpecBase representing the quantization specification to apply. - :param annotation_to_update: The annotation to update based on the edge_or_node and qspec. - """ - if isinstance(edge_or_node, torch.fx.Node): - annotation_to_update.output_qspec = qspec - else: - annotation_to_update.input_qspec_map[edge_or_node[0]] = qspec - - @staticmethod - def _get_torch_ao_qspec_from_nncf_config_for_wc( - wc_param: WeightCompressionParameters, - ) -> QuantizationSpec: - """ - Returns a TorchAO QuantizationSpec based on NNCF weight compression parameter. - - :param wc_param: NNCF Weight compression parameters for the node. - :return: A TorchAO QuantizationSpec. - """ - observer: type[UniformQuantizationObserverBase] - - extra_args: dict[str, Any] = {} - - qmode = wc_param.compression_config.mode - extra_args["wc_param"] = wc_param - is_asym_mode = wc_param.compression_config.is_asym_mode - if qmode in [ - nncf.CompressWeightsMode.INT4_ASYM, - nncf.CompressWeightsMode.INT4_SYM, - ]: - observer = INT4WeightObserver # type: ignore[type-abstract] - quant_min = -8 if not is_asym_mode else 0 - quant_max = 7 if not is_asym_mode else 15 - dtype = torch.int8 - channel_axis = 0 - torch_qscheme = torch_qscheme = ( - torch.per_channel_symmetric if not is_asym_mode else torch.per_channel_affine - ) - else: - observer = INT8WeightObserver # type: ignore[type-abstract] - quant_min = -128 if not is_asym_mode else 0 - quant_max = 127 if not is_asym_mode else 255 - dtype = torch.int8 - channel_axis = 0 - torch_qscheme = torch.per_channel_symmetric if not is_asym_mode else torch.per_channel_affine - return QuantizationSpec( - dtype=dtype, - observer_or_fake_quant_ctr=observer.with_args(**extra_args), - quant_min=quant_min, - quant_max=quant_max, - qscheme=torch_qscheme, - ch_axis=channel_axis, - is_dynamic=False, - ) - - @staticmethod - def _get_torch_ao_qspec_from_nncf_config_for_ptq( - qp: quantization.quantizer_setup.QuantizationPointBase, - ) -> QuantizationSpec: - """ - Returns a TorchAO QuantizationSpec based on NNCF quantization point. - - :param qp: Quantization point from NNCF. - :return: A TorchAO QuantizationSpec. - """ - observer: type[UniformQuantizationObserverBase] - - # Eps value is copied from nncf/torch/quantization/layers.py - extra_args: dict[str, Any] = {"eps": 1e-16} - - is_weight = qp.is_weight_quantization_point() - qconfig = qp.qconfig - dtype = torch.int8 - quant_min = None - quant_max = None - channel_axis = None - - if qconfig.per_channel: - torch_qscheme = ( - torch.per_channel_symmetric - if qconfig.mode is quantization.structs.QuantizationScheme.SYMMETRIC - else torch.per_channel_affine - ) - else: - torch_qscheme = ( - torch.per_tensor_symmetric - if qconfig.mode is quantization.structs.QuantizationScheme.SYMMETRIC - else torch.per_tensor_affine - ) - if is_weight: - observer = PerChannelMinMaxObserver - quant_min = -128 - quant_max = 127 - dtype = torch.int8 - channel_axis = 0 - torch_qscheme = ( - torch.per_channel_symmetric - if qconfig.mode is quantization.structs.QuantizationScheme.SYMMETRIC - else torch.per_channel_affine - ) - else: - observer = ( - HistogramObserver - if torch_qscheme in [torch.per_tensor_symmetric, torch.per_tensor_affine] - else PerChannelMinMaxObserver - ) - quant_min = 0 - quant_max = 255 - dtype = torch.int8 if qconfig.signedness_to_force else torch.uint8 - channel_axis = 1 # channel dim for activations - return QuantizationSpec( - dtype=dtype, - observer_or_fake_quant_ctr=observer.with_args(**extra_args), - quant_min=quant_min, - quant_max=quant_max, - qscheme=torch_qscheme, - ch_axis=channel_axis, - is_dynamic=False, - ) - - def validate(self, model: torch.fx.GraphModule) -> None: - pass - - -def quantize_model( - captured_model: torch.fx.GraphModule, - calibration_dataset: torch.utils.data.DataLoader, - *, - mode: QuantizationMode = QuantizationMode.INT8_SYM, - subset_size: int = 300, - fast_bias_correction: Optional[bool] = True, - smooth_quant: bool = False, - transform_fn: Optional[Callable[[Any], Any]] = None, - extra_quantizer_options: Optional[dict[str, Any]] = None, - **kwargs, -) -> torch.fx.GraphModule: - """ - Quantizes a model using NNCF quantize_pt2e API. - - :param captured_model: The model to be quantized, represented as a torch.fx.GraphModule. - :param calibration_dataset: A DataLoader containing calibration data for quantization. - :param mode: Defines special quantization modes. - - INT8_SYM: INT8 symmetric quantization for both activations and weights. - - INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights. - - INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models - Default value is INT8_SYM. - :param subset_size: Size of a subset to calculate activations - statistics used for quantization. - :param fast_bias_correction: Setting this option to `False` enables a different - bias correction method which is more accurate, in general, and takes - more time but requires less memory. None disables the bias correction algorithm. - :param smooth_quant: Setting this option to `True` enables the SmoothQuant algorithm. - :param extra_quantizer_options: A dictionary containing additional configuration options - for the OpenVINOQuantizer. - :param kwargs: The keyword arguments for the nncf quantize_pt2e function. - :return: The quantized model as a torch.fx.GraphModule. - """ - extra_quantizer_options = extra_quantizer_options or {} - if "mode" in extra_quantizer_options: - print(f'Ignoring "mode" from the quantizer_config. Using parameter mode = {mode}') - del extra_quantizer_options["mode"] - - quantizer = OpenVINOQuantizer(mode=mode, **extra_quantizer_options) - - print("PTQ: Quantize the model") - - if "fold_quantize" not in kwargs: - kwargs["fold_quantize"] = False - - quantized_model = nncf_fx.quantize_pt2e( - captured_model, - quantizer, - subset_size=subset_size, - calibration_dataset=nncf.Dataset(calibration_dataset, transform_fn), - fast_bias_correction=fast_bias_correction, - smooth_quant=smooth_quant, - **kwargs, - ) - return quantized_model From 623ce461cd94cbfeb99fb1a0a4a661f46f0fe86f Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 1 Oct 2025 12:12:17 +0400 Subject: [PATCH 33/91] rework wc algorithm so that get_weight_comrpession_params becomes more static. Accepts data_aware_mixed_precision and data_aware_algo flags and mixed precision algorithm as input --- .../weight_compression/algorithm.py | 56 ++++++++++++------- 1 file changed, 37 insertions(+), 19 deletions(-) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index b6039f50349..4e9e67aa9dc 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -49,6 +49,7 @@ from nncf.quantization.algorithms.weight_compression.scale_estimation import ScaleEstimation from nncf.quantization.algorithms.weight_compression.weight_lowering import WeightCompressionConfig from nncf.quantization.algorithms.weight_compression.weight_lowering import get_reduction_channel_size +from nncf.quantization.algorithms.weight_compression.mixed_precision import MixedPrecisionCriterion from nncf.scopes import IgnoredScope from nncf.scopes import get_ignored_node_names_from_ignored_scope from nncf.tensor import Tensor @@ -331,8 +332,7 @@ def __init__( advanced_parameters if advanced_parameters is not None else AdvancedCompressionParameters() ) - criterion_cls = MIXED_PRECISION_CRITERIA.get(self._sensitivity_metric) - self._mixed_precision_algo = criterion_cls(self._ratio, self._subset_size) + self._mixed_precision_algo = self.get_mixed_precision_algorithm(self._sensitivity_metric, self._ratio, self._subset_size) self._statistics_path = self._advanced_parameters.statistics_path self._group_size_fallback_mode = self._advanced_parameters.group_size_fallback_mode @@ -365,14 +365,18 @@ def __init__( scale_estimation_params.weight_penalty, ) - self._data_aware_mixed_precision = ( - self._sensitivity_metric != SensitivityMetric.WEIGHT_QUANTIZATION_ERROR and self._ratio != 1.0 - ) - self._data_aware_compression = ( - (self._awq and self._advanced_parameters.awq_params.prefer_data_aware_scaling) - or self._scale_estimation - or self._lora_correction - or self._gptq + self._data_aware_mixed_precision = self.is_data_aware_mixed_precision(self._sensitivity_metric, self._ratio) + self._data_aware_compression = self.is_data_aware_compression(self._awq, self._scale_estimation, self._lora_correction, self._gptq, self._advanced_parameters) + + def is_data_aware_mixed_precision(self, sensitivity_metric: SensitivityMetric, ratio: int) -> bool: + return sensitivity_metric != SensitivityMetric.WEIGHT_QUANTIZATION_ERROR and ratio != 1.0 + + def is_data_aware_compression(self, awq: bool, scale_estimation: bool, lora_correction: bool, gptq: bool, advanced_parameters: AdvancedCompressionParameters) -> bool: + return ( + (awq and advanced_parameters.awq_params.prefer_data_aware_scaling) + or scale_estimation + or lora_correction + or gptq ) @property @@ -414,6 +418,10 @@ def set_backend_entity(self, model: TModel) -> None: msg = f"Cannot return backend-specific entity because {model_backend.value} is not supported!" raise nncf.UnsupportedBackendError(msg) + def get_mixed_precision_algorithm(self, sensitivity_metric: nncf.SensitivityMetric, ratio: int, subset_size: Optional[int] = None) -> MixedPrecisionCriterion: + criterion_cls = MIXED_PRECISION_CRITERIA.get(self._sensitivity_metric) + return criterion_cls(ratio, subset_size) + def get_ignored_node_names(self, nncf_graph: NNCFGraph) -> set[str]: """ Gets a set of ignored node names for weight compression. @@ -512,6 +520,7 @@ def _set_weight_compression_config( graph: NNCFGraph, statistics_points: StatisticPointsContainer, group_size_values: dict[str, int], + mixed_precision_algo: MixedPrecisionCriterion, ) -> None: """ Sets the appropriate compression configuration for weights based on some criteria. @@ -524,7 +533,7 @@ def _set_weight_compression_config( :param group_size_values: A dictionary mapping weight names to their group size values. """ if self._ratio < 1 and len(ratio_defining_params) > 0: - primary_precision_weight_params = self._mixed_precision_algo.apply( + primary_precision_weight_params = mixed_precision_algo.apply( model, graph, statistics_points, weight_params=ratio_defining_params ) else: @@ -775,6 +784,9 @@ def collect_weight_compression_statistics( graph: NNCFGraph, dataset: Dataset, weight_params: list[WeightCompressionParameters], + data_aware_compression, + data_aware_mixed_precision, + mixed_precision_algo, statistic_points: Optional[StatisticPointsContainer] = None, ) -> Optional[dict[str, Any]]: """ @@ -789,7 +801,7 @@ def collect_weight_compression_statistics( :return: A dictionary of collected statistics, or None if not applicable. """ statistics = None - if not (self._data_aware_mixed_precision or self._data_aware_compression) or not dataset: + if not (data_aware_compression or data_aware_mixed_precision) or not dataset: return statistics, statistic_points matmul_nodes_to_compress = [ wp.node_with_weight @@ -799,7 +811,7 @@ def collect_weight_compression_statistics( matmul_input_to_output_nodes_map = self.get_matmul_input_to_output_nodes_map(matmul_nodes_to_compress, graph) if statistic_points is None: - statistic_points = self.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys()) + statistic_points = self.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys(), data_aware_compression, data_aware_mixed_precision, mixed_precision_algo) statistic_points = self._collect_statistics(dataset, graph, model, statistic_points) statistics = self._get_statistics_for_weights_compression(matmul_input_to_output_nodes_map, statistic_points) @@ -810,6 +822,9 @@ def get_weight_compression_parameters( model: TModel, graph: NNCFGraph, nodes_to_compress: list[NNCFNode], + data_aware_mixed_precision: bool, + data_aware_compression: bool, + mixed_precision_algo: MixedPrecisionCriterion, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, ) -> tuple[list[WeightCompressionParameters], Optional[dict[str, WCTensorStatistic]]]: @@ -907,11 +922,11 @@ def get_weight_compression_parameters( # Collect statistics for the weights compression weight_params = ratio_defining_params if self._backup_mode == BackupMode.NONE else all_weight_params statistics, statistic_points = self.collect_weight_compression_statistics( - model, graph, dataset, weight_params, statistic_points + model, graph, dataset, weight_params, data_aware_compression, data_aware_mixed_precision, mixed_precision_algo, statistic_points ) # Set weight compression configuration - self._set_weight_compression_config(ratio_defining_params, model, graph, statistic_points, group_size_values) + self._set_weight_compression_config(ratio_defining_params, model, graph, statistic_points, group_size_values, mixed_precision_algo) # Print statistics nncf_logger.info( @@ -1015,7 +1030,7 @@ def apply( nodes_to_compress = self.get_nodes_to_compress(graph) # Get processed weight compression parameters ready for compression all_weight_params, statistics = self.get_weight_compression_parameters( - model, graph, nodes_to_compress, statistic_points, dataset + model, graph, nodes_to_compress, self._data_aware_mixed_precision, self._data_aware_compression, self._mixed_precision_algo, statistic_points, dataset ) transformed_model = self.apply_wc_algos(model, graph, all_weight_params, statistics, dataset) @@ -1109,6 +1124,9 @@ def get_statistic_points( model: TModel, graph: NNCFGraph, nodes_and_port_ids: Iterable[tuple[NNCFNode, int]], + data_aware_compression: bool, + data_aware_mixed_precision: bool, + mixed_precision_algo: MixedPrecisionCriterion, ) -> StatisticPointsContainer: """ Returns statistic points, for which StatisticsCollector should collect statistics. @@ -1120,7 +1138,7 @@ def get_statistic_points( """ statistic_container = StatisticPointsContainer() # Statistics for data aware algorithms - if self._data_aware_compression: + if data_aware_compression: for node, output_port_id in nodes_and_port_ids: statistic_point = self._backend_entity.target_point( TargetType.POST_LAYER_OPERATION, node.node_name, port_id=output_port_id @@ -1137,8 +1155,8 @@ def get_statistic_points( ) ) # Statistics for mixed precision algorithm - if self._data_aware_mixed_precision: - mixed_precision_statistics = self._mixed_precision_algo.get_statistic_points( + if data_aware_mixed_precision: + mixed_precision_statistics = mixed_precision_algo.get_statistic_points( model, graph, nodes_and_port_ids ) for points in mixed_precision_statistics.values(): From d14a6eb480f4bb0414d70115edc5a608d7477abb Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 1 Oct 2025 16:14:19 +0400 Subject: [PATCH 34/91] fix bugs; use sensitivity metric instead of mixed precision algo --- .../algorithms/weight_compression/algorithm.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index 4e9e67aa9dc..f4fbd5c4227 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -332,7 +332,6 @@ def __init__( advanced_parameters if advanced_parameters is not None else AdvancedCompressionParameters() ) - self._mixed_precision_algo = self.get_mixed_precision_algorithm(self._sensitivity_metric, self._ratio, self._subset_size) self._statistics_path = self._advanced_parameters.statistics_path self._group_size_fallback_mode = self._advanced_parameters.group_size_fallback_mode @@ -419,7 +418,7 @@ def set_backend_entity(self, model: TModel) -> None: raise nncf.UnsupportedBackendError(msg) def get_mixed_precision_algorithm(self, sensitivity_metric: nncf.SensitivityMetric, ratio: int, subset_size: Optional[int] = None) -> MixedPrecisionCriterion: - criterion_cls = MIXED_PRECISION_CRITERIA.get(self._sensitivity_metric) + criterion_cls = MIXED_PRECISION_CRITERIA.get(sensitivity_metric) return criterion_cls(ratio, subset_size) def get_ignored_node_names(self, nncf_graph: NNCFGraph) -> set[str]: @@ -821,10 +820,9 @@ def get_weight_compression_parameters( self, model: TModel, graph: NNCFGraph, - nodes_to_compress: list[NNCFNode], data_aware_mixed_precision: bool, data_aware_compression: bool, - mixed_precision_algo: MixedPrecisionCriterion, + sensitivity_metric: SensitivityMetric, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, ) -> tuple[list[WeightCompressionParameters], Optional[dict[str, WCTensorStatistic]]]: @@ -845,9 +843,12 @@ def get_weight_compression_parameters( """ all_weight_params: list[WeightCompressionParameters] = [] skipped_weight_params: list[WeightCompressionParameters] = [] - + + mixed_precision_algo = self.get_mixed_precision_algorithm(sensitivity_metric, self._ratio, self._subset_size) + weight_names = set() is_last_layer_skipped = False + nodes_to_compress = self.get_nodes_to_compress(graph) n = len(nodes_to_compress) ignored_names = self.get_ignored_node_names(graph) @@ -1027,10 +1028,10 @@ def apply( dataset: Optional[Dataset] = None, ) -> TModel: self.set_backend_entity(model) - nodes_to_compress = self.get_nodes_to_compress(graph) + # Get processed weight compression parameters ready for compression all_weight_params, statistics = self.get_weight_compression_parameters( - model, graph, nodes_to_compress, self._data_aware_mixed_precision, self._data_aware_compression, self._mixed_precision_algo, statistic_points, dataset + model, graph, self._data_aware_mixed_precision, self._data_aware_compression, self._sensitivity_metric, statistic_points, dataset ) transformed_model = self.apply_wc_algos(model, graph, all_weight_params, statistics, dataset) From e91b455e67d9126877708ae1a93b023af32014d6 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Mon, 6 Oct 2025 12:12:40 +0400 Subject: [PATCH 35/91] update algorithm with new reworking --- .../weight_compression/algorithm.py | 205 +++++++----------- 1 file changed, 78 insertions(+), 127 deletions(-) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index f4fbd5c4227..0a0ba42a663 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -49,7 +49,6 @@ from nncf.quantization.algorithms.weight_compression.scale_estimation import ScaleEstimation from nncf.quantization.algorithms.weight_compression.weight_lowering import WeightCompressionConfig from nncf.quantization.algorithms.weight_compression.weight_lowering import get_reduction_channel_size -from nncf.quantization.algorithms.weight_compression.mixed_precision import MixedPrecisionCriterion from nncf.scopes import IgnoredScope from nncf.scopes import get_ignored_node_names_from_ignored_scope from nncf.tensor import Tensor @@ -332,6 +331,8 @@ def __init__( advanced_parameters if advanced_parameters is not None else AdvancedCompressionParameters() ) + criterion_cls = MIXED_PRECISION_CRITERIA.get(self._sensitivity_metric) + self._mixed_precision_algo = criterion_cls(self._ratio, self._subset_size) self._statistics_path = self._advanced_parameters.statistics_path self._group_size_fallback_mode = self._advanced_parameters.group_size_fallback_mode @@ -364,18 +365,14 @@ def __init__( scale_estimation_params.weight_penalty, ) - self._data_aware_mixed_precision = self.is_data_aware_mixed_precision(self._sensitivity_metric, self._ratio) - self._data_aware_compression = self.is_data_aware_compression(self._awq, self._scale_estimation, self._lora_correction, self._gptq, self._advanced_parameters) - - def is_data_aware_mixed_precision(self, sensitivity_metric: SensitivityMetric, ratio: int) -> bool: - return sensitivity_metric != SensitivityMetric.WEIGHT_QUANTIZATION_ERROR and ratio != 1.0 - - def is_data_aware_compression(self, awq: bool, scale_estimation: bool, lora_correction: bool, gptq: bool, advanced_parameters: AdvancedCompressionParameters) -> bool: - return ( - (awq and advanced_parameters.awq_params.prefer_data_aware_scaling) - or scale_estimation - or lora_correction - or gptq + self._data_aware_mixed_precision = ( + self._sensitivity_metric != SensitivityMetric.WEIGHT_QUANTIZATION_ERROR and self._ratio != 1.0 + ) + self._data_aware_compression = ( + (self._awq and self._advanced_parameters.awq_params.prefer_data_aware_scaling) + or self._scale_estimation + or self._lora_correction + or self._gptq ) @property @@ -417,10 +414,6 @@ def set_backend_entity(self, model: TModel) -> None: msg = f"Cannot return backend-specific entity because {model_backend.value} is not supported!" raise nncf.UnsupportedBackendError(msg) - def get_mixed_precision_algorithm(self, sensitivity_metric: nncf.SensitivityMetric, ratio: int, subset_size: Optional[int] = None) -> MixedPrecisionCriterion: - criterion_cls = MIXED_PRECISION_CRITERIA.get(sensitivity_metric) - return criterion_cls(ratio, subset_size) - def get_ignored_node_names(self, nncf_graph: NNCFGraph) -> set[str]: """ Gets a set of ignored node names for weight compression. @@ -519,7 +512,6 @@ def _set_weight_compression_config( graph: NNCFGraph, statistics_points: StatisticPointsContainer, group_size_values: dict[str, int], - mixed_precision_algo: MixedPrecisionCriterion, ) -> None: """ Sets the appropriate compression configuration for weights based on some criteria. @@ -532,7 +524,7 @@ def _set_weight_compression_config( :param group_size_values: A dictionary mapping weight names to their group size values. """ if self._ratio < 1 and len(ratio_defining_params) > 0: - primary_precision_weight_params = mixed_precision_algo.apply( + primary_precision_weight_params = self._mixed_precision_algo.apply( model, graph, statistics_points, weight_params=ratio_defining_params ) else: @@ -777,54 +769,10 @@ def is_weight_compression_supported( return is_supported_dtype and not no_bit_reduction - def collect_weight_compression_statistics( - self, - model: TModel, - graph: NNCFGraph, - dataset: Dataset, - weight_params: list[WeightCompressionParameters], - data_aware_compression, - data_aware_mixed_precision, - mixed_precision_algo, - statistic_points: Optional[StatisticPointsContainer] = None, - ) -> Optional[dict[str, Any]]: - """ - Collects statistics for weight compression if data-aware compression or - mixed-precision is enabled. - - :param model: Backend-specific input model. - :param graph: NNCFGraph instance. - :param dataset: Dataset for statistics collection. - :param weight_params: Weight parameters for which to collect statistics. - :param statistic_points: Optional pre-collected statistic points. - :return: A dictionary of collected statistics, or None if not applicable. - """ - statistics = None - if not (data_aware_compression or data_aware_mixed_precision) or not dataset: - return statistics, statistic_points - matmul_nodes_to_compress = [ - wp.node_with_weight - for wp in weight_params - if wp.node_with_weight.metatype in self._backend_entity.matmul_metatypes - ] - matmul_input_to_output_nodes_map = self.get_matmul_input_to_output_nodes_map(matmul_nodes_to_compress, graph) - - if statistic_points is None: - statistic_points = self.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys(), data_aware_compression, data_aware_mixed_precision, mixed_precision_algo) - statistic_points = self._collect_statistics(dataset, graph, model, statistic_points) - - statistics = self._get_statistics_for_weights_compression(matmul_input_to_output_nodes_map, statistic_points) - return statistics, statistic_points - def get_weight_compression_parameters( self, model: TModel, graph: NNCFGraph, - data_aware_mixed_precision: bool, - data_aware_compression: bool, - sensitivity_metric: SensitivityMetric, - statistic_points: Optional[StatisticPointsContainer] = None, - dataset: Optional[Dataset] = None, ) -> tuple[list[WeightCompressionParameters], Optional[dict[str, WCTensorStatistic]]]: """ Generates a list of weight compression parameters based on the Weight Compression algorithm @@ -841,14 +789,13 @@ def get_weight_compression_parameters( Compression algorithm configuration, and a mapping of target node names to the collected statistics. """ + nodes_to_compress = self.get_nodes_to_compress(graph) + all_weight_params: list[WeightCompressionParameters] = [] skipped_weight_params: list[WeightCompressionParameters] = [] - - mixed_precision_algo = self.get_mixed_precision_algorithm(sensitivity_metric, self._ratio, self._subset_size) - + weight_names = set() is_last_layer_skipped = False - nodes_to_compress = self.get_nodes_to_compress(graph) n = len(nodes_to_compress) ignored_names = self.get_ignored_node_names(graph) @@ -920,33 +867,78 @@ def get_weight_compression_parameters( else: group_size_values = {w_params.weight_name: self._group_size for w_params in ratio_defining_params} - # Collect statistics for the weights compression - weight_params = ratio_defining_params if self._backup_mode == BackupMode.NONE else all_weight_params - statistics, statistic_points = self.collect_weight_compression_statistics( - model, graph, dataset, weight_params, data_aware_compression, data_aware_mixed_precision, mixed_precision_algo, statistic_points - ) - - # Set weight compression configuration - self._set_weight_compression_config(ratio_defining_params, model, graph, statistic_points, group_size_values, mixed_precision_algo) - # Print statistics nncf_logger.info( self._get_bitwidth_distribution_str(all_weight_params, ratio_defining_params, skipped_weight_params) ) - # Filter all_weight_params and by excluding nodes that should remain in their original floating-point precision - all_weight_params = list(filter(lambda w_params: w_params.compression_config is not None, all_weight_params)) + return all_weight_params, ratio_defining_params, group_size_values - return all_weight_params, statistics + def _collect_statistics_and_statistic_points( + self, model, graph, statistic_points, dataset, ratio_defining_params, all_weight_params + ): + if not dataset or not (self._data_aware_mixed_precision or self._data_aware_compression): + return None, statistic_points + weight_params = ratio_defining_params if self._backup_mode == BackupMode.NONE else all_weight_params + matmul_nodes_to_compress = [ + wp.node_with_weight + for wp in weight_params + if wp.node_with_weight.metatype in self._backend_entity.matmul_metatypes + ] + matmul_input_to_output_nodes_map = self.get_matmul_input_to_output_nodes_map(matmul_nodes_to_compress, graph) + if statistic_points is None: + statistic_points = self.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys()) + statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset) + statistics_aggregator.register_statistic_points(statistic_points) + statistics_aggregator.collect_statistics(model, graph) + statistic_points = statistics_aggregator.statistic_points + return self._get_statistics_for_weights_compression( + matmul_input_to_output_nodes_map, statistic_points + ), statistic_points - def apply_wc_algos( + def apply( self, model: TModel, graph: NNCFGraph, - all_weight_params: list[WeightCompressionParameters], - statistics: dict[str, Any], + statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, ) -> TModel: + self.set_backend_entity(model) + + # Get processed weight compression parameters ready for compression + all_weight_params, ratio_defining_params, group_size_values = self.get_weight_compression_parameters( + model, graph + ) + return self.apply_with_parameters( + model, + graph, + dataset, + statistic_points, + all_weight_params, + ratio_defining_params, + group_size_values, + ) + + def apply_with_parameters( + self, + model, + graph, + dataset, + statistic_points, + all_weight_params, + ratio_defining_params, + group_size_values, + ): + # Collect statistics for the weights compression + statistics, statistic_points = self._collect_statistics_and_statistic_points( + model, graph, statistic_points, dataset, ratio_defining_params, all_weight_params + ) + # Set weight compression configuration + self._set_weight_compression_config(ratio_defining_params, model, graph, statistic_points, group_size_values) + + # Filter all_weight_params and by excluding nodes that should remain in their original floating-point precision + all_weight_params = list(filter(lambda w_params: w_params.compression_config is not None, all_weight_params)) + if self._awq: model = self.awq_algo.apply(model, graph, all_weight_params, statistics, self._backend_entity) # After applying AWQ we need to update statistics since AWQ alters the activations @@ -1017,24 +1009,6 @@ def apply_wc_algos( }, algo_name="weight_compression", ) - - return transformed_model - - def apply( - self, - model: TModel, - graph: NNCFGraph, - statistic_points: Optional[StatisticPointsContainer] = None, - dataset: Optional[Dataset] = None, - ) -> TModel: - self.set_backend_entity(model) - - # Get processed weight compression parameters ready for compression - all_weight_params, statistics = self.get_weight_compression_parameters( - model, graph, self._data_aware_mixed_precision, self._data_aware_compression, self._sensitivity_metric, statistic_points, dataset - ) - transformed_model = self.apply_wc_algos(model, graph, all_weight_params, statistics, dataset) - return transformed_model def _get_activation_node_and_port(self, node: NNCFNode, nncf_graph: NNCFGraph) -> tuple[NNCFNode, int]: @@ -1100,34 +1074,11 @@ def get_compression_nodes_info( matmul_input_to_output_nodes_map = self.get_matmul_input_to_output_nodes_map(matmul_nodes_to_compress, graph) return nodes_to_compress, matmul_input_to_output_nodes_map - def _collect_statistics( - self, - dataset: Dataset, - graph: NNCFGraph, - model: TModel, - statistic_points: StatisticPointsContainer, - ): - """ - Creates statistics aggregator, registers all statistics specified for algorithm, and then collect them. - - :param dataset: Dataset to collect values. - :param graph: Model graph. - :param model: Model for statistics collection. - :param statistic_points: Statistics points. - """ - statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset) - statistics_aggregator.register_statistic_points(statistic_points) - statistics_aggregator.collect_statistics(model, graph) - return statistics_aggregator.statistic_points - def get_statistic_points( self, model: TModel, graph: NNCFGraph, nodes_and_port_ids: Iterable[tuple[NNCFNode, int]], - data_aware_compression: bool, - data_aware_mixed_precision: bool, - mixed_precision_algo: MixedPrecisionCriterion, ) -> StatisticPointsContainer: """ Returns statistic points, for which StatisticsCollector should collect statistics. @@ -1139,7 +1090,7 @@ def get_statistic_points( """ statistic_container = StatisticPointsContainer() # Statistics for data aware algorithms - if data_aware_compression: + if self._data_aware_compression: for node, output_port_id in nodes_and_port_ids: statistic_point = self._backend_entity.target_point( TargetType.POST_LAYER_OPERATION, node.node_name, port_id=output_port_id @@ -1156,8 +1107,8 @@ def get_statistic_points( ) ) # Statistics for mixed precision algorithm - if data_aware_mixed_precision: - mixed_precision_statistics = mixed_precision_algo.get_statistic_points( + if self._data_aware_mixed_precision: + mixed_precision_statistics = self._mixed_precision_algo.get_statistic_points( model, graph, nodes_and_port_ids ) for points in mixed_precision_statistics.values(): @@ -1202,4 +1153,4 @@ def _get_statistics_for_weights_compression( # Each activation node may have multiple MatMul nodes which it is an input to for node in matmul_nodes: statistics[node.node_name] = copy.deepcopy(stats) - return statistics + return statistics \ No newline at end of file From 448bf84cdedc7912ac1e2cbdadc99bbde38855ce Mon Sep 17 00:00:00 2001 From: anzr299 Date: Mon, 6 Oct 2025 16:27:49 +0400 Subject: [PATCH 36/91] changes --- .../weight_compression/algorithm.py | 85 +++++++++---------- .../quantizer/openvino_adapter.py | 9 +- .../weight_compression/algorithm.py | 66 ++++++++------ 3 files changed, 83 insertions(+), 77 deletions(-) diff --git a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py index 20673e9e51b..7d41288588d 100644 --- a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py @@ -29,7 +29,7 @@ def __init__( scale_estimation: bool = False, gptq: bool = False, lora_correction: bool = False, - sensitivity_metric: nncf.SensitivityMetric = None, + sensitivity_metric: nncf.SensitivityMetric = SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, compression_format: nncf.CompressionFormat = nncf.CompressionFormat.DQ, advanced_parameters: nncf.AdvancedCompressionParameters = None, ) -> torch.fx.GraphModule: @@ -37,27 +37,34 @@ def __init__( wc_config = self._quantizer.get_weight_compression_config() - mode = wc_config.get("mode", None) - ratio = wc_config.get("ratio", 1) - group_size = wc_config.get("group_size", 128) - all_layers = wc_config.get("all_layers", False) - backup_mode = wc_config.get("backup_mode", nncf.BackupMode.INT8_ASYM) + self._mode = wc_config.get("mode", None) + self._awq = awq + self._gptq = gptq + self._scale_estimation = scale_estimation + self._subset_size = subset_size + self._advanced_parameters = advanced_parameters + self._lora_correction = lora_correction + self._ratio = wc_config.get("ratio", 1) + self._group_size = wc_config.get("group_size", 128) + self._all_layers = wc_config.get("all_layers", False) + self._backup_mode = wc_config.get("backup_mode", nncf.BackupMode.INT8_ASYM) self._sensitivity_metric = sensitivity_metric + self._compression_format = compression_format self._algo = WeightCompression( - mode=mode, - ratio=ratio, - group_size=group_size, - ignored_scope=nncf.IgnoredScope(), # only compress "nodes_to_compress" - all_layers=all_layers, - sensitivity_metric=self._sensitivity_metric or SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, - awq=awq, - subset_size=subset_size, - scale_estimation=scale_estimation, - gptq=gptq, - lora_correction=lora_correction, - backup_mode=backup_mode, - compression_format=compression_format, - advanced_parameters=advanced_parameters, + mode=self._mode, + ratio=self._ratio, + group_size=self._group_size, + ignored_scope=nncf.IgnoredScope(), # This is already defined in the quantizer object + all_layers=self._all_layers, + sensitivity_metric=self._sensitivity_metric, + awq=self._awq, + subset_size=self._subset_size, + scale_estimation=self._scale_estimation, + gptq=self._gptq, + lora_correction=self._lora_correction, + backup_mode=self._backup_mode, + compression_format=self._compression_format, + advanced_parameters=self._advanced_parameters, ) def available_backends(self) -> list[BackendType]: @@ -70,30 +77,22 @@ def apply( statistic_points=None, dataset=None, ): - self._algo.set_backend_entity(model) # Set algo backend - - if self._sensitivity_metric is None: - # Default case. It means that it is not defined by the user in the API - # Hence, the annotation(Quantization parameters for all layers) from the quantizer will be used. - all_weight_params = self._quantizer.get_weight_compression_setup( - model, graph - ) # Get weight compression params FROM QUANTIZER - statistics, statistic_points = self._algo.collect_weight_compression_statistics( - model, graph, dataset, all_weight_params, statistic_points - ) - else: - # Data Aware mixed precision is used. In this case, only nodes_to_compress is obtained from the quantizer - nodes_to_compress = self._quantizer.get_nodes_to_compress( - model, graph - ) # Get nodes to compress FROM QUANTIZER - all_weight_params, statistics = self._algo.get_weight_compression_parameters( - model, graph, nodes_to_compress, statistic_points, dataset - ) + self._algo.set_backend_entity(model) + + all_weight_params, ratio_defining_params, group_size_values, skipped_weight_params = self._quantizer.get_weight_compression_parameters( + model, graph + ) - transformed_model = self._algo.apply_wc_algos( - model, graph, all_weight_params, statistics, dataset - ) # Apply the wc algos FROM ALGO - return transformed_model + return self._algo.apply_with_parameters( + model, + graph, + dataset, + statistic_points, + all_weight_params, + ratio_defining_params, + group_size_values, + skipped_weight_params, + ) def get_statistic_points(self, model, graph: NNCFGraph) -> StatisticPointsContainer: return self._algo.get_statistic_points(model, graph) diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py index 695c7082c56..75a3c8fcec4 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py @@ -33,13 +33,10 @@ def transform_prior_quantization(self, model: torch.fx.GraphModule) -> torch.fx. def get_quantization_setup(self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph) -> SingleConfigQuantizerSetup: return self._quantizer.get_nncf_quantization_setup(model, nncf_graph) - def get_weight_compression_setup( - self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph + def get_weight_compression_parameters( + self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph, ) -> SingleConfigQuantizerSetup: - return self._quantizer.get_nncf_weight_compression_setup(model, nncf_graph) - - def get_nodes_to_compress(self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph): - return self._quantizer.get_nodes_to_compress(model, nncf_graph) + return self._quantizer.get_nncf_weight_compression_parameters(model, nncf_graph) def get_weight_compression_config(self) -> dict[str, Any]: return self._quantizer.weight_compression_configuration diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index 0a0ba42a663..e27b61a5d72 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -527,11 +527,8 @@ def _set_weight_compression_config( primary_precision_weight_params = self._mixed_precision_algo.apply( model, graph, statistics_points, weight_params=ratio_defining_params ) - else: - primary_precision_weight_params = ratio_defining_params - - for weight_param in primary_precision_weight_params: - weight_param.compression_config = self._get_primary_config(group_size_values[weight_param.weight_name]) + for weight_param in primary_precision_weight_params: + weight_param.compression_config = self._get_primary_config(group_size_values[weight_param.weight_name]) # Check if group size is valid for each weight in ratio_defining_params failed_nodes = [] @@ -769,6 +766,28 @@ def is_weight_compression_supported( return is_supported_dtype and not no_bit_reduction + def _collect_statistics_and_statistic_points( + self, model, graph, statistic_points, dataset, ratio_defining_params, all_weight_params + ): + if not dataset or not (self._data_aware_mixed_precision or self._data_aware_compression): + return None, statistic_points + weight_params = ratio_defining_params if self._backup_mode == BackupMode.NONE else all_weight_params + matmul_nodes_to_compress = [ + wp.node_with_weight + for wp in weight_params + if wp.node_with_weight.metatype in self._backend_entity.matmul_metatypes + ] + matmul_input_to_output_nodes_map = self.get_matmul_input_to_output_nodes_map(matmul_nodes_to_compress, graph) + if statistic_points is None: + statistic_points = self.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys()) + statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset) + statistics_aggregator.register_statistic_points(statistic_points) + statistics_aggregator.collect_statistics(model, graph) + statistic_points = statistics_aggregator.statistic_points + return self._get_statistics_for_weights_compression( + matmul_input_to_output_nodes_map, statistic_points + ), statistic_points + def get_weight_compression_parameters( self, model: TModel, @@ -867,34 +886,18 @@ def get_weight_compression_parameters( else: group_size_values = {w_params.weight_name: self._group_size for w_params in ratio_defining_params} + # If no mixed precision has to be applied, then set the primary config for all ratio defining params. + if self._ratio == 1 or len(ratio_defining_params) == 0: + for weight_param in ratio_defining_params: + weight_param.compression_config = self._get_primary_config(group_size_values[weight_param.weight_name]) + # Print statistics nncf_logger.info( self._get_bitwidth_distribution_str(all_weight_params, ratio_defining_params, skipped_weight_params) ) - return all_weight_params, ratio_defining_params, group_size_values + return all_weight_params, ratio_defining_params, group_size_values, skipped_weight_params - def _collect_statistics_and_statistic_points( - self, model, graph, statistic_points, dataset, ratio_defining_params, all_weight_params - ): - if not dataset or not (self._data_aware_mixed_precision or self._data_aware_compression): - return None, statistic_points - weight_params = ratio_defining_params if self._backup_mode == BackupMode.NONE else all_weight_params - matmul_nodes_to_compress = [ - wp.node_with_weight - for wp in weight_params - if wp.node_with_weight.metatype in self._backend_entity.matmul_metatypes - ] - matmul_input_to_output_nodes_map = self.get_matmul_input_to_output_nodes_map(matmul_nodes_to_compress, graph) - if statistic_points is None: - statistic_points = self.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys()) - statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset) - statistics_aggregator.register_statistic_points(statistic_points) - statistics_aggregator.collect_statistics(model, graph) - statistic_points = statistics_aggregator.statistic_points - return self._get_statistics_for_weights_compression( - matmul_input_to_output_nodes_map, statistic_points - ), statistic_points def apply( self, @@ -906,7 +909,7 @@ def apply( self.set_backend_entity(model) # Get processed weight compression parameters ready for compression - all_weight_params, ratio_defining_params, group_size_values = self.get_weight_compression_parameters( + all_weight_params, ratio_defining_params, group_size_values, skipped_weight_params = self.get_weight_compression_parameters( model, graph ) return self.apply_with_parameters( @@ -917,6 +920,7 @@ def apply( all_weight_params, ratio_defining_params, group_size_values, + skipped_weight_params, ) def apply_with_parameters( @@ -928,6 +932,7 @@ def apply_with_parameters( all_weight_params, ratio_defining_params, group_size_values, + skipped_weight_params, ): # Collect statistics for the weights compression statistics, statistic_points = self._collect_statistics_and_statistic_points( @@ -939,6 +944,11 @@ def apply_with_parameters( # Filter all_weight_params and by excluding nodes that should remain in their original floating-point precision all_weight_params = list(filter(lambda w_params: w_params.compression_config is not None, all_weight_params)) + # Print statistics + nncf_logger.info( + self._get_bitwidth_distribution_str(all_weight_params, ratio_defining_params, skipped_weight_params) + ) + if self._awq: model = self.awq_algo.apply(model, graph, all_weight_params, statistics, self._backend_entity) # After applying AWQ we need to update statistics since AWQ alters the activations From 8e23572dce6d704b4460e307100a5c78c703d60f Mon Sep 17 00:00:00 2001 From: anzr299 Date: Mon, 6 Oct 2025 21:27:16 +0400 Subject: [PATCH 37/91] review changes --- .../weight_compression/algorithm.py | 39 +++++++++---------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index e27b61a5d72..6a061fce7bf 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -527,9 +527,10 @@ def _set_weight_compression_config( primary_precision_weight_params = self._mixed_precision_algo.apply( model, graph, statistics_points, weight_params=ratio_defining_params ) - for weight_param in primary_precision_weight_params: - weight_param.compression_config = self._get_primary_config(group_size_values[weight_param.weight_name]) - + for weight_param in ratio_defining_params: + if weight_param in primary_precision_weight_params: + continue + weight_param.compression_config = self._get_backup_config(weight_param.weight_dtype) # Check if group size is valid for each weight in ratio_defining_params failed_nodes = [] for w_params in ratio_defining_params: @@ -787,6 +788,18 @@ def _collect_statistics_and_statistic_points( return self._get_statistics_for_weights_compression( matmul_input_to_output_nodes_map, statistic_points ), statistic_points + + def _get_backup_config(self, weight_dtype: TensorDataType): + if self._backup_mode == BackupMode.NONE: + return None + mode = ( + CompressWeightsMode.INT8_ASYM + if self._backup_mode == BackupMode.INT8_ASYM + else CompressWeightsMode.INT8_SYM + ) + if not self.is_weight_compression_supported(weight_dtype, mode): + return None + return WeightCompressionConfig(mode=mode) def get_weight_compression_parameters( self, @@ -851,14 +864,7 @@ def get_weight_compression_parameters( f"node name: {node.node_name}. The node will be in {self._backup_mode} mode." ) - if self._backup_mode != BackupMode.NONE: - mode = ( - CompressWeightsMode.INT8_ASYM - if self._backup_mode == BackupMode.INT8_ASYM - else CompressWeightsMode.INT8_SYM - ) - if self.is_weight_compression_supported(weight_dtype, mode): - wc_config = WeightCompressionConfig(mode=mode) + wc_config = self._get_backup_config(weight_dtype) weight_params = WeightCompressionParameters( weight_name, node, weight_port_id, weight_dtype, weight_shape, reduction_axes, wc_config @@ -886,15 +892,8 @@ def get_weight_compression_parameters( else: group_size_values = {w_params.weight_name: self._group_size for w_params in ratio_defining_params} - # If no mixed precision has to be applied, then set the primary config for all ratio defining params. - if self._ratio == 1 or len(ratio_defining_params) == 0: - for weight_param in ratio_defining_params: - weight_param.compression_config = self._get_primary_config(group_size_values[weight_param.weight_name]) - - # Print statistics - nncf_logger.info( - self._get_bitwidth_distribution_str(all_weight_params, ratio_defining_params, skipped_weight_params) - ) + for weight_param in ratio_defining_params: + weight_param.compression_config = self._get_primary_config(group_size_values[weight_param.weight_name]) return all_weight_params, ratio_defining_params, group_size_values, skipped_weight_params From 36ddf53da8ef92a8ba312b74fde54dcd4c7b3a1f Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 7 Oct 2025 11:02:41 +0400 Subject: [PATCH 38/91] change WeightsCompressionPT2E to ExperimentalWeightsCompression --- .../quantization/algorithms/weight_compression/algorithm.py | 2 +- .../experimental/torch/fx/quantization/quantize_pt2e.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py index 7d41288588d..73193692317 100644 --- a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py @@ -20,7 +20,7 @@ from nncf.quantization.algorithms.weight_compression.algorithm import WeightCompression -class WeightsCompressionPT2E(Algorithm): +class ExperimentalWeightsCompression(Algorithm): def __init__( self, quantizer, diff --git a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py index 30adf9990cd..98acdb04f17 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py +++ b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py @@ -27,7 +27,7 @@ from nncf.common.logging import nncf_logger from nncf.common.utils.api_marker import api from nncf.experimental.quantization.algorithms.post_training.algorithm import ExperimentalPostTrainingQuantization -from nncf.experimental.quantization.algorithms.weight_compression.algorithm import WeightsCompressionPT2E +from nncf.experimental.quantization.algorithms.weight_compression.algorithm import ExperimentalWeightsCompression from nncf.experimental.torch.fx.constant_folding import constant_fold from nncf.experimental.torch.fx.quantization.quantizer.openvino_adapter import OpenVINOQuantizerAdapter from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer @@ -192,7 +192,7 @@ def compress_pt2e( preserve the accuracy of the model, the more sensitive layers receive a higher precision. :param advanced_parameters: Advanced parameters for algorithms in the compression pipeline. """ - if isinstance(quantizer, OpenVINOQuantizer) or hasattr(quantizer, "get_nncf_weight_compression_setup"): + if isinstance(quantizer, OpenVINOQuantizer) or hasattr(quantizer, "get_nncf_weight_compression_parameters"): quantizer = OpenVINOQuantizerAdapter(quantizer) compression_format = nncf.CompressionFormat.DQ else: @@ -200,7 +200,7 @@ def compress_pt2e( msg = "Only OpenVINO Quantizer is supported currently." raise nncf.InternalError(msg) - quantization_algorithm = WeightsCompressionPT2E( + quantization_algorithm = ExperimentalWeightsCompression( quantizer=quantizer, awq=awq, subset_size=subset_size, From 07b730b68fb542784a1eb0087e40cd5216c38301 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 7 Oct 2025 11:03:54 +0400 Subject: [PATCH 39/91] change ExperimentalWeightsCompression to WeightsCompression --- .../quantization/algorithms/weight_compression/algorithm.py | 2 +- src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py index 73193692317..4003e1b429b 100644 --- a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py @@ -20,7 +20,7 @@ from nncf.quantization.algorithms.weight_compression.algorithm import WeightCompression -class ExperimentalWeightsCompression(Algorithm): +class WeightsCompression(Algorithm): def __init__( self, quantizer, diff --git a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py index 98acdb04f17..dca0aa905e7 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py +++ b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py @@ -27,7 +27,7 @@ from nncf.common.logging import nncf_logger from nncf.common.utils.api_marker import api from nncf.experimental.quantization.algorithms.post_training.algorithm import ExperimentalPostTrainingQuantization -from nncf.experimental.quantization.algorithms.weight_compression.algorithm import ExperimentalWeightsCompression +from nncf.experimental.quantization.algorithms.weight_compression.algorithm import WeightsCompression from nncf.experimental.torch.fx.constant_folding import constant_fold from nncf.experimental.torch.fx.quantization.quantizer.openvino_adapter import OpenVINOQuantizerAdapter from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer @@ -200,7 +200,7 @@ def compress_pt2e( msg = "Only OpenVINO Quantizer is supported currently." raise nncf.InternalError(msg) - quantization_algorithm = ExperimentalWeightsCompression( + quantization_algorithm = WeightsCompression( quantizer=quantizer, awq=awq, subset_size=subset_size, From d5dd42264c09f7c7426a50df76bd7180bddd36e1 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 7 Oct 2025 11:17:16 +0400 Subject: [PATCH 40/91] add comments --- .../weight_compression/algorithm.py | 23 ++++++++++++++++++- .../weight_compression/algorithm.py | 3 +++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py index 4003e1b429b..13f83683c36 100644 --- a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py @@ -17,13 +17,21 @@ from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType from nncf.quantization.algorithms.algorithm import Algorithm +from nncf.experimental.quantization.quantizer import Quantizer from nncf.quantization.algorithms.weight_compression.algorithm import WeightCompression class WeightsCompression(Algorithm): + """ + Post-training Weight Compression algorithm implementation. + + Compresses weights of Linear and Embedding layers to 8-bit integer or + to 4-bit integer/float depending on mode, ratio and group size. + """ + def __init__( self, - quantizer, + quantizer: Quantizer, subset_size: int = 128, awq: bool = False, scale_estimation: bool = False, @@ -33,6 +41,19 @@ def __init__( compression_format: nncf.CompressionFormat = nncf.CompressionFormat.DQ, advanced_parameters: nncf.AdvancedCompressionParameters = None, ) -> torch.fx.GraphModule: + """ + :param quantizer: Quantizer to use in WeightCompression algorithm. + :param subset_size: Number of data samples to calculate activation statistics used for assigning different + quantization precision. + :param awq: determines whether to use or not modified AWQ algorithm. + :param scale_estimation: determines whether to use or not scale estimation for 4 bit layers. + :param gptq: determines whether to use or not GPTQ algorithm. + :param lora_correction: determines whether to use or not LoRA Correction algorithm. + :param sensitivity_metric: The sensitivity metric for assigning quantization precision to layers. In order to + preserve the accuracy of the model, the more sensitive layers receives a higher precision. + :param compression_format: Describes the format in which the model is saved after weight compression. + :param advanced_parameters: advanced parameters for algorithms in compression pipeline. + """ self._quantizer = quantizer wc_config = self._quantizer.get_weight_compression_config() diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index 6a061fce7bf..3c9b64e8ab9 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -528,8 +528,10 @@ def _set_weight_compression_config( model, graph, statistics_points, weight_params=ratio_defining_params ) for weight_param in ratio_defining_params: + # This weight should be in lower precision according to mixed precision. Let it be if weight_param in primary_precision_weight_params: continue + # Set all layers other than the ones returned by mixed precision to backup precision weight_param.compression_config = self._get_backup_config(weight_param.weight_dtype) # Check if group size is valid for each weight in ratio_defining_params failed_nodes = [] @@ -892,6 +894,7 @@ def get_weight_compression_parameters( else: group_size_values = {w_params.weight_name: self._group_size for w_params in ratio_defining_params} + # Set these layers to primary config. Later we will set layers to backup precision according to Mixed precision for weight_param in ratio_defining_params: weight_param.compression_config = self._get_primary_config(group_size_values[weight_param.weight_name]) From 076a76bac7369b20a6cdbb242ac6adb1a2647d01 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 7 Oct 2025 11:25:38 +0400 Subject: [PATCH 41/91] add typehints --- .../algorithms/weight_compression/algorithm.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py index 13f83683c36..76bcdd2bedf 100644 --- a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py @@ -13,6 +13,10 @@ import nncf from nncf import SensitivityMetric +from nncf import CompressionFormat +from nncf import AdvancedCompressionParameters +from nncf import Dataset +from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.graph.graph import NNCFGraph from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType @@ -37,9 +41,9 @@ def __init__( scale_estimation: bool = False, gptq: bool = False, lora_correction: bool = False, - sensitivity_metric: nncf.SensitivityMetric = SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, - compression_format: nncf.CompressionFormat = nncf.CompressionFormat.DQ, - advanced_parameters: nncf.AdvancedCompressionParameters = None, + sensitivity_metric: SensitivityMetric = SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, + compression_format: CompressionFormat = CompressionFormat.DQ, + advanced_parameters: AdvancedCompressionParameters = None, ) -> torch.fx.GraphModule: """ :param quantizer: Quantizer to use in WeightCompression algorithm. @@ -95,9 +99,9 @@ def apply( self, model: torch.fx.GraphModule, graph: NNCFGraph, - statistic_points=None, - dataset=None, - ): + statistic_points: Optional[StatisticPointsContainer] = None, + dataset: Optional[Dataset] = None, + ) -> torch.fx.GraphModule: self._algo.set_backend_entity(model) all_weight_params, ratio_defining_params, group_size_values, skipped_weight_params = self._quantizer.get_weight_compression_parameters( @@ -115,5 +119,5 @@ def apply( skipped_weight_params, ) - def get_statistic_points(self, model, graph: NNCFGraph) -> StatisticPointsContainer: + def get_statistic_points(self, model: torch.fx.GraphModule, graph: NNCFGraph) -> StatisticPointsContainer: return self._algo.get_statistic_points(model, graph) From 2ce9eec9791b094157d97c56380b71f189f5a9a0 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 7 Oct 2025 11:30:27 +0400 Subject: [PATCH 42/91] add docstrings --- .../algorithms/weight_compression/algorithm.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py index 76bcdd2bedf..3882e3af6e9 100644 --- a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py @@ -119,5 +119,13 @@ def apply( skipped_weight_params, ) - def get_statistic_points(self, model: torch.fx.GraphModule, graph: NNCFGraph) -> StatisticPointsContainer: - return self._algo.get_statistic_points(model, graph) + def get_statistic_points(self, model: torch.fx.GraphModule, graph: NNCFGraph, nodes_and_port_ids: Iterable[tuple[NNCFNode, int]],) -> StatisticPointsContainer: + """ + Returns statistic points, for which StatisticsCollector should collect statistics. + + :param model: Model for statistics collection. + :param graph: Model graph. + :param nodes_and_port_ids: Nodes and port ids for which statistics should be collected. + :return: Statistic points, for which StatisticsCollector should collect statistics. + """ + return self._algo.get_statistic_points(model, graph, nodes_and_port_ids) From 1bebf3eaa7dee563113adfb55677833a000d8957 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 7 Oct 2025 11:32:41 +0400 Subject: [PATCH 43/91] add typehint for quantize pt2e --- .../experimental/torch/fx/quantization/quantize_pt2e.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py index dca0aa905e7..a178cb74de4 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py +++ b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py @@ -23,6 +23,8 @@ import nncf from nncf import Dataset +from nncf import SensitivityMetric +from nncf import AdvancedCompressionParameters from nncf.common.factory import NNCFGraphFactory from nncf.common.logging import nncf_logger from nncf.common.utils.api_marker import api @@ -170,8 +172,8 @@ def compress_pt2e( gptq: bool = False, lora_correction: bool = False, subset_size: int = 128, # Dataset size to use - sensitivity_metric: nncf.SensitivityMetric = None, - advanced_parameters: nncf.AdvancedCompressionParameters = None, + sensitivity_metric: Optional[SensitivityMetric] = None, + advanced_parameters: Optional[AdvancedCompressionParameters] = None, ) -> torch.fx.GraphModule: """ Applies Weight Compression to the torch.fx.GraphModule provided model From e82920fb25fc95e9bb0f2041c6b90085b538475d Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 7 Oct 2025 11:41:50 +0400 Subject: [PATCH 44/91] return original develop branch changes --- .../weight_compression/algorithm.py | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index 3c9b64e8ab9..9e321b31c9b 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -783,10 +783,7 @@ def _collect_statistics_and_statistic_points( matmul_input_to_output_nodes_map = self.get_matmul_input_to_output_nodes_map(matmul_nodes_to_compress, graph) if statistic_points is None: statistic_points = self.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys()) - statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset) - statistics_aggregator.register_statistic_points(statistic_points) - statistics_aggregator.collect_statistics(model, graph) - statistic_points = statistics_aggregator.statistic_points + statistic_points = self._collect_statistics(dataset, graph, model, statistic_points) return self._get_statistics_for_weights_compression( matmul_input_to_output_nodes_map, statistic_points ), statistic_points @@ -1085,6 +1082,26 @@ def get_compression_nodes_info( ] matmul_input_to_output_nodes_map = self.get_matmul_input_to_output_nodes_map(matmul_nodes_to_compress, graph) return nodes_to_compress, matmul_input_to_output_nodes_map + + def _collect_statistics( + self, + dataset: Dataset, + graph: NNCFGraph, + model: TModel, + statistic_points: StatisticPointsContainer, + ): + """ + Creates statistics aggregator, registers all statistics specified for algorithm, and then collect them. + + :param dataset: Dataset to collect values. + :param graph: Model graph. + :param model: Model for statistics collection. + :param statistic_points: Statistics points. + """ + statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset) + statistics_aggregator.register_statistic_points(statistic_points) + statistics_aggregator.collect_statistics(model, graph) + return statistics_aggregator.statistic_points def get_statistic_points( self, From 82cc10b5086afed9a49d93eacc5e750f8ac2ee80 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 7 Oct 2025 14:23:50 +0400 Subject: [PATCH 45/91] update typehints and docs --- .../weight_compression/algorithm.py | 109 +++++++++++------- 1 file changed, 66 insertions(+), 43 deletions(-) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index 9e321b31c9b..b770278102d 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -491,6 +491,24 @@ def _get_ratio_defining_params( return ratio_defining_params + def _get_backup_config(self, weight_dtype: TensorDataType) -> WeightCompressionConfig: + """ + Returns the backup weight compression configuration based on the algorithm's backup mode. + + :param weight_dtype: Data type of the weight tensor. + :return: A WeightCompressionConfig object for the backup precision, or None if backup is disabled or unsupported. + """ + if self._backup_mode == BackupMode.NONE: + return None + mode = ( + CompressWeightsMode.INT8_ASYM + if self._backup_mode == BackupMode.INT8_ASYM + else CompressWeightsMode.INT8_SYM + ) + if not self.is_weight_compression_supported(weight_dtype, mode): + return None + return WeightCompressionConfig(mode=mode) + def _get_primary_config(self, group_size: int) -> WeightCompressionConfig: codebook_values = None @@ -528,7 +546,8 @@ def _set_weight_compression_config( model, graph, statistics_points, weight_params=ratio_defining_params ) for weight_param in ratio_defining_params: - # This weight should be in lower precision according to mixed precision. Let it be + # We already set these in primary precision. Mixed precision algo returns + # layers which should be in primary precision. Let it be if weight_param in primary_precision_weight_params: continue # Set all layers other than the ones returned by mixed precision to backup precision @@ -770,41 +789,46 @@ def is_weight_compression_supported( return is_supported_dtype and not no_bit_reduction def _collect_statistics_and_statistic_points( - self, model, graph, statistic_points, dataset, ratio_defining_params, all_weight_params + self, + model: TModel, + graph: NNCFGraph, + statistic_points: StatisticPointsContainer, + dataset: Dataset, + ratio_defining_params: list[WeightCompressionParameters], + all_weight_params: list[WeightCompressionParameters], ): - if not dataset or not (self._data_aware_mixed_precision or self._data_aware_compression): - return None, statistic_points - weight_params = ratio_defining_params if self._backup_mode == BackupMode.NONE else all_weight_params - matmul_nodes_to_compress = [ - wp.node_with_weight - for wp in weight_params - if wp.node_with_weight.metatype in self._backend_entity.matmul_metatypes - ] - matmul_input_to_output_nodes_map = self.get_matmul_input_to_output_nodes_map(matmul_nodes_to_compress, graph) - if statistic_points is None: - statistic_points = self.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys()) - statistic_points = self._collect_statistics(dataset, graph, model, statistic_points) - return self._get_statistics_for_weights_compression( - matmul_input_to_output_nodes_map, statistic_points - ), statistic_points - - def _get_backup_config(self, weight_dtype: TensorDataType): - if self._backup_mode == BackupMode.NONE: - return None - mode = ( - CompressWeightsMode.INT8_ASYM - if self._backup_mode == BackupMode.INT8_ASYM - else CompressWeightsMode.INT8_SYM - ) - if not self.is_weight_compression_supported(weight_dtype, mode): - return None - return WeightCompressionConfig(mode=mode) + """ + Collects and computes statistics required for weight compression. + + :param model: Backend-specific model instance. + :param graph: Corresponding NNCFGraph of the model. + :param statistic_points: Container with pre-collected statistics, if available. + :param dataset: Dataset used for collecting statistics when not provided. + :param ratio_defining_params: List of parameters defining compression ratios. + :param all_weight_params: List of all weight compression parameters. + :return: A tuple containing collected statistics for weight compression and the updated statistic_points. + """ + if not dataset or not (self._data_aware_mixed_precision or self._data_aware_compression): + return None, statistic_points + weight_params = ratio_defining_params if self._backup_mode == BackupMode.NONE else all_weight_params + matmul_nodes_to_compress = [ + wp.node_with_weight + for wp in weight_params + if wp.node_with_weight.metatype in self._backend_entity.matmul_metatypes + ] + matmul_input_to_output_nodes_map = self.get_matmul_input_to_output_nodes_map(matmul_nodes_to_compress, graph) + if statistic_points is None: + statistic_points = self.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys()) + statistic_points = self._collect_statistics(dataset, graph, model, statistic_points) + return self._get_statistics_for_weights_compression( + matmul_input_to_output_nodes_map, statistic_points + ), statistic_points def get_weight_compression_parameters( self, model: TModel, graph: NNCFGraph, - ) -> tuple[list[WeightCompressionParameters], Optional[dict[str, WCTensorStatistic]]]: + ) -> tuple[list[WeightCompressionParameters], list[WeightCompressionParameters], dict[str, int], list[WeightCompressionParameters]]: """ Generates a list of weight compression parameters based on the Weight Compression algorithm configuration. Determines the appropriate quantization parameters for each node eligible for @@ -814,11 +838,10 @@ def get_weight_compression_parameters( :param model: Backend-specific input model. :param graph: NNCFGraph instance. - :param statistic_points: Optional pre-collected statistic points. - :param dataset: Optional dataset for statistics collection. - :return: A tuple consisting of a list of weight compression parameters, based on the Weight - Compression algorithm configuration, and a mapping of target node names to the - collected statistics. + :return: A tuple consisting of a list of all weight compression parameters, based on the Weight + Compression algorithm configuration, list of ratio defining parameters(weights that are used + for ratio calculation between primary and backup precisions), A dictionary mapping weight + names to their group size values and list of weight parameters to skip. """ nodes_to_compress = self.get_nodes_to_compress(graph) @@ -924,14 +947,14 @@ def apply( def apply_with_parameters( self, - model, - graph, - dataset, - statistic_points, - all_weight_params, - ratio_defining_params, - group_size_values, - skipped_weight_params, + model: TModel, + graph: NNCFGraph, + dataset: Dataset, + statistic_points: StatisticPointsContainer, + all_weight_params: list[WeightCompressionParameters], + ratio_defining_params: list[WeightCompressionParameters], + group_size_values: dict[str, int], + skipped_weight_params: list[WeightCompressionParameters], ): # Collect statistics for the weights compression statistics, statistic_points = self._collect_statistics_and_statistic_points( From beae508a824601531a23aa5881d8a9bef7823635 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 7 Oct 2025 14:29:00 +0400 Subject: [PATCH 46/91] format --- .../weight_compression/algorithm.py | 23 ++++++++++------ .../torch/fx/quantization/quantize_pt2e.py | 2 +- .../quantizer/openvino_adapter.py | 4 ++- .../weight_compression/algorithm.py | 27 ++++++++++--------- 4 files changed, 34 insertions(+), 22 deletions(-) diff --git a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py index 3882e3af6e9..cf0733bbb94 100644 --- a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py @@ -9,19 +9,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Iterable, Optional + import torch import nncf -from nncf import SensitivityMetric -from nncf import CompressionFormat from nncf import AdvancedCompressionParameters +from nncf import CompressionFormat from nncf import Dataset -from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer +from nncf import SensitivityMetric from nncf.common.graph.graph import NNCFGraph +from nncf.common.graph.graph import NNCFNode from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType -from nncf.quantization.algorithms.algorithm import Algorithm from nncf.experimental.quantization.quantizer import Quantizer +from nncf.quantization.algorithms.algorithm import Algorithm from nncf.quantization.algorithms.weight_compression.algorithm import WeightCompression @@ -103,9 +105,9 @@ def apply( dataset: Optional[Dataset] = None, ) -> torch.fx.GraphModule: self._algo.set_backend_entity(model) - - all_weight_params, ratio_defining_params, group_size_values, skipped_weight_params = self._quantizer.get_weight_compression_parameters( - model, graph + + all_weight_params, ratio_defining_params, group_size_values, skipped_weight_params = ( + self._quantizer.get_weight_compression_parameters(model, graph) ) return self._algo.apply_with_parameters( @@ -119,7 +121,12 @@ def apply( skipped_weight_params, ) - def get_statistic_points(self, model: torch.fx.GraphModule, graph: NNCFGraph, nodes_and_port_ids: Iterable[tuple[NNCFNode, int]],) -> StatisticPointsContainer: + def get_statistic_points( + self, + model: torch.fx.GraphModule, + graph: NNCFGraph, + nodes_and_port_ids: Iterable[tuple[NNCFNode, int]], + ) -> StatisticPointsContainer: """ Returns statistic points, for which StatisticsCollector should collect statistics. diff --git a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py index a178cb74de4..5858cf64c5d 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py +++ b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py @@ -22,9 +22,9 @@ from torch.fx.passes.infra.pass_manager import PassManager import nncf +from nncf import AdvancedCompressionParameters from nncf import Dataset from nncf import SensitivityMetric -from nncf import AdvancedCompressionParameters from nncf.common.factory import NNCFGraphFactory from nncf.common.logging import nncf_logger from nncf.common.utils.api_marker import api diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py index 75a3c8fcec4..d540df86e51 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py @@ -34,7 +34,9 @@ def get_quantization_setup(self, model: torch.fx.GraphModule, nncf_graph: NNCFGr return self._quantizer.get_nncf_quantization_setup(model, nncf_graph) def get_weight_compression_parameters( - self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph, + self, + model: torch.fx.GraphModule, + nncf_graph: NNCFGraph, ) -> SingleConfigQuantizerSetup: return self._quantizer.get_nncf_weight_compression_parameters(model, nncf_graph) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index b770278102d..2db18618235 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -496,14 +496,13 @@ def _get_backup_config(self, weight_dtype: TensorDataType) -> WeightCompressionC Returns the backup weight compression configuration based on the algorithm's backup mode. :param weight_dtype: Data type of the weight tensor. - :return: A WeightCompressionConfig object for the backup precision, or None if backup is disabled or unsupported. + :return: A WeightCompressionConfig object for the backup precision, or None if backup is + disabled or unsupported. """ if self._backup_mode == BackupMode.NONE: return None mode = ( - CompressWeightsMode.INT8_ASYM - if self._backup_mode == BackupMode.INT8_ASYM - else CompressWeightsMode.INT8_SYM + CompressWeightsMode.INT8_ASYM if self._backup_mode == BackupMode.INT8_ASYM else CompressWeightsMode.INT8_SYM ) if not self.is_weight_compression_supported(weight_dtype, mode): return None @@ -828,7 +827,12 @@ def get_weight_compression_parameters( self, model: TModel, graph: NNCFGraph, - ) -> tuple[list[WeightCompressionParameters], list[WeightCompressionParameters], dict[str, int], list[WeightCompressionParameters]]: + ) -> tuple[ + list[WeightCompressionParameters], + list[WeightCompressionParameters], + dict[str, int], + list[WeightCompressionParameters], + ]: """ Generates a list of weight compression parameters based on the Weight Compression algorithm configuration. Determines the appropriate quantization parameters for each node eligible for @@ -839,8 +843,8 @@ def get_weight_compression_parameters( :param model: Backend-specific input model. :param graph: NNCFGraph instance. :return: A tuple consisting of a list of all weight compression parameters, based on the Weight - Compression algorithm configuration, list of ratio defining parameters(weights that are used - for ratio calculation between primary and backup precisions), A dictionary mapping weight + Compression algorithm configuration, list of ratio defining parameters(weights that are used + for ratio calculation between primary and backup precisions), A dictionary mapping weight names to their group size values and list of weight parameters to skip. """ nodes_to_compress = self.get_nodes_to_compress(graph) @@ -920,7 +924,6 @@ def get_weight_compression_parameters( return all_weight_params, ratio_defining_params, group_size_values, skipped_weight_params - def apply( self, model: TModel, @@ -931,8 +934,8 @@ def apply( self.set_backend_entity(model) # Get processed weight compression parameters ready for compression - all_weight_params, ratio_defining_params, group_size_values, skipped_weight_params = self.get_weight_compression_parameters( - model, graph + all_weight_params, ratio_defining_params, group_size_values, skipped_weight_params = ( + self.get_weight_compression_parameters(model, graph) ) return self.apply_with_parameters( model, @@ -1105,7 +1108,7 @@ def get_compression_nodes_info( ] matmul_input_to_output_nodes_map = self.get_matmul_input_to_output_nodes_map(matmul_nodes_to_compress, graph) return nodes_to_compress, matmul_input_to_output_nodes_map - + def _collect_statistics( self, dataset: Dataset, @@ -1205,4 +1208,4 @@ def _get_statistics_for_weights_compression( # Each activation node may have multiple MatMul nodes which it is an input to for node in matmul_nodes: statistics[node.node_name] = copy.deepcopy(stats) - return statistics \ No newline at end of file + return statistics From 8bd95df521fdb4806095b8dd20951fa7781981bc Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 7 Oct 2025 15:20:05 +0400 Subject: [PATCH 47/91] update type hinting of openvino adapter --- .../torch/fx/quantization/quantizer/openvino_adapter.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py index d540df86e51..b72df9d29f7 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py @@ -17,6 +17,7 @@ from nncf.common.quantization.quantizer_setup import SingleConfigQuantizerSetup from nncf.experimental.quantization.quantizer import Quantizer from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer +from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters class OpenVINOQuantizerAdapter(Quantizer): @@ -37,7 +38,12 @@ def get_weight_compression_parameters( self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph, - ) -> SingleConfigQuantizerSetup: + ) -> tuple[ + list[WeightCompressionParameters], + list[WeightCompressionParameters], + dict[str, int], + list[WeightCompressionParameters], + ]: return self._quantizer.get_nncf_weight_compression_parameters(model, nncf_graph) def get_weight_compression_config(self) -> dict[str, Any]: From aac9d3fbbecf0de667b05ee94e94d2f3aa1fa97b Mon Sep 17 00:00:00 2001 From: anzr299 Date: Fri, 10 Oct 2025 17:13:00 +0400 Subject: [PATCH 48/91] add test --- .../weight_compression/algorithm.py | 42 +-- .../torch/fx/quantization/quantize_pt2e.py | 41 ++- .../quantizer/torch_ao_adapter.py | 22 ++ .../weight_compression/algorithm.py | 8 +- tests/executorch/test_quantizer.py | 256 ++++++++++++++++++ tests/torch/test_models/__init__.py | 1 + tests/torch/test_models/llama.py | 151 +++++++++++ ...4wo_sym_gs32_ratio0.8_all_layers_False.dot | 169 ++++++++++++ ...t4wo_sym_gs32_ratio0.8_all_layers_True.dot | 169 ++++++++++++ ...t8wo_asym_gs-1_ratio1_all_layers_False.dot | 169 ++++++++++++ ...4wo_sym_gs32_ratio0.8_all_layers_False.dot | 24 ++ ...t4wo_sym_gs32_ratio0.8_all_layers_True.dot | 24 ++ ...t8wo_asym_gs-1_ratio1_all_layers_False.dot | 24 ++ ...atio0.8_all_layers_False_ref_wc_param.json | 128 +++++++++ ...tivity_metric_hessian_input_activation.dot | 169 ++++++++++++ ...itivity_metric_max_activation_variance.dot | 169 ++++++++++++ ...ivity_metric_mean_activation_magnitude.dot | 169 ++++++++++++ ...tivity_metric_mean_activation_variance.dot | 169 ++++++++++++ ...ivity_metric_weight_quantization_error.dot | 169 ++++++++++++ ...ratio0.8_all_layers_True_ref_wc_param.json | 128 +++++++++ ...tivity_metric_hessian_input_activation.dot | 169 ++++++++++++ ...itivity_metric_max_activation_variance.dot | 169 ++++++++++++ ...ivity_metric_mean_activation_magnitude.dot | 169 ++++++++++++ ...tivity_metric_mean_activation_variance.dot | 169 ++++++++++++ ...ivity_metric_weight_quantization_error.dot | 169 ++++++++++++ ..._ratio1_all_layers_False_ref_wc_param.json | 128 +++++++++ ...l_layers_False_sensitivity_metric_None.dot | 169 ++++++++++++ ...atio0.8_all_layers_False_ref_wc_param.json | 38 +++ ...tivity_metric_hessian_input_activation.dot | 24 ++ ...itivity_metric_max_activation_variance.dot | 24 ++ ...ivity_metric_mean_activation_magnitude.dot | 24 ++ ...tivity_metric_mean_activation_variance.dot | 24 ++ ...ivity_metric_weight_quantization_error.dot | 24 ++ ...ratio0.8_all_layers_True_ref_wc_param.json | 38 +++ ...tivity_metric_hessian_input_activation.dot | 24 ++ ...itivity_metric_max_activation_variance.dot | 24 ++ ...ivity_metric_mean_activation_magnitude.dot | 24 ++ ...tivity_metric_mean_activation_variance.dot | 24 ++ ...ivity_metric_weight_quantization_error.dot | 24 ++ ..._ratio1_all_layers_False_ref_wc_param.json | 38 +++ ...l_layers_False_sensitivity_metric_None.dot | 24 ++ 41 files changed, 3694 insertions(+), 27 deletions(-) create mode 100644 tests/executorch/test_quantizer.py create mode 100644 tests/torch/test_models/llama.py create mode 100644 tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot create mode 100644 tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot create mode 100644 tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot create mode 100644 tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot create mode 100644 tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot create mode 100644 tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_max_activation_variance.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_variance.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_hessian_input_activation.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_magnitude.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_weight_quantization_error.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_sensitivity_metric_None.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_max_activation_variance.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_variance.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_hessian_input_activation.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_magnitude.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_weight_quantization_error.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_sensitivity_metric_None.dot diff --git a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py index cf0733bbb94..562cb1ce202 100644 --- a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py @@ -25,7 +25,10 @@ from nncf.experimental.quantization.quantizer import Quantizer from nncf.quantization.algorithms.algorithm import Algorithm from nncf.quantization.algorithms.weight_compression.algorithm import WeightCompression - +from nncf.quantization.algorithms.weight_compression.algorithm import get_weight_compression_configuration +from nncf import CompressWeightsMode +from nncf import IgnoredScope +from nncf import BackupMode class WeightsCompression(Algorithm): """ @@ -37,15 +40,21 @@ class WeightsCompression(Algorithm): def __init__( self, + mode: CompressWeightsMode, quantizer: Quantizer, - subset_size: int = 128, - awq: bool = False, - scale_estimation: bool = False, - gptq: bool = False, - lora_correction: bool = False, - sensitivity_metric: SensitivityMetric = SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, - compression_format: CompressionFormat = CompressionFormat.DQ, - advanced_parameters: AdvancedCompressionParameters = None, + ratio: float, + group_size: int, + ignored_scope: IgnoredScope, + all_layers: bool, + subset_size: int, + awq: bool, + scale_estimation: bool, + gptq: bool, + lora_correction: bool, + backup_mode: BackupMode, + sensitivity_metric: SensitivityMetric, + compression_format: CompressionFormat, + advanced_parameters: AdvancedCompressionParameters, ) -> torch.fx.GraphModule: """ :param quantizer: Quantizer to use in WeightCompression algorithm. @@ -62,26 +71,25 @@ def __init__( """ self._quantizer = quantizer - wc_config = self._quantizer.get_weight_compression_config() - - self._mode = wc_config.get("mode", None) + self._mode = mode self._awq = awq self._gptq = gptq self._scale_estimation = scale_estimation self._subset_size = subset_size self._advanced_parameters = advanced_parameters self._lora_correction = lora_correction - self._ratio = wc_config.get("ratio", 1) - self._group_size = wc_config.get("group_size", 128) - self._all_layers = wc_config.get("all_layers", False) - self._backup_mode = wc_config.get("backup_mode", nncf.BackupMode.INT8_ASYM) + self._ratio = ratio + self._group_size = group_size + self._all_layers = all_layers + self._backup_mode = backup_mode self._sensitivity_metric = sensitivity_metric self._compression_format = compression_format + self._algo = WeightCompression( mode=self._mode, ratio=self._ratio, group_size=self._group_size, - ignored_scope=nncf.IgnoredScope(), # This is already defined in the quantizer object + ignored_scope=ignored_scope, all_layers=self._all_layers, sensitivity_metric=self._sensitivity_metric, awq=self._awq, diff --git a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py index 5858cf64c5d..1a496eedb2d 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py +++ b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py @@ -37,6 +37,7 @@ from nncf.experimental.torch.fx.transformations import QUANTIZE_NODE_TARGETS from nncf.experimental.torch.fx.transformations import DuplicateDQPassNoAnnotations from nncf.experimental.torch.fx.transformations import compress_post_quantize_transformation +from nncf.quantization.algorithms.weight_compression.algorithm import get_weight_compression_configuration from nncf.quantization.advanced_parameters import AdvancedBiasCorrectionParameters from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters from nncf.quantization.range_estimator import RangeEstimatorParameters @@ -202,16 +203,44 @@ def compress_pt2e( msg = "Only OpenVINO Quantizer is supported currently." raise nncf.InternalError(msg) + wc_config = quantizer.get_weight_compression_config() + + mode = wc_config.get("mode", None) + awq = awq + gptq = gptq + scale_estimation = scale_estimation + subset_size = subset_size + advanced_parameters = advanced_parameters + lora_correction = lora_correction + ratio = wc_config.get("ratio", 1) + group_size = wc_config.get("group_size", 128) + all_layers = wc_config.get("all_layers", False) + backup_mode = wc_config.get("backup_mode", nncf.BackupMode.INT8_ASYM) + sensitivity_metric = sensitivity_metric + compression_format = compression_format + ignored_scope = nncf.IgnoredScope() # This is already defined in the quantizer object + + weight_compression_configuration = get_weight_compression_configuration( + mode, + dataset, + ratio, + group_size, + all_layers, + awq, + scale_estimation, + gptq, + lora_correction, + ignored_scope, + sensitivity_metric, + backup_mode, + advanced_parameters, + ) + quantization_algorithm = WeightsCompression( quantizer=quantizer, - awq=awq, subset_size=subset_size, - scale_estimation=scale_estimation, - gptq=gptq, - lora_correction=lora_correction, - sensitivity_metric=sensitivity_metric, compression_format=compression_format, - advanced_parameters=advanced_parameters, + **weight_compression_configuration ) # Here the model is annotated diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/torch_ao_adapter.py b/src/nncf/experimental/torch/fx/quantization/quantizer/torch_ao_adapter.py index e3a6c1c8f42..19dfb5314ce 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/torch_ao_adapter.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/torch_ao_adapter.py @@ -34,6 +34,7 @@ from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter from nncf.experimental.torch.fx.node_utils import get_node_args from nncf.tensor.definitions import TensorDataType +from nncf import CompressWeightsMode EdgeOrNode = Union[tuple[torch.fx.Node, torch.fx.Node]] @@ -46,6 +47,25 @@ class TorchAOQuantizerAdapter(Quantizer): def __init__(self, quantizer: TorchAOQuantizer): self._quantizer = quantizer + def _get_compression_mode_from_qconfig(qp: QuantizationPointBase): + if qp.qconfig.num_bits == 4 and qp.qconfig.mode == QuantizationMode.ASYMMETRIC: + return CompressWeightsMode.INT4_ASYM + elif qp.qconfig.num_bits == 4 and qp.qconfig.mode == QuantizationMode.SYMMETRIC: + return CompressWeightsMode.INT4_SYM + elif qp.qconfig.num_bits == 8 and qp.qconfig.mode == QuantizationMode.ASYMMETRIC: + return CompressWeightsMode.INT8_ASYM + elif qp.qconfig.num_bits == 8 and qp.qconfig.mode == QuantizationMode.SYMMETRIC: + return CompressWeightsMode.INT8_SYM + + def get_wc_config_node_map(self, model, nncf_graph): + quantization_setup = self.get_quantization_setup(model, nncf_graph) + qps = quantization_setup.quantization_points + print(quantization_setup) + for _,qp in qps.items(): + assert len(qp.directly_quantized_operator_node_names) == 1, "Weights compression does not support shared configs" + qps = {qp.directly_quantized_operator_node_names[0]: self._get_compression_mode_from_qconfig(qp) for id,qp in qps.items()} + return qps + def transform_prior_quantization(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: return self._quantizer.transform_for_annotation(model) @@ -116,6 +136,7 @@ def get_quantizer_config_from_annotated_model(annotated: torch.fx.GraphModule) - :return: A SingleConfigQuantizerSetup containing quantization points derived from the annotated model. """ edge_or_node_to_qspec = _get_edge_or_node_to_qspec(annotated) + print(edge_or_node_to_qspec) # Node means all output edges should be quantized. # Edge means only one edge should be quantized. edge_or_node_to_group_id = _get_edge_or_node_to_group_id(edge_or_node_to_qspec) @@ -150,6 +171,7 @@ def get_quantizer_config_from_annotated_model(annotated: torch.fx.GraphModule) - raise nncf.InternalError(msg) dtype = TensorDataType.int8 if qspec.dtype is torch.int8 else TensorDataType.uint8 + dtype = TensorDataType.int4 if qspec.qmax in [15, 8] else dtype mode = ( QuantizationMode.SYMMETRIC if qspec.qscheme in [torch.per_channel_symmetric, torch.per_tensor_symmetric] diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index 2db18618235..7145fe30b80 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -871,7 +871,6 @@ def get_weight_compression_parameters( weight_shape = self._backend_entity.get_weight_shape(node, weight_port_id, graph) reduction_axes = self._backend_entity.get_reduction_axes(node, weight_port_id, graph) - wc_config = None if is_target_node and self.is_weight_compression_supported(weight_dtype, self._mode): if ( self._group_size != -1 @@ -966,14 +965,15 @@ def apply_with_parameters( # Set weight compression configuration self._set_weight_compression_config(ratio_defining_params, model, graph, statistic_points, group_size_values) - # Filter all_weight_params and by excluding nodes that should remain in their original floating-point precision - all_weight_params = list(filter(lambda w_params: w_params.compression_config is not None, all_weight_params)) - # Print statistics nncf_logger.info( self._get_bitwidth_distribution_str(all_weight_params, ratio_defining_params, skipped_weight_params) ) + # Filter all_weight_params and by excluding nodes that should remain in their original floating-point precision + all_weight_params = list(filter(lambda w_params: w_params.compression_config is not None, all_weight_params)) + + if self._awq: model = self.awq_algo.apply(model, graph, all_weight_params, statistics, self._backend_entity) # After applying AWQ we need to update statistics since AWQ alters the activations diff --git a/tests/executorch/test_quantizer.py b/tests/executorch/test_quantizer.py new file mode 100644 index 00000000000..3015a115076 --- /dev/null +++ b/tests/executorch/test_quantizer.py @@ -0,0 +1,256 @@ +# Copyright (c) 2025 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); + +from dataclasses import dataclass +from functools import partial +from typing import Any, Callable, Optional + +import dataclasses +import json +from enum import Enum + +import pytest +import torch +import torch.fx + +import nncf +from nncf.common.graph import NNCFGraph +from nncf.common.utils.os import safe_open +from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter +from nncf.experimental.torch.fx import compress_pt2e + +from tests.cross_fw.shared.nx_graph import compare_nx_graph_with_reference +from tests.cross_fw.shared.paths import TEST_ROOT +from tests.torch.test_models.synthetic import ShortTransformer +from tests.torch.test_models.llama import LlamaDecoderOnly +from tests.torch2.fx.helpers import get_torch_fx_model + +from torchao.quantization.pt2e.quantize_pt2e import prepare_pt2e, convert_pt2e + +from executorch.backends.openvino.quantizer.quantizer import ( + OpenVINOQuantizer, + QuantizationMode, +) +from nncf.common.graph.graph import NNCFNode + +FX_PT2E_DIR = TEST_ROOT / "torch2" / "data" / "fx" / "compress_pt2e" +FX_AO_DIR = TEST_ROOT / "torch2" / "data" / "fx" / "ao_compression_OpenVINOQuantizer" + + +@dataclass +class ModelCase: + model_builder: Callable[[], torch.nn.Module] + model_id: str + input_shape: tuple[int, ...] + + +def get_dot_filename(model_name: str) -> str: + return model_name + ".dot" + + +def get_wc_param_filename(model_name: str) -> str: + return model_name + "_ref_wc_param.json" + + +def _build_torch_fx_model(model_case: ModelCase) -> tuple[torch.fx.GraphModule, torch.Tensor]: + model = model_case.model_builder() + # ShortTransformer takes token ids; match prior synthetic tests (int32) + example_input = torch.ones(model_case.input_shape, dtype=torch.int32) + fx_model = get_torch_fx_model(model, example_input) + return fx_model, example_input + + +def _get_calibration_dataset(example_input: torch.Tensor) -> nncf.Dataset: + def transform_fn(x): + return x.to("cpu") + return nncf.Dataset([example_input], transform_fn) + + +def get_openvino_quantizer(*args, **kwargs) -> OpenVINOQuantizer: + return OpenVINOQuantizer(*args, **kwargs) + + +def _string_from_quantizer_params(qparams: dict[str, Any], pt2e_param: Optional[dict[str, Any]] = None) -> str: + mode = qparams.get("mode") + gs = qparams.get("group_size", "-1") + ratio = qparams.get("ratio", "1") + all_layers = qparams.get("all_layers", "False") + if(pt2e_param is None): + return f"{mode.value}_gs{gs}_ratio{ratio}_all_layers_{all_layers}" + sensitivity_metric = pt2e_param.get("sensitivity_metric", "None") + return f"{mode.value}_gs{gs}_ratio{ratio}_all_layers_{all_layers}_sensitivity_metric_{sensitivity_metric}" + + +BASE_MODELS = ( + ModelCase(LlamaDecoderOnly, "LlamaDecoderOnly", [1,3,64]), + ModelCase(partial(ShortTransformer, 64, 128, True), "short_transformer_shared", [5]), +) + +QUANTIZER_PARAMS = ( + {"mode": QuantizationMode.INT8WO_ASYM}, + {"mode": QuantizationMode.INT4WO_SYM, "group_size": 32, "ratio": 0.8}, + {"mode": QuantizationMode.INT4WO_SYM, "group_size": 32, "ratio": 0.8, "all_layers": True}, +) + +PT2E_PARAMS = ( + {"sensitivity_metric": nncf.SensitivityMetric.HESSIAN_INPUT_ACTIVATION}, + {"sensitivity_metric": nncf.SensitivityMetric.MAX_ACTIVATION_VARIANCE}, + {"sensitivity_metric": nncf.SensitivityMetric.WEIGHT_QUANTIZATION_ERROR}, + {"sensitivity_metric": nncf.SensitivityMetric.MEAN_ACTIVATION_VARIANCE}, + {"sensitivity_metric": nncf.SensitivityMetric.MEAN_ACTIVATION_MAGNITUDE}, +) + + +TEST_MODELS = tuple( + (model, qparam, pt2e_param) + for model in BASE_MODELS + for qparam in QUANTIZER_PARAMS + for pt2e_param in ( + [{}] + if ( + (qparam.get("mode") in {QuantizationMode.INT8WO_ASYM, QuantizationMode.INT8WO_SYM}) + or (qparam.get("ratio") is None) + ) + else PT2E_PARAMS + ) +) + + +TEST_MODEL_IDS = [ + f"{m.model_id}__{_string_from_quantizer_params(qparams, pt2e_param)}" for (m, qparams, pt2e_param) in TEST_MODELS +] + + +@pytest.mark.parametrize( + ("model_case", "quantizer_params", "pt2e_params"), + TEST_MODELS, + ids=TEST_MODEL_IDS, +) + +@pytest.mark.parametrize( + "quantizer_builder", + [get_openvino_quantizer], + ids=["OpenVINOQuantizer"], +) + +def test_compress_pt2e( + quantizer_builder: Callable[..., OpenVINOQuantizer], + model_case: ModelCase, + quantizer_params, + pt2e_params, +): + fx_model, example_input = _build_torch_fx_model(model_case) + with torch.no_grad(): + ref_out = fx_model(example_input) + + calibration_dataset = _get_calibration_dataset(example_input) + + # Build quantizer directly from quantizer_params (already includes mode/group_size) + quantizer = quantizer_builder(**quantizer_params) + + quantized_model = compress_pt2e( + fx_model, + quantizer=quantizer, + dataset=calibration_dataset, + **pt2e_params, + ) + + with torch.no_grad(): + out = quantized_model(example_input) + assert out.shape == ref_out.shape, "Compressed model output shape mismatch." + + nncf_graph: NNCFGraph = GraphConverter.create_nncf_graph(quantized_model) + nx_graph = nncf_graph.get_graph_for_structure_analysis(extended=True) + param_string = _string_from_quantizer_params(quantizer_params, pt2e_params) + path_to_dot = (FX_PT2E_DIR / quantizer.__class__.__name__ / model_case.model_id / get_dot_filename(param_string)).as_posix() + compare_nx_graph_with_reference(nx_graph, path_to_dot) + + +@pytest.mark.parametrize( + ("model_case", "quantizer_params", "pt2e_params"), + TEST_MODELS, + ids=TEST_MODEL_IDS, +) +@pytest.mark.parametrize( + "quantizer_builder", + [get_openvino_quantizer], + ids=["OpenVINOQuantizer"], +) +def test_openvino_quantizer( + model_case: ModelCase, + quantizer_params, + quantizer_builder: Callable[..., OpenVINOQuantizer], + pt2e_params, +): + fx_model, example_input = _build_torch_fx_model(model_case) + quantizer = quantizer_builder(**quantizer_params) + + prepared = prepare_pt2e(fx_model, quantizer) + prepared(example_input) + ao_quantized_model = convert_pt2e(prepared) + + nncf_graph = GraphConverter.create_nncf_graph(ao_quantized_model) + nx_graph = nncf_graph.get_graph_for_structure_analysis(extended=True) + + param_string = _string_from_quantizer_params(quantizer_params) + path_to_dot = (FX_AO_DIR / model_case.model_id / get_dot_filename(param_string)).as_posix() + compare_nx_graph_with_reference(nx_graph, path_to_dot) + + +def _serialize_wc_param(wp) -> dict[str, Any]: + def to_json_serializable(obj): + if dataclasses.is_dataclass(obj): + return {k: to_json_serializable(v) for k, v in dataclasses.asdict(obj).items()} + elif isinstance(obj, Enum): + return obj.value + elif isinstance(obj, (list, tuple)): + return [to_json_serializable(x) for x in obj] + elif isinstance(obj, dict): + return {k: to_json_serializable(v) for k, v in obj.items()} + elif isinstance(obj, NNCFNode): + return obj.node_name + else: + return obj + + return to_json_serializable(wp) + +@pytest.mark.parametrize( + ("model_case", "quantizer_params", "pt2e_params"), + TEST_MODELS, + ids=TEST_MODEL_IDS, +) +@pytest.mark.parametrize( + "quantizer_builder", + [get_openvino_quantizer], + ids=["OpenVINOQuantizer"], +) +def test_openvino_wc_params( + quantizer_builder: Callable[..., OpenVINOQuantizer], + model_case: ModelCase, + quantizer_params, + pt2e_params, + regen_ref_data=False, +): + fx_model, _ = _build_torch_fx_model(model_case) + nncf_graph: NNCFGraph = GraphConverter.create_nncf_graph(fx_model) + + param_string = _string_from_quantizer_params(quantizer_params) + quantizer = quantizer_builder(**quantizer_params) + + all_weight_params, *_ = quantizer.get_nncf_weight_compression_parameters(fx_model, nncf_graph) + + wc_params = _serialize_wc_param(all_weight_params) + + ref_json_path = (FX_PT2E_DIR / quantizer.__class__.__name__ / model_case.model_id / get_wc_param_filename(param_string)) + + if regen_ref_data: + with safe_open(ref_json_path, "w") as file: + json.dump(wc_params, file, indent=4) + + with safe_open(ref_json_path, "r") as f: + ref_data = json.load(f) + + assert wc_params == ref_data, ( + f"Weight compression parameters JSON mismatch for {model_case.model_id} ({param_string}).\n" + f"Ref: {ref_json_path}" + ) diff --git a/tests/torch/test_models/__init__.py b/tests/torch/test_models/__init__.py index 95cba87cc98..f412372978b 100644 --- a/tests/torch/test_models/__init__.py +++ b/tests/torch/test_models/__init__.py @@ -26,3 +26,4 @@ from .sr_small_model import * from .unet import * from .vgg import * +from .llama import * diff --git a/tests/torch/test_models/llama.py b/tests/torch/test_models/llama.py new file mode 100644 index 00000000000..fbf4df50e21 --- /dev/null +++ b/tests/torch/test_models/llama.py @@ -0,0 +1,151 @@ +import math +from typing import Optional, Tuple +import torch +import torch.nn as nn +import torch.nn.functional as F + +EMBED_DIM = 64 +N_HEADS = 4 +HEAD_DIM = EMBED_DIM // N_HEADS +# Same as Llama 3.2 config +ROPE_THETA = 500000.0 +MAX_SEQ = 128 +BIAS = False + + +class LlamaRMSNorm(nn.Module): + def __init__(self, hidden_size, eps=1e-6): + """ + Copied from src/transformers/models/llama/modeling_llama.py + LlamaRMSNorm is equivalent to T5LayerNorm + """ + super().__init__() + self.weight = nn.Parameter(torch.ones(hidden_size)) + self.variance_epsilon = eps + + def forward(self, hidden_states): + input_dtype = hidden_states.dtype + hidden_states = hidden_states.to(torch.float32) + variance = hidden_states.pow(2).mean(-1, keepdim=True) + hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon) + return self.weight * hidden_states.to(input_dtype) + +def _rotate_half(x): + """ + Copied from src/transformers/models/llama/modeling_llama.py + Rotates half the hidden dims of the input. + """ + x1 = x[..., : x.shape[-1] // 2] + x2 = x[..., x.shape[-1] // 2 :] + return torch.cat((-x2, x1), dim=-1) + +class Rotary(nn.Module): + """ + Precompute cos/sin for RoPE and apply to q,k. + Copied from src/transformers/models/llama/modeling_llama.py + Initialize the cos and sin value once in init method + """ + # Llama applies rotary to q,k before attention; see modeling_llama + def __init__(self, head_dim: int, max_seq_len: int = MAX_SEQ, theta: float = ROPE_THETA, device=None): + super().__init__() + dtype = torch.float32 + inv_freq = 1.0 / (theta ** (torch.arange(0, head_dim, 2, dtype=dtype, device=device) / head_dim)) + t = torch.arange(max_seq_len, dtype=dtype, device=device) + freqs = torch.einsum("t,f->tf", t, inv_freq) # (T, Hd/2) + emb = torch.cat((freqs, freqs), dim=-1) # (T, Hd) + self.register_buffer("cos", emb.cos()[None, None, ...], persistent=False) # (1,1,T,Hd) + self.register_buffer("sin", emb.sin()[None, None, ...], persistent=False) + def forward(self, q: torch.Tensor, k: torch.Tensor, pos: torch.Tensor): + cos = self.cos[..., pos, :] + sin = self.sin[..., pos, :] + q_embed = (q * cos) + (_rotate_half(q) * sin) + k_embed = (k * cos) + (_rotate_half(k) * sin) + return q_embed, k_embed + +class LlamaMLP(nn.Module): + """ + Copied from src/transformers/models/llama/modeling_llama.py + """ + def __init__(self, dim: int, mult: int = 2): + super().__init__() + # mult is used as a scaling factor of sorts. This is to define the hidden/intermediate layer size + hidden = mult * dim + self.gate_proj = nn.Linear(dim, hidden, bias=BIAS) + self.up_proj = nn.Linear(dim, hidden, bias=BIAS) + self.down_proj = nn.Linear(hidden, dim, bias=BIAS) + def forward(self, x: torch.Tensor) -> torch.Tensor: + down_proj = self.down_proj(F.silu(self.gate_proj(x)) * self.up_proj(x)) + return down_proj + +class LlamaDecoderOnly(nn.Module): + """ + One Llama-style transformer block (pre-norm attn + MLP) with RoPE and KV cache. + Forward takes embeddings only. + """ + # KV caching + past_key_values flow mirrors HF implementations. :contentReference[oaicite:4]{index=4} + def __init__(self, dim: int = EMBED_DIM, n_heads: int = N_HEADS): + super().__init__() + assert dim % n_heads == 0 + self.n_heads = n_heads + self.head_dim = dim // n_heads + self.scale = 1.0 / math.sqrt(self.head_dim) + + self.attn_norm = LlamaRMSNorm(dim) + self.q_proj = nn.Linear(dim, dim, bias=BIAS) + self.k_proj = nn.Linear(dim, dim, bias=BIAS) + self.v_proj = nn.Linear(dim, dim, bias=BIAS) + self.o_proj = nn.Linear(dim, dim, bias=BIAS) + self.rope = Rotary(self.head_dim, MAX_SEQ, theta=ROPE_THETA) + + self.mlp_norm = LlamaRMSNorm(dim) + self.mlp = LlamaMLP(dim) + + def _attn(self, x: torch.Tensor, pos: torch.Tensor, past_kv: Optional[Tuple[torch.Tensor, torch.Tensor]]): + ''' + Code from LlamaAttention forward method. SDPA implementation similar to model.config._attn_implementation="SDPA" + ''' + B, T, C = x.shape + H, Hd = self.n_heads, self.head_dim + + # QKV projections from hidden state x + q = self.q_proj(x).view(B, T, H, Hd).transpose(1, 2) + k = self.k_proj(x).view(B, T, H, Hd).transpose(1, 2) + v = self.v_proj(x).view(B, T, H, Hd).transpose(1, 2) + + # RoPE + q, k = self.rope(q, k, pos) + + # KV cache + if past_kv is not None: + pk, pv = past_kv # (B,H,Tpast,Hd) + k = torch.cat([pk, k], dim=2) + v = torch.cat([pv, v], dim=2) + + y = torch.nn.functional.scaled_dot_product_attention( + q, k, v, + attn_mask=None, + is_causal=True, + dropout_p=0.0 + ) + + y = y.transpose(1, 2).contiguous().view(B, T, C) + y = self.o_proj(y) + return y, (k, v) + + def forward( + self, + x_embed: torch.Tensor, # (B, T_new, C) embeddings only + past_kv: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # (B,H,Tpast,Hd) + ): + # positions for the *new* tokens only + past_len = 0 if past_kv is None else past_kv[0].size(2) + T_new = x_embed.size(1) + pos = torch.arange(past_len, past_len + T_new, device=x_embed.device) + + # pre-norm attention + residual + y, kv = self._attn(self.attn_norm(x_embed), pos, past_kv) + x = x_embed + y + + # pre-norm MLP + residual + x = x + self.mlp(self.mlp_norm(x)) + return x diff --git a/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot new file mode 100644 index 00000000000..0a9a27fd85b --- /dev/null +++ b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 q_proj_weight_updated_constant0" [id=2, type="get_attr"]; +"3 symmetric_weights_decompressor_q_proj_weight_0" [id=3, type="call_module"]; +"4 k_proj_weight_updated_constant0" [id=4, type="get_attr"]; +"5 symmetric_weights_decompressor_k_proj_weight_0" [id=5, type="call_module"]; +"6 v_proj_weight_updated_constant0" [id=6, type="get_attr"]; +"7 symmetric_weights_decompressor_v_proj_weight_0" [id=7, type="call_module"]; +"8 o_proj_weight_updated_constant0" [id=8, type="get_attr"]; +"9 symmetric_weights_decompressor_o_proj_weight_0" [id=9, type="call_module"]; +"10 mlp_gate_proj_weight_updated_constant0" [id=10, type="get_attr"]; +"11 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=11, type="call_module"]; +"12 mlp_up_proj_weight_updated_constant0" [id=12, type="get_attr"]; +"13 symmetric_weights_decompressor_mlp_up_proj_weight_0" [id=13, type="call_module"]; +"14 mlp_down_proj_weight_updated_constant0" [id=14, type="get_attr"]; +"15 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=15, type="call_module"]; +"16 rope_cos" [id=16, type="get_attr"]; +"17 rope_sin" [id=17, type="get_attr"]; +"18 x_embed" [id=18, type=input]; +"19 arange" [id=19, type=arange]; +"20 _assert_tensor_metadata_default" [id=20, type="_assert_tensor_metadata"]; +"21 to" [id=21, type=to]; +"22 pow_1" [id=22, type=pow]; +"23 mean" [id=23, type=mean]; +"24 add" [id=24, type=add]; +"25 rsqrt" [id=25, type=rsqrt]; +"26 mul" [id=26, type=mul]; +"27 _assert_tensor_metadata_default_1" [id=27, type="_assert_tensor_metadata"]; +"28 to_1" [id=28, type=to]; +"29 mul_1" [id=29, type=mul]; +"30 linear" [id=30, type=linear]; +"31 view" [id=31, type=view]; +"32 transpose" [id=32, type=transpose]; +"33 linear_1" [id=33, type=linear]; +"34 view_1" [id=34, type=view]; +"35 transpose_1" [id=35, type=transpose]; +"36 linear_2" [id=36, type=linear]; +"37 view_2" [id=37, type=view]; +"38 transpose_2" [id=38, type=transpose]; +"39 index" [id=39, type=index]; +"40 index_1" [id=40, type=index]; +"41 mul_2" [id=41, type=mul]; +"42 slice_1" [id=42, type=slice]; +"43 slice_2" [id=43, type=slice]; +"44 neg" [id=44, type=neg]; +"45 cat" [id=45, type=cat]; +"46 mul_3" [id=46, type=mul]; +"47 add_1" [id=47, type=add]; +"48 mul_4" [id=48, type=mul]; +"49 slice_3" [id=49, type=slice]; +"50 slice_4" [id=50, type=slice]; +"51 neg_1" [id=51, type=neg]; +"52 cat_1" [id=52, type=cat]; +"53 mul_5" [id=53, type=mul]; +"54 add_2" [id=54, type=add]; +"55 scaled_dot_product_attention" [id=55, type="scaled_dot_product_attention"]; +"56 transpose_3" [id=56, type=transpose]; +"57 view_3" [id=57, type=view]; +"58 linear_3" [id=58, type=linear]; +"59 add_3" [id=59, type=add]; +"60 _assert_tensor_metadata_default_2" [id=60, type="_assert_tensor_metadata"]; +"61 to_2" [id=61, type=to]; +"62 pow_2" [id=62, type=pow]; +"63 mean_1" [id=63, type=mean]; +"64 add_4" [id=64, type=add]; +"65 rsqrt_1" [id=65, type=rsqrt]; +"66 mul_6" [id=66, type=mul]; +"67 _assert_tensor_metadata_default_3" [id=67, type="_assert_tensor_metadata"]; +"68 to_3" [id=68, type=to]; +"69 mul_7" [id=69, type=mul]; +"70 linear_4" [id=70, type=linear]; +"71 silu" [id=71, type=silu]; +"72 linear_5" [id=72, type=linear]; +"73 mul_8" [id=73, type=mul]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "29 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "69 mul_7" [style=solid, label="(64,)"]; +"2 q_proj_weight_updated_constant0" -> "3 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; +"3 symmetric_weights_decompressor_q_proj_weight_0" -> "30 linear" [style=solid, label="(64, 64)"]; +"4 k_proj_weight_updated_constant0" -> "5 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"5 symmetric_weights_decompressor_k_proj_weight_0" -> "33 linear_1" [style=solid, label="(64, 64)"]; +"6 v_proj_weight_updated_constant0" -> "7 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"7 symmetric_weights_decompressor_v_proj_weight_0" -> "36 linear_2" [style=solid, label="(64, 64)"]; +"8 o_proj_weight_updated_constant0" -> "9 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"9 symmetric_weights_decompressor_o_proj_weight_0" -> "58 linear_3" [style=solid, label="(64, 64)"]; +"10 mlp_gate_proj_weight_updated_constant0" -> "11 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; +"11 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "70 linear_4" [style=solid, label="(128, 64)"]; +"12 mlp_up_proj_weight_updated_constant0" -> "13 symmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(4096, 1)"]; +"13 symmetric_weights_decompressor_mlp_up_proj_weight_0" -> "72 linear_5" [style=solid, label="(128, 64)"]; +"14 mlp_down_proj_weight_updated_constant0" -> "15 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; +"15 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"16 rope_cos" -> "39 index" [style=solid, label="(1, 1, 128, 16)"]; +"17 rope_sin" -> "40 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"18 x_embed" -> "20 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"18 x_embed" -> "21 to" [style=solid, label="(1, 3, 64)"]; +"18 x_embed" -> "59 add_3" [style=solid, label="(1, 3, 64)"]; +"19 arange" -> "39 index" [style=solid, label="(3,)"]; +"19 arange" -> "40 index_1" [style=solid, label="(3,)"]; +"21 to" -> "22 pow_1" [style=solid, label="(1, 3, 64)"]; +"21 to" -> "26 mul" [style=solid, label="(1, 3, 64)"]; +"22 pow_1" -> "23 mean" [style=solid, label="(1, 3, 64)"]; +"23 mean" -> "24 add" [style=solid, label="(1, 3, 1)"]; +"24 add" -> "25 rsqrt" [style=solid, label="(1, 3, 1)"]; +"25 rsqrt" -> "26 mul" [style=solid, label="(1, 3, 1)"]; +"26 mul" -> "27 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"26 mul" -> "28 to_1" [style=solid, label="(1, 3, 64)"]; +"28 to_1" -> "29 mul_1" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "30 linear" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "33 linear_1" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "36 linear_2" [style=solid, label="(1, 3, 64)"]; +"30 linear" -> "31 view" [style=solid, label="(1, 3, 64)"]; +"31 view" -> "32 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"32 transpose" -> "41 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"32 transpose" -> "42 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"32 transpose" -> "43 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"33 linear_1" -> "34 view_1" [style=solid, label="(1, 3, 64)"]; +"34 view_1" -> "35 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"35 transpose_1" -> "48 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"35 transpose_1" -> "49 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"35 transpose_1" -> "50 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"36 linear_2" -> "37 view_2" [style=solid, label="(1, 3, 64)"]; +"37 view_2" -> "38 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"38 transpose_2" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"39 index" -> "41 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"39 index" -> "48 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"40 index_1" -> "46 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"40 index_1" -> "53 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"41 mul_2" -> "47 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"42 slice_1" -> "45 cat" [style=solid, label="(1, 4, 3, 8)"]; +"43 slice_2" -> "44 neg" [style=solid, label="(1, 4, 3, 8)"]; +"44 neg" -> "45 cat" [style=solid, label="(1, 4, 3, 8)"]; +"45 cat" -> "46 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"46 mul_3" -> "47 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"47 add_1" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"48 mul_4" -> "54 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"49 slice_3" -> "52 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"50 slice_4" -> "51 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"51 neg_1" -> "52 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"52 cat_1" -> "53 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"53 mul_5" -> "54 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"54 add_2" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"55 scaled_dot_product_attention" -> "56 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"56 transpose_3" -> "57 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"57 view_3" -> "58 linear_3" [style=solid, label="(1, 3, 64)"]; +"58 linear_3" -> "59 add_3" [style=solid, label="(1, 3, 64)"]; +"59 add_3" -> "60 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"59 add_3" -> "61 to_2" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "62 pow_2" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "66 mul_6" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"62 pow_2" -> "63 mean_1" [style=solid, label="(1, 3, 64)"]; +"63 mean_1" -> "64 add_4" [style=solid, label="(1, 3, 1)"]; +"64 add_4" -> "65 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"65 rsqrt_1" -> "66 mul_6" [style=solid, label="(1, 3, 1)"]; +"66 mul_6" -> "67 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"66 mul_6" -> "68 to_3" [style=solid, label="(1, 3, 64)"]; +"68 to_3" -> "69 mul_7" [style=solid, label="(1, 3, 64)"]; +"69 mul_7" -> "70 linear_4" [style=solid, label="(1, 3, 64)"]; +"69 mul_7" -> "72 linear_5" [style=solid, label="(1, 3, 64)"]; +"70 linear_4" -> "71 silu" [style=solid, label="(1, 3, 128)"]; +"71 silu" -> "73 mul_8" [style=solid, label="(1, 3, 128)"]; +"72 linear_5" -> "73 mul_8" [style=solid, label="(1, 3, 128)"]; +"73 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot new file mode 100644 index 00000000000..254abcb9dc0 --- /dev/null +++ b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 q_proj_weight_updated_constant0" [id=2, type="get_attr"]; +"3 symmetric_weights_decompressor_q_proj_weight_0" [id=3, type="call_module"]; +"4 k_proj_weight_updated_constant0" [id=4, type="get_attr"]; +"5 symmetric_weights_decompressor_k_proj_weight_0" [id=5, type="call_module"]; +"6 v_proj_weight_updated_constant0" [id=6, type="get_attr"]; +"7 symmetric_weights_decompressor_v_proj_weight_0" [id=7, type="call_module"]; +"8 o_proj_weight_updated_constant0" [id=8, type="get_attr"]; +"9 symmetric_weights_decompressor_o_proj_weight_0" [id=9, type="call_module"]; +"10 mlp_gate_proj_weight_updated_constant0" [id=10, type="get_attr"]; +"11 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=11, type="call_module"]; +"12 mlp_up_proj_weight_updated_constant0" [id=12, type="get_attr"]; +"13 symmetric_weights_decompressor_mlp_up_proj_weight_0" [id=13, type="call_module"]; +"14 mlp_down_proj_weight_updated_constant0" [id=14, type="get_attr"]; +"15 symmetric_weights_decompressor_mlp_down_proj_weight_0" [id=15, type="call_module"]; +"16 rope_cos" [id=16, type="get_attr"]; +"17 rope_sin" [id=17, type="get_attr"]; +"18 x_embed" [id=18, type=input]; +"19 arange" [id=19, type=arange]; +"20 _assert_tensor_metadata_default" [id=20, type="_assert_tensor_metadata"]; +"21 to" [id=21, type=to]; +"22 pow_1" [id=22, type=pow]; +"23 mean" [id=23, type=mean]; +"24 add" [id=24, type=add]; +"25 rsqrt" [id=25, type=rsqrt]; +"26 mul" [id=26, type=mul]; +"27 _assert_tensor_metadata_default_1" [id=27, type="_assert_tensor_metadata"]; +"28 to_1" [id=28, type=to]; +"29 mul_1" [id=29, type=mul]; +"30 linear" [id=30, type=linear]; +"31 view" [id=31, type=view]; +"32 transpose" [id=32, type=transpose]; +"33 linear_1" [id=33, type=linear]; +"34 view_1" [id=34, type=view]; +"35 transpose_1" [id=35, type=transpose]; +"36 linear_2" [id=36, type=linear]; +"37 view_2" [id=37, type=view]; +"38 transpose_2" [id=38, type=transpose]; +"39 index" [id=39, type=index]; +"40 index_1" [id=40, type=index]; +"41 mul_2" [id=41, type=mul]; +"42 slice_1" [id=42, type=slice]; +"43 slice_2" [id=43, type=slice]; +"44 neg" [id=44, type=neg]; +"45 cat" [id=45, type=cat]; +"46 mul_3" [id=46, type=mul]; +"47 add_1" [id=47, type=add]; +"48 mul_4" [id=48, type=mul]; +"49 slice_3" [id=49, type=slice]; +"50 slice_4" [id=50, type=slice]; +"51 neg_1" [id=51, type=neg]; +"52 cat_1" [id=52, type=cat]; +"53 mul_5" [id=53, type=mul]; +"54 add_2" [id=54, type=add]; +"55 scaled_dot_product_attention" [id=55, type="scaled_dot_product_attention"]; +"56 transpose_3" [id=56, type=transpose]; +"57 view_3" [id=57, type=view]; +"58 linear_3" [id=58, type=linear]; +"59 add_3" [id=59, type=add]; +"60 _assert_tensor_metadata_default_2" [id=60, type="_assert_tensor_metadata"]; +"61 to_2" [id=61, type=to]; +"62 pow_2" [id=62, type=pow]; +"63 mean_1" [id=63, type=mean]; +"64 add_4" [id=64, type=add]; +"65 rsqrt_1" [id=65, type=rsqrt]; +"66 mul_6" [id=66, type=mul]; +"67 _assert_tensor_metadata_default_3" [id=67, type="_assert_tensor_metadata"]; +"68 to_3" [id=68, type=to]; +"69 mul_7" [id=69, type=mul]; +"70 linear_4" [id=70, type=linear]; +"71 silu" [id=71, type=silu]; +"72 linear_5" [id=72, type=linear]; +"73 mul_8" [id=73, type=mul]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "29 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "69 mul_7" [style=solid, label="(64,)"]; +"2 q_proj_weight_updated_constant0" -> "3 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; +"3 symmetric_weights_decompressor_q_proj_weight_0" -> "30 linear" [style=solid, label="(64, 64)"]; +"4 k_proj_weight_updated_constant0" -> "5 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"5 symmetric_weights_decompressor_k_proj_weight_0" -> "33 linear_1" [style=solid, label="(64, 64)"]; +"6 v_proj_weight_updated_constant0" -> "7 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"7 symmetric_weights_decompressor_v_proj_weight_0" -> "36 linear_2" [style=solid, label="(64, 64)"]; +"8 o_proj_weight_updated_constant0" -> "9 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"9 symmetric_weights_decompressor_o_proj_weight_0" -> "58 linear_3" [style=solid, label="(64, 64)"]; +"10 mlp_gate_proj_weight_updated_constant0" -> "11 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; +"11 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "70 linear_4" [style=solid, label="(128, 64)"]; +"12 mlp_up_proj_weight_updated_constant0" -> "13 symmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(4096, 1)"]; +"13 symmetric_weights_decompressor_mlp_up_proj_weight_0" -> "72 linear_5" [style=solid, label="(128, 64)"]; +"14 mlp_down_proj_weight_updated_constant0" -> "15 symmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(4096, 1)"]; +"15 symmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"16 rope_cos" -> "39 index" [style=solid, label="(1, 1, 128, 16)"]; +"17 rope_sin" -> "40 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"18 x_embed" -> "20 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"18 x_embed" -> "21 to" [style=solid, label="(1, 3, 64)"]; +"18 x_embed" -> "59 add_3" [style=solid, label="(1, 3, 64)"]; +"19 arange" -> "39 index" [style=solid, label="(3,)"]; +"19 arange" -> "40 index_1" [style=solid, label="(3,)"]; +"21 to" -> "22 pow_1" [style=solid, label="(1, 3, 64)"]; +"21 to" -> "26 mul" [style=solid, label="(1, 3, 64)"]; +"22 pow_1" -> "23 mean" [style=solid, label="(1, 3, 64)"]; +"23 mean" -> "24 add" [style=solid, label="(1, 3, 1)"]; +"24 add" -> "25 rsqrt" [style=solid, label="(1, 3, 1)"]; +"25 rsqrt" -> "26 mul" [style=solid, label="(1, 3, 1)"]; +"26 mul" -> "27 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"26 mul" -> "28 to_1" [style=solid, label="(1, 3, 64)"]; +"28 to_1" -> "29 mul_1" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "30 linear" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "33 linear_1" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "36 linear_2" [style=solid, label="(1, 3, 64)"]; +"30 linear" -> "31 view" [style=solid, label="(1, 3, 64)"]; +"31 view" -> "32 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"32 transpose" -> "41 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"32 transpose" -> "42 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"32 transpose" -> "43 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"33 linear_1" -> "34 view_1" [style=solid, label="(1, 3, 64)"]; +"34 view_1" -> "35 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"35 transpose_1" -> "48 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"35 transpose_1" -> "49 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"35 transpose_1" -> "50 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"36 linear_2" -> "37 view_2" [style=solid, label="(1, 3, 64)"]; +"37 view_2" -> "38 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"38 transpose_2" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"39 index" -> "41 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"39 index" -> "48 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"40 index_1" -> "46 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"40 index_1" -> "53 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"41 mul_2" -> "47 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"42 slice_1" -> "45 cat" [style=solid, label="(1, 4, 3, 8)"]; +"43 slice_2" -> "44 neg" [style=solid, label="(1, 4, 3, 8)"]; +"44 neg" -> "45 cat" [style=solid, label="(1, 4, 3, 8)"]; +"45 cat" -> "46 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"46 mul_3" -> "47 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"47 add_1" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"48 mul_4" -> "54 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"49 slice_3" -> "52 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"50 slice_4" -> "51 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"51 neg_1" -> "52 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"52 cat_1" -> "53 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"53 mul_5" -> "54 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"54 add_2" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"55 scaled_dot_product_attention" -> "56 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"56 transpose_3" -> "57 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"57 view_3" -> "58 linear_3" [style=solid, label="(1, 3, 64)"]; +"58 linear_3" -> "59 add_3" [style=solid, label="(1, 3, 64)"]; +"59 add_3" -> "60 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"59 add_3" -> "61 to_2" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "62 pow_2" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "66 mul_6" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"62 pow_2" -> "63 mean_1" [style=solid, label="(1, 3, 64)"]; +"63 mean_1" -> "64 add_4" [style=solid, label="(1, 3, 1)"]; +"64 add_4" -> "65 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"65 rsqrt_1" -> "66 mul_6" [style=solid, label="(1, 3, 1)"]; +"66 mul_6" -> "67 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"66 mul_6" -> "68 to_3" [style=solid, label="(1, 3, 64)"]; +"68 to_3" -> "69 mul_7" [style=solid, label="(1, 3, 64)"]; +"69 mul_7" -> "70 linear_4" [style=solid, label="(1, 3, 64)"]; +"69 mul_7" -> "72 linear_5" [style=solid, label="(1, 3, 64)"]; +"70 linear_4" -> "71 silu" [style=solid, label="(1, 3, 128)"]; +"71 silu" -> "73 mul_8" [style=solid, label="(1, 3, 128)"]; +"72 linear_5" -> "73 mul_8" [style=solid, label="(1, 3, 128)"]; +"73 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot new file mode 100644 index 00000000000..614e06a21ac --- /dev/null +++ b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 q_proj_weight_updated_constant0" [id=2, type="get_attr"]; +"3 asymmetric_weights_decompressor_q_proj_weight_0" [id=3, type="call_module"]; +"4 k_proj_weight_updated_constant0" [id=4, type="get_attr"]; +"5 asymmetric_weights_decompressor_k_proj_weight_0" [id=5, type="call_module"]; +"6 v_proj_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_v_proj_weight_0" [id=7, type="call_module"]; +"8 o_proj_weight_updated_constant0" [id=8, type="get_attr"]; +"9 asymmetric_weights_decompressor_o_proj_weight_0" [id=9, type="call_module"]; +"10 mlp_gate_proj_weight_updated_constant0" [id=10, type="get_attr"]; +"11 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=11, type="call_module"]; +"12 mlp_up_proj_weight_updated_constant0" [id=12, type="get_attr"]; +"13 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=13, type="call_module"]; +"14 mlp_down_proj_weight_updated_constant0" [id=14, type="get_attr"]; +"15 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=15, type="call_module"]; +"16 rope_cos" [id=16, type="get_attr"]; +"17 rope_sin" [id=17, type="get_attr"]; +"18 x_embed" [id=18, type=input]; +"19 arange" [id=19, type=arange]; +"20 _assert_tensor_metadata_default" [id=20, type="_assert_tensor_metadata"]; +"21 to" [id=21, type=to]; +"22 pow_1" [id=22, type=pow]; +"23 mean" [id=23, type=mean]; +"24 add" [id=24, type=add]; +"25 rsqrt" [id=25, type=rsqrt]; +"26 mul" [id=26, type=mul]; +"27 _assert_tensor_metadata_default_1" [id=27, type="_assert_tensor_metadata"]; +"28 to_1" [id=28, type=to]; +"29 mul_1" [id=29, type=mul]; +"30 linear" [id=30, type=linear]; +"31 view" [id=31, type=view]; +"32 transpose" [id=32, type=transpose]; +"33 linear_1" [id=33, type=linear]; +"34 view_1" [id=34, type=view]; +"35 transpose_1" [id=35, type=transpose]; +"36 linear_2" [id=36, type=linear]; +"37 view_2" [id=37, type=view]; +"38 transpose_2" [id=38, type=transpose]; +"39 index" [id=39, type=index]; +"40 index_1" [id=40, type=index]; +"41 mul_2" [id=41, type=mul]; +"42 slice_1" [id=42, type=slice]; +"43 slice_2" [id=43, type=slice]; +"44 neg" [id=44, type=neg]; +"45 cat" [id=45, type=cat]; +"46 mul_3" [id=46, type=mul]; +"47 add_1" [id=47, type=add]; +"48 mul_4" [id=48, type=mul]; +"49 slice_3" [id=49, type=slice]; +"50 slice_4" [id=50, type=slice]; +"51 neg_1" [id=51, type=neg]; +"52 cat_1" [id=52, type=cat]; +"53 mul_5" [id=53, type=mul]; +"54 add_2" [id=54, type=add]; +"55 scaled_dot_product_attention" [id=55, type="scaled_dot_product_attention"]; +"56 transpose_3" [id=56, type=transpose]; +"57 view_3" [id=57, type=view]; +"58 linear_3" [id=58, type=linear]; +"59 add_3" [id=59, type=add]; +"60 _assert_tensor_metadata_default_2" [id=60, type="_assert_tensor_metadata"]; +"61 to_2" [id=61, type=to]; +"62 pow_2" [id=62, type=pow]; +"63 mean_1" [id=63, type=mean]; +"64 add_4" [id=64, type=add]; +"65 rsqrt_1" [id=65, type=rsqrt]; +"66 mul_6" [id=66, type=mul]; +"67 _assert_tensor_metadata_default_3" [id=67, type="_assert_tensor_metadata"]; +"68 to_3" [id=68, type=to]; +"69 mul_7" [id=69, type=mul]; +"70 linear_4" [id=70, type=linear]; +"71 silu" [id=71, type=silu]; +"72 linear_5" [id=72, type=linear]; +"73 mul_8" [id=73, type=mul]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "29 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "69 mul_7" [style=solid, label="(64,)"]; +"2 q_proj_weight_updated_constant0" -> "3 asymmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(64, 64)"]; +"3 asymmetric_weights_decompressor_q_proj_weight_0" -> "30 linear" [style=solid, label="(64, 64)"]; +"4 k_proj_weight_updated_constant0" -> "5 asymmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(64, 64)"]; +"5 asymmetric_weights_decompressor_k_proj_weight_0" -> "33 linear_1" [style=solid, label="(64, 64)"]; +"6 v_proj_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_v_proj_weight_0" -> "36 linear_2" [style=solid, label="(64, 64)"]; +"8 o_proj_weight_updated_constant0" -> "9 asymmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(64, 64)"]; +"9 asymmetric_weights_decompressor_o_proj_weight_0" -> "58 linear_3" [style=solid, label="(64, 64)"]; +"10 mlp_gate_proj_weight_updated_constant0" -> "11 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(128, 64)"]; +"11 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "70 linear_4" [style=solid, label="(128, 64)"]; +"12 mlp_up_proj_weight_updated_constant0" -> "13 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; +"13 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "72 linear_5" [style=solid, label="(128, 64)"]; +"14 mlp_down_proj_weight_updated_constant0" -> "15 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; +"15 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"16 rope_cos" -> "39 index" [style=solid, label="(1, 1, 128, 16)"]; +"17 rope_sin" -> "40 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"18 x_embed" -> "20 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"18 x_embed" -> "21 to" [style=solid, label="(1, 3, 64)"]; +"18 x_embed" -> "59 add_3" [style=solid, label="(1, 3, 64)"]; +"19 arange" -> "39 index" [style=solid, label="(3,)"]; +"19 arange" -> "40 index_1" [style=solid, label="(3,)"]; +"21 to" -> "22 pow_1" [style=solid, label="(1, 3, 64)"]; +"21 to" -> "26 mul" [style=solid, label="(1, 3, 64)"]; +"22 pow_1" -> "23 mean" [style=solid, label="(1, 3, 64)"]; +"23 mean" -> "24 add" [style=solid, label="(1, 3, 1)"]; +"24 add" -> "25 rsqrt" [style=solid, label="(1, 3, 1)"]; +"25 rsqrt" -> "26 mul" [style=solid, label="(1, 3, 1)"]; +"26 mul" -> "27 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"26 mul" -> "28 to_1" [style=solid, label="(1, 3, 64)"]; +"28 to_1" -> "29 mul_1" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "30 linear" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "33 linear_1" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "36 linear_2" [style=solid, label="(1, 3, 64)"]; +"30 linear" -> "31 view" [style=solid, label="(1, 3, 64)"]; +"31 view" -> "32 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"32 transpose" -> "41 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"32 transpose" -> "42 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"32 transpose" -> "43 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"33 linear_1" -> "34 view_1" [style=solid, label="(1, 3, 64)"]; +"34 view_1" -> "35 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"35 transpose_1" -> "48 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"35 transpose_1" -> "49 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"35 transpose_1" -> "50 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"36 linear_2" -> "37 view_2" [style=solid, label="(1, 3, 64)"]; +"37 view_2" -> "38 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"38 transpose_2" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"39 index" -> "41 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"39 index" -> "48 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"40 index_1" -> "46 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"40 index_1" -> "53 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"41 mul_2" -> "47 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"42 slice_1" -> "45 cat" [style=solid, label="(1, 4, 3, 8)"]; +"43 slice_2" -> "44 neg" [style=solid, label="(1, 4, 3, 8)"]; +"44 neg" -> "45 cat" [style=solid, label="(1, 4, 3, 8)"]; +"45 cat" -> "46 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"46 mul_3" -> "47 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"47 add_1" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"48 mul_4" -> "54 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"49 slice_3" -> "52 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"50 slice_4" -> "51 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"51 neg_1" -> "52 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"52 cat_1" -> "53 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"53 mul_5" -> "54 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"54 add_2" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"55 scaled_dot_product_attention" -> "56 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"56 transpose_3" -> "57 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"57 view_3" -> "58 linear_3" [style=solid, label="(1, 3, 64)"]; +"58 linear_3" -> "59 add_3" [style=solid, label="(1, 3, 64)"]; +"59 add_3" -> "60 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"59 add_3" -> "61 to_2" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "62 pow_2" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "66 mul_6" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"62 pow_2" -> "63 mean_1" [style=solid, label="(1, 3, 64)"]; +"63 mean_1" -> "64 add_4" [style=solid, label="(1, 3, 1)"]; +"64 add_4" -> "65 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"65 rsqrt_1" -> "66 mul_6" [style=solid, label="(1, 3, 1)"]; +"66 mul_6" -> "67 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"66 mul_6" -> "68 to_3" [style=solid, label="(1, 3, 64)"]; +"68 to_3" -> "69 mul_7" [style=solid, label="(1, 3, 64)"]; +"69 mul_7" -> "70 linear_4" [style=solid, label="(1, 3, 64)"]; +"69 mul_7" -> "72 linear_5" [style=solid, label="(1, 3, 64)"]; +"70 linear_4" -> "71 silu" [style=solid, label="(1, 3, 128)"]; +"71 silu" -> "73 mul_8" [style=solid, label="(1, 3, 128)"]; +"72 linear_5" -> "73 mul_8" [style=solid, label="(1, 3, 128)"]; +"73 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot new file mode 100644 index 00000000000..2841824b5a3 --- /dev/null +++ b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_weight_updated_constant0" [id=0, type="get_attr"]; +"1 symmetric_weights_decompressor_linear_weight_0" [id=1, type="call_module"]; +"2 linear_bias" [id=2, type="get_attr"]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 lm_head_bias" [id=5, type="get_attr"]; +"6 input_ids" [id=6, type=input]; +"7 embedding" [id=7, type=embedding]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_weight_updated_constant0" -> "1 symmetric_weights_decompressor_linear_weight_0" [style=solid, label="(2048, 1)"]; +"1 symmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"2 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "7 embedding" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"6 input_ids" -> "7 embedding" [style=solid, label="(5,)"]; +"7 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot new file mode 100644 index 00000000000..0382f7e5934 --- /dev/null +++ b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_weight_updated_constant0" [id=0, type="get_attr"]; +"1 symmetric_weights_decompressor_linear_weight_0" [id=1, type="call_module"]; +"2 linear_bias" [id=2, type="get_attr"]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 lm_head_bias" [id=5, type="get_attr"]; +"6 input_ids" [id=6, type=input]; +"7 embedding" [id=7, type=embedding]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_weight_updated_constant0" -> "1 symmetric_weights_decompressor_linear_weight_0" [style=solid, label="(2048, 1)"]; +"1 symmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"2 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"3 wte_weight_1_updated_constant0" -> "4 symmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(4096, 1)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "7 embedding" [style=solid, label="(128, 64)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"6 input_ids" -> "7 embedding" [style=solid, label="(5,)"]; +"7 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot new file mode 100644 index 00000000000..03fc9e9c6a0 --- /dev/null +++ b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_weight_updated_constant0" [id=0, type="get_attr"]; +"1 asymmetric_weights_decompressor_linear_weight_0" [id=1, type="call_module"]; +"2 linear_bias" [id=2, type="get_attr"]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 lm_head_bias" [id=5, type="get_attr"]; +"6 input_ids" [id=6, type=input]; +"7 embedding" [id=7, type=embedding]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_weight_updated_constant0" -> "1 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"1 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"2 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "7 embedding" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"6 input_ids" -> "7 embedding" [style=solid, label="(5,)"]; +"7 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json new file mode 100644 index 00000000000..7cfdf2719df --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json @@ -0,0 +1,128 @@ +[ + { + "weight_name": "q_proj_weight", + "node_with_weight": "linear", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "k_proj_weight", + "node_with_weight": "linear_1", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "v_proj_weight", + "node_with_weight": "linear_2", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "o_proj_weight", + "node_with_weight": "linear_3", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "mlp_gate_proj_weight", + "node_with_weight": "linear_4", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "mlp_up_proj_weight", + "node_with_weight": "linear_5", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "mlp_down_proj_weight", + "node_with_weight": "linear_6", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 128 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + } +] \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot new file mode 100644 index 00000000000..e04aee29640 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 rope_cos" [id=2, type="get_attr"]; +"3 rope_sin" [id=3, type="get_attr"]; +"4 x_embed" [id=4, type=input]; +"5 arange" [id=5, type=arange]; +"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; +"7 to" [id=7, type=to]; +"8 pow_1" [id=8, type=pow]; +"9 mean" [id=9, type=mean]; +"10 add" [id=10, type=add]; +"11 rsqrt" [id=11, type=rsqrt]; +"12 mul" [id=12, type=mul]; +"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; +"14 to_1" [id=14, type=to]; +"15 mul_1" [id=15, type=mul]; +"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"18 linear" [id=18, type=linear]; +"19 view" [id=19, type=view]; +"20 transpose" [id=20, type=transpose]; +"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; +"23 linear_1" [id=23, type=linear]; +"24 view_1" [id=24, type=view]; +"25 transpose_1" [id=25, type=transpose]; +"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"28 linear_2" [id=28, type=linear]; +"29 view_2" [id=29, type=view]; +"30 transpose_2" [id=30, type=transpose]; +"31 index" [id=31, type=index]; +"32 index_1" [id=32, type=index]; +"33 mul_2" [id=33, type=mul]; +"34 slice_1" [id=34, type=slice]; +"35 slice_2" [id=35, type=slice]; +"36 neg" [id=36, type=neg]; +"37 cat" [id=37, type=cat]; +"38 mul_3" [id=38, type=mul]; +"39 add_1" [id=39, type=add]; +"40 mul_4" [id=40, type=mul]; +"41 slice_3" [id=41, type=slice]; +"42 slice_4" [id=42, type=slice]; +"43 neg_1" [id=43, type=neg]; +"44 cat_1" [id=44, type=cat]; +"45 mul_5" [id=45, type=mul]; +"46 add_2" [id=46, type=add]; +"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; +"48 transpose_3" [id=48, type=transpose]; +"49 view_3" [id=49, type=view]; +"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; +"52 linear_3" [id=52, type=linear]; +"53 add_3" [id=53, type=add]; +"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; +"55 to_2" [id=55, type=to]; +"56 pow_2" [id=56, type=pow]; +"57 mean_1" [id=57, type=mean]; +"58 add_4" [id=58, type=add]; +"59 rsqrt_1" [id=59, type=rsqrt]; +"60 mul_6" [id=60, type=mul]; +"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; +"62 to_3" [id=62, type=to]; +"63 mul_7" [id=63, type=mul]; +"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"66 linear_4" [id=66, type=linear]; +"67 silu" [id=67, type=silu]; +"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; +"69 symmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"70 linear_5" [id=70, type=linear]; +"71 mul_8" [id=71, type=mul]; +"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; +"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; +"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"5 arange" -> "31 index" [style=solid, label="(3,)"]; +"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; +"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; +"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; +"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; +"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; +"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; +"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; +"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; +"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; +"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; +"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; +"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; +"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; +"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; +"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; +"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; +"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; +"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; +"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; +"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(128, 64)"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; +"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 symmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(4096, 1)"]; +"69 symmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_max_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_max_activation_variance.dot new file mode 100644 index 00000000000..076e46114eb --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_max_activation_variance.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 rope_cos" [id=2, type="get_attr"]; +"3 rope_sin" [id=3, type="get_attr"]; +"4 x_embed" [id=4, type=input]; +"5 arange" [id=5, type=arange]; +"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; +"7 to" [id=7, type=to]; +"8 pow_1" [id=8, type=pow]; +"9 mean" [id=9, type=mean]; +"10 add" [id=10, type=add]; +"11 rsqrt" [id=11, type=rsqrt]; +"12 mul" [id=12, type=mul]; +"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; +"14 to_1" [id=14, type=to]; +"15 mul_1" [id=15, type=mul]; +"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"18 linear" [id=18, type=linear]; +"19 view" [id=19, type=view]; +"20 transpose" [id=20, type=transpose]; +"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; +"23 linear_1" [id=23, type=linear]; +"24 view_1" [id=24, type=view]; +"25 transpose_1" [id=25, type=transpose]; +"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"28 linear_2" [id=28, type=linear]; +"29 view_2" [id=29, type=view]; +"30 transpose_2" [id=30, type=transpose]; +"31 index" [id=31, type=index]; +"32 index_1" [id=32, type=index]; +"33 mul_2" [id=33, type=mul]; +"34 slice_1" [id=34, type=slice]; +"35 slice_2" [id=35, type=slice]; +"36 neg" [id=36, type=neg]; +"37 cat" [id=37, type=cat]; +"38 mul_3" [id=38, type=mul]; +"39 add_1" [id=39, type=add]; +"40 mul_4" [id=40, type=mul]; +"41 slice_3" [id=41, type=slice]; +"42 slice_4" [id=42, type=slice]; +"43 neg_1" [id=43, type=neg]; +"44 cat_1" [id=44, type=cat]; +"45 mul_5" [id=45, type=mul]; +"46 add_2" [id=46, type=add]; +"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; +"48 transpose_3" [id=48, type=transpose]; +"49 view_3" [id=49, type=view]; +"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; +"52 linear_3" [id=52, type=linear]; +"53 add_3" [id=53, type=add]; +"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; +"55 to_2" [id=55, type=to]; +"56 pow_2" [id=56, type=pow]; +"57 mean_1" [id=57, type=mean]; +"58 add_4" [id=58, type=add]; +"59 rsqrt_1" [id=59, type=rsqrt]; +"60 mul_6" [id=60, type=mul]; +"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; +"62 to_3" [id=62, type=to]; +"63 mul_7" [id=63, type=mul]; +"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"66 linear_4" [id=66, type=linear]; +"67 silu" [id=67, type=silu]; +"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"70 linear_5" [id=70, type=linear]; +"71 mul_8" [id=71, type=mul]; +"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; +"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; +"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"5 arange" -> "31 index" [style=solid, label="(3,)"]; +"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; +"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; +"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; +"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; +"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; +"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; +"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; +"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; +"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; +"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; +"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; +"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; +"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; +"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; +"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; +"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; +"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; +"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; +"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; +"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; +"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot new file mode 100644 index 00000000000..e04aee29640 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 rope_cos" [id=2, type="get_attr"]; +"3 rope_sin" [id=3, type="get_attr"]; +"4 x_embed" [id=4, type=input]; +"5 arange" [id=5, type=arange]; +"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; +"7 to" [id=7, type=to]; +"8 pow_1" [id=8, type=pow]; +"9 mean" [id=9, type=mean]; +"10 add" [id=10, type=add]; +"11 rsqrt" [id=11, type=rsqrt]; +"12 mul" [id=12, type=mul]; +"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; +"14 to_1" [id=14, type=to]; +"15 mul_1" [id=15, type=mul]; +"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"18 linear" [id=18, type=linear]; +"19 view" [id=19, type=view]; +"20 transpose" [id=20, type=transpose]; +"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; +"23 linear_1" [id=23, type=linear]; +"24 view_1" [id=24, type=view]; +"25 transpose_1" [id=25, type=transpose]; +"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"28 linear_2" [id=28, type=linear]; +"29 view_2" [id=29, type=view]; +"30 transpose_2" [id=30, type=transpose]; +"31 index" [id=31, type=index]; +"32 index_1" [id=32, type=index]; +"33 mul_2" [id=33, type=mul]; +"34 slice_1" [id=34, type=slice]; +"35 slice_2" [id=35, type=slice]; +"36 neg" [id=36, type=neg]; +"37 cat" [id=37, type=cat]; +"38 mul_3" [id=38, type=mul]; +"39 add_1" [id=39, type=add]; +"40 mul_4" [id=40, type=mul]; +"41 slice_3" [id=41, type=slice]; +"42 slice_4" [id=42, type=slice]; +"43 neg_1" [id=43, type=neg]; +"44 cat_1" [id=44, type=cat]; +"45 mul_5" [id=45, type=mul]; +"46 add_2" [id=46, type=add]; +"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; +"48 transpose_3" [id=48, type=transpose]; +"49 view_3" [id=49, type=view]; +"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; +"52 linear_3" [id=52, type=linear]; +"53 add_3" [id=53, type=add]; +"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; +"55 to_2" [id=55, type=to]; +"56 pow_2" [id=56, type=pow]; +"57 mean_1" [id=57, type=mean]; +"58 add_4" [id=58, type=add]; +"59 rsqrt_1" [id=59, type=rsqrt]; +"60 mul_6" [id=60, type=mul]; +"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; +"62 to_3" [id=62, type=to]; +"63 mul_7" [id=63, type=mul]; +"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"66 linear_4" [id=66, type=linear]; +"67 silu" [id=67, type=silu]; +"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; +"69 symmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"70 linear_5" [id=70, type=linear]; +"71 mul_8" [id=71, type=mul]; +"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; +"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; +"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"5 arange" -> "31 index" [style=solid, label="(3,)"]; +"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; +"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; +"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; +"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; +"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; +"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; +"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; +"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; +"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; +"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; +"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; +"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; +"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; +"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; +"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; +"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; +"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; +"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; +"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; +"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(128, 64)"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; +"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 symmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(4096, 1)"]; +"69 symmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_variance.dot new file mode 100644 index 00000000000..076e46114eb --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_variance.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 rope_cos" [id=2, type="get_attr"]; +"3 rope_sin" [id=3, type="get_attr"]; +"4 x_embed" [id=4, type=input]; +"5 arange" [id=5, type=arange]; +"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; +"7 to" [id=7, type=to]; +"8 pow_1" [id=8, type=pow]; +"9 mean" [id=9, type=mean]; +"10 add" [id=10, type=add]; +"11 rsqrt" [id=11, type=rsqrt]; +"12 mul" [id=12, type=mul]; +"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; +"14 to_1" [id=14, type=to]; +"15 mul_1" [id=15, type=mul]; +"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"18 linear" [id=18, type=linear]; +"19 view" [id=19, type=view]; +"20 transpose" [id=20, type=transpose]; +"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; +"23 linear_1" [id=23, type=linear]; +"24 view_1" [id=24, type=view]; +"25 transpose_1" [id=25, type=transpose]; +"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"28 linear_2" [id=28, type=linear]; +"29 view_2" [id=29, type=view]; +"30 transpose_2" [id=30, type=transpose]; +"31 index" [id=31, type=index]; +"32 index_1" [id=32, type=index]; +"33 mul_2" [id=33, type=mul]; +"34 slice_1" [id=34, type=slice]; +"35 slice_2" [id=35, type=slice]; +"36 neg" [id=36, type=neg]; +"37 cat" [id=37, type=cat]; +"38 mul_3" [id=38, type=mul]; +"39 add_1" [id=39, type=add]; +"40 mul_4" [id=40, type=mul]; +"41 slice_3" [id=41, type=slice]; +"42 slice_4" [id=42, type=slice]; +"43 neg_1" [id=43, type=neg]; +"44 cat_1" [id=44, type=cat]; +"45 mul_5" [id=45, type=mul]; +"46 add_2" [id=46, type=add]; +"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; +"48 transpose_3" [id=48, type=transpose]; +"49 view_3" [id=49, type=view]; +"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; +"52 linear_3" [id=52, type=linear]; +"53 add_3" [id=53, type=add]; +"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; +"55 to_2" [id=55, type=to]; +"56 pow_2" [id=56, type=pow]; +"57 mean_1" [id=57, type=mean]; +"58 add_4" [id=58, type=add]; +"59 rsqrt_1" [id=59, type=rsqrt]; +"60 mul_6" [id=60, type=mul]; +"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; +"62 to_3" [id=62, type=to]; +"63 mul_7" [id=63, type=mul]; +"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"66 linear_4" [id=66, type=linear]; +"67 silu" [id=67, type=silu]; +"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"70 linear_5" [id=70, type=linear]; +"71 mul_8" [id=71, type=mul]; +"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; +"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; +"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"5 arange" -> "31 index" [style=solid, label="(3,)"]; +"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; +"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; +"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; +"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; +"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; +"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; +"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; +"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; +"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; +"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; +"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; +"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; +"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; +"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; +"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; +"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; +"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; +"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; +"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; +"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; +"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot new file mode 100644 index 00000000000..6c9cb7c162f --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 rope_cos" [id=2, type="get_attr"]; +"3 rope_sin" [id=3, type="get_attr"]; +"4 x_embed" [id=4, type=input]; +"5 arange" [id=5, type=arange]; +"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; +"7 to" [id=7, type=to]; +"8 pow_1" [id=8, type=pow]; +"9 mean" [id=9, type=mean]; +"10 add" [id=10, type=add]; +"11 rsqrt" [id=11, type=rsqrt]; +"12 mul" [id=12, type=mul]; +"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; +"14 to_1" [id=14, type=to]; +"15 mul_1" [id=15, type=mul]; +"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"18 linear" [id=18, type=linear]; +"19 view" [id=19, type=view]; +"20 transpose" [id=20, type=transpose]; +"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; +"23 linear_1" [id=23, type=linear]; +"24 view_1" [id=24, type=view]; +"25 transpose_1" [id=25, type=transpose]; +"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; +"27 asymmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"28 linear_2" [id=28, type=linear]; +"29 view_2" [id=29, type=view]; +"30 transpose_2" [id=30, type=transpose]; +"31 index" [id=31, type=index]; +"32 index_1" [id=32, type=index]; +"33 mul_2" [id=33, type=mul]; +"34 slice_1" [id=34, type=slice]; +"35 slice_2" [id=35, type=slice]; +"36 neg" [id=36, type=neg]; +"37 cat" [id=37, type=cat]; +"38 mul_3" [id=38, type=mul]; +"39 add_1" [id=39, type=add]; +"40 mul_4" [id=40, type=mul]; +"41 slice_3" [id=41, type=slice]; +"42 slice_4" [id=42, type=slice]; +"43 neg_1" [id=43, type=neg]; +"44 cat_1" [id=44, type=cat]; +"45 mul_5" [id=45, type=mul]; +"46 add_2" [id=46, type=add]; +"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; +"48 transpose_3" [id=48, type=transpose]; +"49 view_3" [id=49, type=view]; +"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; +"51 asymmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; +"52 linear_3" [id=52, type=linear]; +"53 add_3" [id=53, type=add]; +"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; +"55 to_2" [id=55, type=to]; +"56 pow_2" [id=56, type=pow]; +"57 mean_1" [id=57, type=mean]; +"58 add_4" [id=58, type=add]; +"59 rsqrt_1" [id=59, type=rsqrt]; +"60 mul_6" [id=60, type=mul]; +"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; +"62 to_3" [id=62, type=to]; +"63 mul_7" [id=63, type=mul]; +"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"66 linear_4" [id=66, type=linear]; +"67 silu" [id=67, type=silu]; +"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; +"69 symmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"70 linear_5" [id=70, type=linear]; +"71 mul_8" [id=71, type=mul]; +"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; +"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; +"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"5 arange" -> "31 index" [style=solid, label="(3,)"]; +"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; +"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; +"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; +"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; +"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; +"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; +"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; +"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; +"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; +"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; +"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; +"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"26 v_proj_weight_updated_constant0" -> "27 asymmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(64, 64)"]; +"27 asymmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; +"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; +"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; +"50 o_proj_weight_updated_constant0" -> "51 asymmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(64, 64)"]; +"51 asymmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; +"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; +"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; +"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; +"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; +"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; +"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 symmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(4096, 1)"]; +"69 symmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json new file mode 100644 index 00000000000..e1baa81d0dc --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json @@ -0,0 +1,128 @@ +[ + { + "weight_name": "q_proj_weight", + "node_with_weight": "linear", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "k_proj_weight", + "node_with_weight": "linear_1", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "v_proj_weight", + "node_with_weight": "linear_2", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "o_proj_weight", + "node_with_weight": "linear_3", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "mlp_gate_proj_weight", + "node_with_weight": "linear_4", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "mlp_up_proj_weight", + "node_with_weight": "linear_5", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "mlp_down_proj_weight", + "node_with_weight": "linear_6", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 128 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + } +] \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_hessian_input_activation.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_hessian_input_activation.dot new file mode 100644 index 00000000000..31fb9463c88 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_hessian_input_activation.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 rope_cos" [id=2, type="get_attr"]; +"3 rope_sin" [id=3, type="get_attr"]; +"4 x_embed" [id=4, type=input]; +"5 arange" [id=5, type=arange]; +"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; +"7 to" [id=7, type=to]; +"8 pow_1" [id=8, type=pow]; +"9 mean" [id=9, type=mean]; +"10 add" [id=10, type=add]; +"11 rsqrt" [id=11, type=rsqrt]; +"12 mul" [id=12, type=mul]; +"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; +"14 to_1" [id=14, type=to]; +"15 mul_1" [id=15, type=mul]; +"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"18 linear" [id=18, type=linear]; +"19 view" [id=19, type=view]; +"20 transpose" [id=20, type=transpose]; +"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; +"23 linear_1" [id=23, type=linear]; +"24 view_1" [id=24, type=view]; +"25 transpose_1" [id=25, type=transpose]; +"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"28 linear_2" [id=28, type=linear]; +"29 view_2" [id=29, type=view]; +"30 transpose_2" [id=30, type=transpose]; +"31 index" [id=31, type=index]; +"32 index_1" [id=32, type=index]; +"33 mul_2" [id=33, type=mul]; +"34 slice_1" [id=34, type=slice]; +"35 slice_2" [id=35, type=slice]; +"36 neg" [id=36, type=neg]; +"37 cat" [id=37, type=cat]; +"38 mul_3" [id=38, type=mul]; +"39 add_1" [id=39, type=add]; +"40 mul_4" [id=40, type=mul]; +"41 slice_3" [id=41, type=slice]; +"42 slice_4" [id=42, type=slice]; +"43 neg_1" [id=43, type=neg]; +"44 cat_1" [id=44, type=cat]; +"45 mul_5" [id=45, type=mul]; +"46 add_2" [id=46, type=add]; +"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; +"48 transpose_3" [id=48, type=transpose]; +"49 view_3" [id=49, type=view]; +"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; +"52 linear_3" [id=52, type=linear]; +"53 add_3" [id=53, type=add]; +"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; +"55 to_2" [id=55, type=to]; +"56 pow_2" [id=56, type=pow]; +"57 mean_1" [id=57, type=mean]; +"58 add_4" [id=58, type=add]; +"59 rsqrt_1" [id=59, type=rsqrt]; +"60 mul_6" [id=60, type=mul]; +"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; +"62 to_3" [id=62, type=to]; +"63 mul_7" [id=63, type=mul]; +"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"66 linear_4" [id=66, type=linear]; +"67 silu" [id=67, type=silu]; +"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"70 linear_5" [id=70, type=linear]; +"71 mul_8" [id=71, type=mul]; +"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; +"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; +"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; +"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"5 arange" -> "31 index" [style=solid, label="(3,)"]; +"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; +"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; +"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; +"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; +"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; +"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; +"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; +"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; +"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; +"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; +"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; +"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; +"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; +"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; +"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; +"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; +"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; +"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; +"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; +"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(128, 64)"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; +"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(4096, 1)"]; +"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot new file mode 100644 index 00000000000..076e46114eb --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 rope_cos" [id=2, type="get_attr"]; +"3 rope_sin" [id=3, type="get_attr"]; +"4 x_embed" [id=4, type=input]; +"5 arange" [id=5, type=arange]; +"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; +"7 to" [id=7, type=to]; +"8 pow_1" [id=8, type=pow]; +"9 mean" [id=9, type=mean]; +"10 add" [id=10, type=add]; +"11 rsqrt" [id=11, type=rsqrt]; +"12 mul" [id=12, type=mul]; +"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; +"14 to_1" [id=14, type=to]; +"15 mul_1" [id=15, type=mul]; +"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"18 linear" [id=18, type=linear]; +"19 view" [id=19, type=view]; +"20 transpose" [id=20, type=transpose]; +"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; +"23 linear_1" [id=23, type=linear]; +"24 view_1" [id=24, type=view]; +"25 transpose_1" [id=25, type=transpose]; +"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"28 linear_2" [id=28, type=linear]; +"29 view_2" [id=29, type=view]; +"30 transpose_2" [id=30, type=transpose]; +"31 index" [id=31, type=index]; +"32 index_1" [id=32, type=index]; +"33 mul_2" [id=33, type=mul]; +"34 slice_1" [id=34, type=slice]; +"35 slice_2" [id=35, type=slice]; +"36 neg" [id=36, type=neg]; +"37 cat" [id=37, type=cat]; +"38 mul_3" [id=38, type=mul]; +"39 add_1" [id=39, type=add]; +"40 mul_4" [id=40, type=mul]; +"41 slice_3" [id=41, type=slice]; +"42 slice_4" [id=42, type=slice]; +"43 neg_1" [id=43, type=neg]; +"44 cat_1" [id=44, type=cat]; +"45 mul_5" [id=45, type=mul]; +"46 add_2" [id=46, type=add]; +"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; +"48 transpose_3" [id=48, type=transpose]; +"49 view_3" [id=49, type=view]; +"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; +"52 linear_3" [id=52, type=linear]; +"53 add_3" [id=53, type=add]; +"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; +"55 to_2" [id=55, type=to]; +"56 pow_2" [id=56, type=pow]; +"57 mean_1" [id=57, type=mean]; +"58 add_4" [id=58, type=add]; +"59 rsqrt_1" [id=59, type=rsqrt]; +"60 mul_6" [id=60, type=mul]; +"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; +"62 to_3" [id=62, type=to]; +"63 mul_7" [id=63, type=mul]; +"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"66 linear_4" [id=66, type=linear]; +"67 silu" [id=67, type=silu]; +"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"70 linear_5" [id=70, type=linear]; +"71 mul_8" [id=71, type=mul]; +"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; +"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; +"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"5 arange" -> "31 index" [style=solid, label="(3,)"]; +"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; +"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; +"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; +"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; +"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; +"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; +"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; +"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; +"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; +"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; +"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; +"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; +"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; +"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; +"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; +"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; +"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; +"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; +"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; +"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; +"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_magnitude.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_magnitude.dot new file mode 100644 index 00000000000..31fb9463c88 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_magnitude.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 rope_cos" [id=2, type="get_attr"]; +"3 rope_sin" [id=3, type="get_attr"]; +"4 x_embed" [id=4, type=input]; +"5 arange" [id=5, type=arange]; +"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; +"7 to" [id=7, type=to]; +"8 pow_1" [id=8, type=pow]; +"9 mean" [id=9, type=mean]; +"10 add" [id=10, type=add]; +"11 rsqrt" [id=11, type=rsqrt]; +"12 mul" [id=12, type=mul]; +"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; +"14 to_1" [id=14, type=to]; +"15 mul_1" [id=15, type=mul]; +"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"18 linear" [id=18, type=linear]; +"19 view" [id=19, type=view]; +"20 transpose" [id=20, type=transpose]; +"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; +"23 linear_1" [id=23, type=linear]; +"24 view_1" [id=24, type=view]; +"25 transpose_1" [id=25, type=transpose]; +"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"28 linear_2" [id=28, type=linear]; +"29 view_2" [id=29, type=view]; +"30 transpose_2" [id=30, type=transpose]; +"31 index" [id=31, type=index]; +"32 index_1" [id=32, type=index]; +"33 mul_2" [id=33, type=mul]; +"34 slice_1" [id=34, type=slice]; +"35 slice_2" [id=35, type=slice]; +"36 neg" [id=36, type=neg]; +"37 cat" [id=37, type=cat]; +"38 mul_3" [id=38, type=mul]; +"39 add_1" [id=39, type=add]; +"40 mul_4" [id=40, type=mul]; +"41 slice_3" [id=41, type=slice]; +"42 slice_4" [id=42, type=slice]; +"43 neg_1" [id=43, type=neg]; +"44 cat_1" [id=44, type=cat]; +"45 mul_5" [id=45, type=mul]; +"46 add_2" [id=46, type=add]; +"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; +"48 transpose_3" [id=48, type=transpose]; +"49 view_3" [id=49, type=view]; +"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; +"52 linear_3" [id=52, type=linear]; +"53 add_3" [id=53, type=add]; +"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; +"55 to_2" [id=55, type=to]; +"56 pow_2" [id=56, type=pow]; +"57 mean_1" [id=57, type=mean]; +"58 add_4" [id=58, type=add]; +"59 rsqrt_1" [id=59, type=rsqrt]; +"60 mul_6" [id=60, type=mul]; +"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; +"62 to_3" [id=62, type=to]; +"63 mul_7" [id=63, type=mul]; +"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"66 linear_4" [id=66, type=linear]; +"67 silu" [id=67, type=silu]; +"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"70 linear_5" [id=70, type=linear]; +"71 mul_8" [id=71, type=mul]; +"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; +"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; +"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; +"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"5 arange" -> "31 index" [style=solid, label="(3,)"]; +"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; +"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; +"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; +"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; +"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; +"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; +"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; +"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; +"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; +"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; +"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; +"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; +"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; +"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; +"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; +"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; +"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; +"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; +"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; +"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(128, 64)"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; +"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(4096, 1)"]; +"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot new file mode 100644 index 00000000000..076e46114eb --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 rope_cos" [id=2, type="get_attr"]; +"3 rope_sin" [id=3, type="get_attr"]; +"4 x_embed" [id=4, type=input]; +"5 arange" [id=5, type=arange]; +"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; +"7 to" [id=7, type=to]; +"8 pow_1" [id=8, type=pow]; +"9 mean" [id=9, type=mean]; +"10 add" [id=10, type=add]; +"11 rsqrt" [id=11, type=rsqrt]; +"12 mul" [id=12, type=mul]; +"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; +"14 to_1" [id=14, type=to]; +"15 mul_1" [id=15, type=mul]; +"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"18 linear" [id=18, type=linear]; +"19 view" [id=19, type=view]; +"20 transpose" [id=20, type=transpose]; +"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; +"23 linear_1" [id=23, type=linear]; +"24 view_1" [id=24, type=view]; +"25 transpose_1" [id=25, type=transpose]; +"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"28 linear_2" [id=28, type=linear]; +"29 view_2" [id=29, type=view]; +"30 transpose_2" [id=30, type=transpose]; +"31 index" [id=31, type=index]; +"32 index_1" [id=32, type=index]; +"33 mul_2" [id=33, type=mul]; +"34 slice_1" [id=34, type=slice]; +"35 slice_2" [id=35, type=slice]; +"36 neg" [id=36, type=neg]; +"37 cat" [id=37, type=cat]; +"38 mul_3" [id=38, type=mul]; +"39 add_1" [id=39, type=add]; +"40 mul_4" [id=40, type=mul]; +"41 slice_3" [id=41, type=slice]; +"42 slice_4" [id=42, type=slice]; +"43 neg_1" [id=43, type=neg]; +"44 cat_1" [id=44, type=cat]; +"45 mul_5" [id=45, type=mul]; +"46 add_2" [id=46, type=add]; +"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; +"48 transpose_3" [id=48, type=transpose]; +"49 view_3" [id=49, type=view]; +"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; +"52 linear_3" [id=52, type=linear]; +"53 add_3" [id=53, type=add]; +"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; +"55 to_2" [id=55, type=to]; +"56 pow_2" [id=56, type=pow]; +"57 mean_1" [id=57, type=mean]; +"58 add_4" [id=58, type=add]; +"59 rsqrt_1" [id=59, type=rsqrt]; +"60 mul_6" [id=60, type=mul]; +"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; +"62 to_3" [id=62, type=to]; +"63 mul_7" [id=63, type=mul]; +"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"66 linear_4" [id=66, type=linear]; +"67 silu" [id=67, type=silu]; +"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"70 linear_5" [id=70, type=linear]; +"71 mul_8" [id=71, type=mul]; +"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; +"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; +"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"5 arange" -> "31 index" [style=solid, label="(3,)"]; +"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; +"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; +"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; +"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; +"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; +"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; +"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; +"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; +"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; +"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; +"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; +"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; +"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; +"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; +"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; +"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; +"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; +"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; +"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; +"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; +"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_weight_quantization_error.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_weight_quantization_error.dot new file mode 100644 index 00000000000..99c2d53d916 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_weight_quantization_error.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 rope_cos" [id=2, type="get_attr"]; +"3 rope_sin" [id=3, type="get_attr"]; +"4 x_embed" [id=4, type=input]; +"5 arange" [id=5, type=arange]; +"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; +"7 to" [id=7, type=to]; +"8 pow_1" [id=8, type=pow]; +"9 mean" [id=9, type=mean]; +"10 add" [id=10, type=add]; +"11 rsqrt" [id=11, type=rsqrt]; +"12 mul" [id=12, type=mul]; +"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; +"14 to_1" [id=14, type=to]; +"15 mul_1" [id=15, type=mul]; +"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; +"17 asymmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"18 linear" [id=18, type=linear]; +"19 view" [id=19, type=view]; +"20 transpose" [id=20, type=transpose]; +"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; +"23 linear_1" [id=23, type=linear]; +"24 view_1" [id=24, type=view]; +"25 transpose_1" [id=25, type=transpose]; +"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"28 linear_2" [id=28, type=linear]; +"29 view_2" [id=29, type=view]; +"30 transpose_2" [id=30, type=transpose]; +"31 index" [id=31, type=index]; +"32 index_1" [id=32, type=index]; +"33 mul_2" [id=33, type=mul]; +"34 slice_1" [id=34, type=slice]; +"35 slice_2" [id=35, type=slice]; +"36 neg" [id=36, type=neg]; +"37 cat" [id=37, type=cat]; +"38 mul_3" [id=38, type=mul]; +"39 add_1" [id=39, type=add]; +"40 mul_4" [id=40, type=mul]; +"41 slice_3" [id=41, type=slice]; +"42 slice_4" [id=42, type=slice]; +"43 neg_1" [id=43, type=neg]; +"44 cat_1" [id=44, type=cat]; +"45 mul_5" [id=45, type=mul]; +"46 add_2" [id=46, type=add]; +"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; +"48 transpose_3" [id=48, type=transpose]; +"49 view_3" [id=49, type=view]; +"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; +"52 linear_3" [id=52, type=linear]; +"53 add_3" [id=53, type=add]; +"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; +"55 to_2" [id=55, type=to]; +"56 pow_2" [id=56, type=pow]; +"57 mean_1" [id=57, type=mean]; +"58 add_4" [id=58, type=add]; +"59 rsqrt_1" [id=59, type=rsqrt]; +"60 mul_6" [id=60, type=mul]; +"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; +"62 to_3" [id=62, type=to]; +"63 mul_7" [id=63, type=mul]; +"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"66 linear_4" [id=66, type=linear]; +"67 silu" [id=67, type=silu]; +"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; +"69 symmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"70 linear_5" [id=70, type=linear]; +"71 mul_8" [id=71, type=mul]; +"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; +"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; +"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"5 arange" -> "31 index" [style=solid, label="(3,)"]; +"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; +"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; +"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; +"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; +"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; +"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; +"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; +"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; +"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 asymmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(64, 64)"]; +"17 asymmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; +"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; +"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; +"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; +"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; +"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; +"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; +"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; +"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; +"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; +"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; +"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; +"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 symmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(4096, 1)"]; +"69 symmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json new file mode 100644 index 00000000000..69d4cf0f6a8 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json @@ -0,0 +1,128 @@ +[ + { + "weight_name": "q_proj_weight", + "node_with_weight": "linear", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + }, + { + "weight_name": "k_proj_weight", + "node_with_weight": "linear_1", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + }, + { + "weight_name": "v_proj_weight", + "node_with_weight": "linear_2", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + }, + { + "weight_name": "o_proj_weight", + "node_with_weight": "linear_3", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + }, + { + "weight_name": "mlp_gate_proj_weight", + "node_with_weight": "linear_4", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + }, + { + "weight_name": "mlp_up_proj_weight", + "node_with_weight": "linear_5", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + }, + { + "weight_name": "mlp_down_proj_weight", + "node_with_weight": "linear_6", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 128 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + } +] \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_sensitivity_metric_None.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_sensitivity_metric_None.dot new file mode 100644 index 00000000000..29de7b02841 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_sensitivity_metric_None.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 rope_cos" [id=2, type="get_attr"]; +"3 rope_sin" [id=3, type="get_attr"]; +"4 x_embed" [id=4, type=input]; +"5 arange" [id=5, type=arange]; +"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; +"7 to" [id=7, type=to]; +"8 pow_1" [id=8, type=pow]; +"9 mean" [id=9, type=mean]; +"10 add" [id=10, type=add]; +"11 rsqrt" [id=11, type=rsqrt]; +"12 mul" [id=12, type=mul]; +"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; +"14 to_1" [id=14, type=to]; +"15 mul_1" [id=15, type=mul]; +"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; +"17 asymmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"18 linear" [id=18, type=linear]; +"19 view" [id=19, type=view]; +"20 transpose" [id=20, type=transpose]; +"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; +"22 asymmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; +"23 linear_1" [id=23, type=linear]; +"24 view_1" [id=24, type=view]; +"25 transpose_1" [id=25, type=transpose]; +"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; +"27 asymmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"28 linear_2" [id=28, type=linear]; +"29 view_2" [id=29, type=view]; +"30 transpose_2" [id=30, type=transpose]; +"31 index" [id=31, type=index]; +"32 index_1" [id=32, type=index]; +"33 mul_2" [id=33, type=mul]; +"34 slice_1" [id=34, type=slice]; +"35 slice_2" [id=35, type=slice]; +"36 neg" [id=36, type=neg]; +"37 cat" [id=37, type=cat]; +"38 mul_3" [id=38, type=mul]; +"39 add_1" [id=39, type=add]; +"40 mul_4" [id=40, type=mul]; +"41 slice_3" [id=41, type=slice]; +"42 slice_4" [id=42, type=slice]; +"43 neg_1" [id=43, type=neg]; +"44 cat_1" [id=44, type=cat]; +"45 mul_5" [id=45, type=mul]; +"46 add_2" [id=46, type=add]; +"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; +"48 transpose_3" [id=48, type=transpose]; +"49 view_3" [id=49, type=view]; +"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; +"51 asymmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; +"52 linear_3" [id=52, type=linear]; +"53 add_3" [id=53, type=add]; +"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; +"55 to_2" [id=55, type=to]; +"56 pow_2" [id=56, type=pow]; +"57 mean_1" [id=57, type=mean]; +"58 add_4" [id=58, type=add]; +"59 rsqrt_1" [id=59, type=rsqrt]; +"60 mul_6" [id=60, type=mul]; +"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; +"62 to_3" [id=62, type=to]; +"63 mul_7" [id=63, type=mul]; +"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"66 linear_4" [id=66, type=linear]; +"67 silu" [id=67, type=silu]; +"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"70 linear_5" [id=70, type=linear]; +"71 mul_8" [id=71, type=mul]; +"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; +"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; +"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"5 arange" -> "31 index" [style=solid, label="(3,)"]; +"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; +"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; +"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; +"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; +"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; +"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; +"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; +"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; +"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 asymmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(64, 64)"]; +"17 asymmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; +"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"21 k_proj_weight_updated_constant0" -> "22 asymmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(64, 64)"]; +"22 asymmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; +"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; +"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"26 v_proj_weight_updated_constant0" -> "27 asymmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(64, 64)"]; +"27 asymmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; +"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; +"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; +"50 o_proj_weight_updated_constant0" -> "51 asymmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(64, 64)"]; +"51 asymmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; +"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; +"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; +"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; +"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; +"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(128, 64)"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; +"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json new file mode 100644 index 00000000000..fd8fbda6f54 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json @@ -0,0 +1,38 @@ +[ + { + "weight_name": "wte_weight_1", + "node_with_weight": "embedding", + "weight_port_id": 0, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + }, + { + "weight_name": "linear_weight", + "node_with_weight": "linear", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + } +] \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot new file mode 100644 index 00000000000..b249fdf7ce3 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_bias" [id=0, type="get_attr"]; +"1 lm_head_bias" [id=1, type="get_attr"]; +"2 input_ids" [id=2, type=input]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 embedding" [id=5, type=embedding]; +"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; +"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_max_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_max_activation_variance.dot new file mode 100644 index 00000000000..b249fdf7ce3 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_max_activation_variance.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_bias" [id=0, type="get_attr"]; +"1 lm_head_bias" [id=1, type="get_attr"]; +"2 input_ids" [id=2, type=input]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 embedding" [id=5, type=embedding]; +"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; +"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot new file mode 100644 index 00000000000..b249fdf7ce3 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_bias" [id=0, type="get_attr"]; +"1 lm_head_bias" [id=1, type="get_attr"]; +"2 input_ids" [id=2, type=input]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 embedding" [id=5, type=embedding]; +"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; +"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_variance.dot new file mode 100644 index 00000000000..b249fdf7ce3 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_variance.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_bias" [id=0, type="get_attr"]; +"1 lm_head_bias" [id=1, type="get_attr"]; +"2 input_ids" [id=2, type=input]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 embedding" [id=5, type=embedding]; +"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; +"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot new file mode 100644 index 00000000000..b249fdf7ce3 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_bias" [id=0, type="get_attr"]; +"1 lm_head_bias" [id=1, type="get_attr"]; +"2 input_ids" [id=2, type=input]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 embedding" [id=5, type=embedding]; +"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; +"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json new file mode 100644 index 00000000000..81205ac2ca8 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json @@ -0,0 +1,38 @@ +[ + { + "weight_name": "wte_weight_1", + "node_with_weight": "embedding", + "weight_port_id": 0, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "linear_weight", + "node_with_weight": "linear", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + } +] \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_hessian_input_activation.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_hessian_input_activation.dot new file mode 100644 index 00000000000..0a7bb5fe8f8 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_hessian_input_activation.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_bias" [id=0, type="get_attr"]; +"1 lm_head_bias" [id=1, type="get_attr"]; +"2 input_ids" [id=2, type=input]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 embedding" [id=5, type=embedding]; +"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; +"3 wte_weight_1_updated_constant0" -> "4 symmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(4096, 1)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot new file mode 100644 index 00000000000..4248f98aec3 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_bias" [id=0, type="get_attr"]; +"1 lm_head_bias" [id=1, type="get_attr"]; +"2 input_ids" [id=2, type=input]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 embedding" [id=5, type=embedding]; +"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; +"7 symmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; +"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"6 linear_weight_updated_constant0" -> "7 symmetric_weights_decompressor_linear_weight_0" [style=solid, label="(2048, 1)"]; +"7 symmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_magnitude.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_magnitude.dot new file mode 100644 index 00000000000..0a7bb5fe8f8 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_magnitude.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_bias" [id=0, type="get_attr"]; +"1 lm_head_bias" [id=1, type="get_attr"]; +"2 input_ids" [id=2, type=input]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 embedding" [id=5, type=embedding]; +"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; +"3 wte_weight_1_updated_constant0" -> "4 symmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(4096, 1)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot new file mode 100644 index 00000000000..4248f98aec3 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_bias" [id=0, type="get_attr"]; +"1 lm_head_bias" [id=1, type="get_attr"]; +"2 input_ids" [id=2, type=input]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 embedding" [id=5, type=embedding]; +"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; +"7 symmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; +"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"6 linear_weight_updated_constant0" -> "7 symmetric_weights_decompressor_linear_weight_0" [style=solid, label="(2048, 1)"]; +"7 symmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_weight_quantization_error.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_weight_quantization_error.dot new file mode 100644 index 00000000000..0a7bb5fe8f8 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_weight_quantization_error.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_bias" [id=0, type="get_attr"]; +"1 lm_head_bias" [id=1, type="get_attr"]; +"2 input_ids" [id=2, type=input]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 embedding" [id=5, type=embedding]; +"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; +"3 wte_weight_1_updated_constant0" -> "4 symmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(4096, 1)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json new file mode 100644 index 00000000000..49d45c1fffb --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json @@ -0,0 +1,38 @@ +[ + { + "weight_name": "wte_weight_1", + "node_with_weight": "embedding", + "weight_port_id": 0, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + }, + { + "weight_name": "linear_weight", + "node_with_weight": "linear", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + } +] \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_sensitivity_metric_None.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_sensitivity_metric_None.dot new file mode 100644 index 00000000000..b249fdf7ce3 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_sensitivity_metric_None.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_bias" [id=0, type="get_attr"]; +"1 lm_head_bias" [id=1, type="get_attr"]; +"2 input_ids" [id=2, type=input]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 embedding" [id=5, type=embedding]; +"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; +"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} From 4278cfd71a4bb69c0a9b9aa37eeff69e46b7ff2b Mon Sep 17 00:00:00 2001 From: anzr299 Date: Fri, 10 Oct 2025 18:25:34 +0400 Subject: [PATCH 49/91] update reference graphs; use more samples for calibration dataset. This is because mixed precision algo has clashes in ranking layer sensitivities with very few samples. --- tests/executorch/test_quantizer.py | 5 ++++- ...sensitivity_metric_hessian_input_activation.dot | 12 ++++++------ ...ensitivity_metric_mean_activation_magnitude.dot | 12 ++++++------ ...ensitivity_metric_weight_quantization_error.dot | 12 ++++++------ ..._sensitivity_metric_max_activation_variance.dot | 12 ++++++------ ...sensitivity_metric_mean_activation_variance.dot | 12 ++++++------ ..._sensitivity_metric_max_activation_variance.dot | 14 +++++++------- ...sensitivity_metric_mean_activation_variance.dot | 14 +++++++------- 8 files changed, 48 insertions(+), 45 deletions(-) diff --git a/tests/executorch/test_quantizer.py b/tests/executorch/test_quantizer.py index 3015a115076..9d2b68b2ba4 100644 --- a/tests/executorch/test_quantizer.py +++ b/tests/executorch/test_quantizer.py @@ -61,9 +61,12 @@ def _build_torch_fx_model(model_case: ModelCase) -> tuple[torch.fx.GraphModule, def _get_calibration_dataset(example_input: torch.Tensor) -> nncf.Dataset: + torch.manual_seed(42) def transform_fn(x): return x.to("cpu") - return nncf.Dataset([example_input], transform_fn) + sample_1 = torch.randint_like(example_input, 0,10) + sample_2 = torch.randint_like(example_input, 0, 10) + return nncf.Dataset([example_input, sample_1, sample_2], transform_fn) def get_openvino_quantizer(*args, **kwargs) -> OpenVINOQuantizer: diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot index e04aee29640..076e46114eb 100644 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot @@ -64,11 +64,11 @@ strict digraph { "62 to_3" [id=62, type=to]; "63 mul_7" [id=63, type=mul]; "64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; -"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; "66 linear_4" [id=66, type=linear]; "67 silu" [id=67, type=silu]; "68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; -"69 symmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; "70 linear_5" [id=70, type=linear]; "71 mul_8" [id=71, type=mul]; "72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; @@ -154,12 +154,12 @@ strict digraph { "62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; "63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; "63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; -"64 mlp_gate_proj_weight_updated_constant0" -> "65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(128, 64)"]; -"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; "66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; "67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; -"68 mlp_up_proj_weight_updated_constant0" -> "69 symmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(4096, 1)"]; -"69 symmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; "70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; "71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; "72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot index e04aee29640..076e46114eb 100644 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot @@ -64,11 +64,11 @@ strict digraph { "62 to_3" [id=62, type=to]; "63 mul_7" [id=63, type=mul]; "64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; -"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; "66 linear_4" [id=66, type=linear]; "67 silu" [id=67, type=silu]; "68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; -"69 symmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; "70 linear_5" [id=70, type=linear]; "71 mul_8" [id=71, type=mul]; "72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; @@ -154,12 +154,12 @@ strict digraph { "62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; "63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; "63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; -"64 mlp_gate_proj_weight_updated_constant0" -> "65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(128, 64)"]; -"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; "66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; "67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; -"68 mlp_up_proj_weight_updated_constant0" -> "69 symmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(4096, 1)"]; -"69 symmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; "70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; "71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; "72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot index 6c9cb7c162f..c62d8eb460e 100644 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot @@ -16,7 +16,7 @@ strict digraph { "14 to_1" [id=14, type=to]; "15 mul_1" [id=15, type=mul]; "16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; -"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"17 asymmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; "18 linear" [id=18, type=linear]; "19 view" [id=19, type=view]; "20 transpose" [id=20, type=transpose]; @@ -26,7 +26,7 @@ strict digraph { "24 view_1" [id=24, type=view]; "25 transpose_1" [id=25, type=transpose]; "26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; -"27 asymmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; "28 linear_2" [id=28, type=linear]; "29 view_2" [id=29, type=view]; "30 transpose_2" [id=30, type=transpose]; @@ -97,8 +97,8 @@ strict digraph { "15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; "15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; "15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; -"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; -"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 asymmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(64, 64)"]; +"17 asymmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; "18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; "19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; "20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; @@ -111,8 +111,8 @@ strict digraph { "25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; "25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; "25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; -"26 v_proj_weight_updated_constant0" -> "27 asymmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(64, 64)"]; -"27 asymmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; "28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; "29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; "30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot index 076e46114eb..31fb9463c88 100644 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot @@ -64,7 +64,7 @@ strict digraph { "62 to_3" [id=62, type=to]; "63 mul_7" [id=63, type=mul]; "64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; -"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; "66 linear_4" [id=66, type=linear]; "67 silu" [id=67, type=silu]; "68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; @@ -72,7 +72,7 @@ strict digraph { "70 linear_5" [id=70, type=linear]; "71 mul_8" [id=71, type=mul]; "72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; -"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; "74 linear_6" [id=74, type=linear]; "75 add_5" [id=75, type=add]; "76 output" [id=76, type=output]; @@ -154,16 +154,16 @@ strict digraph { "62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; "63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; "63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; -"64 mlp_gate_proj_weight_updated_constant0" -> "65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; -"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(128, 64)"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; "66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; "67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; "68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; "70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; "71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; -"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; -"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(4096, 1)"]; +"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; "74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; "75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; } diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot index 076e46114eb..31fb9463c88 100644 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot @@ -64,7 +64,7 @@ strict digraph { "62 to_3" [id=62, type=to]; "63 mul_7" [id=63, type=mul]; "64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; -"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; "66 linear_4" [id=66, type=linear]; "67 silu" [id=67, type=silu]; "68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; @@ -72,7 +72,7 @@ strict digraph { "70 linear_5" [id=70, type=linear]; "71 mul_8" [id=71, type=mul]; "72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; -"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; "74 linear_6" [id=74, type=linear]; "75 add_5" [id=75, type=add]; "76 output" [id=76, type=output]; @@ -154,16 +154,16 @@ strict digraph { "62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; "63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; "63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; -"64 mlp_gate_proj_weight_updated_constant0" -> "65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; -"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(128, 64)"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; "66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; "67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; "68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; "70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; "71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; -"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; -"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(4096, 1)"]; +"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; "74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; "75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; } diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot index 4248f98aec3..0a7bb5fe8f8 100644 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot @@ -3,22 +3,22 @@ strict digraph { "1 lm_head_bias" [id=1, type="get_attr"]; "2 input_ids" [id=2, type=input]; "3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; "5 embedding" [id=5, type=embedding]; "6 linear_weight_updated_constant0" [id=6, type="get_attr"]; -"7 symmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; "8 linear" [id=8, type=linear]; "9 linear_1" [id=9, type=linear]; "10 output" [id=10, type=output]; "0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; "1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; "2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; -"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"3 wte_weight_1_updated_constant0" -> "4 symmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(4096, 1)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; "5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; -"6 linear_weight_updated_constant0" -> "7 symmetric_weights_decompressor_linear_weight_0" [style=solid, label="(2048, 1)"]; -"7 symmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; "8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; "9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; } diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot index 4248f98aec3..0a7bb5fe8f8 100644 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot @@ -3,22 +3,22 @@ strict digraph { "1 lm_head_bias" [id=1, type="get_attr"]; "2 input_ids" [id=2, type=input]; "3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; "5 embedding" [id=5, type=embedding]; "6 linear_weight_updated_constant0" [id=6, type="get_attr"]; -"7 symmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; "8 linear" [id=8, type=linear]; "9 linear_1" [id=9, type=linear]; "10 output" [id=10, type=output]; "0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; "1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; "2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; -"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"3 wte_weight_1_updated_constant0" -> "4 symmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(4096, 1)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; "5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; -"6 linear_weight_updated_constant0" -> "7 symmetric_weights_decompressor_linear_weight_0" [style=solid, label="(2048, 1)"]; -"7 symmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; "8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; "9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; } From 6fd52162393bbb6b06ab5d2e2120a860c29dfa9c Mon Sep 17 00:00:00 2001 From: anzr299 Date: Fri, 10 Oct 2025 18:41:06 +0400 Subject: [PATCH 50/91] remove groupsize values as return statement from get_weight_compression_params --- .../algorithms/weight_compression/algorithm.py | 3 +-- .../algorithms/weight_compression/algorithm.py | 13 ++++--------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py index 562cb1ce202..088a18de5ad 100644 --- a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py @@ -114,7 +114,7 @@ def apply( ) -> torch.fx.GraphModule: self._algo.set_backend_entity(model) - all_weight_params, ratio_defining_params, group_size_values, skipped_weight_params = ( + all_weight_params, ratio_defining_params, skipped_weight_params = ( self._quantizer.get_weight_compression_parameters(model, graph) ) @@ -125,7 +125,6 @@ def apply( statistic_points, all_weight_params, ratio_defining_params, - group_size_values, skipped_weight_params, ) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index 7145fe30b80..90c4d1d415e 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -528,7 +528,6 @@ def _set_weight_compression_config( model: TModel, graph: NNCFGraph, statistics_points: StatisticPointsContainer, - group_size_values: dict[str, int], ) -> None: """ Sets the appropriate compression configuration for weights based on some criteria. @@ -830,7 +829,6 @@ def get_weight_compression_parameters( ) -> tuple[ list[WeightCompressionParameters], list[WeightCompressionParameters], - dict[str, int], list[WeightCompressionParameters], ]: """ @@ -844,8 +842,7 @@ def get_weight_compression_parameters( :param graph: NNCFGraph instance. :return: A tuple consisting of a list of all weight compression parameters, based on the Weight Compression algorithm configuration, list of ratio defining parameters(weights that are used - for ratio calculation between primary and backup precisions), A dictionary mapping weight - names to their group size values and list of weight parameters to skip. + for ratio calculation between primary and backup precisions), and list of weight parameters to skip. """ nodes_to_compress = self.get_nodes_to_compress(graph) @@ -921,7 +918,7 @@ def get_weight_compression_parameters( for weight_param in ratio_defining_params: weight_param.compression_config = self._get_primary_config(group_size_values[weight_param.weight_name]) - return all_weight_params, ratio_defining_params, group_size_values, skipped_weight_params + return all_weight_params, ratio_defining_params, skipped_weight_params def apply( self, @@ -933,7 +930,7 @@ def apply( self.set_backend_entity(model) # Get processed weight compression parameters ready for compression - all_weight_params, ratio_defining_params, group_size_values, skipped_weight_params = ( + all_weight_params, ratio_defining_params, skipped_weight_params = ( self.get_weight_compression_parameters(model, graph) ) return self.apply_with_parameters( @@ -943,7 +940,6 @@ def apply( statistic_points, all_weight_params, ratio_defining_params, - group_size_values, skipped_weight_params, ) @@ -955,7 +951,6 @@ def apply_with_parameters( statistic_points: StatisticPointsContainer, all_weight_params: list[WeightCompressionParameters], ratio_defining_params: list[WeightCompressionParameters], - group_size_values: dict[str, int], skipped_weight_params: list[WeightCompressionParameters], ): # Collect statistics for the weights compression @@ -963,7 +958,7 @@ def apply_with_parameters( model, graph, statistic_points, dataset, ratio_defining_params, all_weight_params ) # Set weight compression configuration - self._set_weight_compression_config(ratio_defining_params, model, graph, statistic_points, group_size_values) + self._set_weight_compression_config(ratio_defining_params, model, graph, statistic_points) # Print statistics nncf_logger.info( From 118b6111dfa8465deaeaff6f601cc36007d5b0d1 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Mon, 13 Oct 2025 12:58:25 +0400 Subject: [PATCH 51/91] update algorithm --- src/nncf/quantization/algorithms/weight_compression/algorithm.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index 90c4d1d415e..996040e1687 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -867,6 +867,7 @@ def get_weight_compression_parameters( weight_dtype = self._backend_entity.get_weight_dtype(node, weight_port_id, model, graph) weight_shape = self._backend_entity.get_weight_shape(node, weight_port_id, graph) reduction_axes = self._backend_entity.get_reduction_axes(node, weight_port_id, graph) + wc_config = None if is_target_node and self.is_weight_compression_supported(weight_dtype, self._mode): if ( From e9f3cd4ebe240921fba6c9c1a6a04d34d57dba87 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Mon, 13 Oct 2025 13:01:32 +0400 Subject: [PATCH 52/91] change WeightCompression to OriginalWeightCompression in experimental algorithm --- .../quantization/algorithms/weight_compression/algorithm.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py index 088a18de5ad..8ebdbb35134 100644 --- a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py @@ -24,8 +24,7 @@ from nncf.common.utils.backend import BackendType from nncf.experimental.quantization.quantizer import Quantizer from nncf.quantization.algorithms.algorithm import Algorithm -from nncf.quantization.algorithms.weight_compression.algorithm import WeightCompression -from nncf.quantization.algorithms.weight_compression.algorithm import get_weight_compression_configuration +from nncf.quantization.algorithms.weight_compression.algorithm import WeightCompression as OriginalWeightCompression from nncf import CompressWeightsMode from nncf import IgnoredScope from nncf import BackupMode @@ -85,7 +84,7 @@ def __init__( self._sensitivity_metric = sensitivity_metric self._compression_format = compression_format - self._algo = WeightCompression( + self._algo = OriginalWeightCompression( mode=self._mode, ratio=self._ratio, group_size=self._group_size, From a969e583ff6a2c59c1271e9ff156ff9e751a0884 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Mon, 13 Oct 2025 13:23:23 +0400 Subject: [PATCH 53/91] update docstrings as discussed offline --- .../weight_compression/algorithm.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index 996040e1687..ddab9b261ef 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -543,12 +543,11 @@ def _set_weight_compression_config( primary_precision_weight_params = self._mixed_precision_algo.apply( model, graph, statistics_points, weight_params=ratio_defining_params ) + # ratio_defining_params are all in primary precision. Update parameters + # which need to be set to backup precision for weight_param in ratio_defining_params: - # We already set these in primary precision. Mixed precision algo returns - # layers which should be in primary precision. Let it be if weight_param in primary_precision_weight_params: continue - # Set all layers other than the ones returned by mixed precision to backup precision weight_param.compression_config = self._get_backup_config(weight_param.weight_dtype) # Check if group size is valid for each weight in ratio_defining_params failed_nodes = [] @@ -794,7 +793,7 @@ def _collect_statistics_and_statistic_points( dataset: Dataset, ratio_defining_params: list[WeightCompressionParameters], all_weight_params: list[WeightCompressionParameters], - ): + ) -> tuple[dict[str, WCTensorStatistic], StatisticPointsContainer]: """ Collects and computes statistics required for weight compression. @@ -818,9 +817,10 @@ def _collect_statistics_and_statistic_points( if statistic_points is None: statistic_points = self.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys()) statistic_points = self._collect_statistics(dataset, graph, model, statistic_points) - return self._get_statistics_for_weights_compression( + statistics = self._get_statistics_for_weights_compression( matmul_input_to_output_nodes_map, statistic_points - ), statistic_points + ) + return statistics, statistic_points def get_weight_compression_parameters( self, @@ -834,9 +834,10 @@ def get_weight_compression_parameters( """ Generates a list of weight compression parameters based on the Weight Compression algorithm configuration. Determines the appropriate quantization parameters for each node eligible for - weight compression. Also, Generates a mapping of target node names to the collected statistics - based on the provided statistic_points. If statistic_points is None, collects required - compression statistics on the given dataset. + weight compression. Also, returns a list of ratio defining parameters which are a subset of + all_weight_parameters. This is based on parameters like all_layers. Lastly, it gives a list + of skipped layers based on parameters like ignored scope or depending on the group size value + adjustment. :param model: Backend-specific input model. :param graph: NNCFGraph instance. From 71d0597e7f625be74579c545f47a3aadb9476d81 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Mon, 13 Oct 2025 14:41:47 +0400 Subject: [PATCH 54/91] revert torchaoadapter code --- .../quantizer/torch_ao_adapter.py | 24 +------------------ 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/torch_ao_adapter.py b/src/nncf/experimental/torch/fx/quantization/quantizer/torch_ao_adapter.py index 19dfb5314ce..5ae585981c5 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/torch_ao_adapter.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/torch_ao_adapter.py @@ -34,7 +34,6 @@ from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter from nncf.experimental.torch.fx.node_utils import get_node_args from nncf.tensor.definitions import TensorDataType -from nncf import CompressWeightsMode EdgeOrNode = Union[tuple[torch.fx.Node, torch.fx.Node]] @@ -47,25 +46,6 @@ class TorchAOQuantizerAdapter(Quantizer): def __init__(self, quantizer: TorchAOQuantizer): self._quantizer = quantizer - def _get_compression_mode_from_qconfig(qp: QuantizationPointBase): - if qp.qconfig.num_bits == 4 and qp.qconfig.mode == QuantizationMode.ASYMMETRIC: - return CompressWeightsMode.INT4_ASYM - elif qp.qconfig.num_bits == 4 and qp.qconfig.mode == QuantizationMode.SYMMETRIC: - return CompressWeightsMode.INT4_SYM - elif qp.qconfig.num_bits == 8 and qp.qconfig.mode == QuantizationMode.ASYMMETRIC: - return CompressWeightsMode.INT8_ASYM - elif qp.qconfig.num_bits == 8 and qp.qconfig.mode == QuantizationMode.SYMMETRIC: - return CompressWeightsMode.INT8_SYM - - def get_wc_config_node_map(self, model, nncf_graph): - quantization_setup = self.get_quantization_setup(model, nncf_graph) - qps = quantization_setup.quantization_points - print(quantization_setup) - for _,qp in qps.items(): - assert len(qp.directly_quantized_operator_node_names) == 1, "Weights compression does not support shared configs" - qps = {qp.directly_quantized_operator_node_names[0]: self._get_compression_mode_from_qconfig(qp) for id,qp in qps.items()} - return qps - def transform_prior_quantization(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: return self._quantizer.transform_for_annotation(model) @@ -136,7 +116,6 @@ def get_quantizer_config_from_annotated_model(annotated: torch.fx.GraphModule) - :return: A SingleConfigQuantizerSetup containing quantization points derived from the annotated model. """ edge_or_node_to_qspec = _get_edge_or_node_to_qspec(annotated) - print(edge_or_node_to_qspec) # Node means all output edges should be quantized. # Edge means only one edge should be quantized. edge_or_node_to_group_id = _get_edge_or_node_to_group_id(edge_or_node_to_qspec) @@ -171,7 +150,6 @@ def get_quantizer_config_from_annotated_model(annotated: torch.fx.GraphModule) - raise nncf.InternalError(msg) dtype = TensorDataType.int8 if qspec.dtype is torch.int8 else TensorDataType.uint8 - dtype = TensorDataType.int4 if qspec.qmax in [15, 8] else dtype mode = ( QuantizationMode.SYMMETRIC if qspec.qscheme in [torch.per_channel_symmetric, torch.per_tensor_symmetric] @@ -233,4 +211,4 @@ def _unwrap_shared_qspec_safe(qspec: QuantizationSpec, edge_or_node_to_qspec: di if i == MAX_DEPTH: msg = f"Shared qspecs referenced to each other more than the limit: {MAX_DEPTH}" raise RuntimeError(msg) - return qspec + return qspec \ No newline at end of file From bf671ff72007de724bf8cb9f24114acbe1cb0d7e Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 14 Oct 2025 11:01:16 +0400 Subject: [PATCH 55/91] precommit fix --- .../weight_compression/algorithm.py | 8 +-- .../torch/fx/quantization/quantize_pt2e.py | 6 +- tests/executorch/test_quantizer.py | 60 +++++++++++-------- tests/torch/test_models/__init__.py | 2 +- tests/torch/test_models/llama.py | 59 +++++++++++------- 5 files changed, 80 insertions(+), 55 deletions(-) diff --git a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py index 8ebdbb35134..81a2ae9b53d 100644 --- a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py @@ -13,10 +13,12 @@ import torch -import nncf from nncf import AdvancedCompressionParameters +from nncf import BackupMode from nncf import CompressionFormat +from nncf import CompressWeightsMode from nncf import Dataset +from nncf import IgnoredScope from nncf import SensitivityMetric from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.graph import NNCFNode @@ -25,9 +27,7 @@ from nncf.experimental.quantization.quantizer import Quantizer from nncf.quantization.algorithms.algorithm import Algorithm from nncf.quantization.algorithms.weight_compression.algorithm import WeightCompression as OriginalWeightCompression -from nncf import CompressWeightsMode -from nncf import IgnoredScope -from nncf import BackupMode + class WeightsCompression(Algorithm): """ diff --git a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py index 1a496eedb2d..90064eedd05 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py +++ b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py @@ -37,9 +37,9 @@ from nncf.experimental.torch.fx.transformations import QUANTIZE_NODE_TARGETS from nncf.experimental.torch.fx.transformations import DuplicateDQPassNoAnnotations from nncf.experimental.torch.fx.transformations import compress_post_quantize_transformation -from nncf.quantization.algorithms.weight_compression.algorithm import get_weight_compression_configuration from nncf.quantization.advanced_parameters import AdvancedBiasCorrectionParameters from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters +from nncf.quantization.algorithms.weight_compression.algorithm import get_weight_compression_configuration from nncf.quantization.range_estimator import RangeEstimatorParameters @@ -218,7 +218,7 @@ def compress_pt2e( backup_mode = wc_config.get("backup_mode", nncf.BackupMode.INT8_ASYM) sensitivity_metric = sensitivity_metric compression_format = compression_format - ignored_scope = nncf.IgnoredScope() # This is already defined in the quantizer object + ignored_scope = nncf.IgnoredScope() # This is already defined in the quantizer object weight_compression_configuration = get_weight_compression_configuration( mode, @@ -240,7 +240,7 @@ def compress_pt2e( quantizer=quantizer, subset_size=subset_size, compression_format=compression_format, - **weight_compression_configuration + **weight_compression_configuration, ) # Here the model is annotated diff --git a/tests/executorch/test_quantizer.py b/tests/executorch/test_quantizer.py index 9d2b68b2ba4..2eb58489db3 100644 --- a/tests/executorch/test_quantizer.py +++ b/tests/executorch/test_quantizer.py @@ -1,40 +1,44 @@ # Copyright (c) 2025 Intel Corporation # Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. -from dataclasses import dataclass -from functools import partial -from typing import Any, Callable, Optional import dataclasses import json +from dataclasses import dataclass from enum import Enum +from functools import partial +from typing import Any, Callable, Optional import pytest import torch import torch.fx +from torch.ao.quantization.quantize_pt2e import convert_pt2e +from torch.ao.quantization.quantize_pt2e import prepare_pt2e import nncf +from executorch.backends.openvino.quantizer.quantizer import OpenVINOQuantizer +from executorch.backends.openvino.quantizer.quantizer import QuantizationMode from nncf.common.graph import NNCFGraph +from nncf.common.graph.graph import NNCFNode from nncf.common.utils.os import safe_open -from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter from nncf.experimental.torch.fx import compress_pt2e - +from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter from tests.cross_fw.shared.nx_graph import compare_nx_graph_with_reference from tests.cross_fw.shared.paths import TEST_ROOT -from tests.torch.test_models.synthetic import ShortTransformer from tests.torch.test_models.llama import LlamaDecoderOnly +from tests.torch.test_models.synthetic import ShortTransformer from tests.torch2.fx.helpers import get_torch_fx_model -from torchao.quantization.pt2e.quantize_pt2e import prepare_pt2e, convert_pt2e - -from executorch.backends.openvino.quantizer.quantizer import ( - OpenVINOQuantizer, - QuantizationMode, -) -from nncf.common.graph.graph import NNCFNode - FX_PT2E_DIR = TEST_ROOT / "torch2" / "data" / "fx" / "compress_pt2e" -FX_AO_DIR = TEST_ROOT / "torch2" / "data" / "fx" / "ao_compression_OpenVINOQuantizer" +FX_AO_DIR = TEST_ROOT / "torch2" / "data" / "fx" / "ao_compression_OpenVINOQuantizer" @dataclass @@ -62,9 +66,11 @@ def _build_torch_fx_model(model_case: ModelCase) -> tuple[torch.fx.GraphModule, def _get_calibration_dataset(example_input: torch.Tensor) -> nncf.Dataset: torch.manual_seed(42) + def transform_fn(x): return x.to("cpu") - sample_1 = torch.randint_like(example_input, 0,10) + + sample_1 = torch.randint_like(example_input, 0, 10) sample_2 = torch.randint_like(example_input, 0, 10) return nncf.Dataset([example_input, sample_1, sample_2], transform_fn) @@ -78,14 +84,14 @@ def _string_from_quantizer_params(qparams: dict[str, Any], pt2e_param: Optional[ gs = qparams.get("group_size", "-1") ratio = qparams.get("ratio", "1") all_layers = qparams.get("all_layers", "False") - if(pt2e_param is None): + if pt2e_param is None: return f"{mode.value}_gs{gs}_ratio{ratio}_all_layers_{all_layers}" sensitivity_metric = pt2e_param.get("sensitivity_metric", "None") return f"{mode.value}_gs{gs}_ratio{ratio}_all_layers_{all_layers}_sensitivity_metric_{sensitivity_metric}" BASE_MODELS = ( - ModelCase(LlamaDecoderOnly, "LlamaDecoderOnly", [1,3,64]), + ModelCase(LlamaDecoderOnly, "LlamaDecoderOnly", [1, 3, 64]), ModelCase(partial(ShortTransformer, 64, 128, True), "short_transformer_shared", [5]), ) @@ -109,7 +115,7 @@ def _string_from_quantizer_params(qparams: dict[str, Any], pt2e_param: Optional[ for model in BASE_MODELS for qparam in QUANTIZER_PARAMS for pt2e_param in ( - [{}] + [{}] if ( (qparam.get("mode") in {QuantizationMode.INT8WO_ASYM, QuantizationMode.INT8WO_SYM}) or (qparam.get("ratio") is None) @@ -129,13 +135,11 @@ def _string_from_quantizer_params(qparams: dict[str, Any], pt2e_param: Optional[ TEST_MODELS, ids=TEST_MODEL_IDS, ) - @pytest.mark.parametrize( "quantizer_builder", [get_openvino_quantizer], ids=["OpenVINOQuantizer"], ) - def test_compress_pt2e( quantizer_builder: Callable[..., OpenVINOQuantizer], model_case: ModelCase, @@ -165,7 +169,9 @@ def test_compress_pt2e( nncf_graph: NNCFGraph = GraphConverter.create_nncf_graph(quantized_model) nx_graph = nncf_graph.get_graph_for_structure_analysis(extended=True) param_string = _string_from_quantizer_params(quantizer_params, pt2e_params) - path_to_dot = (FX_PT2E_DIR / quantizer.__class__.__name__ / model_case.model_id / get_dot_filename(param_string)).as_posix() + path_to_dot = ( + FX_PT2E_DIR / quantizer.__class__.__name__ / model_case.model_id / get_dot_filename(param_string) + ).as_posix() compare_nx_graph_with_reference(nx_graph, path_to_dot) @@ -217,6 +223,7 @@ def to_json_serializable(obj): return to_json_serializable(wp) + @pytest.mark.parametrize( ("model_case", "quantizer_params", "pt2e_params"), TEST_MODELS, @@ -232,7 +239,7 @@ def test_openvino_wc_params( model_case: ModelCase, quantizer_params, pt2e_params, - regen_ref_data=False, + regen_ref_data, ): fx_model, _ = _build_torch_fx_model(model_case) nncf_graph: NNCFGraph = GraphConverter.create_nncf_graph(fx_model) @@ -244,7 +251,9 @@ def test_openvino_wc_params( wc_params = _serialize_wc_param(all_weight_params) - ref_json_path = (FX_PT2E_DIR / quantizer.__class__.__name__ / model_case.model_id / get_wc_param_filename(param_string)) + ref_json_path = ( + FX_PT2E_DIR / quantizer.__class__.__name__ / model_case.model_id / get_wc_param_filename(param_string) + ) if regen_ref_data: with safe_open(ref_json_path, "w") as file: @@ -254,6 +263,5 @@ def test_openvino_wc_params( ref_data = json.load(f) assert wc_params == ref_data, ( - f"Weight compression parameters JSON mismatch for {model_case.model_id} ({param_string}).\n" - f"Ref: {ref_json_path}" + f"Weight compression parameters JSON mismatch for {model_case.model_id} ({param_string}).\nRef: {ref_json_path}" ) diff --git a/tests/torch/test_models/__init__.py b/tests/torch/test_models/__init__.py index f412372978b..8dcdf092213 100644 --- a/tests/torch/test_models/__init__.py +++ b/tests/torch/test_models/__init__.py @@ -15,6 +15,7 @@ from .googlenet import * from .inceptionv3 import * from .lenet import * +from .llama import * from .pnasnet import * from .preact_resnet import * from .resnet import * @@ -26,4 +27,3 @@ from .sr_small_model import * from .unet import * from .vgg import * -from .llama import * diff --git a/tests/torch/test_models/llama.py b/tests/torch/test_models/llama.py index fbf4df50e21..fae6b6be9de 100644 --- a/tests/torch/test_models/llama.py +++ b/tests/torch/test_models/llama.py @@ -1,16 +1,29 @@ +# Copyright (c) 2025 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import math -from typing import Optional, Tuple +from typing import Optional + import torch import torch.nn as nn import torch.nn.functional as F -EMBED_DIM = 64 -N_HEADS = 4 -HEAD_DIM = EMBED_DIM // N_HEADS +EMBED_DIM = 64 +N_HEADS = 4 +HEAD_DIM = EMBED_DIM // N_HEADS # Same as Llama 3.2 config ROPE_THETA = 500000.0 -MAX_SEQ = 128 -BIAS = False +MAX_SEQ = 128 +BIAS = False class LlamaRMSNorm(nn.Module): @@ -30,6 +43,7 @@ def forward(self, hidden_states): hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon) return self.weight * hidden_states.to(input_dtype) + def _rotate_half(x): """ Copied from src/transformers/models/llama/modeling_llama.py @@ -39,22 +53,25 @@ def _rotate_half(x): x2 = x[..., x.shape[-1] // 2 :] return torch.cat((-x2, x1), dim=-1) + class Rotary(nn.Module): """ Precompute cos/sin for RoPE and apply to q,k. Copied from src/transformers/models/llama/modeling_llama.py Initialize the cos and sin value once in init method """ + # Llama applies rotary to q,k before attention; see modeling_llama def __init__(self, head_dim: int, max_seq_len: int = MAX_SEQ, theta: float = ROPE_THETA, device=None): super().__init__() dtype = torch.float32 inv_freq = 1.0 / (theta ** (torch.arange(0, head_dim, 2, dtype=dtype, device=device) / head_dim)) t = torch.arange(max_seq_len, dtype=dtype, device=device) - freqs = torch.einsum("t,f->tf", t, inv_freq) # (T, Hd/2) - emb = torch.cat((freqs, freqs), dim=-1) # (T, Hd) - self.register_buffer("cos", emb.cos()[None, None, ...], persistent=False) # (1,1,T,Hd) + freqs = torch.einsum("t,f->tf", t, inv_freq) # (T, Hd/2) + emb = torch.cat((freqs, freqs), dim=-1) # (T, Hd) + self.register_buffer("cos", emb.cos()[None, None, ...], persistent=False) # (1,1,T,Hd) self.register_buffer("sin", emb.sin()[None, None, ...], persistent=False) + def forward(self, q: torch.Tensor, k: torch.Tensor, pos: torch.Tensor): cos = self.cos[..., pos, :] sin = self.sin[..., pos, :] @@ -62,10 +79,12 @@ def forward(self, q: torch.Tensor, k: torch.Tensor, pos: torch.Tensor): k_embed = (k * cos) + (_rotate_half(k) * sin) return q_embed, k_embed + class LlamaMLP(nn.Module): """ Copied from src/transformers/models/llama/modeling_llama.py """ + def __init__(self, dim: int, mult: int = 2): super().__init__() # mult is used as a scaling factor of sorts. This is to define the hidden/intermediate layer size @@ -73,15 +92,18 @@ def __init__(self, dim: int, mult: int = 2): self.gate_proj = nn.Linear(dim, hidden, bias=BIAS) self.up_proj = nn.Linear(dim, hidden, bias=BIAS) self.down_proj = nn.Linear(hidden, dim, bias=BIAS) + def forward(self, x: torch.Tensor) -> torch.Tensor: down_proj = self.down_proj(F.silu(self.gate_proj(x)) * self.up_proj(x)) return down_proj + class LlamaDecoderOnly(nn.Module): """ One Llama-style transformer block (pre-norm attn + MLP) with RoPE and KV cache. Forward takes embeddings only. """ + # KV caching + past_key_values flow mirrors HF implementations. :contentReference[oaicite:4]{index=4} def __init__(self, dim: int = EMBED_DIM, n_heads: int = N_HEADS): super().__init__() @@ -100,10 +122,10 @@ def __init__(self, dim: int = EMBED_DIM, n_heads: int = N_HEADS): self.mlp_norm = LlamaRMSNorm(dim) self.mlp = LlamaMLP(dim) - def _attn(self, x: torch.Tensor, pos: torch.Tensor, past_kv: Optional[Tuple[torch.Tensor, torch.Tensor]]): - ''' + def _attn(self, x: torch.Tensor, pos: torch.Tensor, past_kv: Optional[tuple[torch.Tensor, torch.Tensor]]): + """ Code from LlamaAttention forward method. SDPA implementation similar to model.config._attn_implementation="SDPA" - ''' + """ B, T, C = x.shape H, Hd = self.n_heads, self.head_dim @@ -121,12 +143,7 @@ def _attn(self, x: torch.Tensor, pos: torch.Tensor, past_kv: Optional[Tuple[torc k = torch.cat([pk, k], dim=2) v = torch.cat([pv, v], dim=2) - y = torch.nn.functional.scaled_dot_product_attention( - q, k, v, - attn_mask=None, - is_causal=True, - dropout_p=0.0 - ) + y = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=None, is_causal=True, dropout_p=0.0) y = y.transpose(1, 2).contiguous().view(B, T, C) y = self.o_proj(y) @@ -134,8 +151,8 @@ def _attn(self, x: torch.Tensor, pos: torch.Tensor, past_kv: Optional[Tuple[torc def forward( self, - x_embed: torch.Tensor, # (B, T_new, C) embeddings only - past_kv: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # (B,H,Tpast,Hd) + x_embed: torch.Tensor, # (B, T_new, C) embeddings only + past_kv: Optional[tuple[torch.Tensor, torch.Tensor]] = None, # (B,H,Tpast,Hd) ): # positions for the *new* tokens only past_len = 0 if past_kv is None else past_kv[0].size(2) @@ -143,7 +160,7 @@ def forward( pos = torch.arange(past_len, past_len + T_new, device=x_embed.device) # pre-norm attention + residual - y, kv = self._attn(self.attn_norm(x_embed), pos, past_kv) + y, _kv = self._attn(self.attn_norm(x_embed), pos, past_kv) x = x_embed + y # pre-norm MLP + residual From 5f1c2de8dba6b0073ba3d6a6d9e021906e5fb311 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 14 Oct 2025 11:03:26 +0400 Subject: [PATCH 56/91] rename test_quantizer to test_quantizer_compression.py --- .../{test_quantizer.py => test_quantizer_compression.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/executorch/{test_quantizer.py => test_quantizer_compression.py} (100%) diff --git a/tests/executorch/test_quantizer.py b/tests/executorch/test_quantizer_compression.py similarity index 100% rename from tests/executorch/test_quantizer.py rename to tests/executorch/test_quantizer_compression.py From 6f81879e458565270791b00786a21b6e6052071c Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 14 Oct 2025 12:47:30 +0400 Subject: [PATCH 57/91] review changes --- .../weight_compression/algorithm.py | 25 +++++++++++ .../torch/fx/quantization/quantize_pt2e.py | 2 +- .../weight_compression/algorithm.py | 44 +++++++++++++------ 3 files changed, 57 insertions(+), 14 deletions(-) diff --git a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py index 81a2ae9b53d..7d541421d2a 100644 --- a/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/experimental/quantization/algorithms/weight_compression/algorithm.py @@ -56,13 +56,38 @@ def __init__( advanced_parameters: AdvancedCompressionParameters, ) -> torch.fx.GraphModule: """ + :param mode: Defines a mode for weight compression. + INT8_SYM stands for 8-bit integer symmetric quantization of all weights. + Weights are quantized symmetrically without zero point. + INT8_ASYM is the same as INT8_SYM mode, but weights are quantized to a primary precision asymmetrically + with a typical non-fixed zero point. + INT4_SYM stands for a mixed-precision weights quantization with 4-bit integer as a primary precision. + Weights are quantized to a primary precision symmetrically without zero point. + All embeddings and the last layer are always compressed to a backup_mode, which is INT8_ASYM, + by default. All others are quantized whether to 4-bit integer or to a backup_mode depending on + criteria and the given ratio. + INT4_ASYM is the same as INT4_SYM mode, but weights are quantized to a primary precision asymmetrically + with a typical non-fixed zero point. :param quantizer: Quantizer to use in WeightCompression algorithm. + :param ratio: the ratio between primary and backup precisions (e.g. 0.9 means 90% of layers quantized to INT4 + and the rest to backup_mode). + :param group_size: number of weights (e.g. 128) in the channel dimension + that share quantization parameters (scale). The value -1 means no grouping. + :param ignored_scope: An ignored scope that defined the list of model control + flow graph nodes to be ignored during quantization. + :param all_layers: Indicates whether embeddings and last MatMul layers should be compressed to a primary + precision. By default, the backup precision is assigned for the embeddings and last MatMul layers. :param subset_size: Number of data samples to calculate activation statistics used for assigning different quantization precision. :param awq: determines whether to use or not modified AWQ algorithm. :param scale_estimation: determines whether to use or not scale estimation for 4 bit layers. :param gptq: determines whether to use or not GPTQ algorithm. :param lora_correction: determines whether to use or not LoRA Correction algorithm. + :param backup_mode: Defines a backup mode for mixed-precision weight compression. + NONE stands for original floating-point precision of the model weights. + In this mode, weights are retained in their original precision without any quantization. + INT8_SYM stands for 8-bit integer symmetric quantization without zero point. + INT8_ASYM stands for 8-bit integer asymmetric quantization with a typical non-fixed zero point. :param sensitivity_metric: The sensitivity metric for assigning quantization precision to layers. In order to preserve the accuracy of the model, the more sensitive layers receives a higher precision. :param compression_format: Describes the format in which the model is saved after weight compression. diff --git a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py index 90064eedd05..c8906573f30 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py +++ b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py @@ -172,7 +172,7 @@ def compress_pt2e( scale_estimation: bool = False, gptq: bool = False, lora_correction: bool = False, - subset_size: int = 128, # Dataset size to use + subset_size: int = 128, sensitivity_metric: Optional[SensitivityMetric] = None, advanced_parameters: Optional[AdvancedCompressionParameters] = None, ) -> torch.fx.GraphModule: diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index ddab9b261ef..61a684de126 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -799,7 +799,7 @@ def _collect_statistics_and_statistic_points( :param model: Backend-specific model instance. :param graph: Corresponding NNCFGraph of the model. - :param statistic_points: Container with pre-collected statistics, if available. + :param statistic_points: Statistic points. :param dataset: Dataset used for collecting statistics when not provided. :param ratio_defining_params: List of parameters defining compression ratios. :param all_weight_params: List of all weight compression parameters. @@ -832,18 +832,20 @@ def get_weight_compression_parameters( list[WeightCompressionParameters], ]: """ - Generates a list of weight compression parameters based on the Weight Compression algorithm - configuration. Determines the appropriate quantization parameters for each node eligible for - weight compression. Also, returns a list of ratio defining parameters which are a subset of - all_weight_parameters. This is based on parameters like all_layers. Lastly, it gives a list - of skipped layers based on parameters like ignored scope or depending on the group size value - adjustment. + This Function: + 1. Generates a list of weight compression parameters based on the algorithm configuration. + 2. Determines the appropriate quantization parameters for each node eligible for weight compression. + 3. Generates a subset of parameters that can be compressed in both primary and backup precisions, + called ratio-defining parameters. All ratio-defining parameters are set to the primary precision. + 4. Generates a subset of parameters that will not be compressed, based on the ignored scope or + compression configuration restrictions. :param model: Backend-specific input model. :param graph: NNCFGraph instance. - :return: A tuple consisting of a list of all weight compression parameters, based on the Weight - Compression algorithm configuration, list of ratio defining parameters(weights that are used - for ratio calculation between primary and backup precisions), and list of weight parameters to skip. + :return: A tuple consisting a list of weight compression parameters that can be compressed, + a list of ratio-defining parameters, which is a subset of compressible weight parameters + that are allowed to be set to mixed precisions, and a list of weight compression parameters + that can not be compressed. """ nodes_to_compress = self.get_nodes_to_compress(graph) @@ -916,7 +918,7 @@ def get_weight_compression_parameters( else: group_size_values = {w_params.weight_name: self._group_size for w_params in ratio_defining_params} - # Set these layers to primary config. Later we will set layers to backup precision according to Mixed precision + # Set each ratio defining parameter to primary config for weight_param in ratio_defining_params: weight_param.compression_config = self._get_primary_config(group_size_values[weight_param.weight_name]) @@ -954,7 +956,23 @@ def apply_with_parameters( all_weight_params: list[WeightCompressionParameters], ratio_defining_params: list[WeightCompressionParameters], skipped_weight_params: list[WeightCompressionParameters], - ): + ) -> TModel: + """ + Applies the Weight Compression algorithm using precomputed parameters and optional + algorithms (AWQ, GPTQ, scale estimation, LoRA correction). The method collects + statistics, configures the weight compression parameters for mixed precision algorithm, + and performs the model transformation with appropriate decompression operations + + :param model: Backend-specific model to be compressed. + :param graph: NNCFGraph instance. + :param dataset: Dataset to collect statistics. + :param statistic_points: Statistics points object. + :param all_weight_params: List of all weight parameters. + :param ratio_defining_params: Subset of all_weight_params that determine mixed-precision ratios. + :param skipped_weight_params: List of parameters corresponding to weights intentionally skipped + from compression (e.g., due to ignored scopes or group size adjustments). + :return: Transformed model with compressed weights and inserted backend-specific decompressor. + """ # Collect statistics for the weights compression statistics, statistic_points = self._collect_statistics_and_statistic_points( model, graph, statistic_points, dataset, ratio_defining_params, all_weight_params @@ -1179,7 +1197,7 @@ def _get_statistics_for_weights_compression( :param matmul_input_to_output_nodes_map: A mapping from activation node and a port id to corresponding matmul nodes which accept this activation as an input. - :param statistic_points: Statistic points object. + :param statistic_points: Statistic points. :return: Collected statistics. """ # For each node we store statistics in a WCTensorStatistics data-class. It contains the following fields: From eb0ff164ed0c70e626614f85802d9453ef7884cf Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 14 Oct 2025 14:24:57 +0400 Subject: [PATCH 58/91] review changes --- .../weight_compression/algorithm.py | 32 +++++++++---------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index 61a684de126..c76f5188c94 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -543,7 +543,7 @@ def _set_weight_compression_config( primary_precision_weight_params = self._mixed_precision_algo.apply( model, graph, statistics_points, weight_params=ratio_defining_params ) - # ratio_defining_params are all in primary precision. Update parameters + # ratio_defining_params are all in primary precision. Update parameters # which need to be set to backup precision for weight_param in ratio_defining_params: if weight_param in primary_precision_weight_params: @@ -799,7 +799,7 @@ def _collect_statistics_and_statistic_points( :param model: Backend-specific model instance. :param graph: Corresponding NNCFGraph of the model. - :param statistic_points: Statistic points. + :param Container with pre-collected statistics, if available.. :param dataset: Dataset used for collecting statistics when not provided. :param ratio_defining_params: List of parameters defining compression ratios. :param all_weight_params: List of all weight compression parameters. @@ -817,9 +817,7 @@ def _collect_statistics_and_statistic_points( if statistic_points is None: statistic_points = self.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys()) statistic_points = self._collect_statistics(dataset, graph, model, statistic_points) - statistics = self._get_statistics_for_weights_compression( - matmul_input_to_output_nodes_map, statistic_points - ) + statistics = self._get_statistics_for_weights_compression(matmul_input_to_output_nodes_map, statistic_points) return statistics, statistic_points def get_weight_compression_parameters( @@ -832,20 +830,21 @@ def get_weight_compression_parameters( list[WeightCompressionParameters], ]: """ - This Function: - 1. Generates a list of weight compression parameters based on the algorithm configuration. - 2. Determines the appropriate quantization parameters for each node eligible for weight compression. - 3. Generates a subset of parameters that can be compressed in both primary and backup precisions, + This Function does the following: + + * Generates a list of weight compression parameters based on the algorithm configuration. + * Determines the appropriate quantization parameters for each node eligible for weight compression. + * Generates a subset of parameters that can be compressed in both primary and backup precisions, called ratio-defining parameters. All ratio-defining parameters are set to the primary precision. - 4. Generates a subset of parameters that will not be compressed, based on the ignored scope or + * Generates a subset of parameters that will not be compressed, based on the ignored scope or compression configuration restrictions. :param model: Backend-specific input model. :param graph: NNCFGraph instance. :return: A tuple consisting a list of weight compression parameters that can be compressed, - a list of ratio-defining parameters, which is a subset of compressible weight parameters - that are allowed to be set to mixed precisions, and a list of weight compression parameters - that can not be compressed. + a list of ratio-defining parameters, which is a subset of compressible weight parameters + that are allowed to be set to mixed precisions, and a list of weight compression parameters + that can not be compressed. """ nodes_to_compress = self.get_nodes_to_compress(graph) @@ -934,8 +933,8 @@ def apply( self.set_backend_entity(model) # Get processed weight compression parameters ready for compression - all_weight_params, ratio_defining_params, skipped_weight_params = ( - self.get_weight_compression_parameters(model, graph) + all_weight_params, ratio_defining_params, skipped_weight_params = self.get_weight_compression_parameters( + model, graph ) return self.apply_with_parameters( model, @@ -960,7 +959,7 @@ def apply_with_parameters( """ Applies the Weight Compression algorithm using precomputed parameters and optional algorithms (AWQ, GPTQ, scale estimation, LoRA correction). The method collects - statistics, configures the weight compression parameters for mixed precision algorithm, + statistics, configures the weight compression parameters for mixed precision algorithm, and performs the model transformation with appropriate decompression operations :param model: Backend-specific model to be compressed. @@ -988,7 +987,6 @@ def apply_with_parameters( # Filter all_weight_params and by excluding nodes that should remain in their original floating-point precision all_weight_params = list(filter(lambda w_params: w_params.compression_config is not None, all_weight_params)) - if self._awq: model = self.awq_algo.apply(model, graph, all_weight_params, statistics, self._backend_entity) # After applying AWQ we need to update statistics since AWQ alters the activations From 8afeb9db2768220f88535b782515dae71e8d98f2 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 14 Oct 2025 14:27:52 +0400 Subject: [PATCH 59/91] precommit fix --- .../executorch/test_quantizer_compression.py | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/tests/executorch/test_quantizer_compression.py b/tests/executorch/test_quantizer_compression.py index 2eb58489db3..1287672b5e7 100644 --- a/tests/executorch/test_quantizer_compression.py +++ b/tests/executorch/test_quantizer_compression.py @@ -90,6 +90,20 @@ def _string_from_quantizer_params(qparams: dict[str, Any], pt2e_param: Optional[ return f"{mode.value}_gs{gs}_ratio{ratio}_all_layers_{all_layers}_sensitivity_metric_{sensitivity_metric}" +def get_test_cases(): + test_cases = () + for model in BASE_MODELS: + for qparam in QUANTIZER_PARAMS: + pt2e_params = PT2E_PARAMS + if (qparam.get("mode") in {QuantizationMode.INT8WO_ASYM, QuantizationMode.INT8WO_SYM}) or ( + qparam.get("ratio") is None + ): + pt2e_params = [{}] + for pt2e_param in pt2e_params: + test_cases.append(model, qparam, pt2e_param) + return test_cases + + BASE_MODELS = ( ModelCase(LlamaDecoderOnly, "LlamaDecoderOnly", [1, 3, 64]), ModelCase(partial(ShortTransformer, 64, 128, True), "short_transformer_shared", [5]), @@ -110,19 +124,7 @@ def _string_from_quantizer_params(qparams: dict[str, Any], pt2e_param: Optional[ ) -TEST_MODELS = tuple( - (model, qparam, pt2e_param) - for model in BASE_MODELS - for qparam in QUANTIZER_PARAMS - for pt2e_param in ( - [{}] - if ( - (qparam.get("mode") in {QuantizationMode.INT8WO_ASYM, QuantizationMode.INT8WO_SYM}) - or (qparam.get("ratio") is None) - ) - else PT2E_PARAMS - ) -) +TEST_MODELS = get_test_cases() TEST_MODEL_IDS = [ From f491c8deda8663ebfeceb2bba1329e75ed71a81d Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 15 Oct 2025 11:56:43 +0400 Subject: [PATCH 60/91] update quantizer test to include scales; remve sensitivity metric from the testing scope --- .../executorch/test_quantizer_compression.py | 100 +++++++++-- ...4wo_sym_gs32_ratio0.8_all_layers_False.dot | 169 ------------------ ...t4wo_sym_gs32_ratio0.8_all_layers_True.dot | 169 ------------------ ...t8wo_asym_gs-1_ratio1_all_layers_False.dot | 169 ------------------ ...4wo_sym_gs32_ratio0.8_all_layers_False.dot | 24 --- ...t4wo_sym_gs32_ratio0.8_all_layers_True.dot | 24 --- ...t8wo_asym_gs-1_ratio1_all_layers_False.dot | 24 --- ...atio0.8_all_layers_False_ref_wc_param.json | 128 ------------- ...tivity_metric_hessian_input_activation.dot | 169 ------------------ ...itivity_metric_max_activation_variance.dot | 169 ------------------ ...ivity_metric_mean_activation_magnitude.dot | 169 ------------------ ...tivity_metric_mean_activation_variance.dot | 169 ------------------ ...ivity_metric_weight_quantization_error.dot | 169 ------------------ ...ratio0.8_all_layers_True_ref_wc_param.json | 128 ------------- ...tivity_metric_hessian_input_activation.dot | 169 ------------------ ...itivity_metric_max_activation_variance.dot | 169 ------------------ ...ivity_metric_mean_activation_magnitude.dot | 169 ------------------ ...tivity_metric_mean_activation_variance.dot | 169 ------------------ ...ivity_metric_weight_quantization_error.dot | 169 ------------------ ..._ratio1_all_layers_False_ref_wc_param.json | 128 ------------- ...l_layers_False_sensitivity_metric_None.dot | 169 ------------------ ...atio0.8_all_layers_False_ref_wc_param.json | 38 ---- ...tivity_metric_hessian_input_activation.dot | 24 --- ...itivity_metric_max_activation_variance.dot | 24 --- ...ivity_metric_mean_activation_magnitude.dot | 24 --- ...tivity_metric_mean_activation_variance.dot | 24 --- ...ivity_metric_weight_quantization_error.dot | 24 --- ...ratio0.8_all_layers_True_ref_wc_param.json | 38 ---- ...tivity_metric_hessian_input_activation.dot | 24 --- ...itivity_metric_max_activation_variance.dot | 24 --- ...ivity_metric_mean_activation_magnitude.dot | 24 --- ...tivity_metric_mean_activation_variance.dot | 24 --- ...ivity_metric_weight_quantization_error.dot | 24 --- ..._ratio1_all_layers_False_ref_wc_param.json | 38 ---- ...l_layers_False_sensitivity_metric_None.dot | 24 --- 35 files changed, 84 insertions(+), 3216 deletions(-) delete mode 100644 tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot delete mode 100644 tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot delete mode 100644 tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot delete mode 100644 tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot delete mode 100644 tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot delete mode 100644 tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_max_activation_variance.dot delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_variance.dot delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_hessian_input_activation.dot delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_magnitude.dot delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_weight_quantization_error.dot delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_sensitivity_metric_None.dot delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_max_activation_variance.dot delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_variance.dot delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_hessian_input_activation.dot delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_magnitude.dot delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_weight_quantization_error.dot delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json delete mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_sensitivity_metric_None.dot diff --git a/tests/executorch/test_quantizer_compression.py b/tests/executorch/test_quantizer_compression.py index 1287672b5e7..9cc817d5292 100644 --- a/tests/executorch/test_quantizer_compression.py +++ b/tests/executorch/test_quantizer_compression.py @@ -31,6 +31,10 @@ from nncf.common.utils.os import safe_open from nncf.experimental.torch.fx import compress_pt2e from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter +from nncf.torch.quantization.layers import INT4AsymmetricWeightsDecompressor +from nncf.torch.quantization.layers import INT4SymmetricWeightsDecompressor +from nncf.torch.quantization.layers import INT8AsymmetricWeightsDecompressor +from nncf.torch.quantization.layers import INT8SymmetricWeightsDecompressor from tests.cross_fw.shared.nx_graph import compare_nx_graph_with_reference from tests.cross_fw.shared.paths import TEST_ROOT from tests.torch.test_models.llama import LlamaDecoderOnly @@ -56,6 +60,10 @@ def get_wc_param_filename(model_name: str) -> str: return model_name + "_ref_wc_param.json" +def get_wc_scales_filename(model_name: str) -> str: + return model_name + "_ref_wc_scales.json" + + def _build_torch_fx_model(model_case: ModelCase) -> tuple[torch.fx.GraphModule, torch.Tensor]: model = model_case.model_builder() # ShortTransformer takes token ids; match prior synthetic tests (int32) @@ -86,8 +94,32 @@ def _string_from_quantizer_params(qparams: dict[str, Any], pt2e_param: Optional[ all_layers = qparams.get("all_layers", "False") if pt2e_param is None: return f"{mode.value}_gs{gs}_ratio{ratio}_all_layers_{all_layers}" - sensitivity_metric = pt2e_param.get("sensitivity_metric", "None") - return f"{mode.value}_gs{gs}_ratio{ratio}_all_layers_{all_layers}_sensitivity_metric_{sensitivity_metric}" + awq = pt2e_param.get("awq", "False") + scale_estimation = pt2e_param.get("scale_estimation", "False") + return f"{mode.value}_gs{gs}_ratio{ratio}_all_layers_{all_layers}_awq_{awq}_scale_estimation_{scale_estimation}" + + +def check_multiple_isinstance(object_to_check: Any, objects: list[Any]): + for obj in objects: + if not isinstance(object_to_check, obj): + return False + return True + + +def get_scale_values_from_model(model: torch.fx.GraphModule): + node_to_scale_mapping = {} + decompressor_modules = [ + INT4AsymmetricWeightsDecompressor, + INT4SymmetricWeightsDecompressor, + INT8AsymmetricWeightsDecompressor, + INT8SymmetricWeightsDecompressor, + ] + for node in model.graph.nodes: + if not check_multiple_isinstance(node.taget, decompressor_modules): + continue + node_to_scale_mapping[node.name] = model.state_dict()[node.name] + + return node_to_scale_mapping def get_test_cases(): @@ -115,13 +147,7 @@ def get_test_cases(): {"mode": QuantizationMode.INT4WO_SYM, "group_size": 32, "ratio": 0.8, "all_layers": True}, ) -PT2E_PARAMS = ( - {"sensitivity_metric": nncf.SensitivityMetric.HESSIAN_INPUT_ACTIVATION}, - {"sensitivity_metric": nncf.SensitivityMetric.MAX_ACTIVATION_VARIANCE}, - {"sensitivity_metric": nncf.SensitivityMetric.WEIGHT_QUANTIZATION_ERROR}, - {"sensitivity_metric": nncf.SensitivityMetric.MEAN_ACTIVATION_VARIANCE}, - {"sensitivity_metric": nncf.SensitivityMetric.MEAN_ACTIVATION_MAGNITUDE}, -) +PT2E_PARAMS = ({"awq": True, "scale_estimation": True},) TEST_MODELS = get_test_cases() @@ -157,12 +183,7 @@ def test_compress_pt2e( # Build quantizer directly from quantizer_params (already includes mode/group_size) quantizer = quantizer_builder(**quantizer_params) - quantized_model = compress_pt2e( - fx_model, - quantizer=quantizer, - dataset=calibration_dataset, - **pt2e_params, - ) + quantized_model = compress_pt2e(fx_model, quantizer=quantizer, dataset=calibration_dataset) with torch.no_grad(): out = quantized_model(example_input) @@ -170,13 +191,60 @@ def test_compress_pt2e( nncf_graph: NNCFGraph = GraphConverter.create_nncf_graph(quantized_model) nx_graph = nncf_graph.get_graph_for_structure_analysis(extended=True) - param_string = _string_from_quantizer_params(quantizer_params, pt2e_params) + param_string = _string_from_quantizer_params(quantizer_params, pt2e_params=None) path_to_dot = ( FX_PT2E_DIR / quantizer.__class__.__name__ / model_case.model_id / get_dot_filename(param_string) ).as_posix() compare_nx_graph_with_reference(nx_graph, path_to_dot) +@pytest.mark.parametrize( + ("model_case", "quantizer_params", "pt2e_params"), + TEST_MODELS, + ids=TEST_MODEL_IDS, +) +@pytest.mark.parametrize( + "quantizer_builder", + [get_openvino_quantizer], + ids=["OpenVINOQuantizer"], +) +def test_compress_pt2e_scales( + quantizer_builder: Callable[..., OpenVINOQuantizer], + model_case: ModelCase, + quantizer_params, + pt2e_params, + regen_ref_data, +): + fx_model, example_input = _build_torch_fx_model(model_case) + with torch.no_grad(): + ref_out = fx_model(example_input) + + calibration_dataset = _get_calibration_dataset(example_input) + + # Build quantizer directly from quantizer_params (already includes mode/group_size) + quantizer = quantizer_builder(**quantizer_params) + + quantized_model = compress_pt2e(fx_model, quantizer=quantizer, dataset=calibration_dataset, **pt2e_params) + + with torch.no_grad(): + out = quantized_model(example_input) + assert out.shape == ref_out.shape, "Compressed model output shape mismatch." + + param_string = _string_from_quantizer_params(quantizer_params, pt2e_params) + ref_json_path = ( + FX_PT2E_DIR / quantizer.__class__.__name__ / model_case.model_id / get_wc_scales_filename(param_string) + ).as_posix() + + scales_list = get_scale_values_from_model(quantized_model) + + if regen_ref_data: + with safe_open(ref_json_path, "w") as file: + json.dump(scales_list, file, indent=4) + + with safe_open(ref_json_path, "r") as f: + json.load(f) + + @pytest.mark.parametrize( ("model_case", "quantizer_params", "pt2e_params"), TEST_MODELS, diff --git a/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot deleted file mode 100644 index 0a9a27fd85b..00000000000 --- a/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot +++ /dev/null @@ -1,169 +0,0 @@ -strict digraph { -"0 attn_norm_weight" [id=0, type="get_attr"]; -"1 mlp_norm_weight" [id=1, type="get_attr"]; -"2 q_proj_weight_updated_constant0" [id=2, type="get_attr"]; -"3 symmetric_weights_decompressor_q_proj_weight_0" [id=3, type="call_module"]; -"4 k_proj_weight_updated_constant0" [id=4, type="get_attr"]; -"5 symmetric_weights_decompressor_k_proj_weight_0" [id=5, type="call_module"]; -"6 v_proj_weight_updated_constant0" [id=6, type="get_attr"]; -"7 symmetric_weights_decompressor_v_proj_weight_0" [id=7, type="call_module"]; -"8 o_proj_weight_updated_constant0" [id=8, type="get_attr"]; -"9 symmetric_weights_decompressor_o_proj_weight_0" [id=9, type="call_module"]; -"10 mlp_gate_proj_weight_updated_constant0" [id=10, type="get_attr"]; -"11 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=11, type="call_module"]; -"12 mlp_up_proj_weight_updated_constant0" [id=12, type="get_attr"]; -"13 symmetric_weights_decompressor_mlp_up_proj_weight_0" [id=13, type="call_module"]; -"14 mlp_down_proj_weight_updated_constant0" [id=14, type="get_attr"]; -"15 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=15, type="call_module"]; -"16 rope_cos" [id=16, type="get_attr"]; -"17 rope_sin" [id=17, type="get_attr"]; -"18 x_embed" [id=18, type=input]; -"19 arange" [id=19, type=arange]; -"20 _assert_tensor_metadata_default" [id=20, type="_assert_tensor_metadata"]; -"21 to" [id=21, type=to]; -"22 pow_1" [id=22, type=pow]; -"23 mean" [id=23, type=mean]; -"24 add" [id=24, type=add]; -"25 rsqrt" [id=25, type=rsqrt]; -"26 mul" [id=26, type=mul]; -"27 _assert_tensor_metadata_default_1" [id=27, type="_assert_tensor_metadata"]; -"28 to_1" [id=28, type=to]; -"29 mul_1" [id=29, type=mul]; -"30 linear" [id=30, type=linear]; -"31 view" [id=31, type=view]; -"32 transpose" [id=32, type=transpose]; -"33 linear_1" [id=33, type=linear]; -"34 view_1" [id=34, type=view]; -"35 transpose_1" [id=35, type=transpose]; -"36 linear_2" [id=36, type=linear]; -"37 view_2" [id=37, type=view]; -"38 transpose_2" [id=38, type=transpose]; -"39 index" [id=39, type=index]; -"40 index_1" [id=40, type=index]; -"41 mul_2" [id=41, type=mul]; -"42 slice_1" [id=42, type=slice]; -"43 slice_2" [id=43, type=slice]; -"44 neg" [id=44, type=neg]; -"45 cat" [id=45, type=cat]; -"46 mul_3" [id=46, type=mul]; -"47 add_1" [id=47, type=add]; -"48 mul_4" [id=48, type=mul]; -"49 slice_3" [id=49, type=slice]; -"50 slice_4" [id=50, type=slice]; -"51 neg_1" [id=51, type=neg]; -"52 cat_1" [id=52, type=cat]; -"53 mul_5" [id=53, type=mul]; -"54 add_2" [id=54, type=add]; -"55 scaled_dot_product_attention" [id=55, type="scaled_dot_product_attention"]; -"56 transpose_3" [id=56, type=transpose]; -"57 view_3" [id=57, type=view]; -"58 linear_3" [id=58, type=linear]; -"59 add_3" [id=59, type=add]; -"60 _assert_tensor_metadata_default_2" [id=60, type="_assert_tensor_metadata"]; -"61 to_2" [id=61, type=to]; -"62 pow_2" [id=62, type=pow]; -"63 mean_1" [id=63, type=mean]; -"64 add_4" [id=64, type=add]; -"65 rsqrt_1" [id=65, type=rsqrt]; -"66 mul_6" [id=66, type=mul]; -"67 _assert_tensor_metadata_default_3" [id=67, type="_assert_tensor_metadata"]; -"68 to_3" [id=68, type=to]; -"69 mul_7" [id=69, type=mul]; -"70 linear_4" [id=70, type=linear]; -"71 silu" [id=71, type=silu]; -"72 linear_5" [id=72, type=linear]; -"73 mul_8" [id=73, type=mul]; -"74 linear_6" [id=74, type=linear]; -"75 add_5" [id=75, type=add]; -"76 output" [id=76, type=output]; -"0 attn_norm_weight" -> "29 mul_1" [style=solid, label="(64,)"]; -"1 mlp_norm_weight" -> "69 mul_7" [style=solid, label="(64,)"]; -"2 q_proj_weight_updated_constant0" -> "3 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; -"3 symmetric_weights_decompressor_q_proj_weight_0" -> "30 linear" [style=solid, label="(64, 64)"]; -"4 k_proj_weight_updated_constant0" -> "5 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; -"5 symmetric_weights_decompressor_k_proj_weight_0" -> "33 linear_1" [style=solid, label="(64, 64)"]; -"6 v_proj_weight_updated_constant0" -> "7 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; -"7 symmetric_weights_decompressor_v_proj_weight_0" -> "36 linear_2" [style=solid, label="(64, 64)"]; -"8 o_proj_weight_updated_constant0" -> "9 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; -"9 symmetric_weights_decompressor_o_proj_weight_0" -> "58 linear_3" [style=solid, label="(64, 64)"]; -"10 mlp_gate_proj_weight_updated_constant0" -> "11 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; -"11 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "70 linear_4" [style=solid, label="(128, 64)"]; -"12 mlp_up_proj_weight_updated_constant0" -> "13 symmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(4096, 1)"]; -"13 symmetric_weights_decompressor_mlp_up_proj_weight_0" -> "72 linear_5" [style=solid, label="(128, 64)"]; -"14 mlp_down_proj_weight_updated_constant0" -> "15 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; -"15 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; -"16 rope_cos" -> "39 index" [style=solid, label="(1, 1, 128, 16)"]; -"17 rope_sin" -> "40 index_1" [style=solid, label="(1, 1, 128, 16)"]; -"18 x_embed" -> "20 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; -"18 x_embed" -> "21 to" [style=solid, label="(1, 3, 64)"]; -"18 x_embed" -> "59 add_3" [style=solid, label="(1, 3, 64)"]; -"19 arange" -> "39 index" [style=solid, label="(3,)"]; -"19 arange" -> "40 index_1" [style=solid, label="(3,)"]; -"21 to" -> "22 pow_1" [style=solid, label="(1, 3, 64)"]; -"21 to" -> "26 mul" [style=solid, label="(1, 3, 64)"]; -"22 pow_1" -> "23 mean" [style=solid, label="(1, 3, 64)"]; -"23 mean" -> "24 add" [style=solid, label="(1, 3, 1)"]; -"24 add" -> "25 rsqrt" [style=solid, label="(1, 3, 1)"]; -"25 rsqrt" -> "26 mul" [style=solid, label="(1, 3, 1)"]; -"26 mul" -> "27 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; -"26 mul" -> "28 to_1" [style=solid, label="(1, 3, 64)"]; -"28 to_1" -> "29 mul_1" [style=solid, label="(1, 3, 64)"]; -"29 mul_1" -> "30 linear" [style=solid, label="(1, 3, 64)"]; -"29 mul_1" -> "33 linear_1" [style=solid, label="(1, 3, 64)"]; -"29 mul_1" -> "36 linear_2" [style=solid, label="(1, 3, 64)"]; -"30 linear" -> "31 view" [style=solid, label="(1, 3, 64)"]; -"31 view" -> "32 transpose" [style=solid, label="(1, 3, 4, 16)"]; -"32 transpose" -> "41 mul_2" [style=solid, label="(1, 4, 3, 16)"]; -"32 transpose" -> "42 slice_1" [style=solid, label="(1, 4, 3, 16)"]; -"32 transpose" -> "43 slice_2" [style=solid, label="(1, 4, 3, 16)"]; -"33 linear_1" -> "34 view_1" [style=solid, label="(1, 3, 64)"]; -"34 view_1" -> "35 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; -"35 transpose_1" -> "48 mul_4" [style=solid, label="(1, 4, 3, 16)"]; -"35 transpose_1" -> "49 slice_3" [style=solid, label="(1, 4, 3, 16)"]; -"35 transpose_1" -> "50 slice_4" [style=solid, label="(1, 4, 3, 16)"]; -"36 linear_2" -> "37 view_2" [style=solid, label="(1, 3, 64)"]; -"37 view_2" -> "38 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; -"38 transpose_2" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"39 index" -> "41 mul_2" [style=solid, label="(1, 1, 3, 16)"]; -"39 index" -> "48 mul_4" [style=solid, label="(1, 1, 3, 16)"]; -"40 index_1" -> "46 mul_3" [style=solid, label="(1, 1, 3, 16)"]; -"40 index_1" -> "53 mul_5" [style=solid, label="(1, 1, 3, 16)"]; -"41 mul_2" -> "47 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"42 slice_1" -> "45 cat" [style=solid, label="(1, 4, 3, 8)"]; -"43 slice_2" -> "44 neg" [style=solid, label="(1, 4, 3, 8)"]; -"44 neg" -> "45 cat" [style=solid, label="(1, 4, 3, 8)"]; -"45 cat" -> "46 mul_3" [style=solid, label="(1, 4, 3, 16)"]; -"46 mul_3" -> "47 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"47 add_1" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"48 mul_4" -> "54 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"49 slice_3" -> "52 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"50 slice_4" -> "51 neg_1" [style=solid, label="(1, 4, 3, 8)"]; -"51 neg_1" -> "52 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"52 cat_1" -> "53 mul_5" [style=solid, label="(1, 4, 3, 16)"]; -"53 mul_5" -> "54 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"54 add_2" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"55 scaled_dot_product_attention" -> "56 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; -"56 transpose_3" -> "57 view_3" [style=solid, label="(1, 3, 4, 16)"]; -"57 view_3" -> "58 linear_3" [style=solid, label="(1, 3, 64)"]; -"58 linear_3" -> "59 add_3" [style=solid, label="(1, 3, 64)"]; -"59 add_3" -> "60 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; -"59 add_3" -> "61 to_2" [style=solid, label="(1, 3, 64)"]; -"61 to_2" -> "62 pow_2" [style=solid, label="(1, 3, 64)"]; -"61 to_2" -> "66 mul_6" [style=solid, label="(1, 3, 64)"]; -"61 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"62 pow_2" -> "63 mean_1" [style=solid, label="(1, 3, 64)"]; -"63 mean_1" -> "64 add_4" [style=solid, label="(1, 3, 1)"]; -"64 add_4" -> "65 rsqrt_1" [style=solid, label="(1, 3, 1)"]; -"65 rsqrt_1" -> "66 mul_6" [style=solid, label="(1, 3, 1)"]; -"66 mul_6" -> "67 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; -"66 mul_6" -> "68 to_3" [style=solid, label="(1, 3, 64)"]; -"68 to_3" -> "69 mul_7" [style=solid, label="(1, 3, 64)"]; -"69 mul_7" -> "70 linear_4" [style=solid, label="(1, 3, 64)"]; -"69 mul_7" -> "72 linear_5" [style=solid, label="(1, 3, 64)"]; -"70 linear_4" -> "71 silu" [style=solid, label="(1, 3, 128)"]; -"71 silu" -> "73 mul_8" [style=solid, label="(1, 3, 128)"]; -"72 linear_5" -> "73 mul_8" [style=solid, label="(1, 3, 128)"]; -"73 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; -"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; -} diff --git a/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot deleted file mode 100644 index 254abcb9dc0..00000000000 --- a/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot +++ /dev/null @@ -1,169 +0,0 @@ -strict digraph { -"0 attn_norm_weight" [id=0, type="get_attr"]; -"1 mlp_norm_weight" [id=1, type="get_attr"]; -"2 q_proj_weight_updated_constant0" [id=2, type="get_attr"]; -"3 symmetric_weights_decompressor_q_proj_weight_0" [id=3, type="call_module"]; -"4 k_proj_weight_updated_constant0" [id=4, type="get_attr"]; -"5 symmetric_weights_decompressor_k_proj_weight_0" [id=5, type="call_module"]; -"6 v_proj_weight_updated_constant0" [id=6, type="get_attr"]; -"7 symmetric_weights_decompressor_v_proj_weight_0" [id=7, type="call_module"]; -"8 o_proj_weight_updated_constant0" [id=8, type="get_attr"]; -"9 symmetric_weights_decompressor_o_proj_weight_0" [id=9, type="call_module"]; -"10 mlp_gate_proj_weight_updated_constant0" [id=10, type="get_attr"]; -"11 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=11, type="call_module"]; -"12 mlp_up_proj_weight_updated_constant0" [id=12, type="get_attr"]; -"13 symmetric_weights_decompressor_mlp_up_proj_weight_0" [id=13, type="call_module"]; -"14 mlp_down_proj_weight_updated_constant0" [id=14, type="get_attr"]; -"15 symmetric_weights_decompressor_mlp_down_proj_weight_0" [id=15, type="call_module"]; -"16 rope_cos" [id=16, type="get_attr"]; -"17 rope_sin" [id=17, type="get_attr"]; -"18 x_embed" [id=18, type=input]; -"19 arange" [id=19, type=arange]; -"20 _assert_tensor_metadata_default" [id=20, type="_assert_tensor_metadata"]; -"21 to" [id=21, type=to]; -"22 pow_1" [id=22, type=pow]; -"23 mean" [id=23, type=mean]; -"24 add" [id=24, type=add]; -"25 rsqrt" [id=25, type=rsqrt]; -"26 mul" [id=26, type=mul]; -"27 _assert_tensor_metadata_default_1" [id=27, type="_assert_tensor_metadata"]; -"28 to_1" [id=28, type=to]; -"29 mul_1" [id=29, type=mul]; -"30 linear" [id=30, type=linear]; -"31 view" [id=31, type=view]; -"32 transpose" [id=32, type=transpose]; -"33 linear_1" [id=33, type=linear]; -"34 view_1" [id=34, type=view]; -"35 transpose_1" [id=35, type=transpose]; -"36 linear_2" [id=36, type=linear]; -"37 view_2" [id=37, type=view]; -"38 transpose_2" [id=38, type=transpose]; -"39 index" [id=39, type=index]; -"40 index_1" [id=40, type=index]; -"41 mul_2" [id=41, type=mul]; -"42 slice_1" [id=42, type=slice]; -"43 slice_2" [id=43, type=slice]; -"44 neg" [id=44, type=neg]; -"45 cat" [id=45, type=cat]; -"46 mul_3" [id=46, type=mul]; -"47 add_1" [id=47, type=add]; -"48 mul_4" [id=48, type=mul]; -"49 slice_3" [id=49, type=slice]; -"50 slice_4" [id=50, type=slice]; -"51 neg_1" [id=51, type=neg]; -"52 cat_1" [id=52, type=cat]; -"53 mul_5" [id=53, type=mul]; -"54 add_2" [id=54, type=add]; -"55 scaled_dot_product_attention" [id=55, type="scaled_dot_product_attention"]; -"56 transpose_3" [id=56, type=transpose]; -"57 view_3" [id=57, type=view]; -"58 linear_3" [id=58, type=linear]; -"59 add_3" [id=59, type=add]; -"60 _assert_tensor_metadata_default_2" [id=60, type="_assert_tensor_metadata"]; -"61 to_2" [id=61, type=to]; -"62 pow_2" [id=62, type=pow]; -"63 mean_1" [id=63, type=mean]; -"64 add_4" [id=64, type=add]; -"65 rsqrt_1" [id=65, type=rsqrt]; -"66 mul_6" [id=66, type=mul]; -"67 _assert_tensor_metadata_default_3" [id=67, type="_assert_tensor_metadata"]; -"68 to_3" [id=68, type=to]; -"69 mul_7" [id=69, type=mul]; -"70 linear_4" [id=70, type=linear]; -"71 silu" [id=71, type=silu]; -"72 linear_5" [id=72, type=linear]; -"73 mul_8" [id=73, type=mul]; -"74 linear_6" [id=74, type=linear]; -"75 add_5" [id=75, type=add]; -"76 output" [id=76, type=output]; -"0 attn_norm_weight" -> "29 mul_1" [style=solid, label="(64,)"]; -"1 mlp_norm_weight" -> "69 mul_7" [style=solid, label="(64,)"]; -"2 q_proj_weight_updated_constant0" -> "3 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; -"3 symmetric_weights_decompressor_q_proj_weight_0" -> "30 linear" [style=solid, label="(64, 64)"]; -"4 k_proj_weight_updated_constant0" -> "5 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; -"5 symmetric_weights_decompressor_k_proj_weight_0" -> "33 linear_1" [style=solid, label="(64, 64)"]; -"6 v_proj_weight_updated_constant0" -> "7 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; -"7 symmetric_weights_decompressor_v_proj_weight_0" -> "36 linear_2" [style=solid, label="(64, 64)"]; -"8 o_proj_weight_updated_constant0" -> "9 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; -"9 symmetric_weights_decompressor_o_proj_weight_0" -> "58 linear_3" [style=solid, label="(64, 64)"]; -"10 mlp_gate_proj_weight_updated_constant0" -> "11 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; -"11 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "70 linear_4" [style=solid, label="(128, 64)"]; -"12 mlp_up_proj_weight_updated_constant0" -> "13 symmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(4096, 1)"]; -"13 symmetric_weights_decompressor_mlp_up_proj_weight_0" -> "72 linear_5" [style=solid, label="(128, 64)"]; -"14 mlp_down_proj_weight_updated_constant0" -> "15 symmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(4096, 1)"]; -"15 symmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; -"16 rope_cos" -> "39 index" [style=solid, label="(1, 1, 128, 16)"]; -"17 rope_sin" -> "40 index_1" [style=solid, label="(1, 1, 128, 16)"]; -"18 x_embed" -> "20 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; -"18 x_embed" -> "21 to" [style=solid, label="(1, 3, 64)"]; -"18 x_embed" -> "59 add_3" [style=solid, label="(1, 3, 64)"]; -"19 arange" -> "39 index" [style=solid, label="(3,)"]; -"19 arange" -> "40 index_1" [style=solid, label="(3,)"]; -"21 to" -> "22 pow_1" [style=solid, label="(1, 3, 64)"]; -"21 to" -> "26 mul" [style=solid, label="(1, 3, 64)"]; -"22 pow_1" -> "23 mean" [style=solid, label="(1, 3, 64)"]; -"23 mean" -> "24 add" [style=solid, label="(1, 3, 1)"]; -"24 add" -> "25 rsqrt" [style=solid, label="(1, 3, 1)"]; -"25 rsqrt" -> "26 mul" [style=solid, label="(1, 3, 1)"]; -"26 mul" -> "27 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; -"26 mul" -> "28 to_1" [style=solid, label="(1, 3, 64)"]; -"28 to_1" -> "29 mul_1" [style=solid, label="(1, 3, 64)"]; -"29 mul_1" -> "30 linear" [style=solid, label="(1, 3, 64)"]; -"29 mul_1" -> "33 linear_1" [style=solid, label="(1, 3, 64)"]; -"29 mul_1" -> "36 linear_2" [style=solid, label="(1, 3, 64)"]; -"30 linear" -> "31 view" [style=solid, label="(1, 3, 64)"]; -"31 view" -> "32 transpose" [style=solid, label="(1, 3, 4, 16)"]; -"32 transpose" -> "41 mul_2" [style=solid, label="(1, 4, 3, 16)"]; -"32 transpose" -> "42 slice_1" [style=solid, label="(1, 4, 3, 16)"]; -"32 transpose" -> "43 slice_2" [style=solid, label="(1, 4, 3, 16)"]; -"33 linear_1" -> "34 view_1" [style=solid, label="(1, 3, 64)"]; -"34 view_1" -> "35 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; -"35 transpose_1" -> "48 mul_4" [style=solid, label="(1, 4, 3, 16)"]; -"35 transpose_1" -> "49 slice_3" [style=solid, label="(1, 4, 3, 16)"]; -"35 transpose_1" -> "50 slice_4" [style=solid, label="(1, 4, 3, 16)"]; -"36 linear_2" -> "37 view_2" [style=solid, label="(1, 3, 64)"]; -"37 view_2" -> "38 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; -"38 transpose_2" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"39 index" -> "41 mul_2" [style=solid, label="(1, 1, 3, 16)"]; -"39 index" -> "48 mul_4" [style=solid, label="(1, 1, 3, 16)"]; -"40 index_1" -> "46 mul_3" [style=solid, label="(1, 1, 3, 16)"]; -"40 index_1" -> "53 mul_5" [style=solid, label="(1, 1, 3, 16)"]; -"41 mul_2" -> "47 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"42 slice_1" -> "45 cat" [style=solid, label="(1, 4, 3, 8)"]; -"43 slice_2" -> "44 neg" [style=solid, label="(1, 4, 3, 8)"]; -"44 neg" -> "45 cat" [style=solid, label="(1, 4, 3, 8)"]; -"45 cat" -> "46 mul_3" [style=solid, label="(1, 4, 3, 16)"]; -"46 mul_3" -> "47 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"47 add_1" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"48 mul_4" -> "54 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"49 slice_3" -> "52 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"50 slice_4" -> "51 neg_1" [style=solid, label="(1, 4, 3, 8)"]; -"51 neg_1" -> "52 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"52 cat_1" -> "53 mul_5" [style=solid, label="(1, 4, 3, 16)"]; -"53 mul_5" -> "54 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"54 add_2" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"55 scaled_dot_product_attention" -> "56 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; -"56 transpose_3" -> "57 view_3" [style=solid, label="(1, 3, 4, 16)"]; -"57 view_3" -> "58 linear_3" [style=solid, label="(1, 3, 64)"]; -"58 linear_3" -> "59 add_3" [style=solid, label="(1, 3, 64)"]; -"59 add_3" -> "60 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; -"59 add_3" -> "61 to_2" [style=solid, label="(1, 3, 64)"]; -"61 to_2" -> "62 pow_2" [style=solid, label="(1, 3, 64)"]; -"61 to_2" -> "66 mul_6" [style=solid, label="(1, 3, 64)"]; -"61 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"62 pow_2" -> "63 mean_1" [style=solid, label="(1, 3, 64)"]; -"63 mean_1" -> "64 add_4" [style=solid, label="(1, 3, 1)"]; -"64 add_4" -> "65 rsqrt_1" [style=solid, label="(1, 3, 1)"]; -"65 rsqrt_1" -> "66 mul_6" [style=solid, label="(1, 3, 1)"]; -"66 mul_6" -> "67 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; -"66 mul_6" -> "68 to_3" [style=solid, label="(1, 3, 64)"]; -"68 to_3" -> "69 mul_7" [style=solid, label="(1, 3, 64)"]; -"69 mul_7" -> "70 linear_4" [style=solid, label="(1, 3, 64)"]; -"69 mul_7" -> "72 linear_5" [style=solid, label="(1, 3, 64)"]; -"70 linear_4" -> "71 silu" [style=solid, label="(1, 3, 128)"]; -"71 silu" -> "73 mul_8" [style=solid, label="(1, 3, 128)"]; -"72 linear_5" -> "73 mul_8" [style=solid, label="(1, 3, 128)"]; -"73 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; -"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; -} diff --git a/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot deleted file mode 100644 index 614e06a21ac..00000000000 --- a/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot +++ /dev/null @@ -1,169 +0,0 @@ -strict digraph { -"0 attn_norm_weight" [id=0, type="get_attr"]; -"1 mlp_norm_weight" [id=1, type="get_attr"]; -"2 q_proj_weight_updated_constant0" [id=2, type="get_attr"]; -"3 asymmetric_weights_decompressor_q_proj_weight_0" [id=3, type="call_module"]; -"4 k_proj_weight_updated_constant0" [id=4, type="get_attr"]; -"5 asymmetric_weights_decompressor_k_proj_weight_0" [id=5, type="call_module"]; -"6 v_proj_weight_updated_constant0" [id=6, type="get_attr"]; -"7 asymmetric_weights_decompressor_v_proj_weight_0" [id=7, type="call_module"]; -"8 o_proj_weight_updated_constant0" [id=8, type="get_attr"]; -"9 asymmetric_weights_decompressor_o_proj_weight_0" [id=9, type="call_module"]; -"10 mlp_gate_proj_weight_updated_constant0" [id=10, type="get_attr"]; -"11 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=11, type="call_module"]; -"12 mlp_up_proj_weight_updated_constant0" [id=12, type="get_attr"]; -"13 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=13, type="call_module"]; -"14 mlp_down_proj_weight_updated_constant0" [id=14, type="get_attr"]; -"15 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=15, type="call_module"]; -"16 rope_cos" [id=16, type="get_attr"]; -"17 rope_sin" [id=17, type="get_attr"]; -"18 x_embed" [id=18, type=input]; -"19 arange" [id=19, type=arange]; -"20 _assert_tensor_metadata_default" [id=20, type="_assert_tensor_metadata"]; -"21 to" [id=21, type=to]; -"22 pow_1" [id=22, type=pow]; -"23 mean" [id=23, type=mean]; -"24 add" [id=24, type=add]; -"25 rsqrt" [id=25, type=rsqrt]; -"26 mul" [id=26, type=mul]; -"27 _assert_tensor_metadata_default_1" [id=27, type="_assert_tensor_metadata"]; -"28 to_1" [id=28, type=to]; -"29 mul_1" [id=29, type=mul]; -"30 linear" [id=30, type=linear]; -"31 view" [id=31, type=view]; -"32 transpose" [id=32, type=transpose]; -"33 linear_1" [id=33, type=linear]; -"34 view_1" [id=34, type=view]; -"35 transpose_1" [id=35, type=transpose]; -"36 linear_2" [id=36, type=linear]; -"37 view_2" [id=37, type=view]; -"38 transpose_2" [id=38, type=transpose]; -"39 index" [id=39, type=index]; -"40 index_1" [id=40, type=index]; -"41 mul_2" [id=41, type=mul]; -"42 slice_1" [id=42, type=slice]; -"43 slice_2" [id=43, type=slice]; -"44 neg" [id=44, type=neg]; -"45 cat" [id=45, type=cat]; -"46 mul_3" [id=46, type=mul]; -"47 add_1" [id=47, type=add]; -"48 mul_4" [id=48, type=mul]; -"49 slice_3" [id=49, type=slice]; -"50 slice_4" [id=50, type=slice]; -"51 neg_1" [id=51, type=neg]; -"52 cat_1" [id=52, type=cat]; -"53 mul_5" [id=53, type=mul]; -"54 add_2" [id=54, type=add]; -"55 scaled_dot_product_attention" [id=55, type="scaled_dot_product_attention"]; -"56 transpose_3" [id=56, type=transpose]; -"57 view_3" [id=57, type=view]; -"58 linear_3" [id=58, type=linear]; -"59 add_3" [id=59, type=add]; -"60 _assert_tensor_metadata_default_2" [id=60, type="_assert_tensor_metadata"]; -"61 to_2" [id=61, type=to]; -"62 pow_2" [id=62, type=pow]; -"63 mean_1" [id=63, type=mean]; -"64 add_4" [id=64, type=add]; -"65 rsqrt_1" [id=65, type=rsqrt]; -"66 mul_6" [id=66, type=mul]; -"67 _assert_tensor_metadata_default_3" [id=67, type="_assert_tensor_metadata"]; -"68 to_3" [id=68, type=to]; -"69 mul_7" [id=69, type=mul]; -"70 linear_4" [id=70, type=linear]; -"71 silu" [id=71, type=silu]; -"72 linear_5" [id=72, type=linear]; -"73 mul_8" [id=73, type=mul]; -"74 linear_6" [id=74, type=linear]; -"75 add_5" [id=75, type=add]; -"76 output" [id=76, type=output]; -"0 attn_norm_weight" -> "29 mul_1" [style=solid, label="(64,)"]; -"1 mlp_norm_weight" -> "69 mul_7" [style=solid, label="(64,)"]; -"2 q_proj_weight_updated_constant0" -> "3 asymmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(64, 64)"]; -"3 asymmetric_weights_decompressor_q_proj_weight_0" -> "30 linear" [style=solid, label="(64, 64)"]; -"4 k_proj_weight_updated_constant0" -> "5 asymmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(64, 64)"]; -"5 asymmetric_weights_decompressor_k_proj_weight_0" -> "33 linear_1" [style=solid, label="(64, 64)"]; -"6 v_proj_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(64, 64)"]; -"7 asymmetric_weights_decompressor_v_proj_weight_0" -> "36 linear_2" [style=solid, label="(64, 64)"]; -"8 o_proj_weight_updated_constant0" -> "9 asymmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(64, 64)"]; -"9 asymmetric_weights_decompressor_o_proj_weight_0" -> "58 linear_3" [style=solid, label="(64, 64)"]; -"10 mlp_gate_proj_weight_updated_constant0" -> "11 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(128, 64)"]; -"11 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "70 linear_4" [style=solid, label="(128, 64)"]; -"12 mlp_up_proj_weight_updated_constant0" -> "13 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; -"13 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "72 linear_5" [style=solid, label="(128, 64)"]; -"14 mlp_down_proj_weight_updated_constant0" -> "15 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; -"15 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; -"16 rope_cos" -> "39 index" [style=solid, label="(1, 1, 128, 16)"]; -"17 rope_sin" -> "40 index_1" [style=solid, label="(1, 1, 128, 16)"]; -"18 x_embed" -> "20 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; -"18 x_embed" -> "21 to" [style=solid, label="(1, 3, 64)"]; -"18 x_embed" -> "59 add_3" [style=solid, label="(1, 3, 64)"]; -"19 arange" -> "39 index" [style=solid, label="(3,)"]; -"19 arange" -> "40 index_1" [style=solid, label="(3,)"]; -"21 to" -> "22 pow_1" [style=solid, label="(1, 3, 64)"]; -"21 to" -> "26 mul" [style=solid, label="(1, 3, 64)"]; -"22 pow_1" -> "23 mean" [style=solid, label="(1, 3, 64)"]; -"23 mean" -> "24 add" [style=solid, label="(1, 3, 1)"]; -"24 add" -> "25 rsqrt" [style=solid, label="(1, 3, 1)"]; -"25 rsqrt" -> "26 mul" [style=solid, label="(1, 3, 1)"]; -"26 mul" -> "27 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; -"26 mul" -> "28 to_1" [style=solid, label="(1, 3, 64)"]; -"28 to_1" -> "29 mul_1" [style=solid, label="(1, 3, 64)"]; -"29 mul_1" -> "30 linear" [style=solid, label="(1, 3, 64)"]; -"29 mul_1" -> "33 linear_1" [style=solid, label="(1, 3, 64)"]; -"29 mul_1" -> "36 linear_2" [style=solid, label="(1, 3, 64)"]; -"30 linear" -> "31 view" [style=solid, label="(1, 3, 64)"]; -"31 view" -> "32 transpose" [style=solid, label="(1, 3, 4, 16)"]; -"32 transpose" -> "41 mul_2" [style=solid, label="(1, 4, 3, 16)"]; -"32 transpose" -> "42 slice_1" [style=solid, label="(1, 4, 3, 16)"]; -"32 transpose" -> "43 slice_2" [style=solid, label="(1, 4, 3, 16)"]; -"33 linear_1" -> "34 view_1" [style=solid, label="(1, 3, 64)"]; -"34 view_1" -> "35 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; -"35 transpose_1" -> "48 mul_4" [style=solid, label="(1, 4, 3, 16)"]; -"35 transpose_1" -> "49 slice_3" [style=solid, label="(1, 4, 3, 16)"]; -"35 transpose_1" -> "50 slice_4" [style=solid, label="(1, 4, 3, 16)"]; -"36 linear_2" -> "37 view_2" [style=solid, label="(1, 3, 64)"]; -"37 view_2" -> "38 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; -"38 transpose_2" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"39 index" -> "41 mul_2" [style=solid, label="(1, 1, 3, 16)"]; -"39 index" -> "48 mul_4" [style=solid, label="(1, 1, 3, 16)"]; -"40 index_1" -> "46 mul_3" [style=solid, label="(1, 1, 3, 16)"]; -"40 index_1" -> "53 mul_5" [style=solid, label="(1, 1, 3, 16)"]; -"41 mul_2" -> "47 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"42 slice_1" -> "45 cat" [style=solid, label="(1, 4, 3, 8)"]; -"43 slice_2" -> "44 neg" [style=solid, label="(1, 4, 3, 8)"]; -"44 neg" -> "45 cat" [style=solid, label="(1, 4, 3, 8)"]; -"45 cat" -> "46 mul_3" [style=solid, label="(1, 4, 3, 16)"]; -"46 mul_3" -> "47 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"47 add_1" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"48 mul_4" -> "54 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"49 slice_3" -> "52 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"50 slice_4" -> "51 neg_1" [style=solid, label="(1, 4, 3, 8)"]; -"51 neg_1" -> "52 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"52 cat_1" -> "53 mul_5" [style=solid, label="(1, 4, 3, 16)"]; -"53 mul_5" -> "54 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"54 add_2" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"55 scaled_dot_product_attention" -> "56 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; -"56 transpose_3" -> "57 view_3" [style=solid, label="(1, 3, 4, 16)"]; -"57 view_3" -> "58 linear_3" [style=solid, label="(1, 3, 64)"]; -"58 linear_3" -> "59 add_3" [style=solid, label="(1, 3, 64)"]; -"59 add_3" -> "60 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; -"59 add_3" -> "61 to_2" [style=solid, label="(1, 3, 64)"]; -"61 to_2" -> "62 pow_2" [style=solid, label="(1, 3, 64)"]; -"61 to_2" -> "66 mul_6" [style=solid, label="(1, 3, 64)"]; -"61 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"62 pow_2" -> "63 mean_1" [style=solid, label="(1, 3, 64)"]; -"63 mean_1" -> "64 add_4" [style=solid, label="(1, 3, 1)"]; -"64 add_4" -> "65 rsqrt_1" [style=solid, label="(1, 3, 1)"]; -"65 rsqrt_1" -> "66 mul_6" [style=solid, label="(1, 3, 1)"]; -"66 mul_6" -> "67 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; -"66 mul_6" -> "68 to_3" [style=solid, label="(1, 3, 64)"]; -"68 to_3" -> "69 mul_7" [style=solid, label="(1, 3, 64)"]; -"69 mul_7" -> "70 linear_4" [style=solid, label="(1, 3, 64)"]; -"69 mul_7" -> "72 linear_5" [style=solid, label="(1, 3, 64)"]; -"70 linear_4" -> "71 silu" [style=solid, label="(1, 3, 128)"]; -"71 silu" -> "73 mul_8" [style=solid, label="(1, 3, 128)"]; -"72 linear_5" -> "73 mul_8" [style=solid, label="(1, 3, 128)"]; -"73 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; -"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; -} diff --git a/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot deleted file mode 100644 index 2841824b5a3..00000000000 --- a/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot +++ /dev/null @@ -1,24 +0,0 @@ -strict digraph { -"0 linear_weight_updated_constant0" [id=0, type="get_attr"]; -"1 symmetric_weights_decompressor_linear_weight_0" [id=1, type="call_module"]; -"2 linear_bias" [id=2, type="get_attr"]; -"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; -"5 lm_head_bias" [id=5, type="get_attr"]; -"6 input_ids" [id=6, type=input]; -"7 embedding" [id=7, type=embedding]; -"8 linear" [id=8, type=linear]; -"9 linear_1" [id=9, type=linear]; -"10 output" [id=10, type=output]; -"0 linear_weight_updated_constant0" -> "1 symmetric_weights_decompressor_linear_weight_0" [style=solid, label="(2048, 1)"]; -"1 symmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; -"2 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; -"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "7 embedding" [style=solid, label="(128, 64)"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; -"5 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; -"6 input_ids" -> "7 embedding" [style=solid, label="(5,)"]; -"7 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; -"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; -"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; -} diff --git a/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot deleted file mode 100644 index 0382f7e5934..00000000000 --- a/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot +++ /dev/null @@ -1,24 +0,0 @@ -strict digraph { -"0 linear_weight_updated_constant0" [id=0, type="get_attr"]; -"1 symmetric_weights_decompressor_linear_weight_0" [id=1, type="call_module"]; -"2 linear_bias" [id=2, type="get_attr"]; -"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; -"4 symmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; -"5 lm_head_bias" [id=5, type="get_attr"]; -"6 input_ids" [id=6, type=input]; -"7 embedding" [id=7, type=embedding]; -"8 linear" [id=8, type=linear]; -"9 linear_1" [id=9, type=linear]; -"10 output" [id=10, type=output]; -"0 linear_weight_updated_constant0" -> "1 symmetric_weights_decompressor_linear_weight_0" [style=solid, label="(2048, 1)"]; -"1 symmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; -"2 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; -"3 wte_weight_1_updated_constant0" -> "4 symmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(4096, 1)"]; -"4 symmetric_weights_decompressor_wte_weight_1_0" -> "7 embedding" [style=solid, label="(128, 64)"]; -"4 symmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; -"5 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; -"6 input_ids" -> "7 embedding" [style=solid, label="(5,)"]; -"7 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; -"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; -"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; -} diff --git a/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot b/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot deleted file mode 100644 index 03fc9e9c6a0..00000000000 --- a/tests/torch2/data/fx/ao_compression_OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot +++ /dev/null @@ -1,24 +0,0 @@ -strict digraph { -"0 linear_weight_updated_constant0" [id=0, type="get_attr"]; -"1 asymmetric_weights_decompressor_linear_weight_0" [id=1, type="call_module"]; -"2 linear_bias" [id=2, type="get_attr"]; -"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; -"5 lm_head_bias" [id=5, type="get_attr"]; -"6 input_ids" [id=6, type=input]; -"7 embedding" [id=7, type=embedding]; -"8 linear" [id=8, type=linear]; -"9 linear_1" [id=9, type=linear]; -"10 output" [id=10, type=output]; -"0 linear_weight_updated_constant0" -> "1 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; -"1 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; -"2 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; -"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "7 embedding" [style=solid, label="(128, 64)"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; -"5 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; -"6 input_ids" -> "7 embedding" [style=solid, label="(5,)"]; -"7 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; -"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; -"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; -} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json deleted file mode 100644 index 7cfdf2719df..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json +++ /dev/null @@ -1,128 +0,0 @@ -[ - { - "weight_name": "q_proj_weight", - "node_with_weight": "linear", - "weight_port_id": 1, - "weight_dtype": "float32", - "weight_shape": [ - 64, - 64 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int4_sym", - "group_size": 32, - "codebook_values": null - } - }, - { - "weight_name": "k_proj_weight", - "node_with_weight": "linear_1", - "weight_port_id": 1, - "weight_dtype": "float32", - "weight_shape": [ - 64, - 64 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int4_sym", - "group_size": 32, - "codebook_values": null - } - }, - { - "weight_name": "v_proj_weight", - "node_with_weight": "linear_2", - "weight_port_id": 1, - "weight_dtype": "float32", - "weight_shape": [ - 64, - 64 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int4_sym", - "group_size": 32, - "codebook_values": null - } - }, - { - "weight_name": "o_proj_weight", - "node_with_weight": "linear_3", - "weight_port_id": 1, - "weight_dtype": "float32", - "weight_shape": [ - 64, - 64 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int4_sym", - "group_size": 32, - "codebook_values": null - } - }, - { - "weight_name": "mlp_gate_proj_weight", - "node_with_weight": "linear_4", - "weight_port_id": 1, - "weight_dtype": "float32", - "weight_shape": [ - 128, - 64 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int4_sym", - "group_size": 32, - "codebook_values": null - } - }, - { - "weight_name": "mlp_up_proj_weight", - "node_with_weight": "linear_5", - "weight_port_id": 1, - "weight_dtype": "float32", - "weight_shape": [ - 128, - 64 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int4_sym", - "group_size": 32, - "codebook_values": null - } - }, - { - "weight_name": "mlp_down_proj_weight", - "node_with_weight": "linear_6", - "weight_port_id": 1, - "weight_dtype": "float32", - "weight_shape": [ - 64, - 128 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int8_asym", - "group_size": -1, - "codebook_values": null - } - } -] \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot deleted file mode 100644 index 076e46114eb..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot +++ /dev/null @@ -1,169 +0,0 @@ -strict digraph { -"0 attn_norm_weight" [id=0, type="get_attr"]; -"1 mlp_norm_weight" [id=1, type="get_attr"]; -"2 rope_cos" [id=2, type="get_attr"]; -"3 rope_sin" [id=3, type="get_attr"]; -"4 x_embed" [id=4, type=input]; -"5 arange" [id=5, type=arange]; -"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; -"7 to" [id=7, type=to]; -"8 pow_1" [id=8, type=pow]; -"9 mean" [id=9, type=mean]; -"10 add" [id=10, type=add]; -"11 rsqrt" [id=11, type=rsqrt]; -"12 mul" [id=12, type=mul]; -"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; -"14 to_1" [id=14, type=to]; -"15 mul_1" [id=15, type=mul]; -"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; -"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; -"18 linear" [id=18, type=linear]; -"19 view" [id=19, type=view]; -"20 transpose" [id=20, type=transpose]; -"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; -"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; -"23 linear_1" [id=23, type=linear]; -"24 view_1" [id=24, type=view]; -"25 transpose_1" [id=25, type=transpose]; -"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; -"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; -"28 linear_2" [id=28, type=linear]; -"29 view_2" [id=29, type=view]; -"30 transpose_2" [id=30, type=transpose]; -"31 index" [id=31, type=index]; -"32 index_1" [id=32, type=index]; -"33 mul_2" [id=33, type=mul]; -"34 slice_1" [id=34, type=slice]; -"35 slice_2" [id=35, type=slice]; -"36 neg" [id=36, type=neg]; -"37 cat" [id=37, type=cat]; -"38 mul_3" [id=38, type=mul]; -"39 add_1" [id=39, type=add]; -"40 mul_4" [id=40, type=mul]; -"41 slice_3" [id=41, type=slice]; -"42 slice_4" [id=42, type=slice]; -"43 neg_1" [id=43, type=neg]; -"44 cat_1" [id=44, type=cat]; -"45 mul_5" [id=45, type=mul]; -"46 add_2" [id=46, type=add]; -"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; -"48 transpose_3" [id=48, type=transpose]; -"49 view_3" [id=49, type=view]; -"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; -"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; -"52 linear_3" [id=52, type=linear]; -"53 add_3" [id=53, type=add]; -"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; -"55 to_2" [id=55, type=to]; -"56 pow_2" [id=56, type=pow]; -"57 mean_1" [id=57, type=mean]; -"58 add_4" [id=58, type=add]; -"59 rsqrt_1" [id=59, type=rsqrt]; -"60 mul_6" [id=60, type=mul]; -"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; -"62 to_3" [id=62, type=to]; -"63 mul_7" [id=63, type=mul]; -"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; -"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; -"66 linear_4" [id=66, type=linear]; -"67 silu" [id=67, type=silu]; -"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; -"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; -"70 linear_5" [id=70, type=linear]; -"71 mul_8" [id=71, type=mul]; -"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; -"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; -"74 linear_6" [id=74, type=linear]; -"75 add_5" [id=75, type=add]; -"76 output" [id=76, type=output]; -"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; -"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; -"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; -"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; -"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; -"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; -"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; -"5 arange" -> "31 index" [style=solid, label="(3,)"]; -"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; -"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; -"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; -"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; -"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; -"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; -"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; -"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; -"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; -"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; -"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; -"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; -"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; -"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; -"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; -"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; -"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; -"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; -"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; -"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; -"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; -"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; -"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; -"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; -"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; -"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; -"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; -"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; -"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; -"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; -"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; -"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; -"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; -"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; -"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; -"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; -"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; -"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; -"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; -"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; -"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; -"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; -"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; -"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; -"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; -"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; -"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; -"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; -"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; -"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; -"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; -"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; -"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; -"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; -"64 mlp_gate_proj_weight_updated_constant0" -> "65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; -"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; -"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; -"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; -"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; -"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; -"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; -"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; -"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; -"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; -"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; -} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_max_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_max_activation_variance.dot deleted file mode 100644 index 076e46114eb..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_max_activation_variance.dot +++ /dev/null @@ -1,169 +0,0 @@ -strict digraph { -"0 attn_norm_weight" [id=0, type="get_attr"]; -"1 mlp_norm_weight" [id=1, type="get_attr"]; -"2 rope_cos" [id=2, type="get_attr"]; -"3 rope_sin" [id=3, type="get_attr"]; -"4 x_embed" [id=4, type=input]; -"5 arange" [id=5, type=arange]; -"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; -"7 to" [id=7, type=to]; -"8 pow_1" [id=8, type=pow]; -"9 mean" [id=9, type=mean]; -"10 add" [id=10, type=add]; -"11 rsqrt" [id=11, type=rsqrt]; -"12 mul" [id=12, type=mul]; -"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; -"14 to_1" [id=14, type=to]; -"15 mul_1" [id=15, type=mul]; -"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; -"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; -"18 linear" [id=18, type=linear]; -"19 view" [id=19, type=view]; -"20 transpose" [id=20, type=transpose]; -"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; -"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; -"23 linear_1" [id=23, type=linear]; -"24 view_1" [id=24, type=view]; -"25 transpose_1" [id=25, type=transpose]; -"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; -"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; -"28 linear_2" [id=28, type=linear]; -"29 view_2" [id=29, type=view]; -"30 transpose_2" [id=30, type=transpose]; -"31 index" [id=31, type=index]; -"32 index_1" [id=32, type=index]; -"33 mul_2" [id=33, type=mul]; -"34 slice_1" [id=34, type=slice]; -"35 slice_2" [id=35, type=slice]; -"36 neg" [id=36, type=neg]; -"37 cat" [id=37, type=cat]; -"38 mul_3" [id=38, type=mul]; -"39 add_1" [id=39, type=add]; -"40 mul_4" [id=40, type=mul]; -"41 slice_3" [id=41, type=slice]; -"42 slice_4" [id=42, type=slice]; -"43 neg_1" [id=43, type=neg]; -"44 cat_1" [id=44, type=cat]; -"45 mul_5" [id=45, type=mul]; -"46 add_2" [id=46, type=add]; -"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; -"48 transpose_3" [id=48, type=transpose]; -"49 view_3" [id=49, type=view]; -"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; -"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; -"52 linear_3" [id=52, type=linear]; -"53 add_3" [id=53, type=add]; -"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; -"55 to_2" [id=55, type=to]; -"56 pow_2" [id=56, type=pow]; -"57 mean_1" [id=57, type=mean]; -"58 add_4" [id=58, type=add]; -"59 rsqrt_1" [id=59, type=rsqrt]; -"60 mul_6" [id=60, type=mul]; -"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; -"62 to_3" [id=62, type=to]; -"63 mul_7" [id=63, type=mul]; -"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; -"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; -"66 linear_4" [id=66, type=linear]; -"67 silu" [id=67, type=silu]; -"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; -"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; -"70 linear_5" [id=70, type=linear]; -"71 mul_8" [id=71, type=mul]; -"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; -"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; -"74 linear_6" [id=74, type=linear]; -"75 add_5" [id=75, type=add]; -"76 output" [id=76, type=output]; -"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; -"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; -"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; -"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; -"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; -"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; -"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; -"5 arange" -> "31 index" [style=solid, label="(3,)"]; -"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; -"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; -"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; -"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; -"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; -"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; -"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; -"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; -"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; -"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; -"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; -"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; -"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; -"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; -"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; -"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; -"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; -"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; -"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; -"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; -"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; -"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; -"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; -"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; -"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; -"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; -"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; -"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; -"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; -"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; -"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; -"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; -"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; -"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; -"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; -"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; -"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; -"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; -"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; -"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; -"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; -"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; -"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; -"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; -"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; -"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; -"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; -"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; -"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; -"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; -"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; -"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; -"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; -"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; -"64 mlp_gate_proj_weight_updated_constant0" -> "65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; -"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; -"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; -"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; -"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; -"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; -"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; -"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; -"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; -"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; -"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; -} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot deleted file mode 100644 index 076e46114eb..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot +++ /dev/null @@ -1,169 +0,0 @@ -strict digraph { -"0 attn_norm_weight" [id=0, type="get_attr"]; -"1 mlp_norm_weight" [id=1, type="get_attr"]; -"2 rope_cos" [id=2, type="get_attr"]; -"3 rope_sin" [id=3, type="get_attr"]; -"4 x_embed" [id=4, type=input]; -"5 arange" [id=5, type=arange]; -"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; -"7 to" [id=7, type=to]; -"8 pow_1" [id=8, type=pow]; -"9 mean" [id=9, type=mean]; -"10 add" [id=10, type=add]; -"11 rsqrt" [id=11, type=rsqrt]; -"12 mul" [id=12, type=mul]; -"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; -"14 to_1" [id=14, type=to]; -"15 mul_1" [id=15, type=mul]; -"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; -"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; -"18 linear" [id=18, type=linear]; -"19 view" [id=19, type=view]; -"20 transpose" [id=20, type=transpose]; -"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; -"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; -"23 linear_1" [id=23, type=linear]; -"24 view_1" [id=24, type=view]; -"25 transpose_1" [id=25, type=transpose]; -"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; -"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; -"28 linear_2" [id=28, type=linear]; -"29 view_2" [id=29, type=view]; -"30 transpose_2" [id=30, type=transpose]; -"31 index" [id=31, type=index]; -"32 index_1" [id=32, type=index]; -"33 mul_2" [id=33, type=mul]; -"34 slice_1" [id=34, type=slice]; -"35 slice_2" [id=35, type=slice]; -"36 neg" [id=36, type=neg]; -"37 cat" [id=37, type=cat]; -"38 mul_3" [id=38, type=mul]; -"39 add_1" [id=39, type=add]; -"40 mul_4" [id=40, type=mul]; -"41 slice_3" [id=41, type=slice]; -"42 slice_4" [id=42, type=slice]; -"43 neg_1" [id=43, type=neg]; -"44 cat_1" [id=44, type=cat]; -"45 mul_5" [id=45, type=mul]; -"46 add_2" [id=46, type=add]; -"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; -"48 transpose_3" [id=48, type=transpose]; -"49 view_3" [id=49, type=view]; -"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; -"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; -"52 linear_3" [id=52, type=linear]; -"53 add_3" [id=53, type=add]; -"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; -"55 to_2" [id=55, type=to]; -"56 pow_2" [id=56, type=pow]; -"57 mean_1" [id=57, type=mean]; -"58 add_4" [id=58, type=add]; -"59 rsqrt_1" [id=59, type=rsqrt]; -"60 mul_6" [id=60, type=mul]; -"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; -"62 to_3" [id=62, type=to]; -"63 mul_7" [id=63, type=mul]; -"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; -"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; -"66 linear_4" [id=66, type=linear]; -"67 silu" [id=67, type=silu]; -"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; -"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; -"70 linear_5" [id=70, type=linear]; -"71 mul_8" [id=71, type=mul]; -"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; -"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; -"74 linear_6" [id=74, type=linear]; -"75 add_5" [id=75, type=add]; -"76 output" [id=76, type=output]; -"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; -"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; -"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; -"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; -"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; -"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; -"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; -"5 arange" -> "31 index" [style=solid, label="(3,)"]; -"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; -"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; -"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; -"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; -"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; -"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; -"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; -"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; -"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; -"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; -"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; -"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; -"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; -"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; -"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; -"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; -"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; -"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; -"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; -"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; -"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; -"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; -"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; -"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; -"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; -"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; -"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; -"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; -"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; -"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; -"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; -"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; -"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; -"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; -"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; -"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; -"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; -"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; -"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; -"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; -"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; -"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; -"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; -"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; -"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; -"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; -"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; -"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; -"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; -"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; -"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; -"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; -"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; -"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; -"64 mlp_gate_proj_weight_updated_constant0" -> "65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; -"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; -"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; -"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; -"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; -"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; -"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; -"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; -"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; -"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; -"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; -} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_variance.dot deleted file mode 100644 index 076e46114eb..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_variance.dot +++ /dev/null @@ -1,169 +0,0 @@ -strict digraph { -"0 attn_norm_weight" [id=0, type="get_attr"]; -"1 mlp_norm_weight" [id=1, type="get_attr"]; -"2 rope_cos" [id=2, type="get_attr"]; -"3 rope_sin" [id=3, type="get_attr"]; -"4 x_embed" [id=4, type=input]; -"5 arange" [id=5, type=arange]; -"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; -"7 to" [id=7, type=to]; -"8 pow_1" [id=8, type=pow]; -"9 mean" [id=9, type=mean]; -"10 add" [id=10, type=add]; -"11 rsqrt" [id=11, type=rsqrt]; -"12 mul" [id=12, type=mul]; -"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; -"14 to_1" [id=14, type=to]; -"15 mul_1" [id=15, type=mul]; -"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; -"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; -"18 linear" [id=18, type=linear]; -"19 view" [id=19, type=view]; -"20 transpose" [id=20, type=transpose]; -"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; -"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; -"23 linear_1" [id=23, type=linear]; -"24 view_1" [id=24, type=view]; -"25 transpose_1" [id=25, type=transpose]; -"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; -"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; -"28 linear_2" [id=28, type=linear]; -"29 view_2" [id=29, type=view]; -"30 transpose_2" [id=30, type=transpose]; -"31 index" [id=31, type=index]; -"32 index_1" [id=32, type=index]; -"33 mul_2" [id=33, type=mul]; -"34 slice_1" [id=34, type=slice]; -"35 slice_2" [id=35, type=slice]; -"36 neg" [id=36, type=neg]; -"37 cat" [id=37, type=cat]; -"38 mul_3" [id=38, type=mul]; -"39 add_1" [id=39, type=add]; -"40 mul_4" [id=40, type=mul]; -"41 slice_3" [id=41, type=slice]; -"42 slice_4" [id=42, type=slice]; -"43 neg_1" [id=43, type=neg]; -"44 cat_1" [id=44, type=cat]; -"45 mul_5" [id=45, type=mul]; -"46 add_2" [id=46, type=add]; -"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; -"48 transpose_3" [id=48, type=transpose]; -"49 view_3" [id=49, type=view]; -"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; -"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; -"52 linear_3" [id=52, type=linear]; -"53 add_3" [id=53, type=add]; -"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; -"55 to_2" [id=55, type=to]; -"56 pow_2" [id=56, type=pow]; -"57 mean_1" [id=57, type=mean]; -"58 add_4" [id=58, type=add]; -"59 rsqrt_1" [id=59, type=rsqrt]; -"60 mul_6" [id=60, type=mul]; -"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; -"62 to_3" [id=62, type=to]; -"63 mul_7" [id=63, type=mul]; -"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; -"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; -"66 linear_4" [id=66, type=linear]; -"67 silu" [id=67, type=silu]; -"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; -"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; -"70 linear_5" [id=70, type=linear]; -"71 mul_8" [id=71, type=mul]; -"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; -"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; -"74 linear_6" [id=74, type=linear]; -"75 add_5" [id=75, type=add]; -"76 output" [id=76, type=output]; -"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; -"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; -"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; -"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; -"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; -"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; -"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; -"5 arange" -> "31 index" [style=solid, label="(3,)"]; -"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; -"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; -"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; -"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; -"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; -"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; -"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; -"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; -"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; -"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; -"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; -"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; -"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; -"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; -"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; -"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; -"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; -"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; -"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; -"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; -"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; -"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; -"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; -"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; -"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; -"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; -"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; -"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; -"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; -"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; -"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; -"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; -"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; -"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; -"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; -"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; -"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; -"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; -"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; -"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; -"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; -"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; -"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; -"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; -"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; -"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; -"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; -"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; -"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; -"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; -"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; -"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; -"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; -"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; -"64 mlp_gate_proj_weight_updated_constant0" -> "65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; -"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; -"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; -"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; -"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; -"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; -"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; -"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; -"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; -"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; -"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; -} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot deleted file mode 100644 index c62d8eb460e..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot +++ /dev/null @@ -1,169 +0,0 @@ -strict digraph { -"0 attn_norm_weight" [id=0, type="get_attr"]; -"1 mlp_norm_weight" [id=1, type="get_attr"]; -"2 rope_cos" [id=2, type="get_attr"]; -"3 rope_sin" [id=3, type="get_attr"]; -"4 x_embed" [id=4, type=input]; -"5 arange" [id=5, type=arange]; -"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; -"7 to" [id=7, type=to]; -"8 pow_1" [id=8, type=pow]; -"9 mean" [id=9, type=mean]; -"10 add" [id=10, type=add]; -"11 rsqrt" [id=11, type=rsqrt]; -"12 mul" [id=12, type=mul]; -"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; -"14 to_1" [id=14, type=to]; -"15 mul_1" [id=15, type=mul]; -"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; -"17 asymmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; -"18 linear" [id=18, type=linear]; -"19 view" [id=19, type=view]; -"20 transpose" [id=20, type=transpose]; -"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; -"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; -"23 linear_1" [id=23, type=linear]; -"24 view_1" [id=24, type=view]; -"25 transpose_1" [id=25, type=transpose]; -"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; -"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; -"28 linear_2" [id=28, type=linear]; -"29 view_2" [id=29, type=view]; -"30 transpose_2" [id=30, type=transpose]; -"31 index" [id=31, type=index]; -"32 index_1" [id=32, type=index]; -"33 mul_2" [id=33, type=mul]; -"34 slice_1" [id=34, type=slice]; -"35 slice_2" [id=35, type=slice]; -"36 neg" [id=36, type=neg]; -"37 cat" [id=37, type=cat]; -"38 mul_3" [id=38, type=mul]; -"39 add_1" [id=39, type=add]; -"40 mul_4" [id=40, type=mul]; -"41 slice_3" [id=41, type=slice]; -"42 slice_4" [id=42, type=slice]; -"43 neg_1" [id=43, type=neg]; -"44 cat_1" [id=44, type=cat]; -"45 mul_5" [id=45, type=mul]; -"46 add_2" [id=46, type=add]; -"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; -"48 transpose_3" [id=48, type=transpose]; -"49 view_3" [id=49, type=view]; -"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; -"51 asymmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; -"52 linear_3" [id=52, type=linear]; -"53 add_3" [id=53, type=add]; -"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; -"55 to_2" [id=55, type=to]; -"56 pow_2" [id=56, type=pow]; -"57 mean_1" [id=57, type=mean]; -"58 add_4" [id=58, type=add]; -"59 rsqrt_1" [id=59, type=rsqrt]; -"60 mul_6" [id=60, type=mul]; -"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; -"62 to_3" [id=62, type=to]; -"63 mul_7" [id=63, type=mul]; -"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; -"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; -"66 linear_4" [id=66, type=linear]; -"67 silu" [id=67, type=silu]; -"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; -"69 symmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; -"70 linear_5" [id=70, type=linear]; -"71 mul_8" [id=71, type=mul]; -"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; -"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; -"74 linear_6" [id=74, type=linear]; -"75 add_5" [id=75, type=add]; -"76 output" [id=76, type=output]; -"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; -"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; -"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; -"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; -"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; -"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; -"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; -"5 arange" -> "31 index" [style=solid, label="(3,)"]; -"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; -"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; -"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; -"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; -"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; -"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; -"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; -"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; -"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; -"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; -"16 q_proj_weight_updated_constant0" -> "17 asymmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(64, 64)"]; -"17 asymmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; -"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; -"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; -"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; -"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; -"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; -"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; -"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; -"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; -"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; -"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; -"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; -"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; -"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; -"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; -"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; -"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; -"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; -"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; -"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; -"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; -"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; -"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; -"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; -"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; -"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; -"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; -"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; -"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; -"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; -"50 o_proj_weight_updated_constant0" -> "51 asymmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(64, 64)"]; -"51 asymmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; -"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; -"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; -"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; -"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; -"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; -"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; -"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; -"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; -"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; -"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; -"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; -"64 mlp_gate_proj_weight_updated_constant0" -> "65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; -"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; -"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; -"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; -"68 mlp_up_proj_weight_updated_constant0" -> "69 symmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(4096, 1)"]; -"69 symmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; -"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; -"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; -"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; -"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; -"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; -} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json deleted file mode 100644 index e1baa81d0dc..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json +++ /dev/null @@ -1,128 +0,0 @@ -[ - { - "weight_name": "q_proj_weight", - "node_with_weight": "linear", - "weight_port_id": 1, - "weight_dtype": "float32", - "weight_shape": [ - 64, - 64 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int4_sym", - "group_size": 32, - "codebook_values": null - } - }, - { - "weight_name": "k_proj_weight", - "node_with_weight": "linear_1", - "weight_port_id": 1, - "weight_dtype": "float32", - "weight_shape": [ - 64, - 64 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int4_sym", - "group_size": 32, - "codebook_values": null - } - }, - { - "weight_name": "v_proj_weight", - "node_with_weight": "linear_2", - "weight_port_id": 1, - "weight_dtype": "float32", - "weight_shape": [ - 64, - 64 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int4_sym", - "group_size": 32, - "codebook_values": null - } - }, - { - "weight_name": "o_proj_weight", - "node_with_weight": "linear_3", - "weight_port_id": 1, - "weight_dtype": "float32", - "weight_shape": [ - 64, - 64 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int4_sym", - "group_size": 32, - "codebook_values": null - } - }, - { - "weight_name": "mlp_gate_proj_weight", - "node_with_weight": "linear_4", - "weight_port_id": 1, - "weight_dtype": "float32", - "weight_shape": [ - 128, - 64 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int4_sym", - "group_size": 32, - "codebook_values": null - } - }, - { - "weight_name": "mlp_up_proj_weight", - "node_with_weight": "linear_5", - "weight_port_id": 1, - "weight_dtype": "float32", - "weight_shape": [ - 128, - 64 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int4_sym", - "group_size": 32, - "codebook_values": null - } - }, - { - "weight_name": "mlp_down_proj_weight", - "node_with_weight": "linear_6", - "weight_port_id": 1, - "weight_dtype": "float32", - "weight_shape": [ - 64, - 128 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int4_sym", - "group_size": 32, - "codebook_values": null - } - } -] \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_hessian_input_activation.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_hessian_input_activation.dot deleted file mode 100644 index 31fb9463c88..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_hessian_input_activation.dot +++ /dev/null @@ -1,169 +0,0 @@ -strict digraph { -"0 attn_norm_weight" [id=0, type="get_attr"]; -"1 mlp_norm_weight" [id=1, type="get_attr"]; -"2 rope_cos" [id=2, type="get_attr"]; -"3 rope_sin" [id=3, type="get_attr"]; -"4 x_embed" [id=4, type=input]; -"5 arange" [id=5, type=arange]; -"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; -"7 to" [id=7, type=to]; -"8 pow_1" [id=8, type=pow]; -"9 mean" [id=9, type=mean]; -"10 add" [id=10, type=add]; -"11 rsqrt" [id=11, type=rsqrt]; -"12 mul" [id=12, type=mul]; -"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; -"14 to_1" [id=14, type=to]; -"15 mul_1" [id=15, type=mul]; -"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; -"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; -"18 linear" [id=18, type=linear]; -"19 view" [id=19, type=view]; -"20 transpose" [id=20, type=transpose]; -"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; -"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; -"23 linear_1" [id=23, type=linear]; -"24 view_1" [id=24, type=view]; -"25 transpose_1" [id=25, type=transpose]; -"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; -"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; -"28 linear_2" [id=28, type=linear]; -"29 view_2" [id=29, type=view]; -"30 transpose_2" [id=30, type=transpose]; -"31 index" [id=31, type=index]; -"32 index_1" [id=32, type=index]; -"33 mul_2" [id=33, type=mul]; -"34 slice_1" [id=34, type=slice]; -"35 slice_2" [id=35, type=slice]; -"36 neg" [id=36, type=neg]; -"37 cat" [id=37, type=cat]; -"38 mul_3" [id=38, type=mul]; -"39 add_1" [id=39, type=add]; -"40 mul_4" [id=40, type=mul]; -"41 slice_3" [id=41, type=slice]; -"42 slice_4" [id=42, type=slice]; -"43 neg_1" [id=43, type=neg]; -"44 cat_1" [id=44, type=cat]; -"45 mul_5" [id=45, type=mul]; -"46 add_2" [id=46, type=add]; -"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; -"48 transpose_3" [id=48, type=transpose]; -"49 view_3" [id=49, type=view]; -"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; -"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; -"52 linear_3" [id=52, type=linear]; -"53 add_3" [id=53, type=add]; -"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; -"55 to_2" [id=55, type=to]; -"56 pow_2" [id=56, type=pow]; -"57 mean_1" [id=57, type=mean]; -"58 add_4" [id=58, type=add]; -"59 rsqrt_1" [id=59, type=rsqrt]; -"60 mul_6" [id=60, type=mul]; -"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; -"62 to_3" [id=62, type=to]; -"63 mul_7" [id=63, type=mul]; -"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; -"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; -"66 linear_4" [id=66, type=linear]; -"67 silu" [id=67, type=silu]; -"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; -"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; -"70 linear_5" [id=70, type=linear]; -"71 mul_8" [id=71, type=mul]; -"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; -"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; -"74 linear_6" [id=74, type=linear]; -"75 add_5" [id=75, type=add]; -"76 output" [id=76, type=output]; -"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; -"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; -"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; -"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; -"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; -"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; -"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; -"5 arange" -> "31 index" [style=solid, label="(3,)"]; -"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; -"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; -"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; -"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; -"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; -"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; -"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; -"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; -"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; -"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; -"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; -"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; -"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; -"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; -"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; -"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; -"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; -"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; -"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; -"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; -"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; -"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; -"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; -"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; -"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; -"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; -"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; -"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; -"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; -"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; -"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; -"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; -"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; -"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; -"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; -"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; -"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; -"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; -"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; -"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; -"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; -"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; -"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; -"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; -"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; -"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; -"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; -"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; -"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; -"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; -"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; -"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; -"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; -"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; -"64 mlp_gate_proj_weight_updated_constant0" -> "65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(128, 64)"]; -"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; -"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; -"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; -"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; -"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; -"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; -"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; -"72 mlp_down_proj_weight_updated_constant0" -> "73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(4096, 1)"]; -"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; -"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; -} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot deleted file mode 100644 index 31fb9463c88..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot +++ /dev/null @@ -1,169 +0,0 @@ -strict digraph { -"0 attn_norm_weight" [id=0, type="get_attr"]; -"1 mlp_norm_weight" [id=1, type="get_attr"]; -"2 rope_cos" [id=2, type="get_attr"]; -"3 rope_sin" [id=3, type="get_attr"]; -"4 x_embed" [id=4, type=input]; -"5 arange" [id=5, type=arange]; -"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; -"7 to" [id=7, type=to]; -"8 pow_1" [id=8, type=pow]; -"9 mean" [id=9, type=mean]; -"10 add" [id=10, type=add]; -"11 rsqrt" [id=11, type=rsqrt]; -"12 mul" [id=12, type=mul]; -"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; -"14 to_1" [id=14, type=to]; -"15 mul_1" [id=15, type=mul]; -"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; -"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; -"18 linear" [id=18, type=linear]; -"19 view" [id=19, type=view]; -"20 transpose" [id=20, type=transpose]; -"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; -"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; -"23 linear_1" [id=23, type=linear]; -"24 view_1" [id=24, type=view]; -"25 transpose_1" [id=25, type=transpose]; -"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; -"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; -"28 linear_2" [id=28, type=linear]; -"29 view_2" [id=29, type=view]; -"30 transpose_2" [id=30, type=transpose]; -"31 index" [id=31, type=index]; -"32 index_1" [id=32, type=index]; -"33 mul_2" [id=33, type=mul]; -"34 slice_1" [id=34, type=slice]; -"35 slice_2" [id=35, type=slice]; -"36 neg" [id=36, type=neg]; -"37 cat" [id=37, type=cat]; -"38 mul_3" [id=38, type=mul]; -"39 add_1" [id=39, type=add]; -"40 mul_4" [id=40, type=mul]; -"41 slice_3" [id=41, type=slice]; -"42 slice_4" [id=42, type=slice]; -"43 neg_1" [id=43, type=neg]; -"44 cat_1" [id=44, type=cat]; -"45 mul_5" [id=45, type=mul]; -"46 add_2" [id=46, type=add]; -"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; -"48 transpose_3" [id=48, type=transpose]; -"49 view_3" [id=49, type=view]; -"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; -"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; -"52 linear_3" [id=52, type=linear]; -"53 add_3" [id=53, type=add]; -"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; -"55 to_2" [id=55, type=to]; -"56 pow_2" [id=56, type=pow]; -"57 mean_1" [id=57, type=mean]; -"58 add_4" [id=58, type=add]; -"59 rsqrt_1" [id=59, type=rsqrt]; -"60 mul_6" [id=60, type=mul]; -"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; -"62 to_3" [id=62, type=to]; -"63 mul_7" [id=63, type=mul]; -"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; -"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; -"66 linear_4" [id=66, type=linear]; -"67 silu" [id=67, type=silu]; -"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; -"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; -"70 linear_5" [id=70, type=linear]; -"71 mul_8" [id=71, type=mul]; -"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; -"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; -"74 linear_6" [id=74, type=linear]; -"75 add_5" [id=75, type=add]; -"76 output" [id=76, type=output]; -"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; -"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; -"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; -"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; -"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; -"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; -"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; -"5 arange" -> "31 index" [style=solid, label="(3,)"]; -"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; -"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; -"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; -"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; -"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; -"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; -"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; -"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; -"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; -"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; -"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; -"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; -"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; -"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; -"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; -"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; -"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; -"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; -"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; -"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; -"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; -"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; -"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; -"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; -"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; -"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; -"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; -"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; -"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; -"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; -"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; -"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; -"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; -"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; -"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; -"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; -"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; -"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; -"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; -"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; -"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; -"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; -"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; -"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; -"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; -"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; -"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; -"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; -"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; -"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; -"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; -"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; -"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; -"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; -"64 mlp_gate_proj_weight_updated_constant0" -> "65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(128, 64)"]; -"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; -"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; -"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; -"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; -"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; -"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; -"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; -"72 mlp_down_proj_weight_updated_constant0" -> "73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(4096, 1)"]; -"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; -"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; -} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_magnitude.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_magnitude.dot deleted file mode 100644 index 31fb9463c88..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_magnitude.dot +++ /dev/null @@ -1,169 +0,0 @@ -strict digraph { -"0 attn_norm_weight" [id=0, type="get_attr"]; -"1 mlp_norm_weight" [id=1, type="get_attr"]; -"2 rope_cos" [id=2, type="get_attr"]; -"3 rope_sin" [id=3, type="get_attr"]; -"4 x_embed" [id=4, type=input]; -"5 arange" [id=5, type=arange]; -"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; -"7 to" [id=7, type=to]; -"8 pow_1" [id=8, type=pow]; -"9 mean" [id=9, type=mean]; -"10 add" [id=10, type=add]; -"11 rsqrt" [id=11, type=rsqrt]; -"12 mul" [id=12, type=mul]; -"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; -"14 to_1" [id=14, type=to]; -"15 mul_1" [id=15, type=mul]; -"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; -"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; -"18 linear" [id=18, type=linear]; -"19 view" [id=19, type=view]; -"20 transpose" [id=20, type=transpose]; -"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; -"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; -"23 linear_1" [id=23, type=linear]; -"24 view_1" [id=24, type=view]; -"25 transpose_1" [id=25, type=transpose]; -"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; -"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; -"28 linear_2" [id=28, type=linear]; -"29 view_2" [id=29, type=view]; -"30 transpose_2" [id=30, type=transpose]; -"31 index" [id=31, type=index]; -"32 index_1" [id=32, type=index]; -"33 mul_2" [id=33, type=mul]; -"34 slice_1" [id=34, type=slice]; -"35 slice_2" [id=35, type=slice]; -"36 neg" [id=36, type=neg]; -"37 cat" [id=37, type=cat]; -"38 mul_3" [id=38, type=mul]; -"39 add_1" [id=39, type=add]; -"40 mul_4" [id=40, type=mul]; -"41 slice_3" [id=41, type=slice]; -"42 slice_4" [id=42, type=slice]; -"43 neg_1" [id=43, type=neg]; -"44 cat_1" [id=44, type=cat]; -"45 mul_5" [id=45, type=mul]; -"46 add_2" [id=46, type=add]; -"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; -"48 transpose_3" [id=48, type=transpose]; -"49 view_3" [id=49, type=view]; -"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; -"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; -"52 linear_3" [id=52, type=linear]; -"53 add_3" [id=53, type=add]; -"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; -"55 to_2" [id=55, type=to]; -"56 pow_2" [id=56, type=pow]; -"57 mean_1" [id=57, type=mean]; -"58 add_4" [id=58, type=add]; -"59 rsqrt_1" [id=59, type=rsqrt]; -"60 mul_6" [id=60, type=mul]; -"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; -"62 to_3" [id=62, type=to]; -"63 mul_7" [id=63, type=mul]; -"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; -"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; -"66 linear_4" [id=66, type=linear]; -"67 silu" [id=67, type=silu]; -"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; -"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; -"70 linear_5" [id=70, type=linear]; -"71 mul_8" [id=71, type=mul]; -"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; -"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; -"74 linear_6" [id=74, type=linear]; -"75 add_5" [id=75, type=add]; -"76 output" [id=76, type=output]; -"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; -"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; -"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; -"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; -"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; -"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; -"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; -"5 arange" -> "31 index" [style=solid, label="(3,)"]; -"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; -"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; -"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; -"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; -"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; -"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; -"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; -"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; -"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; -"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; -"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; -"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; -"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; -"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; -"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; -"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; -"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; -"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; -"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; -"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; -"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; -"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; -"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; -"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; -"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; -"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; -"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; -"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; -"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; -"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; -"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; -"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; -"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; -"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; -"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; -"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; -"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; -"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; -"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; -"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; -"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; -"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; -"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; -"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; -"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; -"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; -"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; -"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; -"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; -"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; -"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; -"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; -"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; -"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; -"64 mlp_gate_proj_weight_updated_constant0" -> "65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(128, 64)"]; -"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; -"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; -"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; -"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; -"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; -"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; -"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; -"72 mlp_down_proj_weight_updated_constant0" -> "73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(4096, 1)"]; -"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; -"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; -} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot deleted file mode 100644 index 31fb9463c88..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot +++ /dev/null @@ -1,169 +0,0 @@ -strict digraph { -"0 attn_norm_weight" [id=0, type="get_attr"]; -"1 mlp_norm_weight" [id=1, type="get_attr"]; -"2 rope_cos" [id=2, type="get_attr"]; -"3 rope_sin" [id=3, type="get_attr"]; -"4 x_embed" [id=4, type=input]; -"5 arange" [id=5, type=arange]; -"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; -"7 to" [id=7, type=to]; -"8 pow_1" [id=8, type=pow]; -"9 mean" [id=9, type=mean]; -"10 add" [id=10, type=add]; -"11 rsqrt" [id=11, type=rsqrt]; -"12 mul" [id=12, type=mul]; -"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; -"14 to_1" [id=14, type=to]; -"15 mul_1" [id=15, type=mul]; -"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; -"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; -"18 linear" [id=18, type=linear]; -"19 view" [id=19, type=view]; -"20 transpose" [id=20, type=transpose]; -"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; -"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; -"23 linear_1" [id=23, type=linear]; -"24 view_1" [id=24, type=view]; -"25 transpose_1" [id=25, type=transpose]; -"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; -"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; -"28 linear_2" [id=28, type=linear]; -"29 view_2" [id=29, type=view]; -"30 transpose_2" [id=30, type=transpose]; -"31 index" [id=31, type=index]; -"32 index_1" [id=32, type=index]; -"33 mul_2" [id=33, type=mul]; -"34 slice_1" [id=34, type=slice]; -"35 slice_2" [id=35, type=slice]; -"36 neg" [id=36, type=neg]; -"37 cat" [id=37, type=cat]; -"38 mul_3" [id=38, type=mul]; -"39 add_1" [id=39, type=add]; -"40 mul_4" [id=40, type=mul]; -"41 slice_3" [id=41, type=slice]; -"42 slice_4" [id=42, type=slice]; -"43 neg_1" [id=43, type=neg]; -"44 cat_1" [id=44, type=cat]; -"45 mul_5" [id=45, type=mul]; -"46 add_2" [id=46, type=add]; -"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; -"48 transpose_3" [id=48, type=transpose]; -"49 view_3" [id=49, type=view]; -"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; -"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; -"52 linear_3" [id=52, type=linear]; -"53 add_3" [id=53, type=add]; -"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; -"55 to_2" [id=55, type=to]; -"56 pow_2" [id=56, type=pow]; -"57 mean_1" [id=57, type=mean]; -"58 add_4" [id=58, type=add]; -"59 rsqrt_1" [id=59, type=rsqrt]; -"60 mul_6" [id=60, type=mul]; -"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; -"62 to_3" [id=62, type=to]; -"63 mul_7" [id=63, type=mul]; -"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; -"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; -"66 linear_4" [id=66, type=linear]; -"67 silu" [id=67, type=silu]; -"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; -"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; -"70 linear_5" [id=70, type=linear]; -"71 mul_8" [id=71, type=mul]; -"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; -"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; -"74 linear_6" [id=74, type=linear]; -"75 add_5" [id=75, type=add]; -"76 output" [id=76, type=output]; -"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; -"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; -"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; -"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; -"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; -"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; -"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; -"5 arange" -> "31 index" [style=solid, label="(3,)"]; -"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; -"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; -"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; -"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; -"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; -"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; -"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; -"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; -"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; -"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; -"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; -"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; -"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; -"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; -"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; -"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; -"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; -"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; -"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; -"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; -"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; -"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; -"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; -"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; -"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; -"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; -"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; -"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; -"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; -"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; -"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; -"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; -"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; -"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; -"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; -"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; -"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; -"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; -"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; -"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; -"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; -"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; -"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; -"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; -"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; -"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; -"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; -"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; -"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; -"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; -"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; -"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; -"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; -"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; -"64 mlp_gate_proj_weight_updated_constant0" -> "65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(128, 64)"]; -"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; -"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; -"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; -"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; -"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; -"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; -"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; -"72 mlp_down_proj_weight_updated_constant0" -> "73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(4096, 1)"]; -"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; -"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; -} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_weight_quantization_error.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_weight_quantization_error.dot deleted file mode 100644 index 99c2d53d916..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_weight_quantization_error.dot +++ /dev/null @@ -1,169 +0,0 @@ -strict digraph { -"0 attn_norm_weight" [id=0, type="get_attr"]; -"1 mlp_norm_weight" [id=1, type="get_attr"]; -"2 rope_cos" [id=2, type="get_attr"]; -"3 rope_sin" [id=3, type="get_attr"]; -"4 x_embed" [id=4, type=input]; -"5 arange" [id=5, type=arange]; -"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; -"7 to" [id=7, type=to]; -"8 pow_1" [id=8, type=pow]; -"9 mean" [id=9, type=mean]; -"10 add" [id=10, type=add]; -"11 rsqrt" [id=11, type=rsqrt]; -"12 mul" [id=12, type=mul]; -"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; -"14 to_1" [id=14, type=to]; -"15 mul_1" [id=15, type=mul]; -"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; -"17 asymmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; -"18 linear" [id=18, type=linear]; -"19 view" [id=19, type=view]; -"20 transpose" [id=20, type=transpose]; -"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; -"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; -"23 linear_1" [id=23, type=linear]; -"24 view_1" [id=24, type=view]; -"25 transpose_1" [id=25, type=transpose]; -"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; -"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; -"28 linear_2" [id=28, type=linear]; -"29 view_2" [id=29, type=view]; -"30 transpose_2" [id=30, type=transpose]; -"31 index" [id=31, type=index]; -"32 index_1" [id=32, type=index]; -"33 mul_2" [id=33, type=mul]; -"34 slice_1" [id=34, type=slice]; -"35 slice_2" [id=35, type=slice]; -"36 neg" [id=36, type=neg]; -"37 cat" [id=37, type=cat]; -"38 mul_3" [id=38, type=mul]; -"39 add_1" [id=39, type=add]; -"40 mul_4" [id=40, type=mul]; -"41 slice_3" [id=41, type=slice]; -"42 slice_4" [id=42, type=slice]; -"43 neg_1" [id=43, type=neg]; -"44 cat_1" [id=44, type=cat]; -"45 mul_5" [id=45, type=mul]; -"46 add_2" [id=46, type=add]; -"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; -"48 transpose_3" [id=48, type=transpose]; -"49 view_3" [id=49, type=view]; -"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; -"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; -"52 linear_3" [id=52, type=linear]; -"53 add_3" [id=53, type=add]; -"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; -"55 to_2" [id=55, type=to]; -"56 pow_2" [id=56, type=pow]; -"57 mean_1" [id=57, type=mean]; -"58 add_4" [id=58, type=add]; -"59 rsqrt_1" [id=59, type=rsqrt]; -"60 mul_6" [id=60, type=mul]; -"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; -"62 to_3" [id=62, type=to]; -"63 mul_7" [id=63, type=mul]; -"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; -"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; -"66 linear_4" [id=66, type=linear]; -"67 silu" [id=67, type=silu]; -"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; -"69 symmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; -"70 linear_5" [id=70, type=linear]; -"71 mul_8" [id=71, type=mul]; -"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; -"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; -"74 linear_6" [id=74, type=linear]; -"75 add_5" [id=75, type=add]; -"76 output" [id=76, type=output]; -"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; -"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; -"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; -"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; -"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; -"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; -"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; -"5 arange" -> "31 index" [style=solid, label="(3,)"]; -"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; -"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; -"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; -"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; -"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; -"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; -"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; -"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; -"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; -"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; -"16 q_proj_weight_updated_constant0" -> "17 asymmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(64, 64)"]; -"17 asymmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; -"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; -"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; -"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; -"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; -"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; -"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; -"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; -"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; -"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; -"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; -"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; -"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; -"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; -"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; -"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; -"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; -"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; -"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; -"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; -"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; -"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; -"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; -"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; -"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; -"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; -"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; -"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; -"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; -"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; -"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; -"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; -"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; -"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; -"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; -"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; -"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; -"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; -"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; -"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; -"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; -"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; -"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; -"64 mlp_gate_proj_weight_updated_constant0" -> "65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; -"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; -"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; -"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; -"68 mlp_up_proj_weight_updated_constant0" -> "69 symmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(4096, 1)"]; -"69 symmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; -"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; -"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; -"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; -"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; -"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; -} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json deleted file mode 100644 index 69d4cf0f6a8..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json +++ /dev/null @@ -1,128 +0,0 @@ -[ - { - "weight_name": "q_proj_weight", - "node_with_weight": "linear", - "weight_port_id": 1, - "weight_dtype": "float32", - "weight_shape": [ - 64, - 64 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int8_asym", - "group_size": -1, - "codebook_values": null - } - }, - { - "weight_name": "k_proj_weight", - "node_with_weight": "linear_1", - "weight_port_id": 1, - "weight_dtype": "float32", - "weight_shape": [ - 64, - 64 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int8_asym", - "group_size": -1, - "codebook_values": null - } - }, - { - "weight_name": "v_proj_weight", - "node_with_weight": "linear_2", - "weight_port_id": 1, - "weight_dtype": "float32", - "weight_shape": [ - 64, - 64 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int8_asym", - "group_size": -1, - "codebook_values": null - } - }, - { - "weight_name": "o_proj_weight", - "node_with_weight": "linear_3", - "weight_port_id": 1, - "weight_dtype": "float32", - "weight_shape": [ - 64, - 64 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int8_asym", - "group_size": -1, - "codebook_values": null - } - }, - { - "weight_name": "mlp_gate_proj_weight", - "node_with_weight": "linear_4", - "weight_port_id": 1, - "weight_dtype": "float32", - "weight_shape": [ - 128, - 64 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int8_asym", - "group_size": -1, - "codebook_values": null - } - }, - { - "weight_name": "mlp_up_proj_weight", - "node_with_weight": "linear_5", - "weight_port_id": 1, - "weight_dtype": "float32", - "weight_shape": [ - 128, - 64 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int8_asym", - "group_size": -1, - "codebook_values": null - } - }, - { - "weight_name": "mlp_down_proj_weight", - "node_with_weight": "linear_6", - "weight_port_id": 1, - "weight_dtype": "float32", - "weight_shape": [ - 64, - 128 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int8_asym", - "group_size": -1, - "codebook_values": null - } - } -] \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_sensitivity_metric_None.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_sensitivity_metric_None.dot deleted file mode 100644 index 29de7b02841..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_sensitivity_metric_None.dot +++ /dev/null @@ -1,169 +0,0 @@ -strict digraph { -"0 attn_norm_weight" [id=0, type="get_attr"]; -"1 mlp_norm_weight" [id=1, type="get_attr"]; -"2 rope_cos" [id=2, type="get_attr"]; -"3 rope_sin" [id=3, type="get_attr"]; -"4 x_embed" [id=4, type=input]; -"5 arange" [id=5, type=arange]; -"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; -"7 to" [id=7, type=to]; -"8 pow_1" [id=8, type=pow]; -"9 mean" [id=9, type=mean]; -"10 add" [id=10, type=add]; -"11 rsqrt" [id=11, type=rsqrt]; -"12 mul" [id=12, type=mul]; -"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; -"14 to_1" [id=14, type=to]; -"15 mul_1" [id=15, type=mul]; -"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; -"17 asymmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; -"18 linear" [id=18, type=linear]; -"19 view" [id=19, type=view]; -"20 transpose" [id=20, type=transpose]; -"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; -"22 asymmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; -"23 linear_1" [id=23, type=linear]; -"24 view_1" [id=24, type=view]; -"25 transpose_1" [id=25, type=transpose]; -"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; -"27 asymmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; -"28 linear_2" [id=28, type=linear]; -"29 view_2" [id=29, type=view]; -"30 transpose_2" [id=30, type=transpose]; -"31 index" [id=31, type=index]; -"32 index_1" [id=32, type=index]; -"33 mul_2" [id=33, type=mul]; -"34 slice_1" [id=34, type=slice]; -"35 slice_2" [id=35, type=slice]; -"36 neg" [id=36, type=neg]; -"37 cat" [id=37, type=cat]; -"38 mul_3" [id=38, type=mul]; -"39 add_1" [id=39, type=add]; -"40 mul_4" [id=40, type=mul]; -"41 slice_3" [id=41, type=slice]; -"42 slice_4" [id=42, type=slice]; -"43 neg_1" [id=43, type=neg]; -"44 cat_1" [id=44, type=cat]; -"45 mul_5" [id=45, type=mul]; -"46 add_2" [id=46, type=add]; -"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; -"48 transpose_3" [id=48, type=transpose]; -"49 view_3" [id=49, type=view]; -"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; -"51 asymmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; -"52 linear_3" [id=52, type=linear]; -"53 add_3" [id=53, type=add]; -"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; -"55 to_2" [id=55, type=to]; -"56 pow_2" [id=56, type=pow]; -"57 mean_1" [id=57, type=mean]; -"58 add_4" [id=58, type=add]; -"59 rsqrt_1" [id=59, type=rsqrt]; -"60 mul_6" [id=60, type=mul]; -"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; -"62 to_3" [id=62, type=to]; -"63 mul_7" [id=63, type=mul]; -"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; -"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; -"66 linear_4" [id=66, type=linear]; -"67 silu" [id=67, type=silu]; -"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; -"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; -"70 linear_5" [id=70, type=linear]; -"71 mul_8" [id=71, type=mul]; -"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; -"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; -"74 linear_6" [id=74, type=linear]; -"75 add_5" [id=75, type=add]; -"76 output" [id=76, type=output]; -"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; -"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; -"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; -"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; -"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; -"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; -"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; -"5 arange" -> "31 index" [style=solid, label="(3,)"]; -"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; -"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; -"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; -"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; -"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; -"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; -"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; -"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; -"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; -"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; -"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; -"16 q_proj_weight_updated_constant0" -> "17 asymmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(64, 64)"]; -"17 asymmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; -"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; -"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; -"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; -"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; -"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; -"21 k_proj_weight_updated_constant0" -> "22 asymmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(64, 64)"]; -"22 asymmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; -"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; -"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; -"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; -"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; -"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; -"26 v_proj_weight_updated_constant0" -> "27 asymmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(64, 64)"]; -"27 asymmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; -"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; -"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; -"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; -"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; -"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; -"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; -"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; -"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; -"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; -"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; -"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; -"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; -"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; -"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; -"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; -"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; -"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; -"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; -"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; -"50 o_proj_weight_updated_constant0" -> "51 asymmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(64, 64)"]; -"51 asymmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; -"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; -"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; -"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; -"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; -"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; -"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; -"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; -"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; -"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; -"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; -"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; -"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; -"64 mlp_gate_proj_weight_updated_constant0" -> "65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(128, 64)"]; -"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; -"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; -"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; -"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; -"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; -"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; -"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; -"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; -"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; -"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; -"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; -} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json deleted file mode 100644 index fd8fbda6f54..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json +++ /dev/null @@ -1,38 +0,0 @@ -[ - { - "weight_name": "wte_weight_1", - "node_with_weight": "embedding", - "weight_port_id": 0, - "weight_dtype": "float32", - "weight_shape": [ - 128, - 64 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int8_asym", - "group_size": -1, - "codebook_values": null - } - }, - { - "weight_name": "linear_weight", - "node_with_weight": "linear", - "weight_port_id": 1, - "weight_dtype": "float32", - "weight_shape": [ - 64, - 64 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int4_sym", - "group_size": 32, - "codebook_values": null - } - } -] \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot deleted file mode 100644 index b249fdf7ce3..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_hessian_input_activation.dot +++ /dev/null @@ -1,24 +0,0 @@ -strict digraph { -"0 linear_bias" [id=0, type="get_attr"]; -"1 lm_head_bias" [id=1, type="get_attr"]; -"2 input_ids" [id=2, type=input]; -"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; -"5 embedding" [id=5, type=embedding]; -"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; -"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; -"8 linear" [id=8, type=linear]; -"9 linear_1" [id=9, type=linear]; -"10 output" [id=10, type=output]; -"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; -"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; -"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; -"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; -"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; -"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; -"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; -"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; -"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; -} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_max_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_max_activation_variance.dot deleted file mode 100644 index b249fdf7ce3..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_max_activation_variance.dot +++ /dev/null @@ -1,24 +0,0 @@ -strict digraph { -"0 linear_bias" [id=0, type="get_attr"]; -"1 lm_head_bias" [id=1, type="get_attr"]; -"2 input_ids" [id=2, type=input]; -"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; -"5 embedding" [id=5, type=embedding]; -"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; -"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; -"8 linear" [id=8, type=linear]; -"9 linear_1" [id=9, type=linear]; -"10 output" [id=10, type=output]; -"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; -"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; -"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; -"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; -"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; -"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; -"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; -"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; -"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; -} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot deleted file mode 100644 index b249fdf7ce3..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_magnitude.dot +++ /dev/null @@ -1,24 +0,0 @@ -strict digraph { -"0 linear_bias" [id=0, type="get_attr"]; -"1 lm_head_bias" [id=1, type="get_attr"]; -"2 input_ids" [id=2, type=input]; -"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; -"5 embedding" [id=5, type=embedding]; -"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; -"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; -"8 linear" [id=8, type=linear]; -"9 linear_1" [id=9, type=linear]; -"10 output" [id=10, type=output]; -"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; -"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; -"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; -"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; -"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; -"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; -"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; -"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; -"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; -} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_variance.dot deleted file mode 100644 index b249fdf7ce3..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_mean_activation_variance.dot +++ /dev/null @@ -1,24 +0,0 @@ -strict digraph { -"0 linear_bias" [id=0, type="get_attr"]; -"1 lm_head_bias" [id=1, type="get_attr"]; -"2 input_ids" [id=2, type=input]; -"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; -"5 embedding" [id=5, type=embedding]; -"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; -"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; -"8 linear" [id=8, type=linear]; -"9 linear_1" [id=9, type=linear]; -"10 output" [id=10, type=output]; -"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; -"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; -"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; -"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; -"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; -"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; -"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; -"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; -"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; -} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot deleted file mode 100644 index b249fdf7ce3..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_sensitivity_metric_weight_quantization_error.dot +++ /dev/null @@ -1,24 +0,0 @@ -strict digraph { -"0 linear_bias" [id=0, type="get_attr"]; -"1 lm_head_bias" [id=1, type="get_attr"]; -"2 input_ids" [id=2, type=input]; -"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; -"5 embedding" [id=5, type=embedding]; -"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; -"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; -"8 linear" [id=8, type=linear]; -"9 linear_1" [id=9, type=linear]; -"10 output" [id=10, type=output]; -"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; -"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; -"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; -"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; -"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; -"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; -"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; -"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; -"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; -} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json deleted file mode 100644 index 81205ac2ca8..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json +++ /dev/null @@ -1,38 +0,0 @@ -[ - { - "weight_name": "wte_weight_1", - "node_with_weight": "embedding", - "weight_port_id": 0, - "weight_dtype": "float32", - "weight_shape": [ - 128, - 64 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int4_sym", - "group_size": 32, - "codebook_values": null - } - }, - { - "weight_name": "linear_weight", - "node_with_weight": "linear", - "weight_port_id": 1, - "weight_dtype": "float32", - "weight_shape": [ - 64, - 64 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int4_sym", - "group_size": 32, - "codebook_values": null - } - } -] \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_hessian_input_activation.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_hessian_input_activation.dot deleted file mode 100644 index 0a7bb5fe8f8..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_hessian_input_activation.dot +++ /dev/null @@ -1,24 +0,0 @@ -strict digraph { -"0 linear_bias" [id=0, type="get_attr"]; -"1 lm_head_bias" [id=1, type="get_attr"]; -"2 input_ids" [id=2, type=input]; -"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; -"4 symmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; -"5 embedding" [id=5, type=embedding]; -"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; -"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; -"8 linear" [id=8, type=linear]; -"9 linear_1" [id=9, type=linear]; -"10 output" [id=10, type=output]; -"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; -"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; -"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; -"3 wte_weight_1_updated_constant0" -> "4 symmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(4096, 1)"]; -"4 symmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; -"4 symmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; -"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; -"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; -"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; -"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; -"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; -} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot deleted file mode 100644 index 0a7bb5fe8f8..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_max_activation_variance.dot +++ /dev/null @@ -1,24 +0,0 @@ -strict digraph { -"0 linear_bias" [id=0, type="get_attr"]; -"1 lm_head_bias" [id=1, type="get_attr"]; -"2 input_ids" [id=2, type=input]; -"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; -"4 symmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; -"5 embedding" [id=5, type=embedding]; -"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; -"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; -"8 linear" [id=8, type=linear]; -"9 linear_1" [id=9, type=linear]; -"10 output" [id=10, type=output]; -"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; -"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; -"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; -"3 wte_weight_1_updated_constant0" -> "4 symmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(4096, 1)"]; -"4 symmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; -"4 symmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; -"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; -"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; -"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; -"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; -"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; -} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_magnitude.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_magnitude.dot deleted file mode 100644 index 0a7bb5fe8f8..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_magnitude.dot +++ /dev/null @@ -1,24 +0,0 @@ -strict digraph { -"0 linear_bias" [id=0, type="get_attr"]; -"1 lm_head_bias" [id=1, type="get_attr"]; -"2 input_ids" [id=2, type=input]; -"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; -"4 symmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; -"5 embedding" [id=5, type=embedding]; -"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; -"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; -"8 linear" [id=8, type=linear]; -"9 linear_1" [id=9, type=linear]; -"10 output" [id=10, type=output]; -"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; -"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; -"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; -"3 wte_weight_1_updated_constant0" -> "4 symmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(4096, 1)"]; -"4 symmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; -"4 symmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; -"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; -"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; -"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; -"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; -"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; -} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot deleted file mode 100644 index 0a7bb5fe8f8..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_mean_activation_variance.dot +++ /dev/null @@ -1,24 +0,0 @@ -strict digraph { -"0 linear_bias" [id=0, type="get_attr"]; -"1 lm_head_bias" [id=1, type="get_attr"]; -"2 input_ids" [id=2, type=input]; -"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; -"4 symmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; -"5 embedding" [id=5, type=embedding]; -"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; -"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; -"8 linear" [id=8, type=linear]; -"9 linear_1" [id=9, type=linear]; -"10 output" [id=10, type=output]; -"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; -"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; -"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; -"3 wte_weight_1_updated_constant0" -> "4 symmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(4096, 1)"]; -"4 symmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; -"4 symmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; -"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; -"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; -"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; -"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; -"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; -} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_weight_quantization_error.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_weight_quantization_error.dot deleted file mode 100644 index 0a7bb5fe8f8..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_sensitivity_metric_weight_quantization_error.dot +++ /dev/null @@ -1,24 +0,0 @@ -strict digraph { -"0 linear_bias" [id=0, type="get_attr"]; -"1 lm_head_bias" [id=1, type="get_attr"]; -"2 input_ids" [id=2, type=input]; -"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; -"4 symmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; -"5 embedding" [id=5, type=embedding]; -"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; -"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; -"8 linear" [id=8, type=linear]; -"9 linear_1" [id=9, type=linear]; -"10 output" [id=10, type=output]; -"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; -"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; -"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; -"3 wte_weight_1_updated_constant0" -> "4 symmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(4096, 1)"]; -"4 symmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; -"4 symmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; -"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; -"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; -"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; -"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; -"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; -} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json deleted file mode 100644 index 49d45c1fffb..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json +++ /dev/null @@ -1,38 +0,0 @@ -[ - { - "weight_name": "wte_weight_1", - "node_with_weight": "embedding", - "weight_port_id": 0, - "weight_dtype": "float32", - "weight_shape": [ - 128, - 64 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int8_asym", - "group_size": -1, - "codebook_values": null - } - }, - { - "weight_name": "linear_weight", - "node_with_weight": "linear", - "weight_port_id": 1, - "weight_dtype": "float32", - "weight_shape": [ - 64, - 64 - ], - "reduction_axes": [ - 1 - ], - "compression_config": { - "mode": "int8_asym", - "group_size": -1, - "codebook_values": null - } - } -] \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_sensitivity_metric_None.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_sensitivity_metric_None.dot deleted file mode 100644 index b249fdf7ce3..00000000000 --- a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_sensitivity_metric_None.dot +++ /dev/null @@ -1,24 +0,0 @@ -strict digraph { -"0 linear_bias" [id=0, type="get_attr"]; -"1 lm_head_bias" [id=1, type="get_attr"]; -"2 input_ids" [id=2, type=input]; -"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; -"5 embedding" [id=5, type=embedding]; -"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; -"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; -"8 linear" [id=8, type=linear]; -"9 linear_1" [id=9, type=linear]; -"10 output" [id=10, type=output]; -"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; -"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; -"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; -"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; -"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; -"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; -"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; -"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; -"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; -"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; -} From b9f3eff651382d3344a569ebfcd317ba3116f3e5 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 15 Oct 2025 16:30:58 +0400 Subject: [PATCH 61/91] update test and references --- .../executorch/test_quantizer_compression.py | 91 +- ...4wo_sym_gs32_ratio0.8_all_layers_False.dot | 169 + ...t4wo_sym_gs32_ratio0.8_all_layers_True.dot | 169 + ...t8wo_asym_gs-1_ratio1_all_layers_False.dot | 169 + ...4wo_sym_gs32_ratio0.8_all_layers_False.dot | 24 + ...t4wo_sym_gs32_ratio0.8_all_layers_True.dot | 24 + ...t8wo_asym_gs-1_ratio1_all_layers_False.dot | 24 + ...4wo_sym_gs32_ratio0.8_all_layers_False.dot | 169 + ...e_scale_estimation_True_ref_wc_scales.json | 3664 ++++++++++++++++ ...atio0.8_all_layers_False_ref_wc_param.json | 128 + ...t4wo_sym_gs32_ratio0.8_all_layers_True.dot | 169 + ...e_scale_estimation_True_ref_wc_scales.json | 3728 +++++++++++++++++ ...ratio0.8_all_layers_True_ref_wc_param.json | 128 + ...t8wo_asym_gs-1_ratio1_all_layers_False.dot | 169 + ..._scale_estimation_False_ref_wc_scales.json | 1744 ++++++++ ..._ratio1_all_layers_False_ref_wc_param.json | 128 + ...4wo_sym_gs32_ratio0.8_all_layers_False.dot | 24 + ...e_scale_estimation_True_ref_wc_scales.json | 582 +++ ...atio0.8_all_layers_False_ref_wc_param.json | 38 + ...t4wo_sym_gs32_ratio0.8_all_layers_True.dot | 24 + ...e_scale_estimation_True_ref_wc_scales.json | 1222 ++++++ ...ratio0.8_all_layers_True_ref_wc_param.json | 38 + ...t8wo_asym_gs-1_ratio1_all_layers_False.dot | 24 + ..._scale_estimation_False_ref_wc_scales.json | 582 +++ ..._ratio1_all_layers_False_ref_wc_param.json | 38 + 25 files changed, 13229 insertions(+), 40 deletions(-) create mode 100644 tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot create mode 100644 tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot create mode 100644 tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot create mode 100644 tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot create mode 100644 tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot create mode 100644 tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json create mode 100644 tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json diff --git a/tests/executorch/test_quantizer_compression.py b/tests/executorch/test_quantizer_compression.py index 9cc817d5292..180ba2510f7 100644 --- a/tests/executorch/test_quantizer_compression.py +++ b/tests/executorch/test_quantizer_compression.py @@ -20,8 +20,8 @@ import pytest import torch import torch.fx -from torch.ao.quantization.quantize_pt2e import convert_pt2e -from torch.ao.quantization.quantize_pt2e import prepare_pt2e +from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e +from torchao.quantization.pt2e.quantize_pt2e import prepare_pt2e import nncf from executorch.backends.openvino.quantizer.quantizer import OpenVINOQuantizer @@ -42,7 +42,7 @@ from tests.torch2.fx.helpers import get_torch_fx_model FX_PT2E_DIR = TEST_ROOT / "torch2" / "data" / "fx" / "compress_pt2e" -FX_AO_DIR = TEST_ROOT / "torch2" / "data" / "fx" / "ao_compression_OpenVINOQuantizer" +FX_AO_DIR = TEST_ROOT / "torch2" / "data" / "fx" / "ao_compression" @dataclass @@ -64,7 +64,7 @@ def get_wc_scales_filename(model_name: str) -> str: return model_name + "_ref_wc_scales.json" -def _build_torch_fx_model(model_case: ModelCase) -> tuple[torch.fx.GraphModule, torch.Tensor]: +def build_torch_fx_model(model_case: ModelCase) -> tuple[torch.fx.GraphModule, torch.Tensor]: model = model_case.model_builder() # ShortTransformer takes token ids; match prior synthetic tests (int32) example_input = torch.ones(model_case.input_shape, dtype=torch.int32) @@ -100,10 +100,12 @@ def _string_from_quantizer_params(qparams: dict[str, Any], pt2e_param: Optional[ def check_multiple_isinstance(object_to_check: Any, objects: list[Any]): + if not object_to_check: + return False for obj in objects: - if not isinstance(object_to_check, obj): - return False - return True + if isinstance(object_to_check, obj): + return True + return False def get_scale_values_from_model(model: torch.fx.GraphModule): @@ -115,24 +117,31 @@ def get_scale_values_from_model(model: torch.fx.GraphModule): INT8SymmetricWeightsDecompressor, ] for node in model.graph.nodes: - if not check_multiple_isinstance(node.taget, decompressor_modules): + # print(node.name, node.target, node.meta) + node_module = getattr(model, node.target) if node.op == "call_module" else None + if not check_multiple_isinstance(node_module, decompressor_modules): continue - node_to_scale_mapping[node.name] = model.state_dict()[node.name] + state_dict_scale_name = f"{node.target}._scale" + node_to_scale_mapping[node.name] = model.state_dict()[state_dict_scale_name] return node_to_scale_mapping def get_test_cases(): - test_cases = () + test_cases = [] for model in BASE_MODELS: for qparam in QUANTIZER_PARAMS: pt2e_params = PT2E_PARAMS - if (qparam.get("mode") in {QuantizationMode.INT8WO_ASYM, QuantizationMode.INT8WO_SYM}) or ( - qparam.get("ratio") is None - ): + if qparam.get("mode") in {QuantizationMode.INT8WO_ASYM, QuantizationMode.INT8WO_SYM}: pt2e_params = [{}] for pt2e_param in pt2e_params: - test_cases.append(model, qparam, pt2e_param) + test_cases.append( + ( + model, + qparam, + pt2e_param, + ) + ) return test_cases @@ -174,7 +183,7 @@ def test_compress_pt2e( quantizer_params, pt2e_params, ): - fx_model, example_input = _build_torch_fx_model(model_case) + fx_model, example_input = build_torch_fx_model(model_case) with torch.no_grad(): ref_out = fx_model(example_input) @@ -191,7 +200,7 @@ def test_compress_pt2e( nncf_graph: NNCFGraph = GraphConverter.create_nncf_graph(quantized_model) nx_graph = nncf_graph.get_graph_for_structure_analysis(extended=True) - param_string = _string_from_quantizer_params(quantizer_params, pt2e_params=None) + param_string = _string_from_quantizer_params(quantizer_params) path_to_dot = ( FX_PT2E_DIR / quantizer.__class__.__name__ / model_case.model_id / get_dot_filename(param_string) ).as_posix() @@ -213,9 +222,9 @@ def test_compress_pt2e_scales( model_case: ModelCase, quantizer_params, pt2e_params, - regen_ref_data, + regen_ref_data=True, ): - fx_model, example_input = _build_torch_fx_model(model_case) + fx_model, example_input = build_torch_fx_model(model_case) with torch.no_grad(): ref_out = fx_model(example_input) @@ -233,9 +242,10 @@ def test_compress_pt2e_scales( param_string = _string_from_quantizer_params(quantizer_params, pt2e_params) ref_json_path = ( FX_PT2E_DIR / quantizer.__class__.__name__ / model_case.model_id / get_wc_scales_filename(param_string) - ).as_posix() + ) scales_list = get_scale_values_from_model(quantized_model) + scales_list = to_json_serializable(scales_list) if regen_ref_data: with safe_open(ref_json_path, "w") as file: @@ -261,7 +271,7 @@ def test_openvino_quantizer( quantizer_builder: Callable[..., OpenVINOQuantizer], pt2e_params, ): - fx_model, example_input = _build_torch_fx_model(model_case) + fx_model, example_input = build_torch_fx_model(model_case) quantizer = quantizer_builder(**quantizer_params) prepared = prepare_pt2e(fx_model, quantizer) @@ -272,26 +282,27 @@ def test_openvino_quantizer( nx_graph = nncf_graph.get_graph_for_structure_analysis(extended=True) param_string = _string_from_quantizer_params(quantizer_params) - path_to_dot = (FX_AO_DIR / model_case.model_id / get_dot_filename(param_string)).as_posix() + path_to_dot = ( + FX_AO_DIR / quantizer.__class__.__name__ / model_case.model_id / get_dot_filename(param_string) + ).as_posix() compare_nx_graph_with_reference(nx_graph, path_to_dot) -def _serialize_wc_param(wp) -> dict[str, Any]: - def to_json_serializable(obj): - if dataclasses.is_dataclass(obj): - return {k: to_json_serializable(v) for k, v in dataclasses.asdict(obj).items()} - elif isinstance(obj, Enum): - return obj.value - elif isinstance(obj, (list, tuple)): - return [to_json_serializable(x) for x in obj] - elif isinstance(obj, dict): - return {k: to_json_serializable(v) for k, v in obj.items()} - elif isinstance(obj, NNCFNode): - return obj.node_name - else: - return obj - - return to_json_serializable(wp) +def to_json_serializable(obj: Any) -> dict[Any, Any]: + if dataclasses.is_dataclass(obj): + return {k: to_json_serializable(v) for k, v in dataclasses.asdict(obj).items()} + elif isinstance(obj, Enum): + return obj.value + elif isinstance(obj, (list, tuple)): + return [to_json_serializable(x) for x in obj] + elif isinstance(obj, torch.Tensor): + return obj.detach().cpu().tolist() + elif isinstance(obj, dict): + return {k: to_json_serializable(v) for k, v in obj.items()} + elif isinstance(obj, NNCFNode): + return obj.node_name + else: + return obj @pytest.mark.parametrize( @@ -309,9 +320,9 @@ def test_openvino_wc_params( model_case: ModelCase, quantizer_params, pt2e_params, - regen_ref_data, + regen_ref_data=True, ): - fx_model, _ = _build_torch_fx_model(model_case) + fx_model, _ = build_torch_fx_model(model_case) nncf_graph: NNCFGraph = GraphConverter.create_nncf_graph(fx_model) param_string = _string_from_quantizer_params(quantizer_params) @@ -319,7 +330,7 @@ def test_openvino_wc_params( all_weight_params, *_ = quantizer.get_nncf_weight_compression_parameters(fx_model, nncf_graph) - wc_params = _serialize_wc_param(all_weight_params) + wc_params = to_json_serializable(all_weight_params) ref_json_path = ( FX_PT2E_DIR / quantizer.__class__.__name__ / model_case.model_id / get_wc_param_filename(param_string) diff --git a/tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot b/tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot new file mode 100644 index 00000000000..0a9a27fd85b --- /dev/null +++ b/tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 q_proj_weight_updated_constant0" [id=2, type="get_attr"]; +"3 symmetric_weights_decompressor_q_proj_weight_0" [id=3, type="call_module"]; +"4 k_proj_weight_updated_constant0" [id=4, type="get_attr"]; +"5 symmetric_weights_decompressor_k_proj_weight_0" [id=5, type="call_module"]; +"6 v_proj_weight_updated_constant0" [id=6, type="get_attr"]; +"7 symmetric_weights_decompressor_v_proj_weight_0" [id=7, type="call_module"]; +"8 o_proj_weight_updated_constant0" [id=8, type="get_attr"]; +"9 symmetric_weights_decompressor_o_proj_weight_0" [id=9, type="call_module"]; +"10 mlp_gate_proj_weight_updated_constant0" [id=10, type="get_attr"]; +"11 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=11, type="call_module"]; +"12 mlp_up_proj_weight_updated_constant0" [id=12, type="get_attr"]; +"13 symmetric_weights_decompressor_mlp_up_proj_weight_0" [id=13, type="call_module"]; +"14 mlp_down_proj_weight_updated_constant0" [id=14, type="get_attr"]; +"15 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=15, type="call_module"]; +"16 rope_cos" [id=16, type="get_attr"]; +"17 rope_sin" [id=17, type="get_attr"]; +"18 x_embed" [id=18, type=input]; +"19 arange" [id=19, type=arange]; +"20 _assert_tensor_metadata_default" [id=20, type="_assert_tensor_metadata"]; +"21 to" [id=21, type=to]; +"22 pow_1" [id=22, type=pow]; +"23 mean" [id=23, type=mean]; +"24 add" [id=24, type=add]; +"25 rsqrt" [id=25, type=rsqrt]; +"26 mul" [id=26, type=mul]; +"27 _assert_tensor_metadata_default_1" [id=27, type="_assert_tensor_metadata"]; +"28 to_1" [id=28, type=to]; +"29 mul_1" [id=29, type=mul]; +"30 linear" [id=30, type=linear]; +"31 view" [id=31, type=view]; +"32 transpose" [id=32, type=transpose]; +"33 linear_1" [id=33, type=linear]; +"34 view_1" [id=34, type=view]; +"35 transpose_1" [id=35, type=transpose]; +"36 linear_2" [id=36, type=linear]; +"37 view_2" [id=37, type=view]; +"38 transpose_2" [id=38, type=transpose]; +"39 index" [id=39, type=index]; +"40 index_1" [id=40, type=index]; +"41 mul_2" [id=41, type=mul]; +"42 slice_1" [id=42, type=slice]; +"43 slice_2" [id=43, type=slice]; +"44 neg" [id=44, type=neg]; +"45 cat" [id=45, type=cat]; +"46 mul_3" [id=46, type=mul]; +"47 add_1" [id=47, type=add]; +"48 mul_4" [id=48, type=mul]; +"49 slice_3" [id=49, type=slice]; +"50 slice_4" [id=50, type=slice]; +"51 neg_1" [id=51, type=neg]; +"52 cat_1" [id=52, type=cat]; +"53 mul_5" [id=53, type=mul]; +"54 add_2" [id=54, type=add]; +"55 scaled_dot_product_attention" [id=55, type="scaled_dot_product_attention"]; +"56 transpose_3" [id=56, type=transpose]; +"57 view_3" [id=57, type=view]; +"58 linear_3" [id=58, type=linear]; +"59 add_3" [id=59, type=add]; +"60 _assert_tensor_metadata_default_2" [id=60, type="_assert_tensor_metadata"]; +"61 to_2" [id=61, type=to]; +"62 pow_2" [id=62, type=pow]; +"63 mean_1" [id=63, type=mean]; +"64 add_4" [id=64, type=add]; +"65 rsqrt_1" [id=65, type=rsqrt]; +"66 mul_6" [id=66, type=mul]; +"67 _assert_tensor_metadata_default_3" [id=67, type="_assert_tensor_metadata"]; +"68 to_3" [id=68, type=to]; +"69 mul_7" [id=69, type=mul]; +"70 linear_4" [id=70, type=linear]; +"71 silu" [id=71, type=silu]; +"72 linear_5" [id=72, type=linear]; +"73 mul_8" [id=73, type=mul]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "29 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "69 mul_7" [style=solid, label="(64,)"]; +"2 q_proj_weight_updated_constant0" -> "3 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; +"3 symmetric_weights_decompressor_q_proj_weight_0" -> "30 linear" [style=solid, label="(64, 64)"]; +"4 k_proj_weight_updated_constant0" -> "5 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"5 symmetric_weights_decompressor_k_proj_weight_0" -> "33 linear_1" [style=solid, label="(64, 64)"]; +"6 v_proj_weight_updated_constant0" -> "7 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"7 symmetric_weights_decompressor_v_proj_weight_0" -> "36 linear_2" [style=solid, label="(64, 64)"]; +"8 o_proj_weight_updated_constant0" -> "9 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"9 symmetric_weights_decompressor_o_proj_weight_0" -> "58 linear_3" [style=solid, label="(64, 64)"]; +"10 mlp_gate_proj_weight_updated_constant0" -> "11 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; +"11 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "70 linear_4" [style=solid, label="(128, 64)"]; +"12 mlp_up_proj_weight_updated_constant0" -> "13 symmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(4096, 1)"]; +"13 symmetric_weights_decompressor_mlp_up_proj_weight_0" -> "72 linear_5" [style=solid, label="(128, 64)"]; +"14 mlp_down_proj_weight_updated_constant0" -> "15 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; +"15 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"16 rope_cos" -> "39 index" [style=solid, label="(1, 1, 128, 16)"]; +"17 rope_sin" -> "40 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"18 x_embed" -> "20 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"18 x_embed" -> "21 to" [style=solid, label="(1, 3, 64)"]; +"18 x_embed" -> "59 add_3" [style=solid, label="(1, 3, 64)"]; +"19 arange" -> "39 index" [style=solid, label="(3,)"]; +"19 arange" -> "40 index_1" [style=solid, label="(3,)"]; +"21 to" -> "22 pow_1" [style=solid, label="(1, 3, 64)"]; +"21 to" -> "26 mul" [style=solid, label="(1, 3, 64)"]; +"22 pow_1" -> "23 mean" [style=solid, label="(1, 3, 64)"]; +"23 mean" -> "24 add" [style=solid, label="(1, 3, 1)"]; +"24 add" -> "25 rsqrt" [style=solid, label="(1, 3, 1)"]; +"25 rsqrt" -> "26 mul" [style=solid, label="(1, 3, 1)"]; +"26 mul" -> "27 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"26 mul" -> "28 to_1" [style=solid, label="(1, 3, 64)"]; +"28 to_1" -> "29 mul_1" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "30 linear" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "33 linear_1" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "36 linear_2" [style=solid, label="(1, 3, 64)"]; +"30 linear" -> "31 view" [style=solid, label="(1, 3, 64)"]; +"31 view" -> "32 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"32 transpose" -> "41 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"32 transpose" -> "42 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"32 transpose" -> "43 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"33 linear_1" -> "34 view_1" [style=solid, label="(1, 3, 64)"]; +"34 view_1" -> "35 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"35 transpose_1" -> "48 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"35 transpose_1" -> "49 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"35 transpose_1" -> "50 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"36 linear_2" -> "37 view_2" [style=solid, label="(1, 3, 64)"]; +"37 view_2" -> "38 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"38 transpose_2" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"39 index" -> "41 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"39 index" -> "48 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"40 index_1" -> "46 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"40 index_1" -> "53 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"41 mul_2" -> "47 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"42 slice_1" -> "45 cat" [style=solid, label="(1, 4, 3, 8)"]; +"43 slice_2" -> "44 neg" [style=solid, label="(1, 4, 3, 8)"]; +"44 neg" -> "45 cat" [style=solid, label="(1, 4, 3, 8)"]; +"45 cat" -> "46 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"46 mul_3" -> "47 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"47 add_1" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"48 mul_4" -> "54 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"49 slice_3" -> "52 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"50 slice_4" -> "51 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"51 neg_1" -> "52 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"52 cat_1" -> "53 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"53 mul_5" -> "54 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"54 add_2" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"55 scaled_dot_product_attention" -> "56 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"56 transpose_3" -> "57 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"57 view_3" -> "58 linear_3" [style=solid, label="(1, 3, 64)"]; +"58 linear_3" -> "59 add_3" [style=solid, label="(1, 3, 64)"]; +"59 add_3" -> "60 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"59 add_3" -> "61 to_2" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "62 pow_2" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "66 mul_6" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"62 pow_2" -> "63 mean_1" [style=solid, label="(1, 3, 64)"]; +"63 mean_1" -> "64 add_4" [style=solid, label="(1, 3, 1)"]; +"64 add_4" -> "65 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"65 rsqrt_1" -> "66 mul_6" [style=solid, label="(1, 3, 1)"]; +"66 mul_6" -> "67 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"66 mul_6" -> "68 to_3" [style=solid, label="(1, 3, 64)"]; +"68 to_3" -> "69 mul_7" [style=solid, label="(1, 3, 64)"]; +"69 mul_7" -> "70 linear_4" [style=solid, label="(1, 3, 64)"]; +"69 mul_7" -> "72 linear_5" [style=solid, label="(1, 3, 64)"]; +"70 linear_4" -> "71 silu" [style=solid, label="(1, 3, 128)"]; +"71 silu" -> "73 mul_8" [style=solid, label="(1, 3, 128)"]; +"72 linear_5" -> "73 mul_8" [style=solid, label="(1, 3, 128)"]; +"73 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot b/tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot new file mode 100644 index 00000000000..254abcb9dc0 --- /dev/null +++ b/tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 q_proj_weight_updated_constant0" [id=2, type="get_attr"]; +"3 symmetric_weights_decompressor_q_proj_weight_0" [id=3, type="call_module"]; +"4 k_proj_weight_updated_constant0" [id=4, type="get_attr"]; +"5 symmetric_weights_decompressor_k_proj_weight_0" [id=5, type="call_module"]; +"6 v_proj_weight_updated_constant0" [id=6, type="get_attr"]; +"7 symmetric_weights_decompressor_v_proj_weight_0" [id=7, type="call_module"]; +"8 o_proj_weight_updated_constant0" [id=8, type="get_attr"]; +"9 symmetric_weights_decompressor_o_proj_weight_0" [id=9, type="call_module"]; +"10 mlp_gate_proj_weight_updated_constant0" [id=10, type="get_attr"]; +"11 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=11, type="call_module"]; +"12 mlp_up_proj_weight_updated_constant0" [id=12, type="get_attr"]; +"13 symmetric_weights_decompressor_mlp_up_proj_weight_0" [id=13, type="call_module"]; +"14 mlp_down_proj_weight_updated_constant0" [id=14, type="get_attr"]; +"15 symmetric_weights_decompressor_mlp_down_proj_weight_0" [id=15, type="call_module"]; +"16 rope_cos" [id=16, type="get_attr"]; +"17 rope_sin" [id=17, type="get_attr"]; +"18 x_embed" [id=18, type=input]; +"19 arange" [id=19, type=arange]; +"20 _assert_tensor_metadata_default" [id=20, type="_assert_tensor_metadata"]; +"21 to" [id=21, type=to]; +"22 pow_1" [id=22, type=pow]; +"23 mean" [id=23, type=mean]; +"24 add" [id=24, type=add]; +"25 rsqrt" [id=25, type=rsqrt]; +"26 mul" [id=26, type=mul]; +"27 _assert_tensor_metadata_default_1" [id=27, type="_assert_tensor_metadata"]; +"28 to_1" [id=28, type=to]; +"29 mul_1" [id=29, type=mul]; +"30 linear" [id=30, type=linear]; +"31 view" [id=31, type=view]; +"32 transpose" [id=32, type=transpose]; +"33 linear_1" [id=33, type=linear]; +"34 view_1" [id=34, type=view]; +"35 transpose_1" [id=35, type=transpose]; +"36 linear_2" [id=36, type=linear]; +"37 view_2" [id=37, type=view]; +"38 transpose_2" [id=38, type=transpose]; +"39 index" [id=39, type=index]; +"40 index_1" [id=40, type=index]; +"41 mul_2" [id=41, type=mul]; +"42 slice_1" [id=42, type=slice]; +"43 slice_2" [id=43, type=slice]; +"44 neg" [id=44, type=neg]; +"45 cat" [id=45, type=cat]; +"46 mul_3" [id=46, type=mul]; +"47 add_1" [id=47, type=add]; +"48 mul_4" [id=48, type=mul]; +"49 slice_3" [id=49, type=slice]; +"50 slice_4" [id=50, type=slice]; +"51 neg_1" [id=51, type=neg]; +"52 cat_1" [id=52, type=cat]; +"53 mul_5" [id=53, type=mul]; +"54 add_2" [id=54, type=add]; +"55 scaled_dot_product_attention" [id=55, type="scaled_dot_product_attention"]; +"56 transpose_3" [id=56, type=transpose]; +"57 view_3" [id=57, type=view]; +"58 linear_3" [id=58, type=linear]; +"59 add_3" [id=59, type=add]; +"60 _assert_tensor_metadata_default_2" [id=60, type="_assert_tensor_metadata"]; +"61 to_2" [id=61, type=to]; +"62 pow_2" [id=62, type=pow]; +"63 mean_1" [id=63, type=mean]; +"64 add_4" [id=64, type=add]; +"65 rsqrt_1" [id=65, type=rsqrt]; +"66 mul_6" [id=66, type=mul]; +"67 _assert_tensor_metadata_default_3" [id=67, type="_assert_tensor_metadata"]; +"68 to_3" [id=68, type=to]; +"69 mul_7" [id=69, type=mul]; +"70 linear_4" [id=70, type=linear]; +"71 silu" [id=71, type=silu]; +"72 linear_5" [id=72, type=linear]; +"73 mul_8" [id=73, type=mul]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "29 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "69 mul_7" [style=solid, label="(64,)"]; +"2 q_proj_weight_updated_constant0" -> "3 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; +"3 symmetric_weights_decompressor_q_proj_weight_0" -> "30 linear" [style=solid, label="(64, 64)"]; +"4 k_proj_weight_updated_constant0" -> "5 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"5 symmetric_weights_decompressor_k_proj_weight_0" -> "33 linear_1" [style=solid, label="(64, 64)"]; +"6 v_proj_weight_updated_constant0" -> "7 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"7 symmetric_weights_decompressor_v_proj_weight_0" -> "36 linear_2" [style=solid, label="(64, 64)"]; +"8 o_proj_weight_updated_constant0" -> "9 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"9 symmetric_weights_decompressor_o_proj_weight_0" -> "58 linear_3" [style=solid, label="(64, 64)"]; +"10 mlp_gate_proj_weight_updated_constant0" -> "11 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; +"11 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "70 linear_4" [style=solid, label="(128, 64)"]; +"12 mlp_up_proj_weight_updated_constant0" -> "13 symmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(4096, 1)"]; +"13 symmetric_weights_decompressor_mlp_up_proj_weight_0" -> "72 linear_5" [style=solid, label="(128, 64)"]; +"14 mlp_down_proj_weight_updated_constant0" -> "15 symmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(4096, 1)"]; +"15 symmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"16 rope_cos" -> "39 index" [style=solid, label="(1, 1, 128, 16)"]; +"17 rope_sin" -> "40 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"18 x_embed" -> "20 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"18 x_embed" -> "21 to" [style=solid, label="(1, 3, 64)"]; +"18 x_embed" -> "59 add_3" [style=solid, label="(1, 3, 64)"]; +"19 arange" -> "39 index" [style=solid, label="(3,)"]; +"19 arange" -> "40 index_1" [style=solid, label="(3,)"]; +"21 to" -> "22 pow_1" [style=solid, label="(1, 3, 64)"]; +"21 to" -> "26 mul" [style=solid, label="(1, 3, 64)"]; +"22 pow_1" -> "23 mean" [style=solid, label="(1, 3, 64)"]; +"23 mean" -> "24 add" [style=solid, label="(1, 3, 1)"]; +"24 add" -> "25 rsqrt" [style=solid, label="(1, 3, 1)"]; +"25 rsqrt" -> "26 mul" [style=solid, label="(1, 3, 1)"]; +"26 mul" -> "27 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"26 mul" -> "28 to_1" [style=solid, label="(1, 3, 64)"]; +"28 to_1" -> "29 mul_1" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "30 linear" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "33 linear_1" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "36 linear_2" [style=solid, label="(1, 3, 64)"]; +"30 linear" -> "31 view" [style=solid, label="(1, 3, 64)"]; +"31 view" -> "32 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"32 transpose" -> "41 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"32 transpose" -> "42 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"32 transpose" -> "43 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"33 linear_1" -> "34 view_1" [style=solid, label="(1, 3, 64)"]; +"34 view_1" -> "35 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"35 transpose_1" -> "48 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"35 transpose_1" -> "49 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"35 transpose_1" -> "50 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"36 linear_2" -> "37 view_2" [style=solid, label="(1, 3, 64)"]; +"37 view_2" -> "38 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"38 transpose_2" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"39 index" -> "41 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"39 index" -> "48 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"40 index_1" -> "46 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"40 index_1" -> "53 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"41 mul_2" -> "47 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"42 slice_1" -> "45 cat" [style=solid, label="(1, 4, 3, 8)"]; +"43 slice_2" -> "44 neg" [style=solid, label="(1, 4, 3, 8)"]; +"44 neg" -> "45 cat" [style=solid, label="(1, 4, 3, 8)"]; +"45 cat" -> "46 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"46 mul_3" -> "47 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"47 add_1" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"48 mul_4" -> "54 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"49 slice_3" -> "52 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"50 slice_4" -> "51 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"51 neg_1" -> "52 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"52 cat_1" -> "53 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"53 mul_5" -> "54 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"54 add_2" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"55 scaled_dot_product_attention" -> "56 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"56 transpose_3" -> "57 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"57 view_3" -> "58 linear_3" [style=solid, label="(1, 3, 64)"]; +"58 linear_3" -> "59 add_3" [style=solid, label="(1, 3, 64)"]; +"59 add_3" -> "60 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"59 add_3" -> "61 to_2" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "62 pow_2" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "66 mul_6" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"62 pow_2" -> "63 mean_1" [style=solid, label="(1, 3, 64)"]; +"63 mean_1" -> "64 add_4" [style=solid, label="(1, 3, 1)"]; +"64 add_4" -> "65 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"65 rsqrt_1" -> "66 mul_6" [style=solid, label="(1, 3, 1)"]; +"66 mul_6" -> "67 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"66 mul_6" -> "68 to_3" [style=solid, label="(1, 3, 64)"]; +"68 to_3" -> "69 mul_7" [style=solid, label="(1, 3, 64)"]; +"69 mul_7" -> "70 linear_4" [style=solid, label="(1, 3, 64)"]; +"69 mul_7" -> "72 linear_5" [style=solid, label="(1, 3, 64)"]; +"70 linear_4" -> "71 silu" [style=solid, label="(1, 3, 128)"]; +"71 silu" -> "73 mul_8" [style=solid, label="(1, 3, 128)"]; +"72 linear_5" -> "73 mul_8" [style=solid, label="(1, 3, 128)"]; +"73 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot b/tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot new file mode 100644 index 00000000000..614e06a21ac --- /dev/null +++ b/tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 q_proj_weight_updated_constant0" [id=2, type="get_attr"]; +"3 asymmetric_weights_decompressor_q_proj_weight_0" [id=3, type="call_module"]; +"4 k_proj_weight_updated_constant0" [id=4, type="get_attr"]; +"5 asymmetric_weights_decompressor_k_proj_weight_0" [id=5, type="call_module"]; +"6 v_proj_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_v_proj_weight_0" [id=7, type="call_module"]; +"8 o_proj_weight_updated_constant0" [id=8, type="get_attr"]; +"9 asymmetric_weights_decompressor_o_proj_weight_0" [id=9, type="call_module"]; +"10 mlp_gate_proj_weight_updated_constant0" [id=10, type="get_attr"]; +"11 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=11, type="call_module"]; +"12 mlp_up_proj_weight_updated_constant0" [id=12, type="get_attr"]; +"13 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=13, type="call_module"]; +"14 mlp_down_proj_weight_updated_constant0" [id=14, type="get_attr"]; +"15 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=15, type="call_module"]; +"16 rope_cos" [id=16, type="get_attr"]; +"17 rope_sin" [id=17, type="get_attr"]; +"18 x_embed" [id=18, type=input]; +"19 arange" [id=19, type=arange]; +"20 _assert_tensor_metadata_default" [id=20, type="_assert_tensor_metadata"]; +"21 to" [id=21, type=to]; +"22 pow_1" [id=22, type=pow]; +"23 mean" [id=23, type=mean]; +"24 add" [id=24, type=add]; +"25 rsqrt" [id=25, type=rsqrt]; +"26 mul" [id=26, type=mul]; +"27 _assert_tensor_metadata_default_1" [id=27, type="_assert_tensor_metadata"]; +"28 to_1" [id=28, type=to]; +"29 mul_1" [id=29, type=mul]; +"30 linear" [id=30, type=linear]; +"31 view" [id=31, type=view]; +"32 transpose" [id=32, type=transpose]; +"33 linear_1" [id=33, type=linear]; +"34 view_1" [id=34, type=view]; +"35 transpose_1" [id=35, type=transpose]; +"36 linear_2" [id=36, type=linear]; +"37 view_2" [id=37, type=view]; +"38 transpose_2" [id=38, type=transpose]; +"39 index" [id=39, type=index]; +"40 index_1" [id=40, type=index]; +"41 mul_2" [id=41, type=mul]; +"42 slice_1" [id=42, type=slice]; +"43 slice_2" [id=43, type=slice]; +"44 neg" [id=44, type=neg]; +"45 cat" [id=45, type=cat]; +"46 mul_3" [id=46, type=mul]; +"47 add_1" [id=47, type=add]; +"48 mul_4" [id=48, type=mul]; +"49 slice_3" [id=49, type=slice]; +"50 slice_4" [id=50, type=slice]; +"51 neg_1" [id=51, type=neg]; +"52 cat_1" [id=52, type=cat]; +"53 mul_5" [id=53, type=mul]; +"54 add_2" [id=54, type=add]; +"55 scaled_dot_product_attention" [id=55, type="scaled_dot_product_attention"]; +"56 transpose_3" [id=56, type=transpose]; +"57 view_3" [id=57, type=view]; +"58 linear_3" [id=58, type=linear]; +"59 add_3" [id=59, type=add]; +"60 _assert_tensor_metadata_default_2" [id=60, type="_assert_tensor_metadata"]; +"61 to_2" [id=61, type=to]; +"62 pow_2" [id=62, type=pow]; +"63 mean_1" [id=63, type=mean]; +"64 add_4" [id=64, type=add]; +"65 rsqrt_1" [id=65, type=rsqrt]; +"66 mul_6" [id=66, type=mul]; +"67 _assert_tensor_metadata_default_3" [id=67, type="_assert_tensor_metadata"]; +"68 to_3" [id=68, type=to]; +"69 mul_7" [id=69, type=mul]; +"70 linear_4" [id=70, type=linear]; +"71 silu" [id=71, type=silu]; +"72 linear_5" [id=72, type=linear]; +"73 mul_8" [id=73, type=mul]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "29 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "69 mul_7" [style=solid, label="(64,)"]; +"2 q_proj_weight_updated_constant0" -> "3 asymmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(64, 64)"]; +"3 asymmetric_weights_decompressor_q_proj_weight_0" -> "30 linear" [style=solid, label="(64, 64)"]; +"4 k_proj_weight_updated_constant0" -> "5 asymmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(64, 64)"]; +"5 asymmetric_weights_decompressor_k_proj_weight_0" -> "33 linear_1" [style=solid, label="(64, 64)"]; +"6 v_proj_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_v_proj_weight_0" -> "36 linear_2" [style=solid, label="(64, 64)"]; +"8 o_proj_weight_updated_constant0" -> "9 asymmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(64, 64)"]; +"9 asymmetric_weights_decompressor_o_proj_weight_0" -> "58 linear_3" [style=solid, label="(64, 64)"]; +"10 mlp_gate_proj_weight_updated_constant0" -> "11 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(128, 64)"]; +"11 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "70 linear_4" [style=solid, label="(128, 64)"]; +"12 mlp_up_proj_weight_updated_constant0" -> "13 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; +"13 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "72 linear_5" [style=solid, label="(128, 64)"]; +"14 mlp_down_proj_weight_updated_constant0" -> "15 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; +"15 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"16 rope_cos" -> "39 index" [style=solid, label="(1, 1, 128, 16)"]; +"17 rope_sin" -> "40 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"18 x_embed" -> "20 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"18 x_embed" -> "21 to" [style=solid, label="(1, 3, 64)"]; +"18 x_embed" -> "59 add_3" [style=solid, label="(1, 3, 64)"]; +"19 arange" -> "39 index" [style=solid, label="(3,)"]; +"19 arange" -> "40 index_1" [style=solid, label="(3,)"]; +"21 to" -> "22 pow_1" [style=solid, label="(1, 3, 64)"]; +"21 to" -> "26 mul" [style=solid, label="(1, 3, 64)"]; +"22 pow_1" -> "23 mean" [style=solid, label="(1, 3, 64)"]; +"23 mean" -> "24 add" [style=solid, label="(1, 3, 1)"]; +"24 add" -> "25 rsqrt" [style=solid, label="(1, 3, 1)"]; +"25 rsqrt" -> "26 mul" [style=solid, label="(1, 3, 1)"]; +"26 mul" -> "27 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"26 mul" -> "28 to_1" [style=solid, label="(1, 3, 64)"]; +"28 to_1" -> "29 mul_1" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "30 linear" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "33 linear_1" [style=solid, label="(1, 3, 64)"]; +"29 mul_1" -> "36 linear_2" [style=solid, label="(1, 3, 64)"]; +"30 linear" -> "31 view" [style=solid, label="(1, 3, 64)"]; +"31 view" -> "32 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"32 transpose" -> "41 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"32 transpose" -> "42 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"32 transpose" -> "43 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"33 linear_1" -> "34 view_1" [style=solid, label="(1, 3, 64)"]; +"34 view_1" -> "35 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"35 transpose_1" -> "48 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"35 transpose_1" -> "49 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"35 transpose_1" -> "50 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"36 linear_2" -> "37 view_2" [style=solid, label="(1, 3, 64)"]; +"37 view_2" -> "38 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"38 transpose_2" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"39 index" -> "41 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"39 index" -> "48 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"40 index_1" -> "46 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"40 index_1" -> "53 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"41 mul_2" -> "47 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"42 slice_1" -> "45 cat" [style=solid, label="(1, 4, 3, 8)"]; +"43 slice_2" -> "44 neg" [style=solid, label="(1, 4, 3, 8)"]; +"44 neg" -> "45 cat" [style=solid, label="(1, 4, 3, 8)"]; +"45 cat" -> "46 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"46 mul_3" -> "47 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"47 add_1" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"48 mul_4" -> "54 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"49 slice_3" -> "52 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"50 slice_4" -> "51 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"51 neg_1" -> "52 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"52 cat_1" -> "53 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"53 mul_5" -> "54 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"54 add_2" -> "55 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"55 scaled_dot_product_attention" -> "56 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"56 transpose_3" -> "57 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"57 view_3" -> "58 linear_3" [style=solid, label="(1, 3, 64)"]; +"58 linear_3" -> "59 add_3" [style=solid, label="(1, 3, 64)"]; +"59 add_3" -> "60 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"59 add_3" -> "61 to_2" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "62 pow_2" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "66 mul_6" [style=solid, label="(1, 3, 64)"]; +"61 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"62 pow_2" -> "63 mean_1" [style=solid, label="(1, 3, 64)"]; +"63 mean_1" -> "64 add_4" [style=solid, label="(1, 3, 1)"]; +"64 add_4" -> "65 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"65 rsqrt_1" -> "66 mul_6" [style=solid, label="(1, 3, 1)"]; +"66 mul_6" -> "67 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"66 mul_6" -> "68 to_3" [style=solid, label="(1, 3, 64)"]; +"68 to_3" -> "69 mul_7" [style=solid, label="(1, 3, 64)"]; +"69 mul_7" -> "70 linear_4" [style=solid, label="(1, 3, 64)"]; +"69 mul_7" -> "72 linear_5" [style=solid, label="(1, 3, 64)"]; +"70 linear_4" -> "71 silu" [style=solid, label="(1, 3, 128)"]; +"71 silu" -> "73 mul_8" [style=solid, label="(1, 3, 128)"]; +"72 linear_5" -> "73 mul_8" [style=solid, label="(1, 3, 128)"]; +"73 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot b/tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot new file mode 100644 index 00000000000..2841824b5a3 --- /dev/null +++ b/tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_weight_updated_constant0" [id=0, type="get_attr"]; +"1 symmetric_weights_decompressor_linear_weight_0" [id=1, type="call_module"]; +"2 linear_bias" [id=2, type="get_attr"]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 lm_head_bias" [id=5, type="get_attr"]; +"6 input_ids" [id=6, type=input]; +"7 embedding" [id=7, type=embedding]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_weight_updated_constant0" -> "1 symmetric_weights_decompressor_linear_weight_0" [style=solid, label="(2048, 1)"]; +"1 symmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"2 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "7 embedding" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"6 input_ids" -> "7 embedding" [style=solid, label="(5,)"]; +"7 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot b/tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot new file mode 100644 index 00000000000..0382f7e5934 --- /dev/null +++ b/tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_weight_updated_constant0" [id=0, type="get_attr"]; +"1 symmetric_weights_decompressor_linear_weight_0" [id=1, type="call_module"]; +"2 linear_bias" [id=2, type="get_attr"]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 lm_head_bias" [id=5, type="get_attr"]; +"6 input_ids" [id=6, type=input]; +"7 embedding" [id=7, type=embedding]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_weight_updated_constant0" -> "1 symmetric_weights_decompressor_linear_weight_0" [style=solid, label="(2048, 1)"]; +"1 symmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"2 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"3 wte_weight_1_updated_constant0" -> "4 symmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(4096, 1)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "7 embedding" [style=solid, label="(128, 64)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"6 input_ids" -> "7 embedding" [style=solid, label="(5,)"]; +"7 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot b/tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot new file mode 100644 index 00000000000..03fc9e9c6a0 --- /dev/null +++ b/tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_weight_updated_constant0" [id=0, type="get_attr"]; +"1 asymmetric_weights_decompressor_linear_weight_0" [id=1, type="call_module"]; +"2 linear_bias" [id=2, type="get_attr"]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 lm_head_bias" [id=5, type="get_attr"]; +"6 input_ids" [id=6, type=input]; +"7 embedding" [id=7, type=embedding]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_weight_updated_constant0" -> "1 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"1 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"2 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "7 embedding" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"6 input_ids" -> "7 embedding" [style=solid, label="(5,)"]; +"7 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot new file mode 100644 index 00000000000..076e46114eb --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 rope_cos" [id=2, type="get_attr"]; +"3 rope_sin" [id=3, type="get_attr"]; +"4 x_embed" [id=4, type=input]; +"5 arange" [id=5, type=arange]; +"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; +"7 to" [id=7, type=to]; +"8 pow_1" [id=8, type=pow]; +"9 mean" [id=9, type=mean]; +"10 add" [id=10, type=add]; +"11 rsqrt" [id=11, type=rsqrt]; +"12 mul" [id=12, type=mul]; +"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; +"14 to_1" [id=14, type=to]; +"15 mul_1" [id=15, type=mul]; +"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"18 linear" [id=18, type=linear]; +"19 view" [id=19, type=view]; +"20 transpose" [id=20, type=transpose]; +"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; +"23 linear_1" [id=23, type=linear]; +"24 view_1" [id=24, type=view]; +"25 transpose_1" [id=25, type=transpose]; +"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"28 linear_2" [id=28, type=linear]; +"29 view_2" [id=29, type=view]; +"30 transpose_2" [id=30, type=transpose]; +"31 index" [id=31, type=index]; +"32 index_1" [id=32, type=index]; +"33 mul_2" [id=33, type=mul]; +"34 slice_1" [id=34, type=slice]; +"35 slice_2" [id=35, type=slice]; +"36 neg" [id=36, type=neg]; +"37 cat" [id=37, type=cat]; +"38 mul_3" [id=38, type=mul]; +"39 add_1" [id=39, type=add]; +"40 mul_4" [id=40, type=mul]; +"41 slice_3" [id=41, type=slice]; +"42 slice_4" [id=42, type=slice]; +"43 neg_1" [id=43, type=neg]; +"44 cat_1" [id=44, type=cat]; +"45 mul_5" [id=45, type=mul]; +"46 add_2" [id=46, type=add]; +"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; +"48 transpose_3" [id=48, type=transpose]; +"49 view_3" [id=49, type=view]; +"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; +"52 linear_3" [id=52, type=linear]; +"53 add_3" [id=53, type=add]; +"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; +"55 to_2" [id=55, type=to]; +"56 pow_2" [id=56, type=pow]; +"57 mean_1" [id=57, type=mean]; +"58 add_4" [id=58, type=add]; +"59 rsqrt_1" [id=59, type=rsqrt]; +"60 mul_6" [id=60, type=mul]; +"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; +"62 to_3" [id=62, type=to]; +"63 mul_7" [id=63, type=mul]; +"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"66 linear_4" [id=66, type=linear]; +"67 silu" [id=67, type=silu]; +"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"70 linear_5" [id=70, type=linear]; +"71 mul_8" [id=71, type=mul]; +"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; +"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; +"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"5 arange" -> "31 index" [style=solid, label="(3,)"]; +"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; +"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; +"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; +"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; +"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; +"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; +"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; +"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; +"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; +"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; +"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; +"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; +"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; +"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; +"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; +"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; +"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; +"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; +"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; +"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(4096, 1)"]; +"65 symmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; +"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json new file mode 100644 index 00000000000..364f78db4aa --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json @@ -0,0 +1,3664 @@ +{ + "symmetric_weights_decompressor_q_proj_weight_0": [ + [ + [ + -0.0135650634765625 + ], + [ + -0.014251708984375 + ] + ], + [ + [ + 0.015411376953125 + ], + [ + -0.01557159423828125 + ] + ], + [ + [ + -0.01427459716796875 + ], + [ + -0.01519012451171875 + ] + ], + [ + [ + 0.01224517822265625 + ], + [ + 0.01528167724609375 + ] + ], + [ + [ + 0.01348876953125 + ], + [ + -0.01556396484375 + ] + ], + [ + [ + 0.01401519775390625 + ], + [ + 0.0153350830078125 + ] + ], + [ + [ + 0.01560211181640625 + ], + [ + 0.01523590087890625 + ] + ], + [ + [ + 0.0160675048828125 + ], + [ + -0.0149993896484375 + ] + ], + [ + [ + -0.01445770263671875 + ], + [ + 0.01251220703125 + ] + ], + [ + [ + -0.0155487060546875 + ], + [ + 0.0126495361328125 + ] + ], + [ + [ + -0.01306915283203125 + ], + [ + -0.01422882080078125 + ] + ], + [ + [ + -0.0154571533203125 + ], + [ + -0.015594482421875 + ] + ], + [ + [ + -0.01308441162109375 + ], + [ + -0.01392364501953125 + ] + ], + [ + [ + -0.01512908935546875 + ], + [ + 0.01303863525390625 + ] + ], + [ + [ + -0.0134124755859375 + ], + [ + -0.0130462646484375 + ] + ], + [ + [ + 0.01467132568359375 + ], + [ + 0.01568603515625 + ] + ], + [ + [ + -0.01470184326171875 + ], + [ + 0.0147705078125 + ] + ], + [ + [ + -0.01477813720703125 + ], + [ + -0.01468658447265625 + ] + ], + [ + [ + -0.0128021240234375 + ], + [ + 0.0122833251953125 + ] + ], + [ + [ + -0.0158233642578125 + ], + [ + 0.0131378173828125 + ] + ], + [ + [ + -0.01537322998046875 + ], + [ + -0.01543426513671875 + ] + ], + [ + [ + -0.01548004150390625 + ], + [ + 0.01435089111328125 + ] + ], + [ + [ + -0.0130462646484375 + ], + [ + 0.01294708251953125 + ] + ], + [ + [ + 0.01348876953125 + ], + [ + 0.01446533203125 + ] + ], + [ + [ + -0.0157470703125 + ], + [ + -0.014892578125 + ] + ], + [ + [ + 0.01482391357421875 + ], + [ + -0.01473236083984375 + ] + ], + [ + [ + 0.0155029296875 + ], + [ + 0.0135040283203125 + ] + ], + [ + [ + 0.01456451416015625 + ], + [ + -0.016326904296875 + ] + ], + [ + [ + -0.01509857177734375 + ], + [ + -0.012847900390625 + ] + ], + [ + [ + -0.0152130126953125 + ], + [ + 0.01459503173828125 + ] + ], + [ + [ + -0.0153350830078125 + ], + [ + -0.01287078857421875 + ] + ], + [ + [ + 0.0136871337890625 + ], + [ + 0.014801025390625 + ] + ], + [ + [ + 0.01520538330078125 + ], + [ + -0.01514434814453125 + ] + ], + [ + [ + -0.01471710205078125 + ], + [ + 0.0155792236328125 + ] + ], + [ + [ + -0.01485443115234375 + ], + [ + 0.0147857666015625 + ] + ], + [ + [ + 0.01512908935546875 + ], + [ + -0.01381683349609375 + ] + ], + [ + [ + -0.015838623046875 + ], + [ + -0.01444244384765625 + ] + ], + [ + [ + -0.0146636962890625 + ], + [ + -0.01299285888671875 + ] + ], + [ + [ + -0.01495361328125 + ], + [ + -0.014801025390625 + ] + ], + [ + [ + -0.01396942138671875 + ], + [ + 0.0134124755859375 + ] + ], + [ + [ + -0.01490020751953125 + ], + [ + 0.015045166015625 + ] + ], + [ + [ + -0.01543426513671875 + ], + [ + 0.01514434814453125 + ] + ], + [ + [ + 0.01428985595703125 + ], + [ + 0.0141754150390625 + ] + ], + [ + [ + 0.014923095703125 + ], + [ + 0.01470947265625 + ] + ], + [ + [ + -0.01654052734375 + ], + [ + 0.01470947265625 + ] + ], + [ + [ + 0.0150299072265625 + ], + [ + 0.0132293701171875 + ] + ], + [ + [ + -0.0144500732421875 + ], + [ + -0.014556884765625 + ] + ], + [ + [ + -0.01354217529296875 + ], + [ + -0.01436614990234375 + ] + ], + [ + [ + 0.01250457763671875 + ], + [ + 0.014495849609375 + ] + ], + [ + [ + -0.01361846923828125 + ], + [ + -0.01445770263671875 + ] + ], + [ + [ + -0.0148162841796875 + ], + [ + 0.01213836669921875 + ] + ], + [ + [ + -0.0125274658203125 + ], + [ + -0.0152587890625 + ] + ], + [ + [ + -0.01308441162109375 + ], + [ + 0.01410675048828125 + ] + ], + [ + [ + -0.0150146484375 + ], + [ + 0.01324462890625 + ] + ], + [ + [ + -0.016021728515625 + ], + [ + 0.015289306640625 + ] + ], + [ + [ + -0.0143280029296875 + ], + [ + -0.0139617919921875 + ] + ], + [ + [ + -0.0147247314453125 + ], + [ + 0.0161590576171875 + ] + ], + [ + [ + -0.0119476318359375 + ], + [ + 0.0154571533203125 + ] + ], + [ + [ + -0.01476287841796875 + ], + [ + -0.0137176513671875 + ] + ], + [ + [ + 0.01558685302734375 + ], + [ + 0.013427734375 + ] + ], + [ + [ + -0.0167694091796875 + ], + [ + 0.01517486572265625 + ] + ], + [ + [ + 0.01235198974609375 + ], + [ + -0.01605224609375 + ] + ], + [ + [ + 0.015960693359375 + ], + [ + -0.015167236328125 + ] + ], + [ + [ + 0.01517486572265625 + ], + [ + 0.0162200927734375 + ] + ] + ], + "symmetric_weights_decompressor_k_proj_weight_0": [ + [ + [ + 0.0150604248046875 + ], + [ + -0.0138702392578125 + ] + ], + [ + [ + -0.01486968994140625 + ], + [ + -0.01424407958984375 + ] + ], + [ + [ + 0.01526641845703125 + ], + [ + -0.0126800537109375 + ] + ], + [ + [ + -0.01436614990234375 + ], + [ + -0.0157012939453125 + ] + ], + [ + [ + -0.01470947265625 + ], + [ + 0.013916015625 + ] + ], + [ + [ + -0.01371002197265625 + ], + [ + -0.01558685302734375 + ] + ], + [ + [ + 0.01265716552734375 + ], + [ + 0.01399993896484375 + ] + ], + [ + [ + -0.01520538330078125 + ], + [ + -0.01537322998046875 + ] + ], + [ + [ + 0.01538848876953125 + ], + [ + 0.0160064697265625 + ] + ], + [ + [ + -0.01537322998046875 + ], + [ + -0.01198577880859375 + ] + ], + [ + [ + -0.01551055908203125 + ], + [ + -0.01419830322265625 + ] + ], + [ + [ + -0.01544189453125 + ], + [ + -0.0127410888671875 + ] + ], + [ + [ + 0.014373779296875 + ], + [ + -0.01462554931640625 + ] + ], + [ + [ + 0.01326751708984375 + ], + [ + -0.015716552734375 + ] + ], + [ + [ + -0.01415252685546875 + ], + [ + -0.01483917236328125 + ] + ], + [ + [ + -0.01505279541015625 + ], + [ + 0.0154571533203125 + ] + ], + [ + [ + 0.01538848876953125 + ], + [ + -0.016021728515625 + ] + ], + [ + [ + -0.013916015625 + ], + [ + -0.01514434814453125 + ] + ], + [ + [ + 0.01401519775390625 + ], + [ + -0.01239776611328125 + ] + ], + [ + [ + -0.01540374755859375 + ], + [ + -0.0133209228515625 + ] + ], + [ + [ + 0.014617919921875 + ], + [ + 0.01727294921875 + ] + ], + [ + [ + 0.0156707763671875 + ], + [ + -0.0155792236328125 + ] + ], + [ + [ + 0.01384735107421875 + ], + [ + 0.01262664794921875 + ] + ], + [ + [ + -0.0143890380859375 + ], + [ + 0.015106201171875 + ] + ], + [ + [ + 0.0154571533203125 + ], + [ + -0.01403045654296875 + ] + ], + [ + [ + 0.0149993896484375 + ], + [ + 0.012847900390625 + ] + ], + [ + [ + 0.01552581787109375 + ], + [ + -0.01554107666015625 + ] + ], + [ + [ + 0.01503753662109375 + ], + [ + 0.01519775390625 + ] + ], + [ + [ + 0.0144195556640625 + ], + [ + -0.01325225830078125 + ] + ], + [ + [ + -0.0159454345703125 + ], + [ + -0.01555633544921875 + ] + ], + [ + [ + -0.01416015625 + ], + [ + -0.01580810546875 + ] + ], + [ + [ + -0.01446533203125 + ], + [ + -0.01375579833984375 + ] + ], + [ + [ + 0.01214599609375 + ], + [ + -0.0137786865234375 + ] + ], + [ + [ + 0.01497650146484375 + ], + [ + 0.0144805908203125 + ] + ], + [ + [ + -0.01474761962890625 + ], + [ + -0.0155181884765625 + ] + ], + [ + [ + -0.01508331298828125 + ], + [ + -0.01496124267578125 + ] + ], + [ + [ + -0.01544189453125 + ], + [ + 0.014678955078125 + ] + ], + [ + [ + -0.01329803466796875 + ], + [ + -0.0157012939453125 + ] + ], + [ + [ + 0.01535797119140625 + ], + [ + -0.0161590576171875 + ] + ], + [ + [ + 0.01480865478515625 + ], + [ + -0.01407623291015625 + ] + ], + [ + [ + 0.01212310791015625 + ], + [ + 0.01406097412109375 + ] + ], + [ + [ + 0.012939453125 + ], + [ + 0.01445770263671875 + ] + ], + [ + [ + 0.01476287841796875 + ], + [ + -0.01544189453125 + ] + ], + [ + [ + 0.0135650634765625 + ], + [ + 0.01358795166015625 + ] + ], + [ + [ + -0.0150299072265625 + ], + [ + -0.014190673828125 + ] + ], + [ + [ + 0.01522064208984375 + ], + [ + 0.01520538330078125 + ] + ], + [ + [ + 0.0146942138671875 + ], + [ + -0.01531982421875 + ] + ], + [ + [ + 0.01305389404296875 + ], + [ + 0.0139312744140625 + ] + ], + [ + [ + 0.01507568359375 + ], + [ + -0.01461029052734375 + ] + ], + [ + [ + -0.015899658203125 + ], + [ + 0.01421356201171875 + ] + ], + [ + [ + 0.01385498046875 + ], + [ + 0.01284027099609375 + ] + ], + [ + [ + 0.01535797119140625 + ], + [ + 0.0152740478515625 + ] + ], + [ + [ + -0.0144805908203125 + ], + [ + 0.01386260986328125 + ] + ], + [ + [ + 0.0132598876953125 + ], + [ + -0.0147705078125 + ] + ], + [ + [ + -0.01397705078125 + ], + [ + 0.01549530029296875 + ] + ], + [ + [ + 0.0145111083984375 + ], + [ + -0.0167694091796875 + ] + ], + [ + [ + -0.0148773193359375 + ], + [ + 0.01532745361328125 + ] + ], + [ + [ + -0.0145263671875 + ], + [ + -0.01387786865234375 + ] + ], + [ + [ + 0.01473236083984375 + ], + [ + 0.016326904296875 + ] + ], + [ + [ + -0.01299285888671875 + ], + [ + 0.0149993896484375 + ] + ], + [ + [ + 0.013214111328125 + ], + [ + -0.01541900634765625 + ] + ], + [ + [ + -0.01316070556640625 + ], + [ + 0.0142822265625 + ] + ], + [ + [ + 0.01425933837890625 + ], + [ + -0.01212310791015625 + ] + ], + [ + [ + 0.0168914794921875 + ], + [ + -0.01407623291015625 + ] + ] + ], + "symmetric_weights_decompressor_v_proj_weight_0": [ + [ + [ + -0.0145721435546875 + ], + [ + -0.01470184326171875 + ] + ], + [ + [ + -0.01517486572265625 + ], + [ + -0.01496124267578125 + ] + ], + [ + [ + 0.013580322265625 + ], + [ + -0.0135040283203125 + ] + ], + [ + [ + 0.0142669677734375 + ], + [ + 0.014251708984375 + ] + ], + [ + [ + 0.0146942138671875 + ], + [ + 0.0164337158203125 + ] + ], + [ + [ + -0.0142364501953125 + ], + [ + -0.0138397216796875 + ] + ], + [ + [ + -0.0160064697265625 + ], + [ + 0.01447296142578125 + ] + ], + [ + [ + -0.01551055908203125 + ], + [ + -0.013824462890625 + ] + ], + [ + [ + -0.0135650634765625 + ], + [ + 0.0128326416015625 + ] + ], + [ + [ + -0.01386260986328125 + ], + [ + -0.0139312744140625 + ] + ], + [ + [ + -0.0142059326171875 + ], + [ + 0.01422119140625 + ] + ], + [ + [ + -0.01546478271484375 + ], + [ + -0.0157318115234375 + ] + ], + [ + [ + -0.01416015625 + ], + [ + -0.01371002197265625 + ] + ], + [ + [ + -0.0151519775390625 + ], + [ + 0.0147857666015625 + ] + ], + [ + [ + -0.0164031982421875 + ], + [ + -0.01531982421875 + ] + ], + [ + [ + -0.01323699951171875 + ], + [ + -0.01331329345703125 + ] + ], + [ + [ + 0.0156097412109375 + ], + [ + 0.01561737060546875 + ] + ], + [ + [ + 0.0145721435546875 + ], + [ + 0.0152587890625 + ] + ], + [ + [ + 0.01342010498046875 + ], + [ + 0.013824462890625 + ] + ], + [ + [ + 0.01375579833984375 + ], + [ + -0.012847900390625 + ] + ], + [ + [ + 0.015960693359375 + ], + [ + 0.0157623291015625 + ] + ], + [ + [ + 0.01479339599609375 + ], + [ + 0.012969970703125 + ] + ], + [ + [ + 0.0158233642578125 + ], + [ + -0.0147552490234375 + ] + ], + [ + [ + 0.0137481689453125 + ], + [ + 0.01409912109375 + ] + ], + [ + [ + -0.01373291015625 + ], + [ + -0.01508331298828125 + ] + ], + [ + [ + -0.01456451416015625 + ], + [ + 0.0151824951171875 + ] + ], + [ + [ + -0.01549530029296875 + ], + [ + 0.0151519775390625 + ] + ], + [ + [ + 0.012725830078125 + ], + [ + -0.01461029052734375 + ] + ], + [ + [ + -0.01531982421875 + ], + [ + 0.0142974853515625 + ] + ], + [ + [ + 0.01558685302734375 + ], + [ + 0.01357269287109375 + ] + ], + [ + [ + -0.01500701904296875 + ], + [ + -0.0123291015625 + ] + ], + [ + [ + -0.01526641845703125 + ], + [ + 0.0153961181640625 + ] + ], + [ + [ + 0.01474761962890625 + ], + [ + 0.0154876708984375 + ] + ], + [ + [ + -0.01513671875 + ], + [ + 0.015350341796875 + ] + ], + [ + [ + 0.0153961181640625 + ], + [ + 0.01528167724609375 + ] + ], + [ + [ + 0.0152435302734375 + ], + [ + 0.0153656005859375 + ] + ], + [ + [ + 0.0149993896484375 + ], + [ + -0.01336669921875 + ] + ], + [ + [ + 0.01336669921875 + ], + [ + 0.0147857666015625 + ] + ], + [ + [ + 0.01328277587890625 + ], + [ + -0.0137176513671875 + ] + ], + [ + [ + -0.01544952392578125 + ], + [ + 0.01535797119140625 + ] + ], + [ + [ + 0.0138702392578125 + ], + [ + -0.01288604736328125 + ] + ], + [ + [ + 0.01401519775390625 + ], + [ + -0.0158843994140625 + ] + ], + [ + [ + 0.01477813720703125 + ], + [ + 0.01238250732421875 + ] + ], + [ + [ + 0.01261138916015625 + ], + [ + -0.01371002197265625 + ] + ], + [ + [ + 0.01448822021484375 + ], + [ + -0.0145416259765625 + ] + ], + [ + [ + 0.01453399658203125 + ], + [ + 0.0154571533203125 + ] + ], + [ + [ + 0.014251708984375 + ], + [ + -0.0150604248046875 + ] + ], + [ + [ + -0.0154266357421875 + ], + [ + -0.0140228271484375 + ] + ], + [ + [ + 0.0145721435546875 + ], + [ + 0.015472412109375 + ] + ], + [ + [ + 0.01425933837890625 + ], + [ + -0.01351165771484375 + ] + ], + [ + [ + -0.01450347900390625 + ], + [ + -0.0159759521484375 + ] + ], + [ + [ + -0.01361083984375 + ], + [ + 0.01483917236328125 + ] + ], + [ + [ + -0.01447296142578125 + ], + [ + 0.01418304443359375 + ] + ], + [ + [ + -0.015106201171875 + ], + [ + 0.0139923095703125 + ] + ], + [ + [ + -0.014068603515625 + ], + [ + 0.01320648193359375 + ] + ], + [ + [ + -0.0155181884765625 + ], + [ + 0.01560211181640625 + ] + ], + [ + [ + -0.0155792236328125 + ], + [ + -0.0147247314453125 + ] + ], + [ + [ + 0.0147247314453125 + ], + [ + 0.0133209228515625 + ] + ], + [ + [ + 0.01415252685546875 + ], + [ + 0.0130615234375 + ] + ], + [ + [ + -0.01419830322265625 + ], + [ + -0.014251708984375 + ] + ], + [ + [ + -0.0134124755859375 + ], + [ + 0.01519775390625 + ] + ], + [ + [ + 0.01476287841796875 + ], + [ + 0.0138092041015625 + ] + ], + [ + [ + -0.0151824951171875 + ], + [ + 0.01494598388671875 + ] + ], + [ + [ + 0.015106201171875 + ], + [ + 0.01279449462890625 + ] + ] + ], + "symmetric_weights_decompressor_o_proj_weight_0": [ + [ + [ + 0.015625 + ], + [ + 0.014495849609375 + ] + ], + [ + [ + -0.01404571533203125 + ], + [ + -0.0152130126953125 + ] + ], + [ + [ + -0.01512908935546875 + ], + [ + 0.0160369873046875 + ] + ], + [ + [ + 0.01451873779296875 + ], + [ + -0.0155181884765625 + ] + ], + [ + [ + -0.01464080810546875 + ], + [ + -0.0139007568359375 + ] + ], + [ + [ + -0.0123138427734375 + ], + [ + 0.01412200927734375 + ] + ], + [ + [ + -0.01317596435546875 + ], + [ + 0.0151824951171875 + ] + ], + [ + [ + -0.01235198974609375 + ], + [ + -0.0142059326171875 + ] + ], + [ + [ + -0.0145263671875 + ], + [ + -0.0148162841796875 + ] + ], + [ + [ + 0.01427459716796875 + ], + [ + -0.01490020751953125 + ] + ], + [ + [ + 0.01490020751953125 + ], + [ + 0.01303863525390625 + ] + ], + [ + [ + 0.0155029296875 + ], + [ + -0.013946533203125 + ] + ], + [ + [ + 0.01409149169921875 + ], + [ + -0.01322174072265625 + ] + ], + [ + [ + 0.013427734375 + ], + [ + 0.0127716064453125 + ] + ], + [ + [ + 0.0142669677734375 + ], + [ + 0.01432037353515625 + ] + ], + [ + [ + -0.01528167724609375 + ], + [ + 0.01529693603515625 + ] + ], + [ + [ + 0.01393890380859375 + ], + [ + -0.01446533203125 + ] + ], + [ + [ + -0.01214599609375 + ], + [ + -0.01450347900390625 + ] + ], + [ + [ + 0.013275146484375 + ], + [ + -0.01328277587890625 + ] + ], + [ + [ + -0.01528167724609375 + ], + [ + -0.01406097412109375 + ] + ], + [ + [ + -0.01247406005859375 + ], + [ + -0.0160064697265625 + ] + ], + [ + [ + -0.01490020751953125 + ], + [ + -0.01470184326171875 + ] + ], + [ + [ + -0.01491546630859375 + ], + [ + -0.013702392578125 + ] + ], + [ + [ + -0.0145721435546875 + ], + [ + 0.01506805419921875 + ] + ], + [ + [ + -0.0150146484375 + ], + [ + 0.015380859375 + ] + ], + [ + [ + -0.0146484375 + ], + [ + 0.013946533203125 + ] + ], + [ + [ + 0.0121917724609375 + ], + [ + 0.01367950439453125 + ] + ], + [ + [ + -0.01552581787109375 + ], + [ + -0.015228271484375 + ] + ], + [ + [ + 0.0135650634765625 + ], + [ + -0.01288604736328125 + ] + ], + [ + [ + -0.015869140625 + ], + [ + 0.01409912109375 + ] + ], + [ + [ + -0.013946533203125 + ], + [ + -0.0148162841796875 + ] + ], + [ + [ + 0.01346588134765625 + ], + [ + -0.015533447265625 + ] + ], + [ + [ + 0.01334381103515625 + ], + [ + -0.0154571533203125 + ] + ], + [ + [ + -0.01387786865234375 + ], + [ + -0.0156707763671875 + ] + ], + [ + [ + 0.0160675048828125 + ], + [ + -0.0134429931640625 + ] + ], + [ + [ + 0.0123748779296875 + ], + [ + -0.01427459716796875 + ] + ], + [ + [ + -0.0137939453125 + ], + [ + 0.01299285888671875 + ] + ], + [ + [ + -0.015289306640625 + ], + [ + -0.01548004150390625 + ] + ], + [ + [ + 0.0142059326171875 + ], + [ + 0.0158233642578125 + ] + ], + [ + [ + -0.01528167724609375 + ], + [ + -0.013824462890625 + ] + ], + [ + [ + -0.01453399658203125 + ], + [ + -0.0151519775390625 + ] + ], + [ + [ + -0.01526641845703125 + ], + [ + 0.0164337158203125 + ] + ], + [ + [ + 0.01546478271484375 + ], + [ + -0.01494598388671875 + ] + ], + [ + [ + -0.01458740234375 + ], + [ + -0.01313018798828125 + ] + ], + [ + [ + -0.0141448974609375 + ], + [ + -0.0145721435546875 + ] + ], + [ + [ + -0.0144500732421875 + ], + [ + -0.012664794921875 + ] + ], + [ + [ + 0.0151824951171875 + ], + [ + 0.0142822265625 + ] + ], + [ + [ + 0.01434326171875 + ], + [ + -0.0160675048828125 + ] + ], + [ + [ + 0.01505279541015625 + ], + [ + -0.0137939453125 + ] + ], + [ + [ + 0.01270294189453125 + ], + [ + -0.0133056640625 + ] + ], + [ + [ + -0.01343536376953125 + ], + [ + -0.01441192626953125 + ] + ], + [ + [ + 0.0150146484375 + ], + [ + 0.01453399658203125 + ] + ], + [ + [ + -0.016143798828125 + ], + [ + -0.01445770263671875 + ] + ], + [ + [ + -0.0134735107421875 + ], + [ + 0.01480865478515625 + ] + ], + [ + [ + -0.0162506103515625 + ], + [ + 0.0152130126953125 + ] + ], + [ + [ + -0.01522064208984375 + ], + [ + -0.01541900634765625 + ] + ], + [ + [ + -0.01448822021484375 + ], + [ + 0.01557159423828125 + ] + ], + [ + [ + -0.01395416259765625 + ], + [ + 0.01319122314453125 + ] + ], + [ + [ + -0.0153350830078125 + ], + [ + -0.01532745361328125 + ] + ], + [ + [ + 0.016265869140625 + ], + [ + -0.0161285400390625 + ] + ], + [ + [ + -0.0131988525390625 + ], + [ + 0.015350341796875 + ] + ], + [ + [ + 0.0146331787109375 + ], + [ + -0.01483917236328125 + ] + ], + [ + [ + -0.01554107666015625 + ], + [ + -0.01318359375 + ] + ], + [ + [ + 0.0138092041015625 + ], + [ + 0.01560211181640625 + ] + ] + ], + "symmetric_weights_decompressor_mlp_gate_proj_weight_0": [ + [ + [ + -0.0156097412109375 + ], + [ + 0.0138702392578125 + ] + ], + [ + [ + 0.01531219482421875 + ], + [ + -0.01438140869140625 + ] + ], + [ + [ + 0.01373291015625 + ], + [ + 0.0133514404296875 + ] + ], + [ + [ + 0.01351165771484375 + ], + [ + -0.01241302490234375 + ] + ], + [ + [ + 0.01239776611328125 + ], + [ + -0.01500701904296875 + ] + ], + [ + [ + -0.0160064697265625 + ], + [ + -0.01306915283203125 + ] + ], + [ + [ + -0.0152587890625 + ], + [ + -0.01387786865234375 + ] + ], + [ + [ + -0.0160369873046875 + ], + [ + -0.01507568359375 + ] + ], + [ + [ + -0.0150604248046875 + ], + [ + -0.0146942138671875 + ] + ], + [ + [ + -0.0153350830078125 + ], + [ + 0.0147247314453125 + ] + ], + [ + [ + 0.01427459716796875 + ], + [ + -0.01500701904296875 + ] + ], + [ + [ + -0.0140380859375 + ], + [ + 0.01541900634765625 + ] + ], + [ + [ + 0.01519775390625 + ], + [ + 0.01490020751953125 + ] + ], + [ + [ + 0.01526641845703125 + ], + [ + 0.01348114013671875 + ] + ], + [ + [ + 0.01519012451171875 + ], + [ + -0.0141448974609375 + ] + ], + [ + [ + 0.0132904052734375 + ], + [ + 0.013275146484375 + ] + ], + [ + [ + -0.0136566162109375 + ], + [ + -0.016143798828125 + ] + ], + [ + [ + 0.0150604248046875 + ], + [ + 0.01561737060546875 + ] + ], + [ + [ + -0.01538848876953125 + ], + [ + 0.01464080810546875 + ] + ], + [ + [ + 0.016021728515625 + ], + [ + 0.01496124267578125 + ] + ], + [ + [ + 0.01239776611328125 + ], + [ + -0.01406097412109375 + ] + ], + [ + [ + -0.01380157470703125 + ], + [ + 0.015533447265625 + ] + ], + [ + [ + -0.015472412109375 + ], + [ + -0.01557159423828125 + ] + ], + [ + [ + -0.014190673828125 + ], + [ + 0.01348114013671875 + ] + ], + [ + [ + -0.01543426513671875 + ], + [ + -0.0142669677734375 + ] + ], + [ + [ + 0.014923095703125 + ], + [ + 0.01528167724609375 + ] + ], + [ + [ + 0.01294708251953125 + ], + [ + -0.014862060546875 + ] + ], + [ + [ + 0.01442718505859375 + ], + [ + -0.01514434814453125 + ] + ], + [ + [ + 0.01561737060546875 + ], + [ + -0.0137481689453125 + ] + ], + [ + [ + 0.0157928466796875 + ], + [ + 0.015838623046875 + ] + ], + [ + [ + -0.01508331298828125 + ], + [ + 0.0143585205078125 + ] + ], + [ + [ + 0.01557159423828125 + ], + [ + 0.0131988525390625 + ] + ], + [ + [ + 0.01296234130859375 + ], + [ + -0.01441192626953125 + ] + ], + [ + [ + 0.014129638671875 + ], + [ + 0.0147552490234375 + ] + ], + [ + [ + -0.014892578125 + ], + [ + -0.01434326171875 + ] + ], + [ + [ + -0.0155487060546875 + ], + [ + 0.0153961181640625 + ] + ], + [ + [ + 0.01314544677734375 + ], + [ + 0.01385498046875 + ] + ], + [ + [ + -0.013671875 + ], + [ + 0.015106201171875 + ] + ], + [ + [ + 0.012725830078125 + ], + [ + 0.01401519775390625 + ] + ], + [ + [ + 0.0154876708984375 + ], + [ + -0.01436614990234375 + ] + ], + [ + [ + -0.0135955810546875 + ], + [ + 0.0159149169921875 + ] + ], + [ + [ + 0.01509857177734375 + ], + [ + 0.015533447265625 + ] + ], + [ + [ + 0.01290130615234375 + ], + [ + -0.012908935546875 + ] + ], + [ + [ + 0.01514434814453125 + ], + [ + 0.0147247314453125 + ] + ], + [ + [ + 0.0133056640625 + ], + [ + -0.0161590576171875 + ] + ], + [ + [ + 0.01409912109375 + ], + [ + -0.01456451416015625 + ] + ], + [ + [ + 0.0138092041015625 + ], + [ + -0.0165863037109375 + ] + ], + [ + [ + 0.01416015625 + ], + [ + 0.01491546630859375 + ] + ], + [ + [ + -0.01523590087890625 + ], + [ + 0.0150909423828125 + ] + ], + [ + [ + -0.0140533447265625 + ], + [ + -0.01312255859375 + ] + ], + [ + [ + -0.01364898681640625 + ], + [ + 0.01268768310546875 + ] + ], + [ + [ + -0.01406097412109375 + ], + [ + 0.01497650146484375 + ] + ], + [ + [ + 0.0128326416015625 + ], + [ + 0.01483917236328125 + ] + ], + [ + [ + -0.0146026611328125 + ], + [ + -0.01520538330078125 + ] + ], + [ + [ + 0.0151214599609375 + ], + [ + 0.0113372802734375 + ] + ], + [ + [ + -0.0147857666015625 + ], + [ + -0.015716552734375 + ] + ], + [ + [ + 0.01318359375 + ], + [ + -0.01543426513671875 + ] + ], + [ + [ + 0.01508331298828125 + ], + [ + -0.01529693603515625 + ] + ], + [ + [ + 0.01462554931640625 + ], + [ + -0.01311492919921875 + ] + ], + [ + [ + -0.0139007568359375 + ], + [ + 0.01496124267578125 + ] + ], + [ + [ + 0.0155792236328125 + ], + [ + 0.015899658203125 + ] + ], + [ + [ + 0.01395416259765625 + ], + [ + 0.0123291015625 + ] + ], + [ + [ + -0.01465606689453125 + ], + [ + -0.0148162841796875 + ] + ], + [ + [ + 0.01617431640625 + ], + [ + -0.0152130126953125 + ] + ], + [ + [ + -0.01348876953125 + ], + [ + -0.0154571533203125 + ] + ], + [ + [ + -0.01517486572265625 + ], + [ + -0.0145263671875 + ] + ], + [ + [ + 0.01546478271484375 + ], + [ + -0.01513671875 + ] + ], + [ + [ + 0.015594482421875 + ], + [ + -0.01428985595703125 + ] + ], + [ + [ + 0.0152435302734375 + ], + [ + -0.0138092041015625 + ] + ], + [ + [ + 0.0145263671875 + ], + [ + 0.01174163818359375 + ] + ], + [ + [ + -0.0145263671875 + ], + [ + 0.01326751708984375 + ] + ], + [ + [ + -0.01523590087890625 + ], + [ + 0.0143585205078125 + ] + ], + [ + [ + -0.01380157470703125 + ], + [ + -0.01544189453125 + ] + ], + [ + [ + 0.012054443359375 + ], + [ + -0.01401519775390625 + ] + ], + [ + [ + 0.01190185546875 + ], + [ + -0.016571044921875 + ] + ], + [ + [ + -0.01470184326171875 + ], + [ + -0.0139007568359375 + ] + ], + [ + [ + 0.013427734375 + ], + [ + -0.0148773193359375 + ] + ], + [ + [ + -0.01534271240234375 + ], + [ + 0.01479339599609375 + ] + ], + [ + [ + 0.01433563232421875 + ], + [ + 0.01558685302734375 + ] + ], + [ + [ + 0.0167999267578125 + ], + [ + -0.01342010498046875 + ] + ], + [ + [ + -0.0141754150390625 + ], + [ + -0.01506805419921875 + ] + ], + [ + [ + 0.01541900634765625 + ], + [ + -0.01486968994140625 + ] + ], + [ + [ + -0.01505279541015625 + ], + [ + 0.015533447265625 + ] + ], + [ + [ + 0.013519287109375 + ], + [ + 0.014434814453125 + ] + ], + [ + [ + 0.0151824951171875 + ], + [ + -0.01277923583984375 + ] + ], + [ + [ + 0.01611328125 + ], + [ + -0.0157470703125 + ] + ], + [ + [ + 0.01448822021484375 + ], + [ + 0.01453399658203125 + ] + ], + [ + [ + 0.0153350830078125 + ], + [ + 0.0142059326171875 + ] + ], + [ + [ + -0.014190673828125 + ], + [ + -0.013946533203125 + ] + ], + [ + [ + 0.014923095703125 + ], + [ + -0.01447296142578125 + ] + ], + [ + [ + 0.014495849609375 + ], + [ + 0.014404296875 + ] + ], + [ + [ + 0.016204833984375 + ], + [ + 0.015594482421875 + ] + ], + [ + [ + 0.01555633544921875 + ], + [ + -0.01470947265625 + ] + ], + [ + [ + -0.01280975341796875 + ], + [ + 0.0138092041015625 + ] + ], + [ + [ + -0.0149383544921875 + ], + [ + 0.0152587890625 + ] + ], + [ + [ + -0.0153961181640625 + ], + [ + -0.01477813720703125 + ] + ], + [ + [ + -0.01474761962890625 + ], + [ + -0.0145111083984375 + ] + ], + [ + [ + -0.01343536376953125 + ], + [ + 0.013824462890625 + ] + ], + [ + [ + 0.0166778564453125 + ], + [ + 0.014190673828125 + ] + ], + [ + [ + 0.01358795166015625 + ], + [ + 0.015838623046875 + ] + ], + [ + [ + -0.01520538330078125 + ], + [ + 0.01334381103515625 + ] + ], + [ + [ + -0.01416015625 + ], + [ + 0.013824462890625 + ] + ], + [ + [ + -0.01309967041015625 + ], + [ + 0.0156402587890625 + ] + ], + [ + [ + 0.0135955810546875 + ], + [ + 0.0158538818359375 + ] + ], + [ + [ + -0.01552581787109375 + ], + [ + 0.0140228271484375 + ] + ], + [ + [ + 0.01302337646484375 + ], + [ + -0.01416015625 + ] + ], + [ + [ + 0.013336181640625 + ], + [ + 0.01395416259765625 + ] + ], + [ + [ + 0.0152435302734375 + ], + [ + 0.01525115966796875 + ] + ], + [ + [ + 0.014373779296875 + ], + [ + 0.0148162841796875 + ] + ], + [ + [ + -0.01324462890625 + ], + [ + -0.01549530029296875 + ] + ], + [ + [ + 0.0152740478515625 + ], + [ + -0.01324462890625 + ] + ], + [ + [ + 0.015655517578125 + ], + [ + 0.01544952392578125 + ] + ], + [ + [ + 0.0155792236328125 + ], + [ + -0.01348114013671875 + ] + ], + [ + [ + -0.01526641845703125 + ], + [ + -0.0137176513671875 + ] + ], + [ + [ + -0.0145721435546875 + ], + [ + 0.01506805419921875 + ] + ], + [ + [ + -0.01479339599609375 + ], + [ + 0.0142059326171875 + ] + ], + [ + [ + 0.0159912109375 + ], + [ + 0.015106201171875 + ] + ], + [ + [ + 0.0155029296875 + ], + [ + -0.01354217529296875 + ] + ], + [ + [ + -0.01551055908203125 + ], + [ + 0.0157012939453125 + ] + ], + [ + [ + -0.0138397216796875 + ], + [ + 0.01361083984375 + ] + ], + [ + [ + 0.0142669677734375 + ], + [ + -0.01470947265625 + ] + ], + [ + [ + -0.0139312744140625 + ], + [ + -0.01308441162109375 + ] + ], + [ + [ + 0.01525115966796875 + ], + [ + -0.015869140625 + ] + ], + [ + [ + 0.01397705078125 + ], + [ + 0.01459503173828125 + ] + ], + [ + [ + -0.015838623046875 + ], + [ + -0.01488494873046875 + ] + ], + [ + [ + 0.01422882080078125 + ], + [ + -0.01251220703125 + ] + ], + [ + [ + -0.0144805908203125 + ], + [ + -0.013336181640625 + ] + ], + [ + [ + 0.01526641845703125 + ], + [ + -0.0143585205078125 + ] + ] + ], + "asymmetric_weights_decompressor_mlp_up_proj_weight_0": [ + [ + 0.0009670257568359375 + ], + [ + 0.0009627342224121094 + ], + [ + 0.0009636878967285156 + ], + [ + 0.0009675025939941406 + ], + [ + 0.0009679794311523438 + ], + [ + 0.00096893310546875 + ], + [ + 0.0009675025939941406 + ], + [ + 0.0009493827819824219 + ], + [ + 0.00096893310546875 + ], + [ + 0.0009484291076660156 + ], + [ + 0.000949859619140625 + ], + [ + 0.0009398460388183594 + ], + [ + 0.0009174346923828125 + ], + [ + 0.0009679794311523438 + ], + [ + 0.0009403228759765625 + ], + [ + 0.0009741783142089844 + ], + [ + 0.0008835792541503906 + ], + [ + 0.0009450912475585938 + ], + [ + 0.0009484291076660156 + ], + [ + 0.0009655952453613281 + ], + [ + 0.0009579658508300781 + ], + [ + 0.00091552734375 + ], + [ + 0.0009326934814453125 + ], + [ + 0.0009489059448242188 + ], + [ + 0.0009627342224121094 + ], + [ + 0.0009765625 + ], + [ + 0.0009531974792480469 + ], + [ + 0.0009503364562988281 + ], + [ + 0.0009765625 + ], + [ + 0.0009579658508300781 + ], + [ + 0.0009169578552246094 + ], + [ + 0.0009493827819824219 + ], + [ + 0.0009450912475585938 + ], + [ + 0.0009741783142089844 + ], + [ + 0.0009379386901855469 + ], + [ + 0.0009403228759765625 + ], + [ + 0.00092315673828125 + ], + [ + 0.0009717941284179688 + ], + [ + 0.000911712646484375 + ], + [ + 0.0009722709655761719 + ], + [ + 0.0009360313415527344 + ], + [ + 0.0009765625 + ], + [ + 0.000972747802734375 + ], + [ + 0.0009288787841796875 + ], + [ + 0.0009722709655761719 + ], + [ + 0.0009121894836425781 + ], + [ + 0.0009512901306152344 + ], + [ + 0.0009007453918457031 + ], + [ + 0.0009570121765136719 + ], + [ + 0.0009136199951171875 + ], + [ + 0.0009436607360839844 + ], + [ + 0.0009584426879882812 + ], + [ + 0.0009655952453613281 + ], + [ + 0.0009465217590332031 + ], + [ + 0.0009765625 + ], + [ + 0.0009484291076660156 + ], + [ + 0.0009670257568359375 + ], + [ + 0.0009775161743164062 + ], + [ + 0.0009627342224121094 + ], + [ + 0.0009360313415527344 + ], + [ + 0.0009546279907226562 + ], + [ + 0.0009737014770507812 + ], + [ + 0.0009717941284179688 + ], + [ + 0.0009551048278808594 + ], + [ + 0.0009722709655761719 + ], + [ + 0.0009765625 + ], + [ + 0.0008749961853027344 + ], + [ + 0.0009751319885253906 + ], + [ + 0.0009322166442871094 + ], + [ + 0.0009546279907226562 + ], + [ + 0.0009331703186035156 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009756088256835938 + ], + [ + 0.0009636878967285156 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009760856628417969 + ], + [ + 0.0009546279907226562 + ], + [ + 0.0009207725524902344 + ], + [ + 0.0009741783142089844 + ], + [ + 0.0009436607360839844 + ], + [ + 0.0009217262268066406 + ], + [ + 0.0009722709655761719 + ], + [ + 0.0009579658508300781 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009684562683105469 + ], + [ + 0.0009641647338867188 + ], + [ + 0.0009579658508300781 + ], + [ + 0.0009617805480957031 + ], + [ + 0.0009646415710449219 + ], + [ + 0.0009684562683105469 + ], + [ + 0.0009202957153320312 + ], + [ + 0.0009560585021972656 + ], + [ + 0.0009617805480957031 + ], + [ + 0.0009784698486328125 + ], + [ + 0.00089263916015625 + ], + [ + 0.0009698867797851562 + ], + [ + 0.0009479522705078125 + ], + [ + 0.0009436607360839844 + ], + [ + 0.0009250640869140625 + ], + [ + 0.000946044921875 + ], + [ + 0.0009484291076660156 + ], + [ + 0.0009794235229492188 + ], + [ + 0.0009698867797851562 + ], + [ + 0.0009255409240722656 + ], + [ + 0.0009746551513671875 + ], + [ + 0.0009427070617675781 + ], + [ + 0.0009365081787109375 + ], + [ + 0.0009627342224121094 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009694099426269531 + ], + [ + 0.0009379386901855469 + ], + [ + 0.0009407997131347656 + ], + [ + 0.0009579658508300781 + ], + [ + 0.0009579658508300781 + ], + [ + 0.0009360313415527344 + ], + [ + 0.0009374618530273438 + ], + [ + 0.000934600830078125 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009431838989257812 + ], + [ + 0.0009489059448242188 + ], + [ + 0.0009617805480957031 + ], + [ + 0.0009570121765136719 + ], + [ + 0.0009522438049316406 + ], + [ + 0.0009083747863769531 + ], + [ + 0.0009484291076660156 + ], + [ + 0.0009508132934570312 + ], + [ + 0.0009775161743164062 + ], + [ + 0.0009584426879882812 + ] + ], + "asymmetric_weights_decompressor_mlp_down_proj_weight_0": [ + [ + 0.0006890296936035156 + ], + [ + 0.0006866455078125 + ], + [ + 0.0006794929504394531 + ], + [ + 0.0006856918334960938 + ], + [ + 0.0006885528564453125 + ], + [ + 0.0006775856018066406 + ], + [ + 0.00067901611328125 + ], + [ + 0.0006818771362304688 + ], + [ + 0.0006885528564453125 + ], + [ + 0.0006928443908691406 + ], + [ + 0.0006814002990722656 + ], + [ + 0.000690460205078125 + ], + [ + 0.0006756782531738281 + ], + [ + 0.0006895065307617188 + ], + [ + 0.0006847381591796875 + ], + [ + 0.0006761550903320312 + ], + [ + 0.0006814002990722656 + ], + [ + 0.0006885528564453125 + ], + [ + 0.000682830810546875 + ], + [ + 0.0006794929504394531 + ], + [ + 0.0006899833679199219 + ], + [ + 0.0006809234619140625 + ], + [ + 0.0006785392761230469 + ], + [ + 0.0006670951843261719 + ], + [ + 0.0006914138793945312 + ], + [ + 0.0006780624389648438 + ], + [ + 0.0006856918334960938 + ], + [ + 0.0006742477416992188 + ], + [ + 0.000690460205078125 + ], + [ + 0.0006909370422363281 + ], + [ + 0.0006885528564453125 + ], + [ + 0.0006866455078125 + ], + [ + 0.0006842613220214844 + ], + [ + 0.0006880760192871094 + ], + [ + 0.0006861686706542969 + ], + [ + 0.0006861686706542969 + ], + [ + 0.0006804466247558594 + ], + [ + 0.0006866455078125 + ], + [ + 0.0006761550903320312 + ], + [ + 0.0006871223449707031 + ], + [ + 0.0006875991821289062 + ], + [ + 0.0006780624389648438 + ], + [ + 0.0006880760192871094 + ], + [ + 0.0006909370422363281 + ], + [ + 0.0006718635559082031 + ], + [ + 0.0006723403930664062 + ], + [ + 0.0006895065307617188 + ], + [ + 0.0006694793701171875 + ], + [ + 0.0006737709045410156 + ], + [ + 0.0006885528564453125 + ], + [ + 0.0006785392761230469 + ], + [ + 0.0006885528564453125 + ], + [ + 0.0006804466247558594 + ], + [ + 0.0006866455078125 + ], + [ + 0.0006666183471679688 + ], + [ + 0.0006909370422363281 + ], + [ + 0.0006833076477050781 + ], + [ + 0.0006875991821289062 + ], + [ + 0.0006818771362304688 + ], + [ + 0.0006794929504394531 + ], + [ + 0.0006918907165527344 + ], + [ + 0.0006780624389648438 + ], + [ + 0.0006914138793945312 + ], + [ + 0.0006756782531738281 + ] + ] +} \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json new file mode 100644 index 00000000000..7cfdf2719df --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json @@ -0,0 +1,128 @@ +[ + { + "weight_name": "q_proj_weight", + "node_with_weight": "linear", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "k_proj_weight", + "node_with_weight": "linear_1", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "v_proj_weight", + "node_with_weight": "linear_2", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "o_proj_weight", + "node_with_weight": "linear_3", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "mlp_gate_proj_weight", + "node_with_weight": "linear_4", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "mlp_up_proj_weight", + "node_with_weight": "linear_5", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "mlp_down_proj_weight", + "node_with_weight": "linear_6", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 128 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + } +] \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot new file mode 100644 index 00000000000..31fb9463c88 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 rope_cos" [id=2, type="get_attr"]; +"3 rope_sin" [id=3, type="get_attr"]; +"4 x_embed" [id=4, type=input]; +"5 arange" [id=5, type=arange]; +"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; +"7 to" [id=7, type=to]; +"8 pow_1" [id=8, type=pow]; +"9 mean" [id=9, type=mean]; +"10 add" [id=10, type=add]; +"11 rsqrt" [id=11, type=rsqrt]; +"12 mul" [id=12, type=mul]; +"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; +"14 to_1" [id=14, type=to]; +"15 mul_1" [id=15, type=mul]; +"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"18 linear" [id=18, type=linear]; +"19 view" [id=19, type=view]; +"20 transpose" [id=20, type=transpose]; +"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; +"23 linear_1" [id=23, type=linear]; +"24 view_1" [id=24, type=view]; +"25 transpose_1" [id=25, type=transpose]; +"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"28 linear_2" [id=28, type=linear]; +"29 view_2" [id=29, type=view]; +"30 transpose_2" [id=30, type=transpose]; +"31 index" [id=31, type=index]; +"32 index_1" [id=32, type=index]; +"33 mul_2" [id=33, type=mul]; +"34 slice_1" [id=34, type=slice]; +"35 slice_2" [id=35, type=slice]; +"36 neg" [id=36, type=neg]; +"37 cat" [id=37, type=cat]; +"38 mul_3" [id=38, type=mul]; +"39 add_1" [id=39, type=add]; +"40 mul_4" [id=40, type=mul]; +"41 slice_3" [id=41, type=slice]; +"42 slice_4" [id=42, type=slice]; +"43 neg_1" [id=43, type=neg]; +"44 cat_1" [id=44, type=cat]; +"45 mul_5" [id=45, type=mul]; +"46 add_2" [id=46, type=add]; +"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; +"48 transpose_3" [id=48, type=transpose]; +"49 view_3" [id=49, type=view]; +"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; +"52 linear_3" [id=52, type=linear]; +"53 add_3" [id=53, type=add]; +"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; +"55 to_2" [id=55, type=to]; +"56 pow_2" [id=56, type=pow]; +"57 mean_1" [id=57, type=mean]; +"58 add_4" [id=58, type=add]; +"59 rsqrt_1" [id=59, type=rsqrt]; +"60 mul_6" [id=60, type=mul]; +"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; +"62 to_3" [id=62, type=to]; +"63 mul_7" [id=63, type=mul]; +"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"66 linear_4" [id=66, type=linear]; +"67 silu" [id=67, type=silu]; +"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"70 linear_5" [id=70, type=linear]; +"71 mul_8" [id=71, type=mul]; +"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; +"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; +"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; +"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"5 arange" -> "31 index" [style=solid, label="(3,)"]; +"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; +"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; +"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; +"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; +"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; +"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; +"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; +"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; +"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 symmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(2048, 1)"]; +"17 symmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; +"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"21 k_proj_weight_updated_constant0" -> "22 symmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(2048, 1)"]; +"22 symmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; +"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; +"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"26 v_proj_weight_updated_constant0" -> "27 symmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(2048, 1)"]; +"27 symmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; +"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; +"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; +"50 o_proj_weight_updated_constant0" -> "51 symmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(2048, 1)"]; +"51 symmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; +"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; +"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; +"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; +"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; +"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(128, 64)"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; +"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 symmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(4096, 1)"]; +"73 symmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json new file mode 100644 index 00000000000..74b808d1245 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json @@ -0,0 +1,3728 @@ +{ + "symmetric_weights_decompressor_q_proj_weight_0": [ + [ + [ + -0.0135650634765625 + ], + [ + -0.014251708984375 + ] + ], + [ + [ + 0.015411376953125 + ], + [ + -0.01557159423828125 + ] + ], + [ + [ + -0.01427459716796875 + ], + [ + -0.01519012451171875 + ] + ], + [ + [ + 0.01224517822265625 + ], + [ + 0.01528167724609375 + ] + ], + [ + [ + 0.01348876953125 + ], + [ + -0.01556396484375 + ] + ], + [ + [ + 0.01401519775390625 + ], + [ + 0.0153350830078125 + ] + ], + [ + [ + 0.01560211181640625 + ], + [ + 0.01523590087890625 + ] + ], + [ + [ + 0.0160675048828125 + ], + [ + -0.0149993896484375 + ] + ], + [ + [ + -0.01445770263671875 + ], + [ + 0.01251220703125 + ] + ], + [ + [ + -0.0155487060546875 + ], + [ + 0.0126495361328125 + ] + ], + [ + [ + -0.01306915283203125 + ], + [ + -0.01422882080078125 + ] + ], + [ + [ + -0.0154571533203125 + ], + [ + -0.015594482421875 + ] + ], + [ + [ + -0.01308441162109375 + ], + [ + -0.01392364501953125 + ] + ], + [ + [ + -0.01512908935546875 + ], + [ + 0.01303863525390625 + ] + ], + [ + [ + -0.0134124755859375 + ], + [ + -0.0130462646484375 + ] + ], + [ + [ + 0.01467132568359375 + ], + [ + 0.01568603515625 + ] + ], + [ + [ + -0.01470184326171875 + ], + [ + 0.0147705078125 + ] + ], + [ + [ + -0.01477813720703125 + ], + [ + -0.01468658447265625 + ] + ], + [ + [ + -0.0128021240234375 + ], + [ + 0.0122833251953125 + ] + ], + [ + [ + -0.0158233642578125 + ], + [ + 0.0131378173828125 + ] + ], + [ + [ + -0.01537322998046875 + ], + [ + -0.01543426513671875 + ] + ], + [ + [ + -0.01548004150390625 + ], + [ + 0.01435089111328125 + ] + ], + [ + [ + -0.0130462646484375 + ], + [ + 0.01294708251953125 + ] + ], + [ + [ + 0.01348876953125 + ], + [ + 0.01446533203125 + ] + ], + [ + [ + -0.0157470703125 + ], + [ + -0.014892578125 + ] + ], + [ + [ + 0.01482391357421875 + ], + [ + -0.01473236083984375 + ] + ], + [ + [ + 0.0155029296875 + ], + [ + 0.0135040283203125 + ] + ], + [ + [ + 0.01456451416015625 + ], + [ + -0.016326904296875 + ] + ], + [ + [ + -0.01509857177734375 + ], + [ + -0.012847900390625 + ] + ], + [ + [ + -0.0152130126953125 + ], + [ + 0.01459503173828125 + ] + ], + [ + [ + -0.0153350830078125 + ], + [ + -0.01287078857421875 + ] + ], + [ + [ + 0.0136871337890625 + ], + [ + 0.014801025390625 + ] + ], + [ + [ + 0.01520538330078125 + ], + [ + -0.01514434814453125 + ] + ], + [ + [ + -0.01471710205078125 + ], + [ + 0.0155792236328125 + ] + ], + [ + [ + -0.01485443115234375 + ], + [ + 0.0147857666015625 + ] + ], + [ + [ + 0.01512908935546875 + ], + [ + -0.01381683349609375 + ] + ], + [ + [ + -0.015838623046875 + ], + [ + -0.01444244384765625 + ] + ], + [ + [ + -0.0146636962890625 + ], + [ + -0.01299285888671875 + ] + ], + [ + [ + -0.01495361328125 + ], + [ + -0.014801025390625 + ] + ], + [ + [ + -0.01396942138671875 + ], + [ + 0.0134124755859375 + ] + ], + [ + [ + -0.01490020751953125 + ], + [ + 0.015045166015625 + ] + ], + [ + [ + -0.01543426513671875 + ], + [ + 0.01514434814453125 + ] + ], + [ + [ + 0.01428985595703125 + ], + [ + 0.0141754150390625 + ] + ], + [ + [ + 0.014923095703125 + ], + [ + 0.01470947265625 + ] + ], + [ + [ + -0.01654052734375 + ], + [ + 0.01470947265625 + ] + ], + [ + [ + 0.0150299072265625 + ], + [ + 0.0132293701171875 + ] + ], + [ + [ + -0.0144500732421875 + ], + [ + -0.014556884765625 + ] + ], + [ + [ + -0.01354217529296875 + ], + [ + -0.01436614990234375 + ] + ], + [ + [ + 0.01250457763671875 + ], + [ + 0.014495849609375 + ] + ], + [ + [ + -0.01361846923828125 + ], + [ + -0.01445770263671875 + ] + ], + [ + [ + -0.0148162841796875 + ], + [ + 0.01213836669921875 + ] + ], + [ + [ + -0.0125274658203125 + ], + [ + -0.0152587890625 + ] + ], + [ + [ + -0.01308441162109375 + ], + [ + 0.01410675048828125 + ] + ], + [ + [ + -0.0150146484375 + ], + [ + 0.01324462890625 + ] + ], + [ + [ + -0.016021728515625 + ], + [ + 0.015289306640625 + ] + ], + [ + [ + -0.0143280029296875 + ], + [ + -0.0139617919921875 + ] + ], + [ + [ + -0.0147247314453125 + ], + [ + 0.0161590576171875 + ] + ], + [ + [ + -0.0119476318359375 + ], + [ + 0.0154571533203125 + ] + ], + [ + [ + -0.01476287841796875 + ], + [ + -0.0137176513671875 + ] + ], + [ + [ + 0.01558685302734375 + ], + [ + 0.013427734375 + ] + ], + [ + [ + -0.0167694091796875 + ], + [ + 0.01517486572265625 + ] + ], + [ + [ + 0.01235198974609375 + ], + [ + -0.01605224609375 + ] + ], + [ + [ + 0.015960693359375 + ], + [ + -0.015167236328125 + ] + ], + [ + [ + 0.01517486572265625 + ], + [ + 0.0162200927734375 + ] + ] + ], + "symmetric_weights_decompressor_k_proj_weight_0": [ + [ + [ + 0.0150604248046875 + ], + [ + -0.0138702392578125 + ] + ], + [ + [ + -0.01486968994140625 + ], + [ + -0.01424407958984375 + ] + ], + [ + [ + 0.01526641845703125 + ], + [ + -0.0126800537109375 + ] + ], + [ + [ + -0.01436614990234375 + ], + [ + -0.0157012939453125 + ] + ], + [ + [ + -0.01470947265625 + ], + [ + 0.013916015625 + ] + ], + [ + [ + -0.01371002197265625 + ], + [ + -0.01558685302734375 + ] + ], + [ + [ + 0.01265716552734375 + ], + [ + 0.01399993896484375 + ] + ], + [ + [ + -0.01520538330078125 + ], + [ + -0.01537322998046875 + ] + ], + [ + [ + 0.01538848876953125 + ], + [ + 0.0160064697265625 + ] + ], + [ + [ + -0.01537322998046875 + ], + [ + -0.01198577880859375 + ] + ], + [ + [ + -0.01551055908203125 + ], + [ + -0.01419830322265625 + ] + ], + [ + [ + -0.01544189453125 + ], + [ + -0.0127410888671875 + ] + ], + [ + [ + 0.014373779296875 + ], + [ + -0.01462554931640625 + ] + ], + [ + [ + 0.01326751708984375 + ], + [ + -0.015716552734375 + ] + ], + [ + [ + -0.01415252685546875 + ], + [ + -0.01483917236328125 + ] + ], + [ + [ + -0.01505279541015625 + ], + [ + 0.0154571533203125 + ] + ], + [ + [ + 0.01538848876953125 + ], + [ + -0.016021728515625 + ] + ], + [ + [ + -0.013916015625 + ], + [ + -0.01514434814453125 + ] + ], + [ + [ + 0.01401519775390625 + ], + [ + -0.01239776611328125 + ] + ], + [ + [ + -0.01540374755859375 + ], + [ + -0.0133209228515625 + ] + ], + [ + [ + 0.014617919921875 + ], + [ + 0.01727294921875 + ] + ], + [ + [ + 0.0156707763671875 + ], + [ + -0.0155792236328125 + ] + ], + [ + [ + 0.01384735107421875 + ], + [ + 0.01262664794921875 + ] + ], + [ + [ + -0.0143890380859375 + ], + [ + 0.015106201171875 + ] + ], + [ + [ + 0.0154571533203125 + ], + [ + -0.01403045654296875 + ] + ], + [ + [ + 0.0149993896484375 + ], + [ + 0.012847900390625 + ] + ], + [ + [ + 0.01552581787109375 + ], + [ + -0.01554107666015625 + ] + ], + [ + [ + 0.01503753662109375 + ], + [ + 0.01519775390625 + ] + ], + [ + [ + 0.0144195556640625 + ], + [ + -0.01325225830078125 + ] + ], + [ + [ + -0.0159454345703125 + ], + [ + -0.01555633544921875 + ] + ], + [ + [ + -0.01416015625 + ], + [ + -0.01580810546875 + ] + ], + [ + [ + -0.01446533203125 + ], + [ + -0.01375579833984375 + ] + ], + [ + [ + 0.01214599609375 + ], + [ + -0.0137786865234375 + ] + ], + [ + [ + 0.01497650146484375 + ], + [ + 0.0144805908203125 + ] + ], + [ + [ + -0.01474761962890625 + ], + [ + -0.0155181884765625 + ] + ], + [ + [ + -0.01508331298828125 + ], + [ + -0.01496124267578125 + ] + ], + [ + [ + -0.01544189453125 + ], + [ + 0.014678955078125 + ] + ], + [ + [ + -0.01329803466796875 + ], + [ + -0.0157012939453125 + ] + ], + [ + [ + 0.01535797119140625 + ], + [ + -0.0161590576171875 + ] + ], + [ + [ + 0.01480865478515625 + ], + [ + -0.01407623291015625 + ] + ], + [ + [ + 0.01212310791015625 + ], + [ + 0.01406097412109375 + ] + ], + [ + [ + 0.012939453125 + ], + [ + 0.01445770263671875 + ] + ], + [ + [ + 0.01476287841796875 + ], + [ + -0.01544189453125 + ] + ], + [ + [ + 0.0135650634765625 + ], + [ + 0.01358795166015625 + ] + ], + [ + [ + -0.0150299072265625 + ], + [ + -0.014190673828125 + ] + ], + [ + [ + 0.01522064208984375 + ], + [ + 0.01520538330078125 + ] + ], + [ + [ + 0.0146942138671875 + ], + [ + -0.01531982421875 + ] + ], + [ + [ + 0.01305389404296875 + ], + [ + 0.0139312744140625 + ] + ], + [ + [ + 0.01507568359375 + ], + [ + -0.01461029052734375 + ] + ], + [ + [ + -0.015899658203125 + ], + [ + 0.01421356201171875 + ] + ], + [ + [ + 0.01385498046875 + ], + [ + 0.01284027099609375 + ] + ], + [ + [ + 0.01535797119140625 + ], + [ + 0.0152740478515625 + ] + ], + [ + [ + -0.0144805908203125 + ], + [ + 0.01386260986328125 + ] + ], + [ + [ + 0.0132598876953125 + ], + [ + -0.0147705078125 + ] + ], + [ + [ + -0.01397705078125 + ], + [ + 0.01549530029296875 + ] + ], + [ + [ + 0.0145111083984375 + ], + [ + -0.0167694091796875 + ] + ], + [ + [ + -0.0148773193359375 + ], + [ + 0.01532745361328125 + ] + ], + [ + [ + -0.0145263671875 + ], + [ + -0.01387786865234375 + ] + ], + [ + [ + 0.01473236083984375 + ], + [ + 0.016326904296875 + ] + ], + [ + [ + -0.01299285888671875 + ], + [ + 0.0149993896484375 + ] + ], + [ + [ + 0.013214111328125 + ], + [ + -0.01541900634765625 + ] + ], + [ + [ + -0.01316070556640625 + ], + [ + 0.0142822265625 + ] + ], + [ + [ + 0.01425933837890625 + ], + [ + -0.01212310791015625 + ] + ], + [ + [ + 0.0168914794921875 + ], + [ + -0.01407623291015625 + ] + ] + ], + "symmetric_weights_decompressor_v_proj_weight_0": [ + [ + [ + -0.0145721435546875 + ], + [ + -0.01470184326171875 + ] + ], + [ + [ + -0.01517486572265625 + ], + [ + -0.01496124267578125 + ] + ], + [ + [ + 0.013580322265625 + ], + [ + -0.0135040283203125 + ] + ], + [ + [ + 0.0142669677734375 + ], + [ + 0.014251708984375 + ] + ], + [ + [ + 0.0146942138671875 + ], + [ + 0.0164337158203125 + ] + ], + [ + [ + -0.0142364501953125 + ], + [ + -0.0138397216796875 + ] + ], + [ + [ + -0.0160064697265625 + ], + [ + 0.01447296142578125 + ] + ], + [ + [ + -0.01551055908203125 + ], + [ + -0.013824462890625 + ] + ], + [ + [ + -0.0135650634765625 + ], + [ + 0.0128326416015625 + ] + ], + [ + [ + -0.01386260986328125 + ], + [ + -0.0139312744140625 + ] + ], + [ + [ + -0.0142059326171875 + ], + [ + 0.01422119140625 + ] + ], + [ + [ + -0.01546478271484375 + ], + [ + -0.0157318115234375 + ] + ], + [ + [ + -0.01416015625 + ], + [ + -0.01371002197265625 + ] + ], + [ + [ + -0.0151519775390625 + ], + [ + 0.0147857666015625 + ] + ], + [ + [ + -0.0164031982421875 + ], + [ + -0.01531982421875 + ] + ], + [ + [ + -0.01323699951171875 + ], + [ + -0.01331329345703125 + ] + ], + [ + [ + 0.0156097412109375 + ], + [ + 0.01561737060546875 + ] + ], + [ + [ + 0.0145721435546875 + ], + [ + 0.0152587890625 + ] + ], + [ + [ + 0.01342010498046875 + ], + [ + 0.013824462890625 + ] + ], + [ + [ + 0.01375579833984375 + ], + [ + -0.012847900390625 + ] + ], + [ + [ + 0.015960693359375 + ], + [ + 0.0157623291015625 + ] + ], + [ + [ + 0.01479339599609375 + ], + [ + 0.012969970703125 + ] + ], + [ + [ + 0.0158233642578125 + ], + [ + -0.0147552490234375 + ] + ], + [ + [ + 0.0137481689453125 + ], + [ + 0.01409912109375 + ] + ], + [ + [ + -0.01373291015625 + ], + [ + -0.01508331298828125 + ] + ], + [ + [ + -0.01456451416015625 + ], + [ + 0.0151824951171875 + ] + ], + [ + [ + -0.01549530029296875 + ], + [ + 0.0151519775390625 + ] + ], + [ + [ + 0.012725830078125 + ], + [ + -0.01461029052734375 + ] + ], + [ + [ + -0.01531982421875 + ], + [ + 0.0142974853515625 + ] + ], + [ + [ + 0.01558685302734375 + ], + [ + 0.01357269287109375 + ] + ], + [ + [ + -0.01500701904296875 + ], + [ + -0.0123291015625 + ] + ], + [ + [ + -0.01526641845703125 + ], + [ + 0.0153961181640625 + ] + ], + [ + [ + 0.01474761962890625 + ], + [ + 0.0154876708984375 + ] + ], + [ + [ + -0.01513671875 + ], + [ + 0.015350341796875 + ] + ], + [ + [ + 0.0153961181640625 + ], + [ + 0.01528167724609375 + ] + ], + [ + [ + 0.0152435302734375 + ], + [ + 0.0153656005859375 + ] + ], + [ + [ + 0.0149993896484375 + ], + [ + -0.01336669921875 + ] + ], + [ + [ + 0.01336669921875 + ], + [ + 0.0147857666015625 + ] + ], + [ + [ + 0.01328277587890625 + ], + [ + -0.0137176513671875 + ] + ], + [ + [ + -0.01544952392578125 + ], + [ + 0.01535797119140625 + ] + ], + [ + [ + 0.0138702392578125 + ], + [ + -0.01288604736328125 + ] + ], + [ + [ + 0.01401519775390625 + ], + [ + -0.0158843994140625 + ] + ], + [ + [ + 0.01477813720703125 + ], + [ + 0.01238250732421875 + ] + ], + [ + [ + 0.01261138916015625 + ], + [ + -0.01371002197265625 + ] + ], + [ + [ + 0.01448822021484375 + ], + [ + -0.0145416259765625 + ] + ], + [ + [ + 0.01453399658203125 + ], + [ + 0.0154571533203125 + ] + ], + [ + [ + 0.014251708984375 + ], + [ + -0.0150604248046875 + ] + ], + [ + [ + -0.0154266357421875 + ], + [ + -0.0140228271484375 + ] + ], + [ + [ + 0.0145721435546875 + ], + [ + 0.015472412109375 + ] + ], + [ + [ + 0.01425933837890625 + ], + [ + -0.01351165771484375 + ] + ], + [ + [ + -0.01450347900390625 + ], + [ + -0.0159759521484375 + ] + ], + [ + [ + -0.01361083984375 + ], + [ + 0.01483917236328125 + ] + ], + [ + [ + -0.01447296142578125 + ], + [ + 0.01418304443359375 + ] + ], + [ + [ + -0.015106201171875 + ], + [ + 0.0139923095703125 + ] + ], + [ + [ + -0.014068603515625 + ], + [ + 0.01320648193359375 + ] + ], + [ + [ + -0.0155181884765625 + ], + [ + 0.01560211181640625 + ] + ], + [ + [ + -0.0155792236328125 + ], + [ + -0.0147247314453125 + ] + ], + [ + [ + 0.0147247314453125 + ], + [ + 0.0133209228515625 + ] + ], + [ + [ + 0.01415252685546875 + ], + [ + 0.0130615234375 + ] + ], + [ + [ + -0.01419830322265625 + ], + [ + -0.014251708984375 + ] + ], + [ + [ + -0.0134124755859375 + ], + [ + 0.01519775390625 + ] + ], + [ + [ + 0.01476287841796875 + ], + [ + 0.0138092041015625 + ] + ], + [ + [ + -0.0151824951171875 + ], + [ + 0.01494598388671875 + ] + ], + [ + [ + 0.015106201171875 + ], + [ + 0.01279449462890625 + ] + ] + ], + "symmetric_weights_decompressor_o_proj_weight_0": [ + [ + [ + 0.015625 + ], + [ + 0.014495849609375 + ] + ], + [ + [ + -0.01404571533203125 + ], + [ + -0.0152130126953125 + ] + ], + [ + [ + -0.01512908935546875 + ], + [ + 0.0160369873046875 + ] + ], + [ + [ + 0.01451873779296875 + ], + [ + -0.0155181884765625 + ] + ], + [ + [ + -0.01464080810546875 + ], + [ + -0.0139007568359375 + ] + ], + [ + [ + -0.0123138427734375 + ], + [ + 0.01412200927734375 + ] + ], + [ + [ + -0.01317596435546875 + ], + [ + 0.0151824951171875 + ] + ], + [ + [ + -0.01235198974609375 + ], + [ + -0.0142059326171875 + ] + ], + [ + [ + -0.0145263671875 + ], + [ + -0.0148162841796875 + ] + ], + [ + [ + 0.01427459716796875 + ], + [ + -0.01490020751953125 + ] + ], + [ + [ + 0.01490020751953125 + ], + [ + 0.01303863525390625 + ] + ], + [ + [ + 0.0155029296875 + ], + [ + -0.013946533203125 + ] + ], + [ + [ + 0.01409149169921875 + ], + [ + -0.01322174072265625 + ] + ], + [ + [ + 0.013427734375 + ], + [ + 0.0127716064453125 + ] + ], + [ + [ + 0.0142669677734375 + ], + [ + 0.01432037353515625 + ] + ], + [ + [ + -0.01528167724609375 + ], + [ + 0.01529693603515625 + ] + ], + [ + [ + 0.01393890380859375 + ], + [ + -0.01446533203125 + ] + ], + [ + [ + -0.01214599609375 + ], + [ + -0.01450347900390625 + ] + ], + [ + [ + 0.013275146484375 + ], + [ + -0.01328277587890625 + ] + ], + [ + [ + -0.01528167724609375 + ], + [ + -0.01406097412109375 + ] + ], + [ + [ + -0.01247406005859375 + ], + [ + -0.0160064697265625 + ] + ], + [ + [ + -0.01490020751953125 + ], + [ + -0.01470184326171875 + ] + ], + [ + [ + -0.01491546630859375 + ], + [ + -0.013702392578125 + ] + ], + [ + [ + -0.0145721435546875 + ], + [ + 0.01506805419921875 + ] + ], + [ + [ + -0.0150146484375 + ], + [ + 0.015380859375 + ] + ], + [ + [ + -0.0146484375 + ], + [ + 0.013946533203125 + ] + ], + [ + [ + 0.0121917724609375 + ], + [ + 0.01367950439453125 + ] + ], + [ + [ + -0.01552581787109375 + ], + [ + -0.015228271484375 + ] + ], + [ + [ + 0.0135650634765625 + ], + [ + -0.01288604736328125 + ] + ], + [ + [ + -0.015869140625 + ], + [ + 0.01409912109375 + ] + ], + [ + [ + -0.013946533203125 + ], + [ + -0.0148162841796875 + ] + ], + [ + [ + 0.01346588134765625 + ], + [ + -0.015533447265625 + ] + ], + [ + [ + 0.01334381103515625 + ], + [ + -0.0154571533203125 + ] + ], + [ + [ + -0.01387786865234375 + ], + [ + -0.0156707763671875 + ] + ], + [ + [ + 0.0160675048828125 + ], + [ + -0.0134429931640625 + ] + ], + [ + [ + 0.0123748779296875 + ], + [ + -0.01427459716796875 + ] + ], + [ + [ + -0.0137939453125 + ], + [ + 0.01299285888671875 + ] + ], + [ + [ + -0.015289306640625 + ], + [ + -0.01548004150390625 + ] + ], + [ + [ + 0.0142059326171875 + ], + [ + 0.0158233642578125 + ] + ], + [ + [ + -0.01528167724609375 + ], + [ + -0.013824462890625 + ] + ], + [ + [ + -0.01453399658203125 + ], + [ + -0.0151519775390625 + ] + ], + [ + [ + -0.01526641845703125 + ], + [ + 0.0164337158203125 + ] + ], + [ + [ + 0.01546478271484375 + ], + [ + -0.01494598388671875 + ] + ], + [ + [ + -0.01458740234375 + ], + [ + -0.01313018798828125 + ] + ], + [ + [ + -0.0141448974609375 + ], + [ + -0.0145721435546875 + ] + ], + [ + [ + -0.0144500732421875 + ], + [ + -0.012664794921875 + ] + ], + [ + [ + 0.0151824951171875 + ], + [ + 0.0142822265625 + ] + ], + [ + [ + 0.01434326171875 + ], + [ + -0.0160675048828125 + ] + ], + [ + [ + 0.01505279541015625 + ], + [ + -0.0137939453125 + ] + ], + [ + [ + 0.01270294189453125 + ], + [ + -0.0133056640625 + ] + ], + [ + [ + -0.01343536376953125 + ], + [ + -0.01441192626953125 + ] + ], + [ + [ + 0.0150146484375 + ], + [ + 0.01453399658203125 + ] + ], + [ + [ + -0.016143798828125 + ], + [ + -0.01445770263671875 + ] + ], + [ + [ + -0.0134735107421875 + ], + [ + 0.01480865478515625 + ] + ], + [ + [ + -0.0162506103515625 + ], + [ + 0.0152130126953125 + ] + ], + [ + [ + -0.01522064208984375 + ], + [ + -0.01541900634765625 + ] + ], + [ + [ + -0.01448822021484375 + ], + [ + 0.01557159423828125 + ] + ], + [ + [ + -0.01395416259765625 + ], + [ + 0.01319122314453125 + ] + ], + [ + [ + -0.0153350830078125 + ], + [ + -0.01532745361328125 + ] + ], + [ + [ + 0.016265869140625 + ], + [ + -0.0161285400390625 + ] + ], + [ + [ + -0.0131988525390625 + ], + [ + 0.015350341796875 + ] + ], + [ + [ + 0.0146331787109375 + ], + [ + -0.01483917236328125 + ] + ], + [ + [ + -0.01554107666015625 + ], + [ + -0.01318359375 + ] + ], + [ + [ + 0.0138092041015625 + ], + [ + 0.01560211181640625 + ] + ] + ], + "asymmetric_weights_decompressor_mlp_gate_proj_weight_0": [ + [ + 0.0009555816650390625 + ], + [ + 0.0009636878967285156 + ], + [ + 0.00091552734375 + ], + [ + 0.0009512901306152344 + ], + [ + 0.000949859619140625 + ], + [ + 0.0009474754333496094 + ], + [ + 0.0009431838989257812 + ], + [ + 0.0009655952453613281 + ], + [ + 0.0009593963623046875 + ], + [ + 0.0009489059448242188 + ], + [ + 0.0009398460388183594 + ], + [ + 0.0009694099426269531 + ], + [ + 0.0009374618530273438 + ], + [ + 0.0009660720825195312 + ], + [ + 0.0009570121765136719 + ], + [ + 0.0009589195251464844 + ], + [ + 0.0009474754333496094 + ], + [ + 0.0009517669677734375 + ], + [ + 0.000942230224609375 + ], + [ + 0.0009703636169433594 + ], + [ + 0.0009679794311523438 + ], + [ + 0.0009641647338867188 + ], + [ + 0.0009756088256835938 + ], + [ + 0.0009708404541015625 + ], + [ + 0.0009584426879882812 + ], + [ + 0.0009531974792480469 + ], + [ + 0.0009489059448242188 + ], + [ + 0.0009632110595703125 + ], + [ + 0.0009751319885253906 + ], + [ + 0.0009646415710449219 + ], + [ + 0.0009503364562988281 + ], + [ + 0.0009660720825195312 + ], + [ + 0.0009450912475585938 + ], + [ + 0.0009551048278808594 + ], + [ + 0.0009632110595703125 + ], + [ + 0.00096893310546875 + ], + [ + 0.00087738037109375 + ], + [ + 0.0009603500366210938 + ], + [ + 0.0009393692016601562 + ], + [ + 0.0009708404541015625 + ], + [ + 0.0009555816650390625 + ], + [ + 0.0009717941284179688 + ], + [ + 0.0009641647338867188 + ], + [ + 0.0009632110595703125 + ], + [ + 0.000972747802734375 + ], + [ + 0.0009322166442871094 + ], + [ + 0.0009684562683105469 + ], + [ + 0.0009617805480957031 + ], + [ + 0.0009284019470214844 + ], + [ + 0.0009598731994628906 + ], + [ + 0.0009293556213378906 + ], + [ + 0.00092315673828125 + ], + [ + 0.0008797645568847656 + ], + [ + 0.0009746551513671875 + ], + [ + 0.0009598731994628906 + ], + [ + 0.0009226799011230469 + ], + [ + 0.0009679794311523438 + ], + [ + 0.0009741783142089844 + ], + [ + 0.0009684562683105469 + ], + [ + 0.0009245872497558594 + ], + [ + 0.0009694099426269531 + ], + [ + 0.0009393692016601562 + ], + [ + 0.0009298324584960938 + ], + [ + 0.0009636878967285156 + ], + [ + 0.0009665489196777344 + ], + [ + 0.0009307861328125 + ], + [ + 0.0009489059448242188 + ], + [ + 0.0009760856628417969 + ], + [ + 0.0009551048278808594 + ], + [ + 0.0008802413940429688 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009632110595703125 + ], + [ + 0.0009469985961914062 + ], + [ + 0.00095367431640625 + ], + [ + 0.0009508132934570312 + ], + [ + 0.0009350776672363281 + ], + [ + 0.0009427070617675781 + ], + [ + 0.0009551048278808594 + ], + [ + 0.0009765625 + ], + [ + 0.0009560585021972656 + ], + [ + 0.0009598731994628906 + ], + [ + 0.0009636878967285156 + ], + [ + 0.0009670257568359375 + ], + [ + 0.0009546279907226562 + ], + [ + 0.0009646415710449219 + ], + [ + 0.0009703636169433594 + ], + [ + 0.0009655952453613281 + ], + [ + 0.0009274482727050781 + ], + [ + 0.0009093284606933594 + ], + [ + 0.0009398460388183594 + ], + [ + 0.0009493827819824219 + ], + [ + 0.0009708404541015625 + ], + [ + 0.0009756088256835938 + ], + [ + 0.0009412765502929688 + ], + [ + 0.0009632110595703125 + ], + [ + 0.0009260177612304688 + ], + [ + 0.0009589195251464844 + ], + [ + 0.0009484291076660156 + ], + [ + 0.000957489013671875 + ], + [ + 0.0009765625 + ], + [ + 0.0009641647338867188 + ], + [ + 0.000965118408203125 + ], + [ + 0.00095367431640625 + ], + [ + 0.0009660720825195312 + ], + [ + 0.0009703636169433594 + ], + [ + 0.000949859619140625 + ], + [ + 0.0009622573852539062 + ], + [ + 0.0009489059448242188 + ], + [ + 0.0009655952453613281 + ], + [ + 0.0009379386901855469 + ], + [ + 0.0009436607360839844 + ], + [ + 0.0009646415710449219 + ], + [ + 0.0009708404541015625 + ], + [ + 0.0009355545043945312 + ], + [ + 0.0009694099426269531 + ], + [ + 0.000942230224609375 + ], + [ + 0.0009441375732421875 + ], + [ + 0.0009751319885253906 + ], + [ + 0.000957489013671875 + ], + [ + 0.0009441375732421875 + ], + [ + 0.0009717941284179688 + ], + [ + 0.0009450912475585938 + ], + [ + 0.0009703636169433594 + ], + [ + 0.0009417533874511719 + ], + [ + 0.0009288787841796875 + ], + [ + 0.0009655952453613281 + ], + [ + 0.0009655952453613281 + ], + [ + 0.0009632110595703125 + ] + ], + "asymmetric_weights_decompressor_mlp_up_proj_weight_updated_constant0_0": [ + [ + 0.0009670257568359375 + ], + [ + 0.0009627342224121094 + ], + [ + 0.0009636878967285156 + ], + [ + 0.0009675025939941406 + ], + [ + 0.0009679794311523438 + ], + [ + 0.00096893310546875 + ], + [ + 0.0009675025939941406 + ], + [ + 0.0009493827819824219 + ], + [ + 0.00096893310546875 + ], + [ + 0.0009484291076660156 + ], + [ + 0.000949859619140625 + ], + [ + 0.0009398460388183594 + ], + [ + 0.0009174346923828125 + ], + [ + 0.0009679794311523438 + ], + [ + 0.0009403228759765625 + ], + [ + 0.0009741783142089844 + ], + [ + 0.0008835792541503906 + ], + [ + 0.0009450912475585938 + ], + [ + 0.0009484291076660156 + ], + [ + 0.0009655952453613281 + ], + [ + 0.0009579658508300781 + ], + [ + 0.00091552734375 + ], + [ + 0.0009326934814453125 + ], + [ + 0.0009489059448242188 + ], + [ + 0.0009627342224121094 + ], + [ + 0.0009765625 + ], + [ + 0.0009531974792480469 + ], + [ + 0.0009503364562988281 + ], + [ + 0.0009765625 + ], + [ + 0.0009579658508300781 + ], + [ + 0.0009169578552246094 + ], + [ + 0.0009493827819824219 + ], + [ + 0.0009450912475585938 + ], + [ + 0.0009741783142089844 + ], + [ + 0.0009379386901855469 + ], + [ + 0.0009403228759765625 + ], + [ + 0.00092315673828125 + ], + [ + 0.0009717941284179688 + ], + [ + 0.000911712646484375 + ], + [ + 0.0009722709655761719 + ], + [ + 0.0009360313415527344 + ], + [ + 0.0009765625 + ], + [ + 0.000972747802734375 + ], + [ + 0.0009288787841796875 + ], + [ + 0.0009722709655761719 + ], + [ + 0.0009121894836425781 + ], + [ + 0.0009512901306152344 + ], + [ + 0.0009007453918457031 + ], + [ + 0.0009570121765136719 + ], + [ + 0.0009136199951171875 + ], + [ + 0.0009436607360839844 + ], + [ + 0.0009584426879882812 + ], + [ + 0.0009655952453613281 + ], + [ + 0.0009465217590332031 + ], + [ + 0.0009765625 + ], + [ + 0.0009484291076660156 + ], + [ + 0.0009670257568359375 + ], + [ + 0.0009775161743164062 + ], + [ + 0.0009627342224121094 + ], + [ + 0.0009360313415527344 + ], + [ + 0.0009546279907226562 + ], + [ + 0.0009737014770507812 + ], + [ + 0.0009717941284179688 + ], + [ + 0.0009551048278808594 + ], + [ + 0.0009722709655761719 + ], + [ + 0.0009765625 + ], + [ + 0.0008749961853027344 + ], + [ + 0.0009751319885253906 + ], + [ + 0.0009322166442871094 + ], + [ + 0.0009546279907226562 + ], + [ + 0.0009331703186035156 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009756088256835938 + ], + [ + 0.0009636878967285156 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009760856628417969 + ], + [ + 0.0009546279907226562 + ], + [ + 0.0009207725524902344 + ], + [ + 0.0009741783142089844 + ], + [ + 0.0009436607360839844 + ], + [ + 0.0009217262268066406 + ], + [ + 0.0009722709655761719 + ], + [ + 0.0009579658508300781 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009684562683105469 + ], + [ + 0.0009641647338867188 + ], + [ + 0.0009579658508300781 + ], + [ + 0.0009617805480957031 + ], + [ + 0.0009646415710449219 + ], + [ + 0.0009684562683105469 + ], + [ + 0.0009202957153320312 + ], + [ + 0.0009560585021972656 + ], + [ + 0.0009617805480957031 + ], + [ + 0.0009784698486328125 + ], + [ + 0.00089263916015625 + ], + [ + 0.0009698867797851562 + ], + [ + 0.0013704299926757812 + ], + [ + 0.00206756591796875 + ], + [ + 0.0013513565063476562 + ], + [ + 0.00225830078125 + ], + [ + 0.0021381378173828125 + ], + [ + 0.0024662017822265625 + ], + [ + 0.0025844573974609375 + ], + [ + 0.0015583038330078125 + ], + [ + 0.0023174285888671875 + ], + [ + 0.0025119781494140625 + ], + [ + 0.002399444580078125 + ], + [ + 0.0020198822021484375 + ], + [ + 0.00145721435546875 + ], + [ + 0.0021514892578125 + ], + [ + 0.0019207000732421875 + ], + [ + 0.0019245147705078125 + ], + [ + 0.0016422271728515625 + ], + [ + 0.00133514404296875 + ], + [ + 0.0024929046630859375 + ], + [ + 0.0015106201171875 + ], + [ + 0.0017309188842773438 + ], + [ + 0.0017538070678710938 + ], + [ + 0.00246429443359375 + ], + [ + 0.0012035369873046875 + ], + [ + 0.002346038818359375 + ], + [ + 0.0008511543273925781 + ], + [ + 0.001300811767578125 + ], + [ + 0.0024204254150390625 + ], + [ + 0.002277374267578125 + ], + [ + 0.00124359130859375 + ], + [ + 0.0018281936645507812 + ], + [ + 0.0013427734375 + ] + ], + "symmetric_weights_decompressor_mlp_down_proj_weight_updated_constant0_0": [ + [ + [ + -0.01129150390625 + ], + [ + 0.01030731201171875 + ], + [ + -0.0095977783203125 + ], + [ + -0.00717926025390625 + ] + ], + [ + [ + -0.0103607177734375 + ], + [ + 0.010406494140625 + ], + [ + 0.010711669921875 + ], + [ + -0.00701904296875 + ] + ], + [ + [ + -0.0098419189453125 + ], + [ + 0.01096343994140625 + ], + [ + -0.01012420654296875 + ], + [ + 0.00954437255859375 + ] + ], + [ + [ + 0.0105133056640625 + ], + [ + 0.01090240478515625 + ], + [ + -0.010833740234375 + ], + [ + -0.00818634033203125 + ] + ], + [ + [ + 0.01097869873046875 + ], + [ + 0.0105438232421875 + ], + [ + -0.01099395751953125 + ], + [ + -0.00856781005859375 + ] + ], + [ + [ + 0.0101318359375 + ], + [ + 0.0116119384765625 + ], + [ + 0.00989532470703125 + ], + [ + 0.01172637939453125 + ] + ], + [ + [ + -0.00850677490234375 + ], + [ + 0.0114288330078125 + ], + [ + 0.01036834716796875 + ], + [ + 0.0076446533203125 + ] + ], + [ + [ + -0.0112457275390625 + ], + [ + -0.0092315673828125 + ], + [ + 0.00942230224609375 + ], + [ + 0.007320404052734375 + ] + ], + [ + [ + 0.0104522705078125 + ], + [ + -0.00957489013671875 + ], + [ + -0.01071929931640625 + ], + [ + -0.00634002685546875 + ] + ], + [ + [ + -0.0102996826171875 + ], + [ + 0.01103973388671875 + ], + [ + -0.009124755859375 + ], + [ + -0.00803375244140625 + ] + ], + [ + [ + -0.0095367431640625 + ], + [ + 0.00888824462890625 + ], + [ + 0.01154327392578125 + ], + [ + 0.00800323486328125 + ] + ], + [ + [ + 0.009674072265625 + ], + [ + -0.0116119384765625 + ], + [ + 0.0104522705078125 + ], + [ + 0.00786590576171875 + ] + ], + [ + [ + 0.0091705322265625 + ], + [ + 0.00913238525390625 + ], + [ + -0.01096343994140625 + ], + [ + -0.007678985595703125 + ] + ], + [ + [ + 0.01239013671875 + ], + [ + 0.009857177734375 + ], + [ + 0.01012420654296875 + ], + [ + -0.007171630859375 + ] + ], + [ + [ + 0.01021575927734375 + ], + [ + 0.00972747802734375 + ], + [ + -0.01096343994140625 + ], + [ + 0.00801849365234375 + ] + ], + [ + [ + -0.01032257080078125 + ], + [ + -0.01013946533203125 + ], + [ + -0.01071929931640625 + ], + [ + -0.0102691650390625 + ] + ], + [ + [ + -0.0106964111328125 + ], + [ + 0.00943756103515625 + ], + [ + 0.01076507568359375 + ], + [ + 0.00707244873046875 + ] + ], + [ + [ + -0.0108795166015625 + ], + [ + -0.010406494140625 + ], + [ + 0.0109710693359375 + ], + [ + 0.00952911376953125 + ] + ], + [ + [ + -0.009552001953125 + ], + [ + 0.01085662841796875 + ], + [ + -0.00939178466796875 + ], + [ + -0.01177215576171875 + ] + ], + [ + [ + -0.0090179443359375 + ], + [ + 0.00785064697265625 + ], + [ + 0.00989532470703125 + ], + [ + 0.0099334716796875 + ] + ], + [ + [ + 0.0109100341796875 + ], + [ + -0.01056671142578125 + ], + [ + 0.0117950439453125 + ], + [ + 0.0103607177734375 + ] + ], + [ + [ + 0.01050567626953125 + ], + [ + -0.0103912353515625 + ], + [ + 0.01074981689453125 + ], + [ + 0.007213592529296875 + ] + ], + [ + [ + -0.009979248046875 + ], + [ + -0.01123046875 + ], + [ + -0.0108489990234375 + ], + [ + -0.00695037841796875 + ] + ], + [ + [ + -0.01064300537109375 + ], + [ + -0.01023101806640625 + ], + [ + 0.00847625732421875 + ], + [ + 0.00609588623046875 + ] + ], + [ + [ + -0.0100250244140625 + ], + [ + 0.0110015869140625 + ], + [ + -0.009124755859375 + ], + [ + -0.007610321044921875 + ] + ], + [ + [ + 0.01087188720703125 + ], + [ + 0.01104736328125 + ], + [ + 0.01092529296875 + ], + [ + 0.008697509765625 + ] + ], + [ + [ + -0.0101470947265625 + ], + [ + 0.0101318359375 + ], + [ + -0.01070404052734375 + ], + [ + -0.007740020751953125 + ] + ], + [ + [ + 0.010467529296875 + ], + [ + -0.01071929931640625 + ], + [ + -0.01088714599609375 + ], + [ + 0.00823974609375 + ] + ], + [ + [ + 0.0109710693359375 + ], + [ + 0.01070404052734375 + ], + [ + -0.0088653564453125 + ], + [ + -0.0120391845703125 + ] + ], + [ + [ + -0.0101470947265625 + ], + [ + 0.01103973388671875 + ], + [ + -0.01092529296875 + ], + [ + 0.00841522216796875 + ] + ], + [ + [ + -0.00984954833984375 + ], + [ + 0.00902557373046875 + ], + [ + 0.01081085205078125 + ], + [ + -0.0115203857421875 + ] + ], + [ + [ + 0.01021575927734375 + ], + [ + -0.0107421875 + ], + [ + 0.01123809814453125 + ], + [ + 0.00835418701171875 + ] + ], + [ + [ + 0.01099395751953125 + ], + [ + -0.0100250244140625 + ], + [ + 0.01085662841796875 + ], + [ + 0.006694793701171875 + ] + ], + [ + [ + 0.00936126708984375 + ], + [ + 0.01097869873046875 + ], + [ + 0.01055908203125 + ], + [ + 0.00826263427734375 + ] + ], + [ + [ + 0.01218414306640625 + ], + [ + -0.01041412353515625 + ], + [ + -0.01038360595703125 + ], + [ + 0.00843048095703125 + ] + ], + [ + [ + -0.01076507568359375 + ], + [ + -0.0114593505859375 + ], + [ + 0.00991058349609375 + ], + [ + -0.0055389404296875 + ] + ], + [ + [ + -0.0111236572265625 + ], + [ + -0.0110015869140625 + ], + [ + 0.0101776123046875 + ], + [ + 0.00720977783203125 + ] + ], + [ + [ + -0.00986480712890625 + ], + [ + 0.01038360595703125 + ], + [ + -0.01102447509765625 + ], + [ + 0.00872802734375 + ] + ], + [ + [ + -0.01039886474609375 + ], + [ + -0.00897216796875 + ], + [ + 0.01068115234375 + ], + [ + -0.006473541259765625 + ] + ], + [ + [ + -0.01056671142578125 + ], + [ + 0.0096588134765625 + ], + [ + 0.0109100341796875 + ], + [ + 0.00579071044921875 + ] + ], + [ + [ + -0.009613037109375 + ], + [ + -0.0108489990234375 + ], + [ + 0.0097198486328125 + ], + [ + -0.006763458251953125 + ] + ], + [ + [ + 0.0100555419921875 + ], + [ + -0.00954437255859375 + ], + [ + -0.009185791015625 + ], + [ + 0.006927490234375 + ] + ], + [ + [ + -0.01076507568359375 + ], + [ + -0.010528564453125 + ], + [ + 0.0106048583984375 + ], + [ + -0.007671356201171875 + ] + ], + [ + [ + -0.01036834716796875 + ], + [ + -0.01068115234375 + ], + [ + -0.01056671142578125 + ], + [ + -0.009033203125 + ] + ], + [ + [ + -0.01070404052734375 + ], + [ + 0.01039886474609375 + ], + [ + -0.00970458984375 + ], + [ + -0.005916595458984375 + ] + ], + [ + [ + 0.00968170166015625 + ], + [ + -0.010589599609375 + ], + [ + 0.00940704345703125 + ], + [ + -0.00543212890625 + ] + ], + [ + [ + -0.01090240478515625 + ], + [ + -0.010345458984375 + ], + [ + 0.01006317138671875 + ], + [ + 0.00695037841796875 + ] + ], + [ + [ + 0.00974273681640625 + ], + [ + -0.0087432861328125 + ], + [ + -0.009857177734375 + ], + [ + -0.006603240966796875 + ] + ], + [ + [ + 0.00936126708984375 + ], + [ + 0.010711669921875 + ], + [ + 0.0103912353515625 + ], + [ + 0.006847381591796875 + ] + ], + [ + [ + 0.00975799560546875 + ], + [ + -0.01107025146484375 + ], + [ + -0.01073455810546875 + ], + [ + -0.0077362060546875 + ] + ], + [ + [ + 0.009185791015625 + ], + [ + -0.01050567626953125 + ], + [ + 0.0096588134765625 + ], + [ + 0.00775909423828125 + ] + ], + [ + [ + 0.00922393798828125 + ], + [ + 0.00940704345703125 + ], + [ + 0.00949859619140625 + ], + [ + 0.005893707275390625 + ] + ], + [ + [ + 0.01010894775390625 + ], + [ + -0.0094757080078125 + ], + [ + 0.00902557373046875 + ], + [ + -0.00682830810546875 + ] + ], + [ + [ + -0.00994873046875 + ], + [ + 0.010467529296875 + ], + [ + -0.00936126708984375 + ], + [ + -0.006427764892578125 + ] + ], + [ + [ + -0.01031494140625 + ], + [ + 0.01061248779296875 + ], + [ + -0.0096893310546875 + ], + [ + -0.0110626220703125 + ] + ], + [ + [ + -0.00983428955078125 + ], + [ + -0.01062774658203125 + ], + [ + 0.0115203857421875 + ], + [ + 0.006656646728515625 + ] + ], + [ + [ + -0.0110015869140625 + ], + [ + -0.00907135009765625 + ], + [ + -0.0113067626953125 + ], + [ + 0.0066680908203125 + ] + ], + [ + [ + 0.00891876220703125 + ], + [ + 0.01165008544921875 + ], + [ + 0.00977325439453125 + ], + [ + 0.00894927978515625 + ] + ], + [ + [ + -0.01136016845703125 + ], + [ + 0.01145172119140625 + ], + [ + -0.010955810546875 + ], + [ + 0.010772705078125 + ] + ], + [ + [ + -0.010711669921875 + ], + [ + 0.010772705078125 + ], + [ + 0.01076507568359375 + ], + [ + -0.006195068359375 + ] + ], + [ + [ + 0.00940704345703125 + ], + [ + -0.01081085205078125 + ], + [ + 0.00977325439453125 + ], + [ + -0.00846099853515625 + ] + ], + [ + [ + -0.01116943359375 + ], + [ + 0.00909423828125 + ], + [ + -0.01003265380859375 + ], + [ + -0.0112762451171875 + ] + ], + [ + [ + -0.010650634765625 + ], + [ + -0.0108489990234375 + ], + [ + 0.0111541748046875 + ], + [ + 0.01007843017578125 + ] + ], + [ + [ + -0.00910186767578125 + ], + [ + -0.010528564453125 + ], + [ + 0.0102691650390625 + ], + [ + -0.005126953125 + ] + ] + ] +} \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json new file mode 100644 index 00000000000..e1baa81d0dc --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json @@ -0,0 +1,128 @@ +[ + { + "weight_name": "q_proj_weight", + "node_with_weight": "linear", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "k_proj_weight", + "node_with_weight": "linear_1", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "v_proj_weight", + "node_with_weight": "linear_2", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "o_proj_weight", + "node_with_weight": "linear_3", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "mlp_gate_proj_weight", + "node_with_weight": "linear_4", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "mlp_up_proj_weight", + "node_with_weight": "linear_5", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "mlp_down_proj_weight", + "node_with_weight": "linear_6", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 128 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + } +] \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot new file mode 100644 index 00000000000..29de7b02841 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot @@ -0,0 +1,169 @@ +strict digraph { +"0 attn_norm_weight" [id=0, type="get_attr"]; +"1 mlp_norm_weight" [id=1, type="get_attr"]; +"2 rope_cos" [id=2, type="get_attr"]; +"3 rope_sin" [id=3, type="get_attr"]; +"4 x_embed" [id=4, type=input]; +"5 arange" [id=5, type=arange]; +"6 _assert_tensor_metadata_default" [id=6, type="_assert_tensor_metadata"]; +"7 to" [id=7, type=to]; +"8 pow_1" [id=8, type=pow]; +"9 mean" [id=9, type=mean]; +"10 add" [id=10, type=add]; +"11 rsqrt" [id=11, type=rsqrt]; +"12 mul" [id=12, type=mul]; +"13 _assert_tensor_metadata_default_1" [id=13, type="_assert_tensor_metadata"]; +"14 to_1" [id=14, type=to]; +"15 mul_1" [id=15, type=mul]; +"16 q_proj_weight_updated_constant0" [id=16, type="get_attr"]; +"17 asymmetric_weights_decompressor_q_proj_weight_0" [id=17, type="call_module"]; +"18 linear" [id=18, type=linear]; +"19 view" [id=19, type=view]; +"20 transpose" [id=20, type=transpose]; +"21 k_proj_weight_updated_constant0" [id=21, type="get_attr"]; +"22 asymmetric_weights_decompressor_k_proj_weight_0" [id=22, type="call_module"]; +"23 linear_1" [id=23, type=linear]; +"24 view_1" [id=24, type=view]; +"25 transpose_1" [id=25, type=transpose]; +"26 v_proj_weight_updated_constant0" [id=26, type="get_attr"]; +"27 asymmetric_weights_decompressor_v_proj_weight_0" [id=27, type="call_module"]; +"28 linear_2" [id=28, type=linear]; +"29 view_2" [id=29, type=view]; +"30 transpose_2" [id=30, type=transpose]; +"31 index" [id=31, type=index]; +"32 index_1" [id=32, type=index]; +"33 mul_2" [id=33, type=mul]; +"34 slice_1" [id=34, type=slice]; +"35 slice_2" [id=35, type=slice]; +"36 neg" [id=36, type=neg]; +"37 cat" [id=37, type=cat]; +"38 mul_3" [id=38, type=mul]; +"39 add_1" [id=39, type=add]; +"40 mul_4" [id=40, type=mul]; +"41 slice_3" [id=41, type=slice]; +"42 slice_4" [id=42, type=slice]; +"43 neg_1" [id=43, type=neg]; +"44 cat_1" [id=44, type=cat]; +"45 mul_5" [id=45, type=mul]; +"46 add_2" [id=46, type=add]; +"47 scaled_dot_product_attention" [id=47, type="scaled_dot_product_attention"]; +"48 transpose_3" [id=48, type=transpose]; +"49 view_3" [id=49, type=view]; +"50 o_proj_weight_updated_constant0" [id=50, type="get_attr"]; +"51 asymmetric_weights_decompressor_o_proj_weight_0" [id=51, type="call_module"]; +"52 linear_3" [id=52, type=linear]; +"53 add_3" [id=53, type=add]; +"54 _assert_tensor_metadata_default_2" [id=54, type="_assert_tensor_metadata"]; +"55 to_2" [id=55, type=to]; +"56 pow_2" [id=56, type=pow]; +"57 mean_1" [id=57, type=mean]; +"58 add_4" [id=58, type=add]; +"59 rsqrt_1" [id=59, type=rsqrt]; +"60 mul_6" [id=60, type=mul]; +"61 _assert_tensor_metadata_default_3" [id=61, type="_assert_tensor_metadata"]; +"62 to_3" [id=62, type=to]; +"63 mul_7" [id=63, type=mul]; +"64 mlp_gate_proj_weight_updated_constant0" [id=64, type="get_attr"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [id=65, type="call_module"]; +"66 linear_4" [id=66, type=linear]; +"67 silu" [id=67, type=silu]; +"68 mlp_up_proj_weight_updated_constant0" [id=68, type="get_attr"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [id=69, type="call_module"]; +"70 linear_5" [id=70, type=linear]; +"71 mul_8" [id=71, type=mul]; +"72 mlp_down_proj_weight_updated_constant0" [id=72, type="get_attr"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [id=73, type="call_module"]; +"74 linear_6" [id=74, type=linear]; +"75 add_5" [id=75, type=add]; +"76 output" [id=76, type=output]; +"0 attn_norm_weight" -> "15 mul_1" [style=solid, label="(64,)"]; +"1 mlp_norm_weight" -> "63 mul_7" [style=solid, label="(64,)"]; +"2 rope_cos" -> "31 index" [style=solid, label="(1, 1, 128, 16)"]; +"3 rope_sin" -> "32 index_1" [style=solid, label="(1, 1, 128, 16)"]; +"4 x_embed" -> "6 _assert_tensor_metadata_default" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "7 to" [style=solid, label="(1, 3, 64)"]; +"4 x_embed" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"5 arange" -> "31 index" [style=solid, label="(3,)"]; +"5 arange" -> "32 index_1" [style=solid, label="(3,)"]; +"7 to" -> "8 pow_1" [style=solid, label="(1, 3, 64)"]; +"7 to" -> "12 mul" [style=solid, label="(1, 3, 64)"]; +"8 pow_1" -> "9 mean" [style=solid, label="(1, 3, 64)"]; +"9 mean" -> "10 add" [style=solid, label="(1, 3, 1)"]; +"10 add" -> "11 rsqrt" [style=solid, label="(1, 3, 1)"]; +"11 rsqrt" -> "12 mul" [style=solid, label="(1, 3, 1)"]; +"12 mul" -> "13 _assert_tensor_metadata_default_1" [style=solid, label="(1, 3, 64)"]; +"12 mul" -> "14 to_1" [style=solid, label="(1, 3, 64)"]; +"14 to_1" -> "15 mul_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "18 linear" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "23 linear_1" [style=solid, label="(1, 3, 64)"]; +"15 mul_1" -> "28 linear_2" [style=solid, label="(1, 3, 64)"]; +"16 q_proj_weight_updated_constant0" -> "17 asymmetric_weights_decompressor_q_proj_weight_0" [style=solid, label="(64, 64)"]; +"17 asymmetric_weights_decompressor_q_proj_weight_0" -> "18 linear" [style=solid, label="(64, 64)"]; +"18 linear" -> "19 view" [style=solid, label="(1, 3, 64)"]; +"19 view" -> "20 transpose" [style=solid, label="(1, 3, 4, 16)"]; +"20 transpose" -> "33 mul_2" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "34 slice_1" [style=solid, label="(1, 4, 3, 16)"]; +"20 transpose" -> "35 slice_2" [style=solid, label="(1, 4, 3, 16)"]; +"21 k_proj_weight_updated_constant0" -> "22 asymmetric_weights_decompressor_k_proj_weight_0" [style=solid, label="(64, 64)"]; +"22 asymmetric_weights_decompressor_k_proj_weight_0" -> "23 linear_1" [style=solid, label="(64, 64)"]; +"23 linear_1" -> "24 view_1" [style=solid, label="(1, 3, 64)"]; +"24 view_1" -> "25 transpose_1" [style=solid, label="(1, 3, 4, 16)"]; +"25 transpose_1" -> "40 mul_4" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "41 slice_3" [style=solid, label="(1, 4, 3, 16)"]; +"25 transpose_1" -> "42 slice_4" [style=solid, label="(1, 4, 3, 16)"]; +"26 v_proj_weight_updated_constant0" -> "27 asymmetric_weights_decompressor_v_proj_weight_0" [style=solid, label="(64, 64)"]; +"27 asymmetric_weights_decompressor_v_proj_weight_0" -> "28 linear_2" [style=solid, label="(64, 64)"]; +"28 linear_2" -> "29 view_2" [style=solid, label="(1, 3, 64)"]; +"29 view_2" -> "30 transpose_2" [style=solid, label="(1, 3, 4, 16)"]; +"30 transpose_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"31 index" -> "33 mul_2" [style=solid, label="(1, 1, 3, 16)"]; +"31 index" -> "40 mul_4" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "38 mul_3" [style=solid, label="(1, 1, 3, 16)"]; +"32 index_1" -> "45 mul_5" [style=solid, label="(1, 1, 3, 16)"]; +"33 mul_2" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"34 slice_1" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"35 slice_2" -> "36 neg" [style=solid, label="(1, 4, 3, 8)"]; +"36 neg" -> "37 cat" [style=solid, label="(1, 4, 3, 8)"]; +"37 cat" -> "38 mul_3" [style=solid, label="(1, 4, 3, 16)"]; +"38 mul_3" -> "39 add_1" [style=solid, label="(1, 4, 3, 16)"]; +"39 add_1" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"40 mul_4" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"41 slice_3" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"42 slice_4" -> "43 neg_1" [style=solid, label="(1, 4, 3, 8)"]; +"43 neg_1" -> "44 cat_1" [style=solid, label="(1, 4, 3, 8)"]; +"44 cat_1" -> "45 mul_5" [style=solid, label="(1, 4, 3, 16)"]; +"45 mul_5" -> "46 add_2" [style=solid, label="(1, 4, 3, 16)"]; +"46 add_2" -> "47 scaled_dot_product_attention" [style=solid, label="(1, 4, 3, 16)"]; +"47 scaled_dot_product_attention" -> "48 transpose_3" [style=solid, label="(1, 4, 3, 16)"]; +"48 transpose_3" -> "49 view_3" [style=solid, label="(1, 3, 4, 16)"]; +"49 view_3" -> "52 linear_3" [style=solid, label="(1, 3, 64)"]; +"50 o_proj_weight_updated_constant0" -> "51 asymmetric_weights_decompressor_o_proj_weight_0" [style=solid, label="(64, 64)"]; +"51 asymmetric_weights_decompressor_o_proj_weight_0" -> "52 linear_3" [style=solid, label="(64, 64)"]; +"52 linear_3" -> "53 add_3" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "54 _assert_tensor_metadata_default_2" [style=solid, label="(1, 3, 64)"]; +"53 add_3" -> "55 to_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "56 pow_2" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "60 mul_6" [style=solid, label="(1, 3, 64)"]; +"55 to_2" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"56 pow_2" -> "57 mean_1" [style=solid, label="(1, 3, 64)"]; +"57 mean_1" -> "58 add_4" [style=solid, label="(1, 3, 1)"]; +"58 add_4" -> "59 rsqrt_1" [style=solid, label="(1, 3, 1)"]; +"59 rsqrt_1" -> "60 mul_6" [style=solid, label="(1, 3, 1)"]; +"60 mul_6" -> "61 _assert_tensor_metadata_default_3" [style=solid, label="(1, 3, 64)"]; +"60 mul_6" -> "62 to_3" [style=solid, label="(1, 3, 64)"]; +"62 to_3" -> "63 mul_7" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "66 linear_4" [style=solid, label="(1, 3, 64)"]; +"63 mul_7" -> "70 linear_5" [style=solid, label="(1, 3, 64)"]; +"64 mlp_gate_proj_weight_updated_constant0" -> "65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" [style=solid, label="(128, 64)"]; +"65 asymmetric_weights_decompressor_mlp_gate_proj_weight_0" -> "66 linear_4" [style=solid, label="(128, 64)"]; +"66 linear_4" -> "67 silu" [style=solid, label="(1, 3, 128)"]; +"67 silu" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"68 mlp_up_proj_weight_updated_constant0" -> "69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" [style=solid, label="(128, 64)"]; +"69 asymmetric_weights_decompressor_mlp_up_proj_weight_0" -> "70 linear_5" [style=solid, label="(128, 64)"]; +"70 linear_5" -> "71 mul_8" [style=solid, label="(1, 3, 128)"]; +"71 mul_8" -> "74 linear_6" [style=solid, label="(1, 3, 128)"]; +"72 mlp_down_proj_weight_updated_constant0" -> "73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" [style=solid, label="(64, 128)"]; +"73 asymmetric_weights_decompressor_mlp_down_proj_weight_0" -> "74 linear_6" [style=solid, label="(64, 128)"]; +"74 linear_6" -> "75 add_5" [style=solid, label="(1, 3, 64)"]; +"75 add_5" -> "76 output" [style=solid, label="(1, 3, 64)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json new file mode 100644 index 00000000000..40b1cc6c44e --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json @@ -0,0 +1,1744 @@ +{ + "asymmetric_weights_decompressor_q_proj_weight_0": [ + [ + 0.0009570121765136719 + ], + [ + 0.0009775161743164062 + ], + [ + 0.00090789794921875 + ], + [ + 0.0009617805480957031 + ], + [ + 0.0009713172912597656 + ], + [ + 0.0009593963623046875 + ], + [ + 0.0009546279907226562 + ], + [ + 0.0009412765502929688 + ], + [ + 0.0009703636169433594 + ], + [ + 0.0009531974792480469 + ], + [ + 0.0009756088256835938 + ], + [ + 0.0009655952453613281 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009293556213378906 + ], + [ + 0.0009431838989257812 + ], + [ + 0.0009684562683105469 + ], + [ + 0.0009675025939941406 + ], + [ + 0.0009603500366210938 + ], + [ + 0.0009112358093261719 + ], + [ + 0.0009660720825195312 + ], + [ + 0.0009403228759765625 + ], + [ + 0.0009670257568359375 + ], + [ + 0.0009279251098632812 + ], + [ + 0.0009522438049316406 + ], + [ + 0.0008702278137207031 + ], + [ + 0.0009508132934570312 + ], + [ + 0.0009407997131347656 + ], + [ + 0.0009713172912597656 + ], + [ + 0.0009570121765136719 + ], + [ + 0.0009112358093261719 + ], + [ + 0.0009579658508300781 + ], + [ + 0.000926971435546875 + ], + [ + 0.0009055137634277344 + ], + [ + 0.0009469985961914062 + ], + [ + 0.0009598731994628906 + ], + [ + 0.0009484291076660156 + ], + [ + 0.0009455680847167969 + ], + [ + 0.0009098052978515625 + ], + [ + 0.0009479522705078125 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009589195251464844 + ], + [ + 0.0009512901306152344 + ], + [ + 0.0009641647338867188 + ], + [ + 0.0009608268737792969 + ], + [ + 0.0009350776672363281 + ], + [ + 0.000911712646484375 + ], + [ + 0.0009655952453613281 + ], + [ + 0.000949859619140625 + ], + [ + 0.00092315673828125 + ], + [ + 0.000957489013671875 + ], + [ + 0.0009412765502929688 + ], + [ + 0.0009593963623046875 + ], + [ + 0.0009217262268066406 + ], + [ + 0.0009026527404785156 + ], + [ + 0.00095367431640625 + ], + [ + 0.0009531974792480469 + ], + [ + 0.0009198188781738281 + ], + [ + 0.0009183883666992188 + ], + [ + 0.000957489013671875 + ], + [ + 0.0009531974792480469 + ], + [ + 0.0009655952453613281 + ], + [ + 0.0009546279907226562 + ], + [ + 0.0009760856628417969 + ], + [ + 0.0009632110595703125 + ] + ], + "asymmetric_weights_decompressor_k_proj_weight_0": [ + [ + 0.0009379386901855469 + ], + [ + 0.0009522438049316406 + ], + [ + 0.0009541511535644531 + ], + [ + 0.0009598731994628906 + ], + [ + 0.0009517669677734375 + ], + [ + 0.000934600830078125 + ], + [ + 0.0009455680847167969 + ], + [ + 0.000926971435546875 + ], + [ + 0.000957489013671875 + ], + [ + 0.0009679794311523438 + ], + [ + 0.0009698867797851562 + ], + [ + 0.0009417533874511719 + ], + [ + 0.0009417533874511719 + ], + [ + 0.0009469985961914062 + ], + [ + 0.000965118408203125 + ], + [ + 0.000946044921875 + ], + [ + 0.0009469985961914062 + ], + [ + 0.0009489059448242188 + ], + [ + 0.0009484291076660156 + ], + [ + 0.0009698867797851562 + ], + [ + 0.0009603500366210938 + ], + [ + 0.0009522438049316406 + ], + [ + 0.0009694099426269531 + ], + [ + 0.0009531974792480469 + ], + [ + 0.0009427070617675781 + ], + [ + 0.0009622573852539062 + ], + [ + 0.0009732246398925781 + ], + [ + 0.0009245872497558594 + ], + [ + 0.0009450912475585938 + ], + [ + 0.0009560585021972656 + ], + [ + 0.000949859619140625 + ], + [ + 0.0009469985961914062 + ], + [ + 0.0009622573852539062 + ], + [ + 0.00093841552734375 + ], + [ + 0.0009455680847167969 + ], + [ + 0.0009469985961914062 + ], + [ + 0.0009493827819824219 + ], + [ + 0.0009741783142089844 + ], + [ + 0.0009331703186035156 + ], + [ + 0.0009412765502929688 + ], + [ + 0.0009660720825195312 + ], + [ + 0.0009593963623046875 + ], + [ + 0.0009484291076660156 + ], + [ + 0.0009756088256835938 + ], + [ + 0.0009617805480957031 + ], + [ + 0.0009279251098632812 + ], + [ + 0.0009350776672363281 + ], + [ + 0.00093841552734375 + ], + [ + 0.0009512901306152344 + ], + [ + 0.0009531974792480469 + ], + [ + 0.0009489059448242188 + ], + [ + 0.0009493827819824219 + ], + [ + 0.0009551048278808594 + ], + [ + 0.0009503364562988281 + ], + [ + 0.0009622573852539062 + ], + [ + 0.0009250640869140625 + ], + [ + 0.0009508132934570312 + ], + [ + 0.0009503364562988281 + ], + [ + 0.0009627342224121094 + ], + [ + 0.0009059906005859375 + ], + [ + 0.0009679794311523438 + ], + [ + 0.0009765625 + ], + [ + 0.0009584426879882812 + ], + [ + 0.0009436607360839844 + ] + ], + "asymmetric_weights_decompressor_v_proj_weight_0": [ + [ + 0.0009713172912597656 + ], + [ + 0.0009407997131347656 + ], + [ + 0.000934600830078125 + ], + [ + 0.0009560585021972656 + ], + [ + 0.00090789794921875 + ], + [ + 0.0009722709655761719 + ], + [ + 0.0009274482727050781 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009703636169433594 + ], + [ + 0.0009093284606933594 + ], + [ + 0.0009679794311523438 + ], + [ + 0.0009703636169433594 + ], + [ + 0.0009374618530273438 + ], + [ + 0.0009684562683105469 + ], + [ + 0.0009655952453613281 + ], + [ + 0.0009140968322753906 + ], + [ + 0.0009531974792480469 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009226799011230469 + ], + [ + 0.0009655952453613281 + ], + [ + 0.0009160041809082031 + ], + [ + 0.0009560585021972656 + ], + [ + 0.0009713172912597656 + ], + [ + 0.000942230224609375 + ], + [ + 0.0009527206420898438 + ], + [ + 0.0009603500366210938 + ], + [ + 0.0009722709655761719 + ], + [ + 0.0009450912475585938 + ], + [ + 0.0009388923645019531 + ], + [ + 0.0009670257568359375 + ], + [ + 0.00095367431640625 + ], + [ + 0.0009665489196777344 + ], + [ + 0.00096893310546875 + ], + [ + 0.0009512901306152344 + ], + [ + 0.0009665489196777344 + ], + [ + 0.000934600830078125 + ], + [ + 0.0009379386901855469 + ], + [ + 0.0009522438049316406 + ], + [ + 0.0009431838989257812 + ], + [ + 0.0009765625 + ], + [ + 0.0009579658508300781 + ], + [ + 0.0009608268737792969 + ], + [ + 0.0009489059448242188 + ], + [ + 0.0009694099426269531 + ], + [ + 0.0009603500366210938 + ], + [ + 0.0009665489196777344 + ], + [ + 0.0009260177612304688 + ], + [ + 0.0009684562683105469 + ], + [ + 0.0009188652038574219 + ], + [ + 0.0009365081787109375 + ], + [ + 0.00095367431640625 + ], + [ + 0.0009708404541015625 + ], + [ + 0.0009398460388183594 + ], + [ + 0.0009655952453613281 + ], + [ + 0.0009446144104003906 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009717941284179688 + ], + [ + 0.0009593963623046875 + ], + [ + 0.0009503364562988281 + ], + [ + 0.0009560585021972656 + ], + [ + 0.0009608268737792969 + ], + [ + 0.0009760856628417969 + ], + [ + 0.0009722709655761719 + ], + [ + 0.0008978843688964844 + ] + ], + "asymmetric_weights_decompressor_o_proj_weight_0": [ + [ + 0.0009369850158691406 + ], + [ + 0.0009579658508300781 + ], + [ + 0.0009160041809082031 + ], + [ + 0.0009713172912597656 + ], + [ + 0.0009646415710449219 + ], + [ + 0.0009450912475585938 + ], + [ + 0.0009622573852539062 + ], + [ + 0.0009102821350097656 + ], + [ + 0.000942230224609375 + ], + [ + 0.0009608268737792969 + ], + [ + 0.0009150505065917969 + ], + [ + 0.0009412765502929688 + ], + [ + 0.0009427070617675781 + ], + [ + 0.0009264945983886719 + ], + [ + 0.0009660720825195312 + ], + [ + 0.0009541511535644531 + ], + [ + 0.0009675025939941406 + ], + [ + 0.0009431838989257812 + ], + [ + 0.0009207725524902344 + ], + [ + 0.0009479522705078125 + ], + [ + 0.0009660720825195312 + ], + [ + 0.0009217262268066406 + ], + [ + 0.0009613037109375 + ], + [ + 0.0009675025939941406 + ], + [ + 0.0009489059448242188 + ], + [ + 0.0009465217590332031 + ], + [ + 0.0009403228759765625 + ], + [ + 0.00093841552734375 + ], + [ + 0.0009326934814453125 + ], + [ + 0.0009765625 + ], + [ + 0.0009708404541015625 + ], + [ + 0.0009565353393554688 + ], + [ + 0.0009765625 + ], + [ + 0.0009293556213378906 + ], + [ + 0.0009379386901855469 + ], + [ + 0.0009794235229492188 + ], + [ + 0.0009675025939941406 + ], + [ + 0.0009517669677734375 + ], + [ + 0.0009436607360839844 + ], + [ + 0.0009622573852539062 + ], + [ + 0.000942230224609375 + ], + [ + 0.0009531974792480469 + ], + [ + 0.0009503364562988281 + ], + [ + 0.0009775161743164062 + ], + [ + 0.000972747802734375 + ], + [ + 0.0009722709655761719 + ], + [ + 0.0009307861328125 + ], + [ + 0.0009579658508300781 + ], + [ + 0.0009546279907226562 + ], + [ + 0.0009584426879882812 + ], + [ + 0.0009207725524902344 + ], + [ + 0.0008878707885742188 + ], + [ + 0.0009522438049316406 + ], + [ + 0.0009560585021972656 + ], + [ + 0.00093841552734375 + ], + [ + 0.0009641647338867188 + ], + [ + 0.000957489013671875 + ], + [ + 0.0009393692016601562 + ], + [ + 0.0009016990661621094 + ], + [ + 0.0009589195251464844 + ], + [ + 0.0009589195251464844 + ], + [ + 0.0009746551513671875 + ], + [ + 0.0009665489196777344 + ], + [ + 0.0009617805480957031 + ] + ], + "asymmetric_weights_decompressor_mlp_gate_proj_weight_0": [ + [ + 0.0009775161743164062 + ], + [ + 0.0009708404541015625 + ], + [ + 0.0009703636169433594 + ], + [ + 0.0009775161743164062 + ], + [ + 0.0009636878967285156 + ], + [ + 0.0009717941284179688 + ], + [ + 0.000934600830078125 + ], + [ + 0.0009636878967285156 + ], + [ + 0.00091552734375 + ], + [ + 0.0009522438049316406 + ], + [ + 0.0009431838989257812 + ], + [ + 0.0009474754333496094 + ], + [ + 0.0009431838989257812 + ], + [ + 0.0009655952453613281 + ], + [ + 0.0009593963623046875 + ], + [ + 0.0009489059448242188 + ], + [ + 0.0009598731994628906 + ], + [ + 0.0009617805480957031 + ], + [ + 0.0009374618530273438 + ], + [ + 0.0009660720825195312 + ], + [ + 0.0009632110595703125 + ], + [ + 0.0009503364562988281 + ], + [ + 0.0009474754333496094 + ], + [ + 0.0009636878967285156 + ], + [ + 0.0009646415710449219 + ], + [ + 0.0009646415710449219 + ], + [ + 0.0009679794311523438 + ], + [ + 0.00096893310546875 + ], + [ + 0.0009756088256835938 + ], + [ + 0.0009708404541015625 + ], + [ + 0.0009517669677734375 + ], + [ + 0.0009531974792480469 + ], + [ + 0.0009489059448242188 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009031295776367188 + ], + [ + 0.0009646415710449219 + ], + [ + 0.00096893310546875 + ], + [ + 0.0009598731994628906 + ], + [ + 0.0009245872497558594 + ], + [ + 0.0009675025939941406 + ], + [ + 0.0009632110595703125 + ], + [ + 0.00096893310546875 + ], + [ + 0.0009126663208007812 + ], + [ + 0.0009603500366210938 + ], + [ + 0.0009427070617675781 + ], + [ + 0.0009684562683105469 + ], + [ + 0.0009555816650390625 + ], + [ + 0.0009717941284179688 + ], + [ + 0.0009641647338867188 + ], + [ + 0.0009713172912597656 + ], + [ + 0.0009698867797851562 + ], + [ + 0.0009388923645019531 + ], + [ + 0.0009684562683105469 + ], + [ + 0.0009617805480957031 + ], + [ + 0.0008859634399414062 + ], + [ + 0.0009598731994628906 + ], + [ + 0.0009293556213378906 + ], + [ + 0.00092315673828125 + ], + [ + 0.0008797645568847656 + ], + [ + 0.0009746551513671875 + ], + [ + 0.0009541511535644531 + ], + [ + 0.0009226799011230469 + ], + [ + 0.0009679794311523438 + ], + [ + 0.0009741783142089844 + ], + [ + 0.0009684562683105469 + ], + [ + 0.0009245872497558594 + ], + [ + 0.0009694099426269531 + ], + [ + 0.0009512901306152344 + ], + [ + 0.0009183883666992188 + ], + [ + 0.0009636878967285156 + ], + [ + 0.0009665489196777344 + ], + [ + 0.0009307861328125 + ], + [ + 0.0009603500366210938 + ], + [ + 0.0009760856628417969 + ], + [ + 0.0009551048278808594 + ], + [ + 0.0008802413940429688 + ], + [ + 0.0009717941284179688 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009469985961914062 + ], + [ + 0.00095367431640625 + ], + [ + 0.0009508132934570312 + ], + [ + 0.0009632110595703125 + ], + [ + 0.0009436607360839844 + ], + [ + 0.0009322166442871094 + ], + [ + 0.0009765625 + ], + [ + 0.0009469985961914062 + ], + [ + 0.0009636878967285156 + ], + [ + 0.0009603500366210938 + ], + [ + 0.0009670257568359375 + ], + [ + 0.0009546279907226562 + ], + [ + 0.0009646415710449219 + ], + [ + 0.0009703636169433594 + ], + [ + 0.0009655952453613281 + ], + [ + 0.0009298324584960938 + ], + [ + 0.0008978843688964844 + ], + [ + 0.000949859619140625 + ], + [ + 0.0009593963623046875 + ], + [ + 0.0009655952453613281 + ], + [ + 0.0009756088256835938 + ], + [ + 0.0009417533874511719 + ], + [ + 0.0009555816650390625 + ], + [ + 0.0009527206420898438 + ], + [ + 0.000926971435546875 + ], + [ + 0.0009565353393554688 + ], + [ + 0.000957489013671875 + ], + [ + 0.0009765625 + ], + [ + 0.0009641647338867188 + ], + [ + 0.0009622573852539062 + ], + [ + 0.00095367431640625 + ], + [ + 0.0009660720825195312 + ], + [ + 0.0009703636169433594 + ], + [ + 0.000949859619140625 + ], + [ + 0.0009436607360839844 + ], + [ + 0.0009570121765136719 + ], + [ + 0.0009655952453613281 + ], + [ + 0.00096893310546875 + ], + [ + 0.0009703636169433594 + ], + [ + 0.0009608268737792969 + ], + [ + 0.0009360313415527344 + ], + [ + 0.0008497238159179688 + ], + [ + 0.0009436607360839844 + ], + [ + 0.0009546279907226562 + ], + [ + 0.0009412765502929688 + ], + [ + 0.0009751319885253906 + ], + [ + 0.000957489013671875 + ], + [ + 0.0009441375732421875 + ], + [ + 0.0009717941284179688 + ], + [ + 0.0009450912475585938 + ] + ], + "asymmetric_weights_decompressor_mlp_up_proj_weight_0": [ + [ + 0.0009703636169433594 + ], + [ + 0.0009417533874511719 + ], + [ + 0.0009379386901855469 + ], + [ + 0.0009655952453613281 + ], + [ + 0.0009655952453613281 + ], + [ + 0.0009632110595703125 + ], + [ + 0.0009670257568359375 + ], + [ + 0.0009627342224121094 + ], + [ + 0.0009636878967285156 + ], + [ + 0.0009698867797851562 + ], + [ + 0.0009479522705078125 + ], + [ + 0.0009713172912597656 + ], + [ + 0.0009675025939941406 + ], + [ + 0.0009493827819824219 + ], + [ + 0.00096893310546875 + ], + [ + 0.0009484291076660156 + ], + [ + 0.000949859619140625 + ], + [ + 0.0009565353393554688 + ], + [ + 0.0009121894836425781 + ], + [ + 0.0009679794311523438 + ], + [ + 0.000957489013671875 + ], + [ + 0.0009665489196777344 + ], + [ + 0.0009403228759765625 + ], + [ + 0.0009489059448242188 + ], + [ + 0.0009388923645019531 + ], + [ + 0.0009760856628417969 + ], + [ + 0.0009570121765136719 + ], + [ + 0.0009322166442871094 + ], + [ + 0.0009326934814453125 + ], + [ + 0.0009603500366210938 + ], + [ + 0.0009555816650390625 + ], + [ + 0.0009765625 + ], + [ + 0.0009188652038574219 + ], + [ + 0.0009503364562988281 + ], + [ + 0.0009765625 + ], + [ + 0.0009579658508300781 + ], + [ + 0.0009031295776367188 + ], + [ + 0.0009493827819824219 + ], + [ + 0.0009274482727050781 + ], + [ + 0.0009741783142089844 + ], + [ + 0.0009379386901855469 + ], + [ + 0.0009403228759765625 + ], + [ + 0.00092315673828125 + ], + [ + 0.0009717941284179688 + ], + [ + 0.0009055137634277344 + ], + [ + 0.0009722709655761719 + ], + [ + 0.0009684562683105469 + ], + [ + 0.0009589195251464844 + ], + [ + 0.000972747802734375 + ], + [ + 0.0009694099426269531 + ], + [ + 0.0009641647338867188 + ], + [ + 0.0009164810180664062 + ], + [ + 0.0009512901306152344 + ], + [ + 0.0009007453918457031 + ], + [ + 0.0009570121765136719 + ], + [ + 0.0009188652038574219 + ], + [ + 0.0009560585021972656 + ], + [ + 0.0009551048278808594 + ], + [ + 0.0009655952453613281 + ], + [ + 0.0009465217590332031 + ], + [ + 0.0009765625 + ], + [ + 0.0009541511535644531 + ], + [ + 0.0009717941284179688 + ], + [ + 0.0009760856628417969 + ], + [ + 0.000957489013671875 + ], + [ + 0.0009493827819824219 + ], + [ + 0.0009546279907226562 + ], + [ + 0.0009737014770507812 + ], + [ + 0.0009717941284179688 + ], + [ + 0.0009608268737792969 + ], + [ + 0.0009217262268066406 + ], + [ + 0.0009765625 + ], + [ + 0.0008749961853027344 + ], + [ + 0.0009751319885253906 + ], + [ + 0.0009322166442871094 + ], + [ + 0.0009546279907226562 + ], + [ + 0.0009331703186035156 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009756088256835938 + ], + [ + 0.0009646415710449219 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009760856628417969 + ], + [ + 0.0009546279907226562 + ], + [ + 0.0009431838989257812 + ], + [ + 0.0009765625 + ], + [ + 0.0009403228759765625 + ], + [ + 0.000946044921875 + ], + [ + 0.0009775161743164062 + ], + [ + 0.0009350776672363281 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009684562683105469 + ], + [ + 0.0009641647338867188 + ], + [ + 0.0009522438049316406 + ], + [ + 0.0009617805480957031 + ], + [ + 0.0009646415710449219 + ], + [ + 0.0009684562683105469 + ], + [ + 0.0009484291076660156 + ], + [ + 0.0009412765502929688 + ], + [ + 0.0009450912475585938 + ], + [ + 0.0009784698486328125 + ], + [ + 0.0009140968322753906 + ], + [ + 0.0009698867797851562 + ], + [ + 0.0009546279907226562 + ], + [ + 0.0009436607360839844 + ], + [ + 0.0008835792541503906 + ], + [ + 0.0009217262268066406 + ], + [ + 0.0009484291076660156 + ], + [ + 0.0009794235229492188 + ], + [ + 0.0009698867797851562 + ], + [ + 0.0008611679077148438 + ], + [ + 0.0009746551513671875 + ], + [ + 0.0009427070617675781 + ], + [ + 0.0008997917175292969 + ], + [ + 0.0009713172912597656 + ], + [ + 0.0009679794311523438 + ], + [ + 0.0009393692016601562 + ], + [ + 0.0009617805480957031 + ], + [ + 0.0009407997131347656 + ], + [ + 0.0009593963623046875 + ], + [ + 0.0009493827819824219 + ], + [ + 0.0008873939514160156 + ], + [ + 0.0009212493896484375 + ], + [ + 0.0009560585021972656 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009517669677734375 + ], + [ + 0.0009489059448242188 + ], + [ + 0.0009617805480957031 + ], + [ + 0.0009570121765136719 + ] + ], + "asymmetric_weights_decompressor_mlp_down_proj_weight_0": [ + [ + 0.0006866455078125 + ], + [ + 0.0006728172302246094 + ], + [ + 0.0006909370422363281 + ], + [ + 0.000690460205078125 + ], + [ + 0.0006866455078125 + ], + [ + 0.0006794929504394531 + ], + [ + 0.0006856918334960938 + ], + [ + 0.0006885528564453125 + ], + [ + 0.0006775856018066406 + ], + [ + 0.00067901611328125 + ], + [ + 0.0006818771362304688 + ], + [ + 0.0006895065307617188 + ], + [ + 0.0006823539733886719 + ], + [ + 0.0006814002990722656 + ], + [ + 0.000690460205078125 + ], + [ + 0.0006890296936035156 + ], + [ + 0.0006895065307617188 + ], + [ + 0.0006847381591796875 + ], + [ + 0.0006761550903320312 + ], + [ + 0.0006785392761230469 + ], + [ + 0.0006885528564453125 + ], + [ + 0.000682830810546875 + ], + [ + 0.0006794929504394531 + ], + [ + 0.0006899833679199219 + ], + [ + 0.0006804466247558594 + ], + [ + 0.0006785392761230469 + ], + [ + 0.00066375732421875 + ], + [ + 0.0006914138793945312 + ], + [ + 0.0006780624389648438 + ], + [ + 0.0006856918334960938 + ], + [ + 0.0006890296936035156 + ], + [ + 0.0006837844848632812 + ], + [ + 0.0006890296936035156 + ], + [ + 0.0006885528564453125 + ], + [ + 0.0006866455078125 + ], + [ + 0.0006885528564453125 + ], + [ + 0.0006880760192871094 + ], + [ + 0.0006861686706542969 + ], + [ + 0.0006861686706542969 + ], + [ + 0.0006804466247558594 + ], + [ + 0.0006866455078125 + ], + [ + 0.0006761550903320312 + ], + [ + 0.0006871223449707031 + ], + [ + 0.0006875991821289062 + ], + [ + 0.0006780624389648438 + ], + [ + 0.0006880760192871094 + ], + [ + 0.0006909370422363281 + ], + [ + 0.0006718635559082031 + ], + [ + 0.0006723403930664062 + ], + [ + 0.0006895065307617188 + ], + [ + 0.0006694793701171875 + ], + [ + 0.0006737709045410156 + ], + [ + 0.0006885528564453125 + ], + [ + 0.0006785392761230469 + ], + [ + 0.0006885528564453125 + ], + [ + 0.0006804466247558594 + ], + [ + 0.0006866455078125 + ], + [ + 0.0006666183471679688 + ], + [ + 0.0006909370422363281 + ], + [ + 0.0006833076477050781 + ], + [ + 0.0006875991821289062 + ], + [ + 0.0006818771362304688 + ], + [ + 0.0006794929504394531 + ], + [ + 0.0006918907165527344 + ] + ] +} \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json new file mode 100644 index 00000000000..69d4cf0f6a8 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json @@ -0,0 +1,128 @@ +[ + { + "weight_name": "q_proj_weight", + "node_with_weight": "linear", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + }, + { + "weight_name": "k_proj_weight", + "node_with_weight": "linear_1", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + }, + { + "weight_name": "v_proj_weight", + "node_with_weight": "linear_2", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + }, + { + "weight_name": "o_proj_weight", + "node_with_weight": "linear_3", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + }, + { + "weight_name": "mlp_gate_proj_weight", + "node_with_weight": "linear_4", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + }, + { + "weight_name": "mlp_up_proj_weight", + "node_with_weight": "linear_5", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + }, + { + "weight_name": "mlp_down_proj_weight", + "node_with_weight": "linear_6", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 128 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + } +] \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot new file mode 100644 index 00000000000..b249fdf7ce3 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_bias" [id=0, type="get_attr"]; +"1 lm_head_bias" [id=1, type="get_attr"]; +"2 input_ids" [id=2, type=input]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 embedding" [id=5, type=embedding]; +"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; +"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json new file mode 100644 index 00000000000..38bc9f7b51c --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json @@ -0,0 +1,582 @@ +{ + "asymmetric_weights_decompressor_wte_weight_1_0": [ + [ + 0.0181884765625 + ], + [ + 0.025665283203125 + ], + [ + 0.01727294921875 + ], + [ + 0.015869140625 + ], + [ + 0.01837158203125 + ], + [ + 0.0209503173828125 + ], + [ + 0.0199127197265625 + ], + [ + 0.01641845703125 + ], + [ + 0.0213775634765625 + ], + [ + 0.01971435546875 + ], + [ + 0.020751953125 + ], + [ + 0.0206756591796875 + ], + [ + 0.018585205078125 + ], + [ + 0.017120361328125 + ], + [ + 0.016693115234375 + ], + [ + 0.01551055908203125 + ], + [ + 0.019378662109375 + ], + [ + 0.0218353271484375 + ], + [ + 0.018707275390625 + ], + [ + 0.018524169921875 + ], + [ + 0.0207672119140625 + ], + [ + 0.0210113525390625 + ], + [ + 0.017608642578125 + ], + [ + 0.016937255859375 + ], + [ + 0.0146331787109375 + ], + [ + 0.016754150390625 + ], + [ + 0.02288818359375 + ], + [ + 0.0201873779296875 + ], + [ + 0.0160675048828125 + ], + [ + 0.0161285400390625 + ], + [ + 0.0251617431640625 + ], + [ + 0.015899658203125 + ], + [ + 0.016143798828125 + ], + [ + 0.0206756591796875 + ], + [ + 0.0192718505859375 + ], + [ + 0.01537322998046875 + ], + [ + 0.017669677734375 + ], + [ + 0.0156402587890625 + ], + [ + 0.0193023681640625 + ], + [ + 0.021484375 + ], + [ + 0.018341064453125 + ], + [ + 0.017730712890625 + ], + [ + 0.0257110595703125 + ], + [ + 0.0167388916015625 + ], + [ + 0.017822265625 + ], + [ + 0.016204833984375 + ], + [ + 0.0133209228515625 + ], + [ + 0.0187835693359375 + ], + [ + 0.015716552734375 + ], + [ + 0.0193939208984375 + ], + [ + 0.018707275390625 + ], + [ + 0.0181427001953125 + ], + [ + 0.017822265625 + ], + [ + 0.018035888671875 + ], + [ + 0.01763916015625 + ], + [ + 0.0210418701171875 + ], + [ + 0.018341064453125 + ], + [ + 0.0186614990234375 + ], + [ + 0.0202789306640625 + ], + [ + 0.01519775390625 + ], + [ + 0.020172119140625 + ], + [ + 0.02069091796875 + ], + [ + 0.0180816650390625 + ], + [ + 0.0163726806640625 + ], + [ + 0.0164337158203125 + ], + [ + 0.017852783203125 + ], + [ + 0.018646240234375 + ], + [ + 0.0186614990234375 + ], + [ + 0.0171356201171875 + ], + [ + 0.0163116455078125 + ], + [ + 0.01611328125 + ], + [ + 0.0183868408203125 + ], + [ + 0.016571044921875 + ], + [ + 0.024322509765625 + ], + [ + 0.017547607421875 + ], + [ + 0.01885986328125 + ], + [ + 0.0171051025390625 + ], + [ + 0.0189971923828125 + ], + [ + 0.019134521484375 + ], + [ + 0.0159759521484375 + ], + [ + 0.020416259765625 + ], + [ + 0.0206756591796875 + ], + [ + 0.0185089111328125 + ], + [ + 0.0176544189453125 + ], + [ + 0.01861572265625 + ], + [ + 0.0157623291015625 + ], + [ + 0.0214691162109375 + ], + [ + 0.0176239013671875 + ], + [ + 0.0150299072265625 + ], + [ + 0.0193939208984375 + ], + [ + 0.02099609375 + ], + [ + 0.0237274169921875 + ], + [ + 0.0191802978515625 + ], + [ + 0.0176849365234375 + ], + [ + 0.01983642578125 + ], + [ + 0.0178070068359375 + ], + [ + 0.020050048828125 + ], + [ + 0.01355743408203125 + ], + [ + 0.01800537109375 + ], + [ + 0.019195556640625 + ], + [ + 0.0178375244140625 + ], + [ + 0.0227813720703125 + ], + [ + 0.01983642578125 + ], + [ + 0.019744873046875 + ], + [ + 0.0207977294921875 + ], + [ + 0.0200958251953125 + ], + [ + 0.0193939208984375 + ], + [ + 0.018280029296875 + ], + [ + 0.0204620361328125 + ], + [ + 0.0170745849609375 + ], + [ + 0.0171661376953125 + ], + [ + 0.0176849365234375 + ], + [ + 0.015625 + ], + [ + 0.01715087890625 + ], + [ + 0.01885986328125 + ], + [ + 0.015869140625 + ], + [ + 0.0142364501953125 + ], + [ + 0.01629638671875 + ], + [ + 0.017852783203125 + ], + [ + 0.01678466796875 + ], + [ + 0.0186920166015625 + ], + [ + 0.0174560546875 + ], + [ + 0.016754150390625 + ], + [ + 0.0172119140625 + ], + [ + 0.0206756591796875 + ], + [ + 0.02178955078125 + ], + [ + 0.02001953125 + ], + [ + 0.0166473388671875 + ] + ], + "asymmetric_weights_decompressor_linear_weight_0": [ + [ + 0.0009713172912597656 + ], + [ + 0.0009407997131347656 + ], + [ + 0.000934600830078125 + ], + [ + 0.0009560585021972656 + ], + [ + 0.00090789794921875 + ], + [ + 0.0009722709655761719 + ], + [ + 0.0009274482727050781 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009703636169433594 + ], + [ + 0.0009093284606933594 + ], + [ + 0.0009679794311523438 + ], + [ + 0.0009703636169433594 + ], + [ + 0.0009374618530273438 + ], + [ + 0.0009684562683105469 + ], + [ + 0.0009655952453613281 + ], + [ + 0.0009140968322753906 + ], + [ + 0.0009531974792480469 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009226799011230469 + ], + [ + 0.0009655952453613281 + ], + [ + 0.0009160041809082031 + ], + [ + 0.0009560585021972656 + ], + [ + 0.0009713172912597656 + ], + [ + 0.000942230224609375 + ], + [ + 0.0009527206420898438 + ], + [ + 0.0009603500366210938 + ], + [ + 0.0009722709655761719 + ], + [ + 0.0009450912475585938 + ], + [ + 0.0009388923645019531 + ], + [ + 0.0009670257568359375 + ], + [ + 0.00095367431640625 + ], + [ + 0.0009665489196777344 + ], + [ + 0.00096893310546875 + ], + [ + 0.0009512901306152344 + ], + [ + 0.0009665489196777344 + ], + [ + 0.000934600830078125 + ], + [ + 0.0009379386901855469 + ], + [ + 0.0009522438049316406 + ], + [ + 0.0009431838989257812 + ], + [ + 0.0009765625 + ], + [ + 0.0009579658508300781 + ], + [ + 0.0009608268737792969 + ], + [ + 0.0009489059448242188 + ], + [ + 0.0009694099426269531 + ], + [ + 0.0009603500366210938 + ], + [ + 0.0009665489196777344 + ], + [ + 0.0009260177612304688 + ], + [ + 0.0009684562683105469 + ], + [ + 0.0009188652038574219 + ], + [ + 0.0009365081787109375 + ], + [ + 0.00095367431640625 + ], + [ + 0.0009708404541015625 + ], + [ + 0.0009398460388183594 + ], + [ + 0.0009655952453613281 + ], + [ + 0.0009446144104003906 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009717941284179688 + ], + [ + 0.0009593963623046875 + ], + [ + 0.0009503364562988281 + ], + [ + 0.0009560585021972656 + ], + [ + 0.0009608268737792969 + ], + [ + 0.0009760856628417969 + ], + [ + 0.0009722709655761719 + ], + [ + 0.0008978843688964844 + ] + ] +} \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json new file mode 100644 index 00000000000..fd8fbda6f54 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json @@ -0,0 +1,38 @@ +[ + { + "weight_name": "wte_weight_1", + "node_with_weight": "embedding", + "weight_port_id": 0, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + }, + { + "weight_name": "linear_weight", + "node_with_weight": "linear", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + } +] \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot new file mode 100644 index 00000000000..0a7bb5fe8f8 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_bias" [id=0, type="get_attr"]; +"1 lm_head_bias" [id=1, type="get_attr"]; +"2 input_ids" [id=2, type=input]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 embedding" [id=5, type=embedding]; +"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; +"3 wte_weight_1_updated_constant0" -> "4 symmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(4096, 1)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 symmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json new file mode 100644 index 00000000000..24d93dde47d --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json @@ -0,0 +1,1222 @@ +{ + "symmetric_weights_decompressor_wte_weight_1_0": [ + [ + [ + -0.290283203125 + ], + [ + -0.283447265625 + ] + ], + [ + [ + 0.313720703125 + ], + [ + -0.50439453125 + ] + ], + [ + [ + 0.28662109375 + ], + [ + -0.263916015625 + ] + ], + [ + [ + 0.255859375 + ], + [ + 0.1998291015625 + ] + ], + [ + [ + -0.271240234375 + ], + [ + -0.319580078125 + ] + ], + [ + [ + -0.3564453125 + ], + [ + 0.31103515625 + ] + ], + [ + [ + 0.3388671875 + ], + [ + -0.2958984375 + ] + ], + [ + [ + -0.2027587890625 + ], + [ + 0.32080078125 + ] + ], + [ + [ + -0.2509765625 + ], + [ + -0.423095703125 + ] + ], + [ + [ + -0.290771484375 + ], + [ + 0.337646484375 + ] + ], + [ + [ + 0.34912109375 + ], + [ + -0.3125 + ] + ], + [ + [ + 0.286865234375 + ], + [ + 0.3876953125 + ] + ], + [ + [ + 0.373046875 + ], + [ + 0.2486572265625 + ] + ], + [ + [ + 0.295166015625 + ], + [ + -0.238525390625 + ] + ], + [ + [ + -0.28857421875 + ], + [ + 0.2283935546875 + ] + ], + [ + [ + 0.28564453125 + ], + [ + 0.2127685546875 + ] + ], + [ + [ + -0.35009765625 + ], + [ + -0.275634765625 + ] + ], + [ + [ + -0.3984375 + ], + [ + -0.3095703125 + ] + ], + [ + [ + 0.28564453125 + ], + [ + 0.31591796875 + ] + ], + [ + [ + -0.351318359375 + ], + [ + -0.304931640625 + ] + ], + [ + [ + -0.316650390625 + ], + [ + -0.35595703125 + ] + ], + [ + [ + -0.3603515625 + ], + [ + -0.2188720703125 + ] + ], + [ + [ + -0.233642578125 + ], + [ + 0.303466796875 + ] + ], + [ + [ + 0.27685546875 + ], + [ + -0.26318359375 + ] + ], + [ + [ + 0.271240234375 + ], + [ + 0.2080078125 + ] + ], + [ + [ + 0.30126953125 + ], + [ + 0.26171875 + ] + ], + [ + [ + 0.3359375 + ], + [ + -0.393798828125 + ] + ], + [ + [ + 0.326904296875 + ], + [ + -0.316650390625 + ] + ], + [ + [ + -0.2403564453125 + ], + [ + -0.2607421875 + ] + ], + [ + [ + 0.2646484375 + ], + [ + -0.249267578125 + ] + ], + [ + [ + 0.47900390625 + ], + [ + 0.36181640625 + ] + ], + [ + [ + 0.25048828125 + ], + [ + 0.3310546875 + ] + ], + [ + [ + -0.25830078125 + ], + [ + 0.25634765625 + ] + ], + [ + [ + -0.349365234375 + ], + [ + -0.290283203125 + ] + ], + [ + [ + 0.269287109375 + ], + [ + 0.38134765625 + ] + ], + [ + [ + -0.274169921875 + ], + [ + -0.253662109375 + ] + ], + [ + [ + -0.251953125 + ], + [ + 0.302490234375 + ] + ], + [ + [ + 0.2353515625 + ], + [ + -0.262939453125 + ] + ], + [ + [ + -0.268310546875 + ], + [ + 0.3466796875 + ] + ], + [ + [ + -0.39892578125 + ], + [ + -0.27734375 + ] + ], + [ + [ + 0.2763671875 + ], + [ + -0.308349609375 + ] + ], + [ + [ + -0.254638671875 + ], + [ + -0.31689453125 + ] + ], + [ + [ + -0.36572265625 + ], + [ + 0.453857421875 + ] + ], + [ + [ + -0.2156982421875 + ], + [ + 0.290283203125 + ] + ], + [ + [ + 0.34716796875 + ], + [ + 0.346923828125 + ] + ], + [ + [ + 0.235595703125 + ], + [ + -0.266357421875 + ] + ], + [ + [ + -0.2052001953125 + ], + [ + -0.253173828125 + ] + ], + [ + [ + -0.259765625 + ], + [ + 0.339111328125 + ] + ], + [ + [ + 0.259521484375 + ], + [ + 0.25537109375 + ] + ], + [ + [ + -0.3212890625 + ], + [ + -0.283935546875 + ] + ], + [ + [ + 0.2685546875 + ], + [ + -0.314453125 + ] + ], + [ + [ + 0.2138671875 + ], + [ + 0.378662109375 + ] + ], + [ + [ + 0.256591796875 + ], + [ + -0.311279296875 + ] + ], + [ + [ + -0.272216796875 + ], + [ + 0.302490234375 + ] + ], + [ + [ + -0.330078125 + ], + [ + 0.22216796875 + ] + ], + [ + [ + -0.241943359375 + ], + [ + -0.442626953125 + ] + ], + [ + [ + -0.33740234375 + ], + [ + 0.235107421875 + ] + ], + [ + [ + 0.320068359375 + ], + [ + -0.275146484375 + ] + ], + [ + [ + -0.338623046875 + ], + [ + 0.3076171875 + ] + ], + [ + [ + 0.256591796875 + ], + [ + 0.2252197265625 + ] + ], + [ + [ + 0.34765625 + ], + [ + -0.29541015625 + ] + ], + [ + [ + -0.306396484375 + ], + [ + 0.353271484375 + ] + ], + [ + [ + 0.309326171875 + ], + [ + -0.231201171875 + ] + ], + [ + [ + 0.290283203125 + ], + [ + -0.2315673828125 + ] + ], + [ + [ + 0.263671875 + ], + [ + -0.26025390625 + ] + ], + [ + [ + 0.320556640625 + ], + [ + 0.212890625 + ] + ], + [ + [ + 0.341796875 + ], + [ + 0.2548828125 + ] + ], + [ + [ + -0.302001953125 + ], + [ + -0.3212890625 + ] + ], + [ + [ + -0.1842041015625 + ], + [ + 0.28271484375 + ] + ], + [ + [ + -0.2568359375 + ], + [ + 0.26318359375 + ] + ], + [ + [ + -0.2091064453125 + ], + [ + 0.304443359375 + ] + ], + [ + [ + -0.2381591796875 + ], + [ + 0.319580078125 + ] + ], + [ + [ + 0.264892578125 + ], + [ + 0.252197265625 + ] + ], + [ + [ + -0.271728515625 + ], + [ + 0.42333984375 + ] + ], + [ + [ + -0.2264404296875 + ], + [ + -0.36376953125 + ] + ], + [ + [ + -0.296875 + ], + [ + 0.30419921875 + ] + ], + [ + [ + 0.268798828125 + ], + [ + -0.276123046875 + ] + ], + [ + [ + 0.2763671875 + ], + [ + 0.330810546875 + ] + ], + [ + [ + 0.314453125 + ], + [ + -0.29541015625 + ] + ], + [ + [ + -0.19921875 + ], + [ + 0.2685546875 + ] + ], + [ + [ + 0.32763671875 + ], + [ + -0.323486328125 + ] + ], + [ + [ + -0.3369140625 + ], + [ + 0.322265625 + ] + ], + [ + [ + 0.32763671875 + ], + [ + 0.293212890625 + ] + ], + [ + [ + 0.2374267578125 + ], + [ + 0.3037109375 + ] + ], + [ + [ + -0.295166015625 + ], + [ + 0.2978515625 + ] + ], + [ + [ + -0.306396484375 + ], + [ + -0.25634765625 + ] + ], + [ + [ + 0.314697265625 + ], + [ + -0.36962890625 + ] + ], + [ + [ + 0.2705078125 + ], + [ + 0.290283203125 + ] + ], + [ + [ + -0.2493896484375 + ], + [ + -0.263427734375 + ] + ], + [ + [ + -0.359619140625 + ], + [ + 0.2587890625 + ] + ], + [ + [ + -0.23583984375 + ], + [ + -0.348388671875 + ] + ], + [ + [ + -0.2421875 + ], + [ + -0.436767578125 + ] + ], + [ + [ + -0.349365234375 + ], + [ + 0.26220703125 + ] + ], + [ + [ + 0.25732421875 + ], + [ + 0.314697265625 + ] + ], + [ + [ + 0.21728515625 + ], + [ + 0.338134765625 + ] + ], + [ + [ + 0.301025390625 + ], + [ + -0.2666015625 + ] + ], + [ + [ + 0.314697265625 + ], + [ + -0.32421875 + ] + ], + [ + [ + 0.260986328125 + ], + [ + 0.262939453125 + ] + ], + [ + [ + 0.2110595703125 + ], + [ + -0.28759765625 + ] + ], + [ + [ + 0.339599609375 + ], + [ + 0.359375 + ] + ], + [ + [ + -0.289306640625 + ], + [ + 0.279296875 + ] + ], + [ + [ + -0.330810546875 + ], + [ + 0.395263671875 + ] + ], + [ + [ + 0.271240234375 + ], + [ + -0.32373046875 + ] + ], + [ + [ + 0.32568359375 + ], + [ + -0.3037109375 + ] + ], + [ + [ + 0.387939453125 + ], + [ + 0.3095703125 + ] + ], + [ + [ + 0.325927734375 + ], + [ + -0.314697265625 + ] + ], + [ + [ + -0.30615234375 + ], + [ + -0.346923828125 + ] + ], + [ + [ + -0.330322265625 + ], + [ + -0.28369140625 + ] + ], + [ + [ + 0.373046875 + ], + [ + 0.251708984375 + ] + ], + [ + [ + 0.259033203125 + ], + [ + -0.284912109375 + ] + ], + [ + [ + 0.2054443359375 + ], + [ + 0.29931640625 + ] + ], + [ + [ + -0.259033203125 + ], + [ + 0.304443359375 + ] + ], + [ + [ + -0.268310546875 + ], + [ + 0.2294921875 + ] + ], + [ + [ + 0.27392578125 + ], + [ + 0.312744140625 + ] + ], + [ + [ + 0.2205810546875 + ], + [ + -0.31884765625 + ] + ], + [ + [ + 0.286865234375 + ], + [ + -0.21923828125 + ] + ], + [ + [ + 0.2305908203125 + ], + [ + -0.2103271484375 + ] + ], + [ + [ + -0.292236328125 + ], + [ + -0.2314453125 + ] + ], + [ + [ + -0.300537109375 + ], + [ + -0.32080078125 + ] + ], + [ + [ + 0.292236328125 + ], + [ + 0.2412109375 + ] + ], + [ + [ + -0.30126953125 + ], + [ + 0.29443359375 + ] + ], + [ + [ + 0.277587890625 + ], + [ + -0.27880859375 + ] + ], + [ + [ + 0.26318359375 + ], + [ + 0.280029296875 + ] + ], + [ + [ + -0.32763671875 + ], + [ + -0.272216796875 + ] + ], + [ + [ + 0.272705078125 + ], + [ + -0.38623046875 + ] + ], + [ + [ + -0.249267578125 + ], + [ + 0.4208984375 + ] + ], + [ + [ + 0.290283203125 + ], + [ + 0.34765625 + ] + ], + [ + [ + -0.284912109375 + ], + [ + -0.34326171875 + ] + ] + ], + "asymmetric_weights_decompressor_linear_weight_0": [ + [ + 0.0009713172912597656 + ], + [ + 0.0009407997131347656 + ], + [ + 0.000934600830078125 + ], + [ + 0.0009560585021972656 + ], + [ + 0.00090789794921875 + ], + [ + 0.0009722709655761719 + ], + [ + 0.0009274482727050781 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009703636169433594 + ], + [ + 0.0009093284606933594 + ], + [ + 0.0009679794311523438 + ], + [ + 0.0009703636169433594 + ], + [ + 0.0009374618530273438 + ], + [ + 0.0009684562683105469 + ], + [ + 0.0009655952453613281 + ], + [ + 0.0009140968322753906 + ], + [ + 0.0009531974792480469 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009226799011230469 + ], + [ + 0.0009655952453613281 + ], + [ + 0.0009160041809082031 + ], + [ + 0.0009560585021972656 + ], + [ + 0.0009713172912597656 + ], + [ + 0.000942230224609375 + ], + [ + 0.0009527206420898438 + ], + [ + 0.0009603500366210938 + ], + [ + 0.0009722709655761719 + ], + [ + 0.0009450912475585938 + ], + [ + 0.0009388923645019531 + ], + [ + 0.0009670257568359375 + ], + [ + 0.00095367431640625 + ], + [ + 0.0009665489196777344 + ], + [ + 0.00096893310546875 + ], + [ + 0.0009512901306152344 + ], + [ + 0.0009665489196777344 + ], + [ + 0.000934600830078125 + ], + [ + 0.0009379386901855469 + ], + [ + 0.0009522438049316406 + ], + [ + 0.0009431838989257812 + ], + [ + 0.0009765625 + ], + [ + 0.0009579658508300781 + ], + [ + 0.0009608268737792969 + ], + [ + 0.0009489059448242188 + ], + [ + 0.0009694099426269531 + ], + [ + 0.0009603500366210938 + ], + [ + 0.0009665489196777344 + ], + [ + 0.0009260177612304688 + ], + [ + 0.0009684562683105469 + ], + [ + 0.0009188652038574219 + ], + [ + 0.0009365081787109375 + ], + [ + 0.00095367431640625 + ], + [ + 0.0009708404541015625 + ], + [ + 0.0009398460388183594 + ], + [ + 0.0009655952453613281 + ], + [ + 0.0009446144104003906 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009717941284179688 + ], + [ + 0.0009593963623046875 + ], + [ + 0.0009503364562988281 + ], + [ + 0.0009560585021972656 + ], + [ + 0.0009608268737792969 + ], + [ + 0.0009760856628417969 + ], + [ + 0.0009722709655761719 + ], + [ + 0.0008978843688964844 + ] + ] +} \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json new file mode 100644 index 00000000000..81205ac2ca8 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json @@ -0,0 +1,38 @@ +[ + { + "weight_name": "wte_weight_1", + "node_with_weight": "embedding", + "weight_port_id": 0, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + }, + { + "weight_name": "linear_weight", + "node_with_weight": "linear", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int4_sym", + "group_size": 32, + "codebook_values": null + } + } +] \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot new file mode 100644 index 00000000000..b249fdf7ce3 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot @@ -0,0 +1,24 @@ +strict digraph { +"0 linear_bias" [id=0, type="get_attr"]; +"1 lm_head_bias" [id=1, type="get_attr"]; +"2 input_ids" [id=2, type=input]; +"3 wte_weight_1_updated_constant0" [id=3, type="get_attr"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" [id=4, type="call_module"]; +"5 embedding" [id=5, type=embedding]; +"6 linear_weight_updated_constant0" [id=6, type="get_attr"]; +"7 asymmetric_weights_decompressor_linear_weight_0" [id=7, type="call_module"]; +"8 linear" [id=8, type=linear]; +"9 linear_1" [id=9, type=linear]; +"10 output" [id=10, type=output]; +"0 linear_bias" -> "8 linear" [style=solid, label="(64,)"]; +"1 lm_head_bias" -> "9 linear_1" [style=solid, label="(128,)"]; +"2 input_ids" -> "5 embedding" [style=solid, label="(5,)"]; +"3 wte_weight_1_updated_constant0" -> "4 asymmetric_weights_decompressor_wte_weight_1_0" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "5 embedding" [style=solid, label="(128, 64)"]; +"4 asymmetric_weights_decompressor_wte_weight_1_0" -> "9 linear_1" [style=solid, label="(128, 64)"]; +"5 embedding" -> "8 linear" [style=solid, label="(5, 64)"]; +"6 linear_weight_updated_constant0" -> "7 asymmetric_weights_decompressor_linear_weight_0" [style=solid, label="(64, 64)"]; +"7 asymmetric_weights_decompressor_linear_weight_0" -> "8 linear" [style=solid, label="(64, 64)"]; +"8 linear" -> "9 linear_1" [style=solid, label="(5, 64)"]; +"9 linear_1" -> "10 output" [style=solid, label="(5, 128)"]; +} diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json new file mode 100644 index 00000000000..edb48f38b53 --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json @@ -0,0 +1,582 @@ +{ + "asymmetric_weights_decompressor_wte_weight_1_0": [ + [ + 0.01552581787109375 + ], + [ + 0.02117919921875 + ], + [ + 0.020660400390625 + ], + [ + 0.0182037353515625 + ], + [ + 0.0228118896484375 + ], + [ + 0.0202178955078125 + ], + [ + 0.0205841064453125 + ], + [ + 0.0165252685546875 + ], + [ + 0.0195770263671875 + ], + [ + 0.0199127197265625 + ], + [ + 0.018218994140625 + ], + [ + 0.0209197998046875 + ], + [ + 0.0209503173828125 + ], + [ + 0.0175628662109375 + ], + [ + 0.0171966552734375 + ], + [ + 0.0175628662109375 + ], + [ + 0.01415252685546875 + ], + [ + 0.0167388916015625 + ], + [ + 0.0225982666015625 + ], + [ + 0.018218994140625 + ], + [ + 0.0182342529296875 + ], + [ + 0.02288818359375 + ], + [ + 0.0174713134765625 + ], + [ + 0.01513671875 + ], + [ + 0.0233917236328125 + ], + [ + 0.0154266357421875 + ], + [ + 0.016845703125 + ], + [ + 0.0186767578125 + ], + [ + 0.018646240234375 + ], + [ + 0.0182952880859375 + ], + [ + 0.0182647705078125 + ], + [ + 0.017181396484375 + ], + [ + 0.0171966552734375 + ], + [ + 0.0185089111328125 + ], + [ + 0.0191497802734375 + ], + [ + 0.0159454345703125 + ], + [ + 0.02313232421875 + ], + [ + 0.0196075439453125 + ], + [ + 0.0168304443359375 + ], + [ + 0.015594482421875 + ], + [ + 0.01898193359375 + ], + [ + 0.021270751953125 + ], + [ + 0.015869140625 + ], + [ + 0.0191192626953125 + ], + [ + 0.0183563232421875 + ], + [ + 0.01557159423828125 + ], + [ + 0.02337646484375 + ], + [ + 0.01558685302734375 + ], + [ + 0.0152740478515625 + ], + [ + 0.0184783935546875 + ], + [ + 0.016021728515625 + ], + [ + 0.0166473388671875 + ], + [ + 0.0171051025390625 + ], + [ + 0.0184326171875 + ], + [ + 0.0150909423828125 + ], + [ + 0.023773193359375 + ], + [ + 0.0170745849609375 + ], + [ + 0.0181121826171875 + ], + [ + 0.01715087890625 + ], + [ + 0.020843505859375 + ], + [ + 0.018280029296875 + ], + [ + 0.0178375244140625 + ], + [ + 0.01375579833984375 + ], + [ + 0.0179290771484375 + ], + [ + 0.0196075439453125 + ], + [ + 0.01708984375 + ], + [ + 0.0186920166015625 + ], + [ + 0.0255584716796875 + ], + [ + 0.02203369140625 + ], + [ + 0.0218505859375 + ], + [ + 0.0159759521484375 + ], + [ + 0.017852783203125 + ], + [ + 0.01922607421875 + ], + [ + 0.0218658447265625 + ], + [ + 0.0211029052734375 + ], + [ + 0.017547607421875 + ], + [ + 0.016937255859375 + ], + [ + 0.020782470703125 + ], + [ + 0.0189056396484375 + ], + [ + 0.01519775390625 + ], + [ + 0.01806640625 + ], + [ + 0.021728515625 + ], + [ + 0.0183868408203125 + ], + [ + 0.019927978515625 + ], + [ + 0.018463134765625 + ], + [ + 0.0167999267578125 + ], + [ + 0.017059326171875 + ], + [ + 0.01708984375 + ], + [ + 0.016143798828125 + ], + [ + 0.0185699462890625 + ], + [ + 0.018341064453125 + ], + [ + 0.01262664794921875 + ], + [ + 0.01849365234375 + ], + [ + 0.0159759521484375 + ], + [ + 0.019012451171875 + ], + [ + 0.01947021484375 + ], + [ + 0.0208282470703125 + ], + [ + 0.0182342529296875 + ], + [ + 0.0167999267578125 + ], + [ + 0.01523590087890625 + ], + [ + 0.021331787109375 + ], + [ + 0.0187225341796875 + ], + [ + 0.0179443359375 + ], + [ + 0.017608642578125 + ], + [ + 0.01416778564453125 + ], + [ + 0.0186614990234375 + ], + [ + 0.01302337646484375 + ], + [ + 0.018463134765625 + ], + [ + 0.0204010009765625 + ], + [ + 0.018463134765625 + ], + [ + 0.0205078125 + ], + [ + 0.0153350830078125 + ], + [ + 0.01751708984375 + ], + [ + 0.01922607421875 + ], + [ + 0.0174560546875 + ], + [ + 0.0154571533203125 + ], + [ + 0.01812744140625 + ], + [ + 0.019073486328125 + ], + [ + 0.017852783203125 + ], + [ + 0.0158538818359375 + ], + [ + 0.0195465087890625 + ], + [ + 0.0213470458984375 + ], + [ + 0.01995849609375 + ], + [ + 0.0166168212890625 + ], + [ + 0.019561767578125 + ], + [ + 0.0184478759765625 + ], + [ + 0.0162353515625 + ], + [ + 0.021270751953125 + ] + ], + "asymmetric_weights_decompressor_linear_weight_0": [ + [ + 0.0009274482727050781 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009703636169433594 + ], + [ + 0.0009093284606933594 + ], + [ + 0.0009679794311523438 + ], + [ + 0.0009703636169433594 + ], + [ + 0.0009570121765136719 + ], + [ + 0.0009684562683105469 + ], + [ + 0.0009617805480957031 + ], + [ + 0.0009212493896484375 + ], + [ + 0.0009541511535644531 + ], + [ + 0.000965118408203125 + ], + [ + 0.0009226799011230469 + ], + [ + 0.0009541511535644531 + ], + [ + 0.0009613037109375 + ], + [ + 0.0009560585021972656 + ], + [ + 0.0009713172912597656 + ], + [ + 0.000942230224609375 + ], + [ + 0.0009274482727050781 + ], + [ + 0.0009565353393554688 + ], + [ + 0.0009722709655761719 + ], + [ + 0.0009312629699707031 + ], + [ + 0.0009722709655761719 + ], + [ + 0.0009579658508300781 + ], + [ + 0.0009436607360839844 + ], + [ + 0.0009632110595703125 + ], + [ + 0.0009717941284179688 + ], + [ + 0.0009670257568359375 + ], + [ + 0.0009665489196777344 + ], + [ + 0.000949859619140625 + ], + [ + 0.0009379386901855469 + ], + [ + 0.0009522438049316406 + ], + [ + 0.0009431838989257812 + ], + [ + 0.0009694099426269531 + ], + [ + 0.0009660720825195312 + ], + [ + 0.0009608268737792969 + ], + [ + 0.0009489059448242188 + ], + [ + 0.0009322166442871094 + ], + [ + 0.0009675025939941406 + ], + [ + 0.0009665489196777344 + ], + [ + 0.0009264945983886719 + ], + [ + 0.0009684562683105469 + ], + [ + 0.0009188652038574219 + ], + [ + 0.0009365081787109375 + ], + [ + 0.00095367431640625 + ], + [ + 0.0009708404541015625 + ], + [ + 0.0009508132934570312 + ], + [ + 0.0009217262268066406 + ], + [ + 0.0009446144104003906 + ], + [ + 0.0009517669677734375 + ], + [ + 0.0009717941284179688 + ], + [ + 0.0009593963623046875 + ], + [ + 0.0009174346923828125 + ], + [ + 0.0009484291076660156 + ], + [ + 0.0009756088256835938 + ], + [ + 0.0009760856628417969 + ], + [ + 0.0009722709655761719 + ], + [ + 0.0008978843688964844 + ], + [ + 0.0009188652038574219 + ], + [ + 0.0009579658508300781 + ], + [ + 0.000946044921875 + ], + [ + 0.0009713172912597656 + ], + [ + 0.0009474754333496094 + ], + [ + 0.0009555816650390625 + ] + ] +} \ No newline at end of file diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json new file mode 100644 index 00000000000..49d45c1fffb --- /dev/null +++ b/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json @@ -0,0 +1,38 @@ +[ + { + "weight_name": "wte_weight_1", + "node_with_weight": "embedding", + "weight_port_id": 0, + "weight_dtype": "float32", + "weight_shape": [ + 128, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + }, + { + "weight_name": "linear_weight", + "node_with_weight": "linear", + "weight_port_id": 1, + "weight_dtype": "float32", + "weight_shape": [ + 64, + 64 + ], + "reduction_axes": [ + 1 + ], + "compression_config": { + "mode": "int8_asym", + "group_size": -1, + "codebook_values": null + } + } +] \ No newline at end of file From 09dabf6ee7820356c02a4f72f1b0381d24629de6 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 15 Oct 2025 16:33:48 +0400 Subject: [PATCH 62/91] minor --- tests/executorch/test_quantizer_compression.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/executorch/test_quantizer_compression.py b/tests/executorch/test_quantizer_compression.py index 180ba2510f7..aa275251fbe 100644 --- a/tests/executorch/test_quantizer_compression.py +++ b/tests/executorch/test_quantizer_compression.py @@ -222,7 +222,7 @@ def test_compress_pt2e_scales( model_case: ModelCase, quantizer_params, pt2e_params, - regen_ref_data=True, + regen_ref_data, ): fx_model, example_input = build_torch_fx_model(model_case) with torch.no_grad(): @@ -320,7 +320,7 @@ def test_openvino_wc_params( model_case: ModelCase, quantizer_params, pt2e_params, - regen_ref_data=True, + regen_ref_data, ): fx_model, _ = build_torch_fx_model(model_case) nncf_graph: NNCFGraph = GraphConverter.create_nncf_graph(fx_model) From 68316a55bfe1cc31612b79411fed014c7b015917 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 15 Oct 2025 16:42:49 +0400 Subject: [PATCH 63/91] add workflow for executorch test --- .github/workflows/call_precommit.yml | 38 ++++++++++++++++++++++++++++ tests/executorch/requirements.txt | 4 +++ 2 files changed, 42 insertions(+) create mode 100644 tests/executorch/requirements.txt diff --git a/.github/workflows/call_precommit.yml b/.github/workflows/call_precommit.yml index 6c2f58ae9ce..553598b0e72 100644 --- a/.github/workflows/call_precommit.yml +++ b/.github/workflows/call_precommit.yml @@ -136,6 +136,44 @@ jobs: env: NUM_WORKERS: 4 + executorch: + timeout-minutes: 40 + runs-on: ubuntu-latest-8-cores + defaults: + run: + shell: bash + env: + DEBIAN_FRONTEND: noninteractive + steps: + - name: Install dependencies + run : | + sudo apt-get update + sudo apt-get --assume-yes install gcc g++ build-essential ninja-build libgl1-mesa-dev libglib2.0-0 + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + lfs: true + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 + with: + python-version: ${{ inputs.python_version }} + - name: Runner info + continue-on-error: true + run: | + cat /etc/*release + cat /proc/cpuinfo + - name: Override constraints + if: ${{ inputs.override_requirements != '' }} + run: python .github/scripts/override_constraints.py "${{ inputs.override_requirements }}" + shell: bash + - name: Install NNCF and test requirements + run: pip install . -r tests/executorch/requirements.txt + - name: Print installed modules + run: pip list + - name: Run PyTorch precommit test scope + run: | + make test-torch-cpu + env: + NUM_WORKERS: 4 + pytorch-cuda: timeout-minutes: 40 runs-on: aks-linux-4-cores-28gb-gpu-tesla-t4 diff --git a/tests/executorch/requirements.txt b/tests/executorch/requirements.txt new file mode 100644 index 00000000000..94876d7dc71 --- /dev/null +++ b/tests/executorch/requirements.txt @@ -0,0 +1,4 @@ +-c ../../constraints.txt +torch==2.9.0 +torchao +git+https://github.com/anzr299/executorch.git@an/quantizer_nncf_pt2e_support \ No newline at end of file From 58b8992820d6cd05d2c6b55e52f7692b9c7d936d Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 15 Oct 2025 16:47:08 +0400 Subject: [PATCH 64/91] update workflow and makefile --- .github/workflows/call_precommit.yml | 2 +- Makefile | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/call_precommit.yml b/.github/workflows/call_precommit.yml index 553598b0e72..032ac15f20c 100644 --- a/.github/workflows/call_precommit.yml +++ b/.github/workflows/call_precommit.yml @@ -170,7 +170,7 @@ jobs: run: pip list - name: Run PyTorch precommit test scope run: | - make test-torch-cpu + make test-executorch env: NUM_WORKERS: 4 diff --git a/Makefile b/Makefile index d420c8ce4f0..0cf574a1848 100644 --- a/Makefile +++ b/Makefile @@ -141,6 +141,9 @@ test-torch-cpu: test-torch-cuda: pytest ${COVERAGE_ARGS} tests/torch -ra -m "cuda and not weekly and not nightly and not models_hub and not legacy" --junitxml ${JUNITXML_PATH} +test-executorch: + pytest ${COVERAGE_ARGS} tests/executorch --junitxml ${JUNITXML_PATH} + test-torch-nightly: pytest ${COVERAGE_ARGS} tests/torch -m "nightly or legacy" --junitxml ${JUNITXML_PATH} $(DATA_ARG) From e7bae1fc6dd3f52fd0350503a75fdf7e44f17230 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 15 Oct 2025 16:51:58 +0400 Subject: [PATCH 65/91] update execiutorch test requirements. --- tests/executorch/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/executorch/requirements.txt b/tests/executorch/requirements.txt index 94876d7dc71..b90f9ea61fe 100644 --- a/tests/executorch/requirements.txt +++ b/tests/executorch/requirements.txt @@ -1,4 +1,5 @@ -c ../../constraints.txt torch==2.9.0 -torchao +torchvision==0.24.0 +torchao==0.13.0 git+https://github.com/anzr299/executorch.git@an/quantizer_nncf_pt2e_support \ No newline at end of file From d4da34f0513a72cd949c77a48435ca5dd24bdfe8 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 15 Oct 2025 16:56:32 +0400 Subject: [PATCH 66/91] fix precommit --- tests/executorch/test_quantizer_compression.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tests/executorch/test_quantizer_compression.py b/tests/executorch/test_quantizer_compression.py index aa275251fbe..384bd7d42d1 100644 --- a/tests/executorch/test_quantizer_compression.py +++ b/tests/executorch/test_quantizer_compression.py @@ -291,18 +291,17 @@ def test_openvino_quantizer( def to_json_serializable(obj: Any) -> dict[Any, Any]: if dataclasses.is_dataclass(obj): return {k: to_json_serializable(v) for k, v in dataclasses.asdict(obj).items()} - elif isinstance(obj, Enum): + if isinstance(obj, Enum): return obj.value - elif isinstance(obj, (list, tuple)): + if isinstance(obj, (list, tuple)): return [to_json_serializable(x) for x in obj] - elif isinstance(obj, torch.Tensor): + if isinstance(obj, torch.Tensor): return obj.detach().cpu().tolist() - elif isinstance(obj, dict): + if isinstance(obj, dict): return {k: to_json_serializable(v) for k, v in obj.items()} - elif isinstance(obj, NNCFNode): + if isinstance(obj, NNCFNode): return obj.node_name - else: - return obj + return obj @pytest.mark.parametrize( From 2b916585f021ac891b04570adb44d9cad7cd9a5a Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 15 Oct 2025 17:33:35 +0400 Subject: [PATCH 67/91] override constraint in executorch workflow --- .github/workflows/call_precommit.yml | 9 +++++++-- .github/workflows/precommit.yml | 1 + tests/executorch/requirements.txt | 3 --- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/workflows/call_precommit.yml b/.github/workflows/call_precommit.yml index 032ac15f20c..360bbeadbbe 100644 --- a/.github/workflows/call_precommit.yml +++ b/.github/workflows/call_precommit.yml @@ -18,6 +18,11 @@ on: default: '' type: string required: false + executorch_override_requirements: + description: 'Executorch specific Override requirements' + default: '' + type: string + required: false jobs: common: @@ -161,8 +166,8 @@ jobs: cat /etc/*release cat /proc/cpuinfo - name: Override constraints - if: ${{ inputs.override_requirements != '' }} - run: python .github/scripts/override_constraints.py "${{ inputs.override_requirements }}" + if: ${{ inputs.override_requirements != '' || inputs.executorch_override_requirements != '' }} + run: python .github/scripts/override_constraints.py "${{ inputs.override_requirements }} ${{ inputs.executorch_override_requirements }}" shell: bash - name: Install NNCF and test requirements run: pip install . -r tests/executorch/requirements.txt diff --git a/.github/workflows/precommit.yml b/.github/workflows/precommit.yml index 3a4a1dc9399..d20a850ea3b 100644 --- a/.github/workflows/precommit.yml +++ b/.github/workflows/precommit.yml @@ -22,3 +22,4 @@ jobs: with: python_version: "3.10.14" gpu_enabled: true + executorch_override_requirements: "torch==2.9.0 torchvision==0.24.0 torchao==0.13.0" diff --git a/tests/executorch/requirements.txt b/tests/executorch/requirements.txt index b90f9ea61fe..6c27fb559f1 100644 --- a/tests/executorch/requirements.txt +++ b/tests/executorch/requirements.txt @@ -1,5 +1,2 @@ -c ../../constraints.txt -torch==2.9.0 -torchvision==0.24.0 -torchao==0.13.0 git+https://github.com/anzr299/executorch.git@an/quantizer_nncf_pt2e_support \ No newline at end of file From 93c3f19ca7349497f02b272cb3071bd1c6d10c9b Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 15 Oct 2025 17:38:12 +0400 Subject: [PATCH 68/91] minor fix --- .github/workflows/precommit.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/precommit.yml b/.github/workflows/precommit.yml index d20a850ea3b..fa62e69fbda 100644 --- a/.github/workflows/precommit.yml +++ b/.github/workflows/precommit.yml @@ -22,4 +22,4 @@ jobs: with: python_version: "3.10.14" gpu_enabled: true - executorch_override_requirements: "torch==2.9.0 torchvision==0.24.0 torchao==0.13.0" + executorch_override_requirements: "torch==2.9.0,torchvision==0.24.0,torchao==0.13.0" From 932b2969156055ddfdaf45081e340033d692d9bb Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 15 Oct 2025 18:35:05 +0400 Subject: [PATCH 69/91] update workflow for fix --- .github/workflows/call_precommit.yml | 4 ++++ tests/executorch/requirements.txt | 3 +-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/call_precommit.yml b/.github/workflows/call_precommit.yml index 360bbeadbbe..ec96828e927 100644 --- a/.github/workflows/call_precommit.yml +++ b/.github/workflows/call_precommit.yml @@ -165,6 +165,10 @@ jobs: run: | cat /etc/*release cat /proc/cpuinfo + - name: Preinstall ExecuTorch from branch + run: | + git clone --branch an/quantizer_nncf_pt2e_support https://github.com/anzr299/executorch.git executorch + pip install ./executorch - name: Override constraints if: ${{ inputs.override_requirements != '' || inputs.executorch_override_requirements != '' }} run: python .github/scripts/override_constraints.py "${{ inputs.override_requirements }} ${{ inputs.executorch_override_requirements }}" diff --git a/tests/executorch/requirements.txt b/tests/executorch/requirements.txt index 6c27fb559f1..c583847a060 100644 --- a/tests/executorch/requirements.txt +++ b/tests/executorch/requirements.txt @@ -1,2 +1 @@ --c ../../constraints.txt -git+https://github.com/anzr299/executorch.git@an/quantizer_nncf_pt2e_support \ No newline at end of file +-c ../../constraints.txt \ No newline at end of file From 67ab13518e076901f7b2c78acf045ebf6ffd10af Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 15 Oct 2025 18:47:23 +0400 Subject: [PATCH 70/91] update workflow file --- .github/workflows/call_precommit.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/call_precommit.yml b/.github/workflows/call_precommit.yml index ec96828e927..21984fd1bd0 100644 --- a/.github/workflows/call_precommit.yml +++ b/.github/workflows/call_precommit.yml @@ -168,6 +168,7 @@ jobs: - name: Preinstall ExecuTorch from branch run: | git clone --branch an/quantizer_nncf_pt2e_support https://github.com/anzr299/executorch.git executorch + git -C executorch submodule update --init --recursive pip install ./executorch - name: Override constraints if: ${{ inputs.override_requirements != '' || inputs.executorch_override_requirements != '' }} From a23acafc7cfdd8f2a3dfd1a4f02221708513e3cb Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 15 Oct 2025 19:05:45 +0400 Subject: [PATCH 71/91] install executorch after pytorch --- .github/workflows/call_precommit.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/call_precommit.yml b/.github/workflows/call_precommit.yml index 21984fd1bd0..e21b949eaa6 100644 --- a/.github/workflows/call_precommit.yml +++ b/.github/workflows/call_precommit.yml @@ -165,17 +165,17 @@ jobs: run: | cat /etc/*release cat /proc/cpuinfo - - name: Preinstall ExecuTorch from branch - run: | - git clone --branch an/quantizer_nncf_pt2e_support https://github.com/anzr299/executorch.git executorch - git -C executorch submodule update --init --recursive - pip install ./executorch - - name: Override constraints if: ${{ inputs.override_requirements != '' || inputs.executorch_override_requirements != '' }} run: python .github/scripts/override_constraints.py "${{ inputs.override_requirements }} ${{ inputs.executorch_override_requirements }}" shell: bash - name: Install NNCF and test requirements run: pip install . -r tests/executorch/requirements.txt + - name: Install ExecuTorch from branch + run: | + git clone --branch an/quantizer_nncf_pt2e_support https://github.com/anzr299/executorch.git executorch + git -C executorch submodule update --init --recursive + pip install ./executorch + - name: Override constraints - name: Print installed modules run: pip list - name: Run PyTorch precommit test scope From 646228459f37127768bec830597b4100899dfa37 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 15 Oct 2025 19:33:20 +0400 Subject: [PATCH 72/91] install torch nightly --- .github/workflows/call_precommit.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/call_precommit.yml b/.github/workflows/call_precommit.yml index e21b949eaa6..648116ea29e 100644 --- a/.github/workflows/call_precommit.yml +++ b/.github/workflows/call_precommit.yml @@ -170,6 +170,9 @@ jobs: shell: bash - name: Install NNCF and test requirements run: pip install . -r tests/executorch/requirements.txt + - name: Install Pytorch Nightly + run: | + pip install --pre torch torchvision torchao --index-url https://download.pytorch.org/whl/nightly/cpu - name: Install ExecuTorch from branch run: | git clone --branch an/quantizer_nncf_pt2e_support https://github.com/anzr299/executorch.git executorch From cf7e8d3a9e74ca8b60a46e92bfc33a2560e25d73 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 15 Oct 2025 19:54:22 +0400 Subject: [PATCH 73/91] update requirements and revert workflow changes --- .github/workflows/call_precommit.yml | 10 +++------- .github/workflows/precommit.yml | 1 - tests/executorch/requirements.txt | 17 ++++++++++++++++- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/.github/workflows/call_precommit.yml b/.github/workflows/call_precommit.yml index 648116ea29e..78f3c993eb4 100644 --- a/.github/workflows/call_precommit.yml +++ b/.github/workflows/call_precommit.yml @@ -18,11 +18,6 @@ on: default: '' type: string required: false - executorch_override_requirements: - description: 'Executorch specific Override requirements' - default: '' - type: string - required: false jobs: common: @@ -165,8 +160,9 @@ jobs: run: | cat /etc/*release cat /proc/cpuinfo - if: ${{ inputs.override_requirements != '' || inputs.executorch_override_requirements != '' }} - run: python .github/scripts/override_constraints.py "${{ inputs.override_requirements }} ${{ inputs.executorch_override_requirements }}" + - name: Override constraints + if: ${{ inputs.override_requirements != '' }} + run: python .github/scripts/override_constraints.py "${{ inputs.override_requirements }}" shell: bash - name: Install NNCF and test requirements run: pip install . -r tests/executorch/requirements.txt diff --git a/.github/workflows/precommit.yml b/.github/workflows/precommit.yml index fa62e69fbda..3a4a1dc9399 100644 --- a/.github/workflows/precommit.yml +++ b/.github/workflows/precommit.yml @@ -22,4 +22,3 @@ jobs: with: python_version: "3.10.14" gpu_enabled: true - executorch_override_requirements: "torch==2.9.0,torchvision==0.24.0,torchao==0.13.0" diff --git a/tests/executorch/requirements.txt b/tests/executorch/requirements.txt index c583847a060..1f65536177d 100644 --- a/tests/executorch/requirements.txt +++ b/tests/executorch/requirements.txt @@ -1 +1,16 @@ --c ../../constraints.txt \ No newline at end of file +--pre +--extra-index-url https://download.pytorch.org/whl/nightly/cpu + +torch +torchvision +torchao + +# Tests and examples +pytest==8.0.2 +pytest-cov==4.1.0 +pytest-mock==3.12.0 +pytest-dependency==0.6.0 +pytest-ordering==0.6 +pytest-xdist==3.5.0 +pytest-forked==1.6.0 +pytest-split==0.9.0 From a07dc079ecac79bb4e439376eafd16ea0c85e3f0 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 15 Oct 2025 20:02:08 +0400 Subject: [PATCH 74/91] fix minor workflow file issue --- .github/workflows/call_precommit.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/call_precommit.yml b/.github/workflows/call_precommit.yml index 78f3c993eb4..3b982ab7d29 100644 --- a/.github/workflows/call_precommit.yml +++ b/.github/workflows/call_precommit.yml @@ -174,7 +174,6 @@ jobs: git clone --branch an/quantizer_nncf_pt2e_support https://github.com/anzr299/executorch.git executorch git -C executorch submodule update --init --recursive pip install ./executorch - - name: Override constraints - name: Print installed modules run: pip list - name: Run PyTorch precommit test scope From 0506bcaa4a8499d1f6ab6be6a5c21b926d8b0d2f Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 15 Oct 2025 20:34:24 +0400 Subject: [PATCH 75/91] install with no build isolation --- .github/workflows/call_precommit.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/call_precommit.yml b/.github/workflows/call_precommit.yml index 3b982ab7d29..ea99b7a7efe 100644 --- a/.github/workflows/call_precommit.yml +++ b/.github/workflows/call_precommit.yml @@ -166,14 +166,11 @@ jobs: shell: bash - name: Install NNCF and test requirements run: pip install . -r tests/executorch/requirements.txt - - name: Install Pytorch Nightly - run: | - pip install --pre torch torchvision torchao --index-url https://download.pytorch.org/whl/nightly/cpu - name: Install ExecuTorch from branch run: | git clone --branch an/quantizer_nncf_pt2e_support https://github.com/anzr299/executorch.git executorch git -C executorch submodule update --init --recursive - pip install ./executorch + pip install --no-build-isolation ./executorch - name: Print installed modules run: pip list - name: Run PyTorch precommit test scope From f8675ada553481b9cd1a78053497320428d83b93 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 15 Oct 2025 20:48:05 +0400 Subject: [PATCH 76/91] include executorch requirements --- tests/executorch/requirements.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/executorch/requirements.txt b/tests/executorch/requirements.txt index 1f65536177d..ef83f3d8975 100644 --- a/tests/executorch/requirements.txt +++ b/tests/executorch/requirements.txt @@ -5,6 +5,16 @@ torch torchvision torchao +# Copied from https://github.com/anzr299/executorch/blob/an/quantizer_nncf_pt2e_support/requirements-dev.txt +cmake>=3.29, <4.0.0 # For building binary targets in the wheel. +packaging>=24.2 # Lower bound required by setuptools +pip>=23 # For building the pip package. +pyyaml # Imported by the kernel codegen tools. +setuptools>=77.0.3 # For building the pip package contents. +wheel # For building the pip package archive. +zstd # Imported by resolve_buck.py. +certifi # Imported by resolve_buck.py. + # Tests and examples pytest==8.0.2 pytest-cov==4.1.0 From 52a7d5a953454d1f77d1457b8a5603b31c62d1ad Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 15 Oct 2025 21:08:59 +0400 Subject: [PATCH 77/91] include openvino in requirements --- tests/executorch/requirements.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/executorch/requirements.txt b/tests/executorch/requirements.txt index ef83f3d8975..675bd0eb3b4 100644 --- a/tests/executorch/requirements.txt +++ b/tests/executorch/requirements.txt @@ -5,6 +5,9 @@ torch torchvision torchao +# Openvino +openvino==2025.3.0 + # Copied from https://github.com/anzr299/executorch/blob/an/quantizer_nncf_pt2e_support/requirements-dev.txt cmake>=3.29, <4.0.0 # For building binary targets in the wheel. packaging>=24.2 # Lower bound required by setuptools From 9e02948c789693d6108c547430c3c597b0d2589e Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 15 Oct 2025 21:22:14 +0400 Subject: [PATCH 78/91] fix --- .github/workflows/call_precommit.yml | 2 +- tests/executorch/requirements.txt | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/call_precommit.yml b/.github/workflows/call_precommit.yml index ea99b7a7efe..1a39cb0faf0 100644 --- a/.github/workflows/call_precommit.yml +++ b/.github/workflows/call_precommit.yml @@ -170,7 +170,7 @@ jobs: run: | git clone --branch an/quantizer_nncf_pt2e_support https://github.com/anzr299/executorch.git executorch git -C executorch submodule update --init --recursive - pip install --no-build-isolation ./executorch + pip install ./executorch - name: Print installed modules run: pip list - name: Run PyTorch precommit test scope diff --git a/tests/executorch/requirements.txt b/tests/executorch/requirements.txt index 675bd0eb3b4..2042778780b 100644 --- a/tests/executorch/requirements.txt +++ b/tests/executorch/requirements.txt @@ -8,6 +8,11 @@ torchao # Openvino openvino==2025.3.0 +# ONNX +onnx==1.17.0; python_version < '3.13' +onnx==1.18.0; python_version >= '3.13' +onnxruntime==1.21.1 + # Copied from https://github.com/anzr299/executorch/blob/an/quantizer_nncf_pt2e_support/requirements-dev.txt cmake>=3.29, <4.0.0 # For building binary targets in the wheel. packaging>=24.2 # Lower bound required by setuptools From a578fce2c70e63f5c71593c94469d133d5c7de8c Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 15 Oct 2025 21:30:20 +0400 Subject: [PATCH 79/91] fix --- .github/workflows/call_precommit.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/call_precommit.yml b/.github/workflows/call_precommit.yml index 1a39cb0faf0..ea99b7a7efe 100644 --- a/.github/workflows/call_precommit.yml +++ b/.github/workflows/call_precommit.yml @@ -170,7 +170,7 @@ jobs: run: | git clone --branch an/quantizer_nncf_pt2e_support https://github.com/anzr299/executorch.git executorch git -C executorch submodule update --init --recursive - pip install ./executorch + pip install --no-build-isolation ./executorch - name: Print installed modules run: pip list - name: Run PyTorch precommit test scope From 8ae6a802aebdbd23099cde5f676ccf38fbf76476 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 15 Oct 2025 22:30:02 +0400 Subject: [PATCH 80/91] update requirements --- tests/executorch/requirements.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/executorch/requirements.txt b/tests/executorch/requirements.txt index 2042778780b..925e46ed10b 100644 --- a/tests/executorch/requirements.txt +++ b/tests/executorch/requirements.txt @@ -23,6 +23,18 @@ wheel # For building the pip package archive. zstd # Imported by resolve_buck.py. certifi # Imported by resolve_buck.py. +# Copied from tests/torch2/requirements.txt +addict>=2.4.0 +efficientnet_pytorch==0.7.1 +transformers==4.52.1 + +sentence-transformers==4.1.0 +optimum-intel==1.24.0 +optimum==1.26.0 +accelerate==1.9.0 +fastdownload==0.0.7 + + # Tests and examples pytest==8.0.2 pytest-cov==4.1.0 From c7210b882c5e0ac741a9d1aa3e62693d2cdd6230 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 15 Oct 2025 23:10:20 +0400 Subject: [PATCH 81/91] add conftest and __init__ --- tests/executorch/__init__.py | 10 ++++++++++ tests/executorch/conftest.py | 11 +++++++++++ 2 files changed, 21 insertions(+) create mode 100644 tests/executorch/__init__.py create mode 100644 tests/executorch/conftest.py diff --git a/tests/executorch/__init__.py b/tests/executorch/__init__.py new file mode 100644 index 00000000000..e5a42efc0ef --- /dev/null +++ b/tests/executorch/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) 2025 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/executorch/conftest.py b/tests/executorch/conftest.py new file mode 100644 index 00000000000..f42e60bbc25 --- /dev/null +++ b/tests/executorch/conftest.py @@ -0,0 +1,11 @@ +# Copyright (c) 2025 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +pytest_plugins = ["tests.torch2.conftest"] \ No newline at end of file From 2f8b296b9c2853df78fc166165a7cd006e0be1db Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 15 Oct 2025 23:25:12 +0400 Subject: [PATCH 82/91] use older pytorch commit --- tests/executorch/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/executorch/requirements.txt b/tests/executorch/requirements.txt index 925e46ed10b..6855bd17f23 100644 --- a/tests/executorch/requirements.txt +++ b/tests/executorch/requirements.txt @@ -1,7 +1,7 @@ --pre --extra-index-url https://download.pytorch.org/whl/nightly/cpu -torch +torch @ git+https://github.com/pytorch/pytorch.git@8e6b0c7 torchvision torchao From 75ccdcbbb48ed57fcb976185589e2947d2442538 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Thu, 16 Oct 2025 10:10:52 +0400 Subject: [PATCH 83/91] change torch versions to 2.10.0.dev20250922+cpu --- tests/executorch/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/executorch/requirements.txt b/tests/executorch/requirements.txt index 6855bd17f23..43711a276c5 100644 --- a/tests/executorch/requirements.txt +++ b/tests/executorch/requirements.txt @@ -1,9 +1,9 @@ --pre --extra-index-url https://download.pytorch.org/whl/nightly/cpu -torch @ git+https://github.com/pytorch/pytorch.git@8e6b0c7 -torchvision -torchao +torch==2.10.0.dev20250922+cpu +torchvision==0.25.0.dev20250922+cpu +torchao==0.14.0.dev20250922+cpu # Openvino openvino==2025.3.0 From 75cc2556fa2d6eb96e266ce8b0e324c2f90f9cb1 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Thu, 16 Oct 2025 11:47:15 +0400 Subject: [PATCH 84/91] install executorch directly from requirements txt --- .github/workflows/call_precommit.yml | 5 ----- tests/executorch/requirements.txt | 6 ++++++ 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/call_precommit.yml b/.github/workflows/call_precommit.yml index ea99b7a7efe..032ac15f20c 100644 --- a/.github/workflows/call_precommit.yml +++ b/.github/workflows/call_precommit.yml @@ -166,11 +166,6 @@ jobs: shell: bash - name: Install NNCF and test requirements run: pip install . -r tests/executorch/requirements.txt - - name: Install ExecuTorch from branch - run: | - git clone --branch an/quantizer_nncf_pt2e_support https://github.com/anzr299/executorch.git executorch - git -C executorch submodule update --init --recursive - pip install --no-build-isolation ./executorch - name: Print installed modules run: pip list - name: Run PyTorch precommit test scope diff --git a/tests/executorch/requirements.txt b/tests/executorch/requirements.txt index 43711a276c5..8e966566c23 100644 --- a/tests/executorch/requirements.txt +++ b/tests/executorch/requirements.txt @@ -1,6 +1,12 @@ --pre --extra-index-url https://download.pytorch.org/whl/nightly/cpu +--no-build-isolation +# Executorch +# Due to https://github.com/pytorch/executorch/issues/6475 +-e git+https://github.com/anzr299/executorch.git@an/quantizer_nncf_pt2e_support#egg=executorch + +# Pytorch torch==2.10.0.dev20250922+cpu torchvision==0.25.0.dev20250922+cpu torchao==0.14.0.dev20250922+cpu From 3cdfe74a461fd305b80fa3158cd4dbfd092edf01 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Thu, 16 Oct 2025 11:55:10 +0400 Subject: [PATCH 85/91] comments --- .github/workflows/call_precommit.yml | 2 +- tests/executorch/requirements.txt | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/call_precommit.yml b/.github/workflows/call_precommit.yml index 032ac15f20c..4472650a5c4 100644 --- a/.github/workflows/call_precommit.yml +++ b/.github/workflows/call_precommit.yml @@ -165,7 +165,7 @@ jobs: run: python .github/scripts/override_constraints.py "${{ inputs.override_requirements }}" shell: bash - name: Install NNCF and test requirements - run: pip install . -r tests/executorch/requirements.txt + run: pip install . -r tests/executorch/requirements.txt --no-build-isolation - name: Print installed modules run: pip list - name: Run PyTorch precommit test scope diff --git a/tests/executorch/requirements.txt b/tests/executorch/requirements.txt index 8e966566c23..10831aef70e 100644 --- a/tests/executorch/requirements.txt +++ b/tests/executorch/requirements.txt @@ -1,9 +1,8 @@ --pre --extra-index-url https://download.pytorch.org/whl/nightly/cpu ---no-build-isolation # Executorch -# Due to https://github.com/pytorch/executorch/issues/6475 +# Editable install due to https://github.com/pytorch/executorch/issues/6475 -e git+https://github.com/anzr299/executorch.git@an/quantizer_nncf_pt2e_support#egg=executorch # Pytorch From e4f9286d1e4e9f8c7b6cbfc229d105f4a668d746 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Thu, 16 Oct 2025 12:08:48 +0400 Subject: [PATCH 86/91] seperate executorch installation --- .github/workflows/call_precommit.yml | 6 +++++- tests/executorch/requirements.txt | 4 ---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/call_precommit.yml b/.github/workflows/call_precommit.yml index 4472650a5c4..ddabf24213a 100644 --- a/.github/workflows/call_precommit.yml +++ b/.github/workflows/call_precommit.yml @@ -165,7 +165,11 @@ jobs: run: python .github/scripts/override_constraints.py "${{ inputs.override_requirements }}" shell: bash - name: Install NNCF and test requirements - run: pip install . -r tests/executorch/requirements.txt --no-build-isolation + run: | + pip install . -r tests/executorch/requirements.txt + # Executorch + # Editable install due to https://github.com/pytorch/executorch/issues/6475 + pip install --no-build-isolation -e git+https://github.com/anzr299/executorch.git@an/quantizer_nncf_pt2e_support#egg=executorch - name: Print installed modules run: pip list - name: Run PyTorch precommit test scope diff --git a/tests/executorch/requirements.txt b/tests/executorch/requirements.txt index 10831aef70e..49faa6dac6a 100644 --- a/tests/executorch/requirements.txt +++ b/tests/executorch/requirements.txt @@ -1,10 +1,6 @@ --pre --extra-index-url https://download.pytorch.org/whl/nightly/cpu -# Executorch -# Editable install due to https://github.com/pytorch/executorch/issues/6475 --e git+https://github.com/anzr299/executorch.git@an/quantizer_nncf_pt2e_support#egg=executorch - # Pytorch torch==2.10.0.dev20250922+cpu torchvision==0.25.0.dev20250922+cpu From 009c5872ebc4846f8bea23ce4d2383b06471c9d4 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Thu, 16 Oct 2025 12:42:15 +0400 Subject: [PATCH 87/91] precommit fix --- .../torch/fx/quantization/quantizer/torch_ao_adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/torch_ao_adapter.py b/src/nncf/experimental/torch/fx/quantization/quantizer/torch_ao_adapter.py index 5ae585981c5..e3a6c1c8f42 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/torch_ao_adapter.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/torch_ao_adapter.py @@ -211,4 +211,4 @@ def _unwrap_shared_qspec_safe(qspec: QuantizationSpec, edge_or_node_to_qspec: di if i == MAX_DEPTH: msg = f"Shared qspecs referenced to each other more than the limit: {MAX_DEPTH}" raise RuntimeError(msg) - return qspec \ No newline at end of file + return qspec From 6e379c859f325a1393d2f8c8753f724bcae01453 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Thu, 16 Oct 2025 12:43:38 +0400 Subject: [PATCH 88/91] conftest precommit --- tests/executorch/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/executorch/conftest.py b/tests/executorch/conftest.py index f42e60bbc25..ce9b7b42661 100644 --- a/tests/executorch/conftest.py +++ b/tests/executorch/conftest.py @@ -8,4 +8,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -pytest_plugins = ["tests.torch2.conftest"] \ No newline at end of file +pytest_plugins = ["tests.torch2.conftest"] From e45f796b44b6087b3223c620e2f4afda7cedb731 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Thu, 16 Oct 2025 12:48:44 +0400 Subject: [PATCH 89/91] update ref location for executorch --- .../int4wo_sym_gs32_ratio0.8_all_layers_False.dot | 0 .../int4wo_sym_gs32_ratio0.8_all_layers_True.dot | 0 .../int8wo_asym_gs-1_ratio1_all_layers_False.dot | 0 .../int4wo_sym_gs32_ratio0.8_all_layers_False.dot | 0 .../int4wo_sym_gs32_ratio0.8_all_layers_True.dot | 0 .../int8wo_asym_gs-1_ratio1_all_layers_False.dot | 0 .../int4wo_sym_gs32_ratio0.8_all_layers_False.dot | 0 ..._False_awq_True_scale_estimation_True_ref_wc_scales.json | 0 ...4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json | 0 .../int4wo_sym_gs32_ratio0.8_all_layers_True.dot | 0 ...s_True_awq_True_scale_estimation_True_ref_wc_scales.json | 0 ...t4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json | 0 .../int8wo_asym_gs-1_ratio1_all_layers_False.dot | 0 ...alse_awq_False_scale_estimation_False_ref_wc_scales.json | 0 ...t8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json | 0 .../int4wo_sym_gs32_ratio0.8_all_layers_False.dot | 0 ..._False_awq_True_scale_estimation_True_ref_wc_scales.json | 0 ...4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json | 0 .../int4wo_sym_gs32_ratio0.8_all_layers_True.dot | 0 ...s_True_awq_True_scale_estimation_True_ref_wc_scales.json | 0 ...t4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json | 0 .../int8wo_asym_gs-1_ratio1_all_layers_False.dot | 0 ...alse_awq_False_scale_estimation_False_ref_wc_scales.json | 0 ...t8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json | 0 tests/executorch/test_quantizer_compression.py | 6 +++--- 25 files changed, 3 insertions(+), 3 deletions(-) rename tests/{torch2/data/fx/ao_compression/OpenVINOQuantizer => executorch/data/fx/ao_export_compression_OpenVINOQuantizer}/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot (100%) rename tests/{torch2/data/fx/ao_compression/OpenVINOQuantizer => executorch/data/fx/ao_export_compression_OpenVINOQuantizer}/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot (100%) rename tests/{torch2/data/fx/ao_compression/OpenVINOQuantizer => executorch/data/fx/ao_export_compression_OpenVINOQuantizer}/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot (100%) rename tests/{torch2/data/fx/ao_compression/OpenVINOQuantizer => executorch/data/fx/ao_export_compression_OpenVINOQuantizer}/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot (100%) rename tests/{torch2/data/fx/ao_compression/OpenVINOQuantizer => executorch/data/fx/ao_export_compression_OpenVINOQuantizer}/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot (100%) rename tests/{torch2/data/fx/ao_compression/OpenVINOQuantizer => executorch/data/fx/ao_export_compression_OpenVINOQuantizer}/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot (100%) rename tests/{torch2 => executorch}/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot (100%) rename tests/{torch2 => executorch}/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json (100%) rename tests/{torch2 => executorch}/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json (100%) rename tests/{torch2 => executorch}/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot (100%) rename tests/{torch2 => executorch}/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json (100%) rename tests/{torch2 => executorch}/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json (100%) rename tests/{torch2 => executorch}/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot (100%) rename tests/{torch2 => executorch}/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json (100%) rename tests/{torch2 => executorch}/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json (100%) rename tests/{torch2 => executorch}/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot (100%) rename tests/{torch2 => executorch}/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json (100%) rename tests/{torch2 => executorch}/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json (100%) rename tests/{torch2 => executorch}/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot (100%) rename tests/{torch2 => executorch}/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json (100%) rename tests/{torch2 => executorch}/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json (100%) rename tests/{torch2 => executorch}/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot (100%) rename tests/{torch2 => executorch}/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json (100%) rename tests/{torch2 => executorch}/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json (100%) diff --git a/tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot b/tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot similarity index 100% rename from tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot rename to tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot diff --git a/tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot b/tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot similarity index 100% rename from tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot rename to tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot diff --git a/tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot b/tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot similarity index 100% rename from tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot rename to tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot diff --git a/tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot b/tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot similarity index 100% rename from tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot rename to tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot diff --git a/tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot b/tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot similarity index 100% rename from tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot rename to tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot diff --git a/tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot b/tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot similarity index 100% rename from tests/torch2/data/fx/ao_compression/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot rename to tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot similarity index 100% rename from tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json similarity index 100% rename from tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json similarity index 100% rename from tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot similarity index 100% rename from tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json similarity index 100% rename from tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json similarity index 100% rename from tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot similarity index 100% rename from tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json similarity index 100% rename from tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json similarity index 100% rename from tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot similarity index 100% rename from tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json similarity index 100% rename from tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json similarity index 100% rename from tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot similarity index 100% rename from tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json similarity index 100% rename from tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json similarity index 100% rename from tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot similarity index 100% rename from tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json similarity index 100% rename from tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json diff --git a/tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json similarity index 100% rename from tests/torch2/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json diff --git a/tests/executorch/test_quantizer_compression.py b/tests/executorch/test_quantizer_compression.py index 384bd7d42d1..77885f095ad 100644 --- a/tests/executorch/test_quantizer_compression.py +++ b/tests/executorch/test_quantizer_compression.py @@ -41,8 +41,8 @@ from tests.torch.test_models.synthetic import ShortTransformer from tests.torch2.fx.helpers import get_torch_fx_model -FX_PT2E_DIR = TEST_ROOT / "torch2" / "data" / "fx" / "compress_pt2e" -FX_AO_DIR = TEST_ROOT / "torch2" / "data" / "fx" / "ao_compression" +FX_PT2E_DIR = TEST_ROOT / "executorch" / "data" / "fx" / "compress_pt2e" +FX_AO_DIR = TEST_ROOT / "executorch" / "data" / "fx" / "ao_export_compression_OpenVINOQuantizer" @dataclass @@ -283,7 +283,7 @@ def test_openvino_quantizer( param_string = _string_from_quantizer_params(quantizer_params) path_to_dot = ( - FX_AO_DIR / quantizer.__class__.__name__ / model_case.model_id / get_dot_filename(param_string) + FX_AO_DIR / model_case.model_id / get_dot_filename(param_string) ).as_posix() compare_nx_graph_with_reference(nx_graph, path_to_dot) From f2ece8cc22a4723cb21a4fa15dec2f4e69d335d8 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Thu, 16 Oct 2025 13:26:13 +0400 Subject: [PATCH 90/91] define ratio in compress_pt2e API and not Quantizer itself; Update test --- .../torch/fx/quantization/quantize_pt2e.py | 5 +++- ...t => int4wo_sym_gs32_all_layers_False.dot} | 0 ...ot => int4wo_sym_gs32_all_layers_True.dot} | 0 ... => int8wo_asym_gs-1_all_layers_False.dot} | 0 ...t => int4wo_sym_gs32_all_layers_False.dot} | 0 ...ot => int4wo_sym_gs32_all_layers_True.dot} | 0 ... => int8wo_asym_gs-1_all_layers_False.dot} | 0 ...t => int4wo_sym_gs32_all_layers_False.dot} | 0 ..._scale_estimation_True_ref_wc_scales.json} | 0 ...m_gs32_all_layers_False_ref_wc_param.json} | 0 ...ot => int4wo_sym_gs32_all_layers_True.dot} | 0 ..._scale_estimation_True_ref_wc_scales.json} | 0 ...ym_gs32_all_layers_True_ref_wc_param.json} | 0 ... => int8wo_asym_gs-1_all_layers_False.dot} | 0 ...scale_estimation_False_ref_wc_scales.json} | 0 ...m_gs-1_all_layers_False_ref_wc_param.json} | 0 ...t => int4wo_sym_gs32_all_layers_False.dot} | 0 ..._scale_estimation_True_ref_wc_scales.json} | 0 ...m_gs32_all_layers_False_ref_wc_param.json} | 0 ...ot => int4wo_sym_gs32_all_layers_True.dot} | 0 ..._scale_estimation_True_ref_wc_scales.json} | 0 ...ym_gs32_all_layers_True_ref_wc_param.json} | 0 ... => int8wo_asym_gs-1_all_layers_False.dot} | 0 ...scale_estimation_False_ref_wc_scales.json} | 0 ...m_gs-1_all_layers_False_ref_wc_param.json} | 0 .../executorch/test_quantizer_compression.py | 26 +++++++++++-------- 26 files changed, 19 insertions(+), 12 deletions(-) rename tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/LlamaDecoderOnly/{int4wo_sym_gs32_ratio0.8_all_layers_False.dot => int4wo_sym_gs32_all_layers_False.dot} (100%) rename tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/LlamaDecoderOnly/{int4wo_sym_gs32_ratio0.8_all_layers_True.dot => int4wo_sym_gs32_all_layers_True.dot} (100%) rename tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/LlamaDecoderOnly/{int8wo_asym_gs-1_ratio1_all_layers_False.dot => int8wo_asym_gs-1_all_layers_False.dot} (100%) rename tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/short_transformer_shared/{int4wo_sym_gs32_ratio0.8_all_layers_False.dot => int4wo_sym_gs32_all_layers_False.dot} (100%) rename tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/short_transformer_shared/{int4wo_sym_gs32_ratio0.8_all_layers_True.dot => int4wo_sym_gs32_all_layers_True.dot} (100%) rename tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/short_transformer_shared/{int8wo_asym_gs-1_ratio1_all_layers_False.dot => int8wo_asym_gs-1_all_layers_False.dot} (100%) rename tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/{int4wo_sym_gs32_ratio0.8_all_layers_False.dot => int4wo_sym_gs32_all_layers_False.dot} (100%) rename tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/{int4wo_sym_gs32_ratio0.8_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json => int4wo_sym_gs32_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json} (100%) rename tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/{int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json => int4wo_sym_gs32_all_layers_False_ref_wc_param.json} (100%) rename tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/{int4wo_sym_gs32_ratio0.8_all_layers_True.dot => int4wo_sym_gs32_all_layers_True.dot} (100%) rename tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/{int4wo_sym_gs32_ratio0.8_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json => int4wo_sym_gs32_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json} (100%) rename tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/{int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json => int4wo_sym_gs32_all_layers_True_ref_wc_param.json} (100%) rename tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/{int8wo_asym_gs-1_ratio1_all_layers_False.dot => int8wo_asym_gs-1_all_layers_False.dot} (100%) rename tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/{int8wo_asym_gs-1_ratio1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json => int8wo_asym_gs-1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json} (100%) rename tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/{int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json => int8wo_asym_gs-1_all_layers_False_ref_wc_param.json} (100%) rename tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/{int4wo_sym_gs32_ratio0.8_all_layers_False.dot => int4wo_sym_gs32_all_layers_False.dot} (100%) rename tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/{int4wo_sym_gs32_ratio0.8_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json => int4wo_sym_gs32_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json} (100%) rename tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/{int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json => int4wo_sym_gs32_all_layers_False_ref_wc_param.json} (100%) rename tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/{int4wo_sym_gs32_ratio0.8_all_layers_True.dot => int4wo_sym_gs32_all_layers_True.dot} (100%) rename tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/{int4wo_sym_gs32_ratio0.8_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json => int4wo_sym_gs32_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json} (100%) rename tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/{int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json => int4wo_sym_gs32_all_layers_True_ref_wc_param.json} (100%) rename tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/{int8wo_asym_gs-1_ratio1_all_layers_False.dot => int8wo_asym_gs-1_all_layers_False.dot} (100%) rename tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/{int8wo_asym_gs-1_ratio1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json => int8wo_asym_gs-1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json} (100%) rename tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/{int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json => int8wo_asym_gs-1_all_layers_False_ref_wc_param.json} (100%) diff --git a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py index c8906573f30..b493a9461af 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py +++ b/src/nncf/experimental/torch/fx/quantization/quantize_pt2e.py @@ -173,6 +173,7 @@ def compress_pt2e( gptq: bool = False, lora_correction: bool = False, subset_size: int = 128, + ratio: int = 1, sensitivity_metric: Optional[SensitivityMetric] = None, advanced_parameters: Optional[AdvancedCompressionParameters] = None, ) -> torch.fx.GraphModule: @@ -191,6 +192,8 @@ def compress_pt2e( :param lora_correction: Determines whether to use or not LoRA Correction algorithm. :param subset_size: Number of data samples to calculate activation statistics used for assigning different quantization precision. + :param ratio: the ratio between baseline and backup precisions (e.g. 0.9 means 90% of layers quantized to NF4 + and the rest to INT8_ASYM). :param sensitivity_metric: The sensitivity metric for assigning quantization precision to layers. In order to preserve the accuracy of the model, the more sensitive layers receive a higher precision. :param advanced_parameters: Advanced parameters for algorithms in the compression pipeline. @@ -212,7 +215,7 @@ def compress_pt2e( subset_size = subset_size advanced_parameters = advanced_parameters lora_correction = lora_correction - ratio = wc_config.get("ratio", 1) + ratio = ratio group_size = wc_config.get("group_size", 128) all_layers = wc_config.get("all_layers", False) backup_mode = wc_config.get("backup_mode", nncf.BackupMode.INT8_ASYM) diff --git a/tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot b/tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_all_layers_False.dot similarity index 100% rename from tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot rename to tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_all_layers_False.dot diff --git a/tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot b/tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_all_layers_True.dot similarity index 100% rename from tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot rename to tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_all_layers_True.dot diff --git a/tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot b/tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_all_layers_False.dot similarity index 100% rename from tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot rename to tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_all_layers_False.dot diff --git a/tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot b/tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_all_layers_False.dot similarity index 100% rename from tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot rename to tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_all_layers_False.dot diff --git a/tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot b/tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_all_layers_True.dot similarity index 100% rename from tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot rename to tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_all_layers_True.dot diff --git a/tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot b/tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_all_layers_False.dot similarity index 100% rename from tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot rename to tests/executorch/data/fx/ao_export_compression_OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_all_layers_False.dot diff --git a/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_all_layers_False.dot similarity index 100% rename from tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False.dot rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_all_layers_False.dot diff --git a/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json similarity index 100% rename from tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json diff --git a/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_all_layers_False_ref_wc_param.json similarity index 100% rename from tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_all_layers_False_ref_wc_param.json diff --git a/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_all_layers_True.dot similarity index 100% rename from tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True.dot rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_all_layers_True.dot diff --git a/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json similarity index 100% rename from tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json diff --git a/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_all_layers_True_ref_wc_param.json similarity index 100% rename from tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int4wo_sym_gs32_all_layers_True_ref_wc_param.json diff --git a/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_all_layers_False.dot similarity index 100% rename from tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False.dot rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_all_layers_False.dot diff --git a/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json similarity index 100% rename from tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json diff --git a/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_all_layers_False_ref_wc_param.json similarity index 100% rename from tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/LlamaDecoderOnly/int8wo_asym_gs-1_all_layers_False_ref_wc_param.json diff --git a/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_all_layers_False.dot similarity index 100% rename from tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False.dot rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_all_layers_False.dot diff --git a/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json similarity index 100% rename from tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_all_layers_False_awq_True_scale_estimation_True_ref_wc_scales.json diff --git a/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_all_layers_False_ref_wc_param.json similarity index 100% rename from tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_False_ref_wc_param.json rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_all_layers_False_ref_wc_param.json diff --git a/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_all_layers_True.dot similarity index 100% rename from tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True.dot rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_all_layers_True.dot diff --git a/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json similarity index 100% rename from tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_all_layers_True_awq_True_scale_estimation_True_ref_wc_scales.json diff --git a/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_all_layers_True_ref_wc_param.json similarity index 100% rename from tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_ratio0.8_all_layers_True_ref_wc_param.json rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int4wo_sym_gs32_all_layers_True_ref_wc_param.json diff --git a/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_all_layers_False.dot similarity index 100% rename from tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False.dot rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_all_layers_False.dot diff --git a/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json similarity index 100% rename from tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_all_layers_False_awq_False_scale_estimation_False_ref_wc_scales.json diff --git a/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json b/tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_all_layers_False_ref_wc_param.json similarity index 100% rename from tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_ratio1_all_layers_False_ref_wc_param.json rename to tests/executorch/data/fx/compress_pt2e/OpenVINOQuantizer/short_transformer_shared/int8wo_asym_gs-1_all_layers_False_ref_wc_param.json diff --git a/tests/executorch/test_quantizer_compression.py b/tests/executorch/test_quantizer_compression.py index 77885f095ad..464d0a4764a 100644 --- a/tests/executorch/test_quantizer_compression.py +++ b/tests/executorch/test_quantizer_compression.py @@ -90,13 +90,12 @@ def get_openvino_quantizer(*args, **kwargs) -> OpenVINOQuantizer: def _string_from_quantizer_params(qparams: dict[str, Any], pt2e_param: Optional[dict[str, Any]] = None) -> str: mode = qparams.get("mode") gs = qparams.get("group_size", "-1") - ratio = qparams.get("ratio", "1") all_layers = qparams.get("all_layers", "False") if pt2e_param is None: - return f"{mode.value}_gs{gs}_ratio{ratio}_all_layers_{all_layers}" + return f"{mode.value}_gs{gs}_all_layers_{all_layers}" awq = pt2e_param.get("awq", "False") scale_estimation = pt2e_param.get("scale_estimation", "False") - return f"{mode.value}_gs{gs}_ratio{ratio}_all_layers_{all_layers}_awq_{awq}_scale_estimation_{scale_estimation}" + return f"{mode.value}_gs{gs}_all_layers_{all_layers}_awq_{awq}_scale_estimation_{scale_estimation}" def check_multiple_isinstance(object_to_check: Any, objects: list[Any]): @@ -152,8 +151,8 @@ def get_test_cases(): QUANTIZER_PARAMS = ( {"mode": QuantizationMode.INT8WO_ASYM}, - {"mode": QuantizationMode.INT4WO_SYM, "group_size": 32, "ratio": 0.8}, - {"mode": QuantizationMode.INT4WO_SYM, "group_size": 32, "ratio": 0.8, "all_layers": True}, + {"mode": QuantizationMode.INT4WO_SYM, "group_size": 32}, + {"mode": QuantizationMode.INT4WO_SYM, "group_size": 32, "all_layers": True}, ) PT2E_PARAMS = ({"awq": True, "scale_estimation": True},) @@ -166,6 +165,8 @@ def get_test_cases(): f"{m.model_id}__{_string_from_quantizer_params(qparams, pt2e_param)}" for (m, qparams, pt2e_param) in TEST_MODELS ] +INT8_COMPRESSION_MODES = [QuantizationMode.INT8WO_ASYM, QuantizationMode.INT8WO_SYM] + @pytest.mark.parametrize( ("model_case", "quantizer_params", "pt2e_params"), @@ -191,8 +192,10 @@ def test_compress_pt2e( # Build quantizer directly from quantizer_params (already includes mode/group_size) quantizer = quantizer_builder(**quantizer_params) + mode = quantizer_params.get("mode") + ratio = 1 if mode in INT8_COMPRESSION_MODES else 0.8 - quantized_model = compress_pt2e(fx_model, quantizer=quantizer, dataset=calibration_dataset) + quantized_model = compress_pt2e(fx_model, quantizer=quantizer, ratio=ratio, dataset=calibration_dataset) with torch.no_grad(): out = quantized_model(example_input) @@ -232,8 +235,11 @@ def test_compress_pt2e_scales( # Build quantizer directly from quantizer_params (already includes mode/group_size) quantizer = quantizer_builder(**quantizer_params) - - quantized_model = compress_pt2e(fx_model, quantizer=quantizer, dataset=calibration_dataset, **pt2e_params) + mode = quantizer_params.get("mode") + ratio = 1 if mode in INT8_COMPRESSION_MODES else 0.8 + quantized_model = compress_pt2e( + fx_model, quantizer=quantizer, ratio=ratio, dataset=calibration_dataset, **pt2e_params + ) with torch.no_grad(): out = quantized_model(example_input) @@ -282,9 +288,7 @@ def test_openvino_quantizer( nx_graph = nncf_graph.get_graph_for_structure_analysis(extended=True) param_string = _string_from_quantizer_params(quantizer_params) - path_to_dot = ( - FX_AO_DIR / model_case.model_id / get_dot_filename(param_string) - ).as_posix() + path_to_dot = (FX_AO_DIR / model_case.model_id / get_dot_filename(param_string)).as_posix() compare_nx_graph_with_reference(nx_graph, path_to_dot) From 387d69cd6e2695c511ecd1ae8961b08ffe56f43e Mon Sep 17 00:00:00 2001 From: Aamir Nazir Date: Thu, 16 Oct 2025 15:59:56 +0400 Subject: [PATCH 91/91] Apply suggestion from @daniil-lyakhov Co-authored-by: Daniil Lyakhov --- .../torch/fx/quantization/quantizer/openvino_adapter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py index b72df9d29f7..5b4bd321780 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py +++ b/src/nncf/experimental/torch/fx/quantization/quantizer/openvino_adapter.py @@ -41,7 +41,6 @@ def get_weight_compression_parameters( ) -> tuple[ list[WeightCompressionParameters], list[WeightCompressionParameters], - dict[str, int], list[WeightCompressionParameters], ]: return self._quantizer.get_nncf_weight_compression_parameters(model, nncf_graph)