@@ -156,9 +156,13 @@ ArmPlugin::pass::ConvolutionQuantizeFusion::ConvolutionQuantizeFusion() {
156156 opset::Sigmoid, opset::Tanh, opset::Relu, opset::Abs,
157157 opset::Elu, opset::Sqrt, opset::SoftPlus, opset::HSwish,
158158 opset::PRelu, opset::Clamp>({node_pattern});
159- auto node_output = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{node_pattern, activation_pattern});
159+ auto q_scale = ngraph::pattern::wrap_type<opset::Constant>();
160+ auto q_mul = ngraph::pattern::wrap_type<opset::Multiply>({
161+ std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{node_pattern, activation_pattern}),
162+ q_scale},
163+ ngraph::pattern::consumers_count (1 ));
160164 auto fq_pattern = ngraph::pattern::wrap_type<opset::FakeQuantize>({
161- node_output ,
165+ std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{node_pattern, activation_pattern, q_mul}) ,
162166 ngraph::pattern::any_input (ngraph::pattern::has_static_shape ()),
163167 ngraph::pattern::any_input (ngraph::pattern::has_static_shape ()),
164168 ngraph::pattern::any_input (ngraph::pattern::has_static_shape ()),
@@ -169,6 +173,7 @@ ArmPlugin::pass::ConvolutionQuantizeFusion::ConvolutionQuantizeFusion() {
169173 auto pattern_map = m.get_pattern_value_map ();
170174 auto node = pattern_map[node_pattern].get_node_shared_ptr ();
171175 auto fakeQuantize = safe_cast<opset::FakeQuantize>(pattern_map[fq_pattern].get_node_shared_ptr ());
176+ auto itMul = pattern_map.find (q_mul);
172177 auto itActivation = pattern_map.find (activation_pattern);
173178 auto realType = node->get_output_element_type (0 );
174179 auto quantizedType = fakeQuantize->get_output_element_type (0 );
@@ -179,6 +184,25 @@ ArmPlugin::pass::ConvolutionQuantizeFusion::ConvolutionQuantizeFusion() {
179184 getFloatVector (fakeQuantize->input_value (2 ).get_node ()),
180185 getFloatVector (fakeQuantize->input_value (3 ).get_node ()),
181186 getFloatVector (fakeQuantize->input_value (4 ).get_node ()));
187+ if (itMul != pattern_map.end ()) {
188+ std::vector<float > scales = getFloatVector (pattern_map[q_scale].get_node ());
189+ if (allEqualToFirst (scales)) {
190+ if (scales[0 ] == 0 .) return false ; // Scale multiplier shouldn't be zero
191+ for (auto && v : quantizationInfo.first ) v *= scales[0 ];
192+ } else if (quantizationInfo.first .size () > 1 ) {
193+ if (scales.size () != quantizationInfo.first .size ()) return false ;
194+ std::transform (quantizationInfo.first .begin (), quantizationInfo.first .end (), scales.begin (),
195+ quantizationInfo.first .begin (), [](float f, float sc) -> float { return f * sc; });
196+ } else {
197+ std::vector<float > qiScales, qiOffsets;
198+ for (auto && sc : scales) {
199+ qiScales.emplace_back (quantizationInfo.first [0 ] * sc);
200+ qiOffsets.emplace_back (quantizationInfo.second [0 ]);
201+ }
202+ quantizationInfo.first .swap (qiScales);
203+ quantizationInfo.second .swap (qiOffsets);
204+ }
205+ }
182206
183207 std::vector<ngraph::Output<ngraph::Node>> newInputs;
184208 Types inputTypes;
@@ -187,27 +211,75 @@ ArmPlugin::pass::ConvolutionQuantizeFusion::ConvolutionQuantizeFusion() {
187211 newInputs.emplace_back (
188212 ngraph::op::TemporaryReplaceOutputType{input.get_source_output (), realType}.get ());
189213 }
214+
215+ std::shared_ptr<ngraph::Node> bias;
216+ if (node->inputs ().size () > 2 ) {
217+ bias = node->input_value (2 ).get_node_shared_ptr ();
218+ }
219+
220+ bool negativeScales = std::any_of (std::begin (quantizationInfo.first ), std::end (quantizationInfo.first ), [] (auto & value) {return value < 0 ;});
221+ if (negativeScales) {
222+ if (node->get_input_element_type (1 ) != ngraph::element::i8 )
223+ return false ;
224+ std::vector<std::int8_t > negate;
225+ std::transform (quantizationInfo.first .begin (), quantizationInfo.first .end (), std::back_inserter (negate),
226+ [](float f) -> std::int8_t { return f < 0 ? -1 : 1 ; } );
227+ std::transform (quantizationInfo.first .begin (), quantizationInfo.first .end (), quantizationInfo.first .begin (),
228+ [](float f) -> float { return f < 0 ? -f : f; } );
229+ std::shared_ptr<ngraph::Node> weightMultiply;
230+ if (ngraph::is_type<opset::Constant>(node->input_value (1 ).get_node ())) {
231+ std::vector<std::int8_t > weights = safe_cast<const opset::Constant>(node->input_value (1 ).get_node ())->cast_vector <std::int8_t >();
232+ size_t step = weights.size () / negate.size ();
233+ auto weightsIt = weights.begin ();
234+ for (auto && sign : negate) {
235+ std::transform (weightsIt, weightsIt + step, weightsIt, [&sign](std::int8_t w) -> std::int8_t { return w * sign; } );
236+ weightsIt += step;
237+ }
238+ weightMultiply = std::make_shared<opset::Constant>(node->get_input_element_type (1 ), node->get_input_shape (1 ), weights);
239+ } else {
240+ weightMultiply = std::make_shared<opset::Multiply>(node->input_value (1 ),
241+ std::make_shared<opset::Constant>(node->get_input_element_type (1 ),
242+ ngraph::Shape{negate.size (), 1 , 1 , 1 }, negate));
243+ }
244+ weightMultiply->set_friendly_name (node->input_value (1 ).get_node_shared_ptr ()->get_friendly_name () + " _weights_negate" );
245+ ngraph::copy_runtime_info (node->input_value (1 ).get_node_shared_ptr (), weightMultiply);
246+ newInputs[1 ] = ngraph::op::TemporaryReplaceOutputType{weightMultiply->output (0 ), ngraph::element::i8 }.get ();
247+
248+ if (bias) {
249+ bias = std::make_shared<opset::Multiply>(bias,
250+ std::make_shared<opset::Constant>(ngraph::element::f32 ,
251+ ngraph::Shape{negate.size ()}, negate));
252+ bias->set_friendly_name (node->input_value (2 ).get_node_shared_ptr ()->get_friendly_name () + " _bias_negate" );
253+ ngraph::copy_runtime_info (node->input_value (2 ).get_node_shared_ptr (), bias);
254+ newInputs[2 ] = ngraph::op::TemporaryReplaceOutputType{bias->output (0 ), realType}.get ();
255+ }
256+ }
257+
190258 std::int32_t qiOffset = 0 ;
191259 if (!allEqualToFirst (quantizationInfo.second )) {
192- auto shape = ngraph::Shape{ quantizationInfo.second .size ()};
193- std::vector< float > invScale ;
194- std::transform (quantizationInfo. first . begin (), quantizationInfo. first . end (), std::back_inserter (invScale) ,
195- []( float f) -> float { return 1 ./f; } );
196- std::shared_ptr<ngraph::Node> bias = std::make_shared<opset::Multiply>(
197- std::make_shared<opset::Constant>(ngraph::element:: f32 , shape, quantizationInfo. second ),
198- std::make_shared<opset::Constant>(ngraph::element:: f32 , shape, invScale));
199- OPENVINO_ASSERT (bias, " Failed to create bias node for fused convolution " );
200- if (node->inputs (). size () > 2 ) {
201- bias = std::make_shared<opset::Add> (node->input_value (2 ), bias);
260+ std::transform ( quantizationInfo.second .begin (), quantizationInfo. second . end (), quantizationInfo. first . begin (),
261+ quantizationInfo. second . begin (), []( float sh, float sc) -> float { return sh / sc; } ) ;
262+ std::shared_ptr<ngraph::Node> zpbias = std::make_shared<opset::Constant>(ngraph::element:: f32 ,
263+ ngraph::Shape{quantizationInfo. second . size ()},
264+ quantizationInfo. second );
265+ OPENVINO_ASSERT (zpbias, " Failed to convert zero point to bias node for fused convolution " );
266+ if (bias) {
267+ bias = std::make_shared<opset::Add>(bias, zpbias );
268+ bias-> set_friendly_name (node->input_value ( 2 ). get_node_shared_ptr ()-> get_friendly_name () + " _bias_fusedzp " );
269+ ngraph::copy_runtime_info (node->input_value (2 ). get_node_shared_ptr ( ), bias);
202270 newInputs[2 ] = ngraph::op::TemporaryReplaceOutputType{bias->output (0 ), realType}.get ();
203271 } else {
204272 inputTypes.emplace_back (realType);
205- newInputs.emplace_back (ngraph::op::TemporaryReplaceOutputType{bias ->output (0 ), realType}.get ());
273+ newInputs.emplace_back (ngraph::op::TemporaryReplaceOutputType{zpbias ->output (0 ), realType}.get ());
206274 }
207275 } else {
208276 qiOffset = static_cast <std::int32_t >(std::round (quantizationInfo.second [0 ]));
209277 }
210278 auto newNode = makeTypeRelaxed (node.get (), newInputs, inputTypes, Types{quantizedType});
279+ if (!bias && newInputs.size () == 3 && newNode->inputs ().size () != 3 ) {
280+ // TypeRelaxed operations unable to extend amount of inputs on copy
281+ newNode->set_argument (2 , newInputs.at (2 ));
282+ }
211283
212284 if (itActivation != pattern_map.end ()) {
213285 auto activation = itActivation->second .get_node_shared_ptr ();
@@ -228,6 +300,7 @@ ArmPlugin::pass::ConvolutionQuantizeFusion::ConvolutionQuantizeFusion() {
228300 if (!allEqualToFirst (quantizationInfo.first )) {
229301 if (node->get_input_element_type (1 ) != ngraph::element::i8 )
230302 return false ;
303+ // Is it correct if fused activation exists?
231304 newNode->get_rt_info ()[" WeightsPrescaleInfo" ] =
232305 arm_compute::QuantizationInfo{quantizationInfo.first , std::vector<std::int32_t >(quantizationInfo.first .size (), 0 )};
233306 } else {
0 commit comments