From 0e6fb2118b6a3d127703b56ade1ca7c8469117c9 Mon Sep 17 00:00:00 2001 From: Evgeniia Nugmanova Date: Tue, 12 Nov 2024 11:19:09 +0400 Subject: [PATCH 01/11] Partial Value optimization Signed-off-by: Evgeniia Nugmanova --- .../symbol_optimization.cpp | 182 +++++++++++++----- .../symbol_optimization.cpp | 25 +-- 2 files changed, 139 insertions(+), 68 deletions(-) diff --git a/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp b/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp index 55f0794e0ee008..07583ae486ae74 100644 --- a/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp +++ b/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp @@ -18,6 +18,7 @@ #include "openvino/op/squeeze.hpp" #include "openvino/op/util/multi_subgraph_base.hpp" #include "openvino/op/util/op_types.hpp" +#include "transformations/symbolic_transformations/utils.hpp" #include "transformations/utils/utils.hpp" namespace { @@ -84,27 +85,28 @@ int64_t get_idx_of_symbol_in_source(const ov::Output& source, const st } ov::Output alternative_source_from_existing_value(const std::shared_ptr& symbol, - const ov::Output& original_output, + const ov::Shape& original_shape, + const ov::element::Type& original_et, + const std::shared_ptr& node_to_copy_rt_info, STS_map& symbol_value_source) { auto alternative_source = ov::Output(); if (symbol_value_source.count(symbol)) { alternative_source = symbol_value_source[symbol]; - const auto &original_shape = original_output.get_shape(), &alternative_shape = alternative_source.get_shape(); - const auto &original_et = original_output.get_element_type(), - &alternative_et = alternative_source.get_element_type(); + const auto& alternative_shape = alternative_source.get_shape(); + const auto& alternative_et = alternative_source.get_element_type(); if (alternative_shape != original_shape && (original_shape.empty() || original_shape == ov::Shape{0})) { auto squeeze = std::make_shared(alternative_source); - ov::copy_runtime_info(original_output.get_node_shared_ptr(), squeeze); + ov::copy_runtime_info(node_to_copy_rt_info, squeeze); alternative_source = squeeze->output(0); } else if (alternative_shape != original_shape) { auto shape = ov::op::v0::Constant::create(ov::element::i64, {original_shape.size()}, original_shape); auto reshape = std::make_shared(alternative_source, shape, false); - ov::copy_runtime_info(original_output.get_node_shared_ptr(), reshape); + ov::copy_runtime_info(node_to_copy_rt_info, reshape); alternative_source = reshape->output(0); } if (alternative_et != original_et) { auto convert = std::make_shared(alternative_source, original_et); - ov::copy_runtime_info(original_output.get_node_shared_ptr(), convert); + ov::copy_runtime_info(node_to_copy_rt_info, convert); alternative_source = convert->output(0); } } @@ -113,7 +115,9 @@ ov::Output alternative_source_from_existing_value(const std::shared_pt ov::Output alternative_source_from_shape_source(const STS_map& symbol_shape_source, const std::shared_ptr& symbol, - const ov::Output& original_output, + const ov::Shape& original_shape, + const ov::element::Type& original_et, + const std::shared_ptr& node_to_copy_rt_info, STS_map& symbol_value_source) { auto alternative_source = ov::Output(); if (symbol_shape_source.count(symbol)) { @@ -122,39 +126,61 @@ ov::Output alternative_source_from_shape_source(const STS_map& symbol_ const int64_t& idx = get_idx_of_symbol_in_source(source, symbol); if (idx == -1) return alternative_source; - const auto& original_et = original_output.get_element_type(); std::shared_ptr shape; if (original_et == ov::element::i32 || original_et == ov::element::i64) { shape = std::make_shared(source, original_et); } else { shape = std::make_shared(source); - ov::copy_runtime_info(original_output.get_node_shared_ptr(), shape); + ov::copy_runtime_info(node_to_copy_rt_info, shape); shape = std::make_shared(shape, original_et); } - auto indices = ov::op::v0::Constant::create(ov::element::i64, original_output.get_shape(), {idx}); + auto indices = ov::op::v0::Constant::create(ov::element::i64, original_shape, {idx}); auto axis = ov::op::v0::Constant::create(ov::element::i64, {}, {0}); auto gather = std::make_shared(shape, indices, axis); - ov::copy_runtime_info(original_output.get_node_shared_ptr(), {shape, indices, axis, gather}); + ov::copy_runtime_info(node_to_copy_rt_info, {shape, indices, axis, gather}); alternative_source = gather; symbol_value_source[symbol] = alternative_source; } return alternative_source; } -ov::Output get_alternative_source_from_value_or_shape_source(const STS_map& symbol_shape_source, - const std::shared_ptr& symbol, - const ov::Output& original_output, - STS_map& symbol_value_source) { +ov::Output get_alternative_source_from_value_or_shape_source( + const STS_map& symbol_shape_source, + const std::shared_ptr& symbol, + const ov::Shape& original_shape, + const ov::element::Type& original_et, + const std::shared_ptr& node_to_copy_rt_info, + STS_map& symbol_value_source) { auto alternative_source = ov::Output(); if (symbol == nullptr) return alternative_source; - alternative_source = alternative_source_from_existing_value(symbol, original_output, symbol_value_source); + alternative_source = alternative_source_from_existing_value(symbol, + original_shape, + original_et, + node_to_copy_rt_info, + symbol_value_source); if (!alternative_source.get_node_shared_ptr()) - alternative_source = - alternative_source_from_shape_source(symbol_shape_source, symbol, original_output, symbol_value_source); + alternative_source = alternative_source_from_shape_source(symbol_shape_source, + symbol, + original_shape, + original_et, + node_to_copy_rt_info, + symbol_value_source); return alternative_source; } +ov::Output get_alternative_source_from_value_or_shape_source(const STS_map& symbol_shape_source, + const std::shared_ptr& symbol, + const ov::Output& original_output, + STS_map& symbol_value_source) { + return get_alternative_source_from_value_or_shape_source(symbol_shape_source, + symbol, + original_output.get_shape(), + original_output.get_element_type(), + original_output.get_node_shared_ptr(), + symbol_value_source); +} + ov::Output alternative_source_from_concat_input_sources(const STS_map& symbol_shape_source, const std::shared_ptr& symbol, const ov::Output& original_output, @@ -198,7 +224,9 @@ ov::Output alternative_source_from_concat_input_sources(const STS_map& return alternative_source; } -void optimize_value_usage(ov::Output& output, STS_map& symbol_shape_source, STS_map& symbol_value_source) { +void optimize_single_value_usage(ov::Output& output, + STS_map& symbol_shape_source, + STS_map& symbol_value_source) { auto value_symbols = output.get_tensor().get_value_symbol(); if (value_symbols.size() != 1) return; @@ -316,16 +344,16 @@ std::vector> topological_order(const std::shared_ptr& op, STS_map& symbol_shape_source) { - if (ov::is_type(op) || ov::is_type(op)) { - const auto& output = op->input_value(0); + bool is_shape = ov::is_type(op) || ov::is_type(op); + bool is_parameter = ov::is_type(op); + if (is_shape || is_parameter) { + const auto& output = is_shape ? op->input_value(0) : op->output(0); if (output.get_partial_shape().rank().is_dynamic()) return; for (const auto& d : output.get_partial_shape()) { - if (d.is_static()) - continue; - auto symbol = d.get_symbol(); - if (symbol == nullptr) + if (d.is_static() || d.get_symbol() == nullptr) continue; + auto symbol = ov::symbol::ancestor_of(d.get_symbol()); if (symbol_shape_source.count(symbol)) continue; symbol_shape_source[symbol] = output; @@ -344,11 +372,9 @@ void save_shape_sources(const std::shared_ptr& op, STS_map& symbol_sha if (input.get_partial_shape().rank().is_dynamic()) continue; const auto dimension = input.get_partial_shape()[axis]; - if (dimension.is_static()) - continue; - auto symbol = dimension.get_symbol(); - if (symbol == nullptr) + if (dimension.is_static() || dimension.get_symbol() == nullptr) continue; + auto symbol = ov::symbol::ancestor_of(dimension.get_symbol()); if (symbol_shape_source.count(symbol)) continue; symbol_shape_source[symbol] = input; @@ -402,27 +428,74 @@ struct OutputValue { } }; -void save_and_update_value_sources(const std::shared_ptr& op, - std::map>& multi_symbol_source) { - for (auto& output : op->outputs()) { - if (output.get_tensor().get_value_symbol().size() < 2) - continue; // singular values are handled by optimize_value_usage helper - - if (auto result = OutputValue::make(output)) { - if (multi_symbol_source.count(*result)) { - auto alternative_source = multi_symbol_source[*result]; - if (output.get_element_type() != alternative_source.get_element_type()) { - auto convert = std::make_shared(alternative_source, output.get_element_type()); - ov::copy_runtime_info(output.get_node_shared_ptr(), convert); - alternative_source = convert->output(0); - } - if (output.get_partial_shape().is_dynamic() || - output.get_partial_shape() != alternative_source.get_partial_shape()) - continue; - output.replace(alternative_source); +void optimize_multi_value_usage(ov::Output& output, + std::map>& multi_symbol_source, + STS_map& symbol_shape_source, + STS_map& symbol_value_source) { + if (output.get_tensor().get_value_symbol().size() < 2) + return; // singular values are handled by optimize_single_value_usage helper + auto result = OutputValue::make(output); + if (!result) + return; + if (multi_symbol_source.count(*result)) { + // multiple value source have been seen before + auto alternative_source = multi_symbol_source[*result]; + if (output.get_element_type() != alternative_source.get_element_type()) { + auto convert = std::make_shared(alternative_source, output.get_element_type()); + // should this be a convert like? no, output should be erased ideally. or should we just avoid dynamic data type? + ov::copy_runtime_info(output.get_node_shared_ptr(), convert); + alternative_source = convert->output(0); + } + if (output.get_partial_shape() != alternative_source.get_partial_shape()) { + const auto& shape = ov::op::v0::Constant::create(ov::element::i32, + ov::Shape{output.get_shape().size()}, + output.get_shape()); + alternative_source = std::make_shared(alternative_source, shape, false)->output(0); + } + output.replace(alternative_source); + } else { + // new instance of multiple value source + ov::OutputVector to_be_concated; + for (const auto& el : result->value) { + if (el.is()) { + const auto& value = el.as(); + const auto& constant = ov::op::v0::Constant::create(output.get_element_type(), ov::Shape{1}, {value}); + to_be_concated.push_back(constant->output(0)); + } else if (el.is>()) { + const auto& symbol = el.as>(); + auto alternative_output = + get_alternative_source_from_value_or_shape_source(symbol_shape_source, + symbol, + ov::Shape{1}, + output.get_element_type(), + output.get_node_shared_ptr(), + symbol_value_source); + if (alternative_output.get_node_shared_ptr()) + to_be_concated.push_back(alternative_output); + else + break; } else { - multi_symbol_source[*result] = output; + break; + } + } + if (to_be_concated.size() != ov::shape_size(output.get_shape())) { + multi_symbol_source[*result] = output; + } else { + auto alternative_output = std::make_shared(to_be_concated, 0)->output(0); + ov::copy_runtime_info(output.get_node_shared_ptr(), alternative_output.get_node_shared_ptr()); + if (output.get_partial_shape() != alternative_output.get_partial_shape()) { + alternative_output = std::make_shared( + alternative_output, + ov::op::v0::Constant::create(ov::element::i32, + ov::Shape{output.get_shape().size()}, + output.get_shape()), + false) + ->output(0); + ov::copy_runtime_info(output.get_node_shared_ptr(), alternative_output.get_node_shared_ptr()); } + ov::util::evaluate_both_bounds(alternative_output); + output.replace(alternative_output); + multi_symbol_source[*result] = alternative_output; } } } @@ -434,6 +507,8 @@ bool ov::pass::OptimizeSymbolsUsedAsValues::run_on_model(const std::shared_ptr> multi_symbol_source; + for (const auto& parameter : m->get_parameters()) + save_shape_sources(parameter, symbol_shape_source); for (const auto& op : topological_order(m)) { // Result has output port which has shared (during validate_and_infer_type) tensor with input port. // Transformations may replace input of Result. After replacement and before Result::validate_and_infer_type -- @@ -443,13 +518,14 @@ bool ov::pass::OptimizeSymbolsUsedAsValues::run_on_model(const std::shared_ptr(op)) continue; - // LTS maps aren't shared with sub-graphs because inner graph can not access outer graph for label sources + // LTS maps aren't shared with sub-graphs because inner graph can not access outer graph for symbol sources ov::op::util::process_subgraph(*this, op); - for (auto& output : op->outputs()) - optimize_value_usage(output, symbol_shape_source, symbol_value_source); + for (auto& output : op->outputs()) { + optimize_single_value_usage(output, symbol_shape_source, symbol_value_source); + optimize_multi_value_usage(output, multi_symbol_source, symbol_shape_source, symbol_value_source); + } save_shape_sources(op, symbol_shape_source); - save_and_update_value_sources(op, multi_symbol_source); } return true; } diff --git a/src/common/transformations/tests/symbolic_transformations/symbol_optimization.cpp b/src/common/transformations/tests/symbolic_transformations/symbol_optimization.cpp index e4653ec084bafb..4da19a3c896ca6 100644 --- a/src/common/transformations/tests/symbolic_transformations/symbol_optimization.cpp +++ b/src/common/transformations/tests/symbolic_transformations/symbol_optimization.cpp @@ -19,7 +19,6 @@ #include "openvino/pass/visualize_tree.hpp" #include "transformations/common_optimizations/shared_ops_optimization.hpp" #include "transformations/symbolic_transformations/symbolic_optimizations.hpp" -#include "transformations/symbolic_transformations/utils.hpp" using namespace ov; using namespace ov::op; @@ -174,7 +173,7 @@ TEST_F(TransformationTestsF, ValueOptimizationDoubleValue) { auto input = make_shared(element::f32, PartialShape::dynamic(4)); auto dim_0 = get_dim_by_idx(input, {-1, -2}, element::i64); - auto dim_1 = get_dim_by_idx(input, {3, 2}, element::i32); + auto dim_1 = get_dim_by_idx(input, {3, 2}, element::i64); auto reshape_0 = make_shared( input, @@ -182,28 +181,25 @@ TEST_F(TransformationTestsF, ValueOptimizationDoubleValue) { false); auto reshape_1 = make_shared( input, - make_shared(OutputVector{v0::Constant::create(element::i32, {1}, {0}), dim_1}, 0), + make_shared(OutputVector{v0::Constant::create(element::i64, {1}, {0}), dim_1}, 0), false); model = make_shared(NodeVector{reshape_0, reshape_1}, ParameterVector{input}); manager.set_per_pass_validation(false); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); + manager.register_pass(); } { auto input = make_shared(element::f32, PartialShape::dynamic(4)); - auto dim_0 = get_dim_by_idx(input, {3, 2}, element::i32); - auto dim_1 = std::make_shared(dim_0, element::i64); + auto dim_0 = get_dim_by_idx(input, {3, 2}, element::i64); auto reshape_0 = make_shared( input, - make_shared(OutputVector{v0::Constant::create(element::i64, {1}, {-1}), dim_1}, 0), + make_shared(OutputVector{v0::Constant::create(element::i64, {1}, {-1}), dim_0}, 0), false); auto reshape_1 = make_shared( input, - make_shared(OutputVector{v0::Constant::create(element::i32, {1}, {0}), dim_0}, 0), + make_shared(OutputVector{v0::Constant::create(element::i64, {1}, {0}), dim_0}, 0), false); model_ref = make_shared(NodeVector{reshape_0, reshape_1}, ParameterVector{input}); @@ -216,7 +212,7 @@ TEST_F(TransformationTestsF, ValueOptimizationSymbolAndValue) { auto input = make_shared(element::f32, PartialShape({-1, -1, 4, -1})); auto dim_0 = get_dim_by_idx(input, {-1, -2}, element::i64); - auto dim_1 = get_dim_by_idx(input, {3, 2}, element::i32); + auto dim_1 = get_dim_by_idx(input, {3, 2}, element::i64); auto reshape_0 = make_shared( input, @@ -224,7 +220,7 @@ TEST_F(TransformationTestsF, ValueOptimizationSymbolAndValue) { false); auto reshape_1 = make_shared( input, - make_shared(OutputVector{v0::Constant::create(element::i32, {1}, {-1}), dim_1}, 0), + make_shared(OutputVector{v0::Constant::create(element::i64, {1}, {-1}), dim_1}, 0), false); model = make_shared(NodeVector{reshape_0, reshape_1}, ParameterVector{input}); @@ -237,11 +233,10 @@ TEST_F(TransformationTestsF, ValueOptimizationSymbolAndValue) { { auto input = make_shared(element::f32, PartialShape({-1, -1, 4, -1})); auto dim_0 = make_shared( - OutputVector{v0::Constant::create(element::i32, {1}, {-1}), get_dim_by_idx(input, {3, 2}, element::i32)}, + OutputVector{v0::Constant::create(element::i64, {1}, {-1}), get_dim_by_idx(input, {3}, element::i64), v0::Constant::create(element::i64, {1}, {4})}, 0); - auto dim_1 = std::make_shared(dim_0, element::i64); - auto reshape_0 = make_shared(input, dim_1, false); + auto reshape_0 = make_shared(input, dim_0, false); auto reshape_1 = make_shared(input, dim_0, false); model_ref = make_shared(NodeVector{reshape_0, reshape_1}, ParameterVector{input}); From 6110c677b52ed135a1d00f14a0bf6d445788fa85 Mon Sep 17 00:00:00 2001 From: Evgeniia Nugmanova Date: Thu, 14 Nov 2024 14:13:46 +0400 Subject: [PATCH 02/11] style Signed-off-by: Evgeniia Nugmanova --- .../symbolic_transformations/symbol_optimization.cpp | 3 ++- .../tests/symbolic_transformations/symbol_optimization.cpp | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp b/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp index 07583ae486ae74..71463bc56cbd54 100644 --- a/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp +++ b/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp @@ -442,7 +442,8 @@ void optimize_multi_value_usage(ov::Output& output, auto alternative_source = multi_symbol_source[*result]; if (output.get_element_type() != alternative_source.get_element_type()) { auto convert = std::make_shared(alternative_source, output.get_element_type()); - // should this be a convert like? no, output should be erased ideally. or should we just avoid dynamic data type? + // should this be a convert like? no, output should be erased ideally. or should we just avoid dynamic data + // type? ov::copy_runtime_info(output.get_node_shared_ptr(), convert); alternative_source = convert->output(0); } diff --git a/src/common/transformations/tests/symbolic_transformations/symbol_optimization.cpp b/src/common/transformations/tests/symbolic_transformations/symbol_optimization.cpp index 4da19a3c896ca6..2070a2bce7d349 100644 --- a/src/common/transformations/tests/symbolic_transformations/symbol_optimization.cpp +++ b/src/common/transformations/tests/symbolic_transformations/symbol_optimization.cpp @@ -232,9 +232,10 @@ TEST_F(TransformationTestsF, ValueOptimizationSymbolAndValue) { } { auto input = make_shared(element::f32, PartialShape({-1, -1, 4, -1})); - auto dim_0 = make_shared( - OutputVector{v0::Constant::create(element::i64, {1}, {-1}), get_dim_by_idx(input, {3}, element::i64), v0::Constant::create(element::i64, {1}, {4})}, - 0); + auto dim_0 = make_shared(OutputVector{v0::Constant::create(element::i64, {1}, {-1}), + get_dim_by_idx(input, {3}, element::i64), + v0::Constant::create(element::i64, {1}, {4})}, + 0); auto reshape_0 = make_shared(input, dim_0, false); auto reshape_1 = make_shared(input, dim_0, false); From dab2af56888b54067452d4e7b0ac8f59b65aff96 Mon Sep 17 00:00:00 2001 From: Evgeniia Nugmanova Date: Thu, 21 Nov 2024 12:39:08 +0400 Subject: [PATCH 03/11] too early for ShapeOf replacements Signed-off-by: Evgeniia Nugmanova --- .../symbolic_transformations/symbol_optimization.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp b/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp index 71463bc56cbd54..66dfa236ed181c 100644 --- a/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp +++ b/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp @@ -508,8 +508,6 @@ bool ov::pass::OptimizeSymbolsUsedAsValues::run_on_model(const std::shared_ptr> multi_symbol_source; - for (const auto& parameter : m->get_parameters()) - save_shape_sources(parameter, symbol_shape_source); for (const auto& op : topological_order(m)) { // Result has output port which has shared (during validate_and_infer_type) tensor with input port. // Transformations may replace input of Result. After replacement and before Result::validate_and_infer_type -- From 14e9f63c5cb73bf7cc5038fd54f349ef9b7e6cd2 Mon Sep 17 00:00:00 2001 From: Evgeniia Nugmanova Date: Thu, 21 Nov 2024 12:43:09 +0400 Subject: [PATCH 04/11] review comments Signed-off-by: Evgeniia Nugmanova --- .../symbolic_transformations/symbol_optimization.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp b/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp index 66dfa236ed181c..5b83e5bb1e4497 100644 --- a/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp +++ b/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp @@ -344,10 +344,10 @@ std::vector> topological_order(const std::shared_ptr& op, STS_map& symbol_shape_source) { - bool is_shape = ov::is_type(op) || ov::is_type(op); - bool is_parameter = ov::is_type(op); - if (is_shape || is_parameter) { - const auto& output = is_shape ? op->input_value(0) : op->output(0); + const auto is_shape_of = ov::is_type(op); + const auto is_parameter = ov::is_type(op); + if (is_shape_of || is_parameter) { + const auto& output = is_shape_of ? op->input_value(0) : op->output(0); if (output.get_partial_shape().rank().is_dynamic()) return; for (const auto& d : output.get_partial_shape()) { @@ -434,7 +434,7 @@ void optimize_multi_value_usage(ov::Output& output, STS_map& symbol_value_source) { if (output.get_tensor().get_value_symbol().size() < 2) return; // singular values are handled by optimize_single_value_usage helper - auto result = OutputValue::make(output); + const auto result = OutputValue::make(output); if (!result) return; if (multi_symbol_source.count(*result)) { From f1645f0f70a78c002cad0b585a2e00adf816a2cd Mon Sep 17 00:00:00 2001 From: Evgeniia Nugmanova Date: Fri, 22 Nov 2024 14:01:03 +0400 Subject: [PATCH 05/11] Correct RoPE pattern Signed-off-by: Evgeniia Nugmanova --- .../common_optimizations/fuse_rotary_positional_embeddings.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/common/transformations/src/transformations/common_optimizations/fuse_rotary_positional_embeddings.cpp b/src/common/transformations/src/transformations/common_optimizations/fuse_rotary_positional_embeddings.cpp index 143603f0415373..838ad9aeb5a187 100644 --- a/src/common/transformations/src/transformations/common_optimizations/fuse_rotary_positional_embeddings.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/fuse_rotary_positional_embeddings.cpp @@ -486,7 +486,8 @@ ov::pass::RoPEFusionChatGLM::RoPEFusionChatGLM(int split_output_id, const bool s auto const_target_shape_2 = makeConst({batch, 1, seq_len, ndims / 2, 2}); // Slice cos_sin_cache to support 2-dimentional RoPE - auto ScatterUpdate = makePattern({{0, 0}, {1}, seq_length, {0}}, {}); + auto zero_seqlen = makePattern({{0}, seq_length}, {{"axis", 0}}); + auto ScatterUpdate = makePattern({{0, 0}, {1}, seq_length, {0}}, {}) | zero_seqlen; auto slice_Slice_449_1d = makePattern({cos_sin_cache, {0}, seq_length, {1}, {1}}); auto slice_Slice_449_2d = makePattern({cos_sin_cache, {0, 0}, ScatterUpdate, {1, 1}, {0}}); auto slice_StridedSlice_449 = GenStridedSlice(cos_sin_cache, {0, 0}, ScatterUpdate, {1, 1}, 1); From 18f40f72441f75cb15336e7bc7ca90da2009f30c Mon Sep 17 00:00:00 2001 From: Evgeniia Nugmanova Date: Sat, 23 Nov 2024 11:35:58 +0400 Subject: [PATCH 06/11] Casual Mask pattern update Signed-off-by: Evgeniia Nugmanova --- .../cpu_opset/common/pass/causal_mask_preprocess_fusion.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/causal_mask_preprocess_fusion.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/causal_mask_preprocess_fusion.cpp index 3e8a7fc6d5a3f0..fd743c4aebc912 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/causal_mask_preprocess_fusion.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/causal_mask_preprocess_fusion.cpp @@ -123,8 +123,9 @@ CausalMaskPreprocess::CausalMaskPreprocess() { auto ShapeOf_49034 = makePattern({attention_mask}); // tensor_array auto Gather_41642 = makePattern({ShapeOf_49034, {1}, 0}, {{"batch_dims", 0}}); // tensor_array + auto alternative_concat = makePattern({{0}, {0}, {0}, Gather_41642}, {{"axis", 0}}); auto ScatterUpdate_93502 = - makePattern({{0, 0, 0, 0}, {3}, Gather_41642, {0}}); // tensor_array + makePattern({{0, 0, 0, 0}, {3}, Gather_41642, {0}}) | alternative_concat; // tensor_array auto SliceAssign_201_Slice = makePattern({SliceAssign_201_Reshape, {0}, Gather_41642, {1}, {3}}); auto SliceAssign_201_StridedSlice = GenStridedSlice(SliceAssign_201_Reshape, {0, 0, 0, 0}, ScatterUpdate_93502, {1, 1, 1, 1}, 3); // tensor_array @@ -179,8 +180,9 @@ CausalMaskPreprocess::CausalMaskPreprocess() { auto SliceAssign_201_Reshape_3 = makePattern({SliceAssign_201_ScatterNDUpdate, {-1, 1, max_seq_len, max_seq_len}}, {{"special_zero", true}}); // tensor_array + auto alternative_concat_1 = makePattern({{0}, {0}, {0}, Gather_41642}, {{"axis", 0}}); auto ScatterUpdate_93554 = - makePattern({{0, 0, 0, 0}, {3}, kvLen, {0}}); // tensor_array + makePattern({{0, 0, 0, 0}, {3}, kvLen, {0}}) | alternative_concat_1; // tensor_array auto slice_StridedSlice_14 = GenStridedSlice(SliceAssign_201_Reshape_3, {0, 0, 0, 0}, ScatterUpdate_93554, {1, 1, 1, 1}, 3); // tensor_array auto slice_Slice_14 = makePattern({SliceAssign_201_Reshape_3, {0}, kvLen, {1}, {3}}); From 653b8a0c4339057db304412581ecae6bb34ee660 Mon Sep 17 00:00:00 2001 From: Evgeniia Nugmanova Date: Mon, 23 Dec 2024 14:02:36 +0400 Subject: [PATCH 07/11] Fixes hanging Parameter problem Signed-off-by: Evgeniia Nugmanova --- src/core/src/pass/sdpa_to_paged_attention.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/core/src/pass/sdpa_to_paged_attention.cpp b/src/core/src/pass/sdpa_to_paged_attention.cpp index 872e4539eda8df..8dbb59ee918a89 100644 --- a/src/core/src/pass/sdpa_to_paged_attention.cpp +++ b/src/core/src/pass/sdpa_to_paged_attention.cpp @@ -25,7 +25,7 @@ ov::pass::SDPAToPagedAttention::SDPAToPagedAttention(bool use_block_indices_inpu m_use_score_outputs(use_score_outputs) {} static std::shared_ptr setName(std::shared_ptr node, const char* name) { - // Set name for both node and output tensor (should be only one tensor, and any other names will be overriden by a + // Set name for both node and output tensor (should be only one tensor, and any other names will be overridden by a // given single name) node->set_friendly_name(name); OPENVINO_ASSERT(node->get_output_size() == 1); @@ -149,8 +149,10 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptroutput(0).get_target_inputs().size() == 1) + param->output(0).get_target_inputs().begin()->replace_source_output(input_ids_node->output(0)); model->remove_parameter(param); if (param->output(0).get_target_inputs().size() == 0) { From 7c183dbf80ce4fa00163482cf31c62ea0747f6a3 Mon Sep 17 00:00:00 2001 From: Evgeniia Nugmanova Date: Tue, 24 Dec 2024 12:34:03 +0400 Subject: [PATCH 08/11] Fix build Signed-off-by: Evgeniia Nugmanova --- src/core/src/pass/sdpa_to_paged_attention.cpp | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/core/src/pass/sdpa_to_paged_attention.cpp b/src/core/src/pass/sdpa_to_paged_attention.cpp index 8dbb59ee918a89..7ed253f4210a79 100644 --- a/src/core/src/pass/sdpa_to_paged_attention.cpp +++ b/src/core/src/pass/sdpa_to_paged_attention.cpp @@ -149,21 +149,25 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptroutput(0).get_target_inputs().size() == 1) - param->output(0).get_target_inputs().begin()->replace_source_output(input_ids_node->output(0)); + auto target_inputs = param->output(0).get_target_inputs(); + if (!strcmp(param_name, "attention_mask") && target_inputs.size() == 1 && + ov::is_type(target_inputs.begin()->get_node())) { + target_inputs.begin()->replace_source_output(input_ids_node->output(0)); + target_inputs = param->output(0).get_target_inputs(); + } model->remove_parameter(param); - if (param->output(0).get_target_inputs().size() == 0) { + if (!target_inputs.empty()) { std::stringstream consumers; consumers << std::endl; - for (auto& input : param->output(0).get_target_inputs()) { + for (auto& input : target_inputs) { consumers << *input.get_node() << std::endl; } - OPENVINO_ASSERT(param->output(0).get_target_inputs().size() == 0, + OPENVINO_ASSERT(target_inputs.empty(), "PagedAttention transformation failed: couldn't remove ", - param->output(0).get_target_inputs().size(), + target_inputs.size(), " inputs of ", param_name, " input: ", From 9b4103d81a1473229b8a17ab0bc6e8385ba99fd3 Mon Sep 17 00:00:00 2001 From: Evgenya Nugmanova Date: Thu, 26 Dec 2024 09:52:01 +0000 Subject: [PATCH 09/11] Comments adressed --- .../symbol_optimization.cpp | 2 -- .../fuse_rotary_positional_embeddings.cpp | 14 ++++++++++++++ src/core/src/pass/sdpa_to_paged_attention.cpp | 2 +- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp b/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp index 5b83e5bb1e4497..a7e8c5044ff111 100644 --- a/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp +++ b/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp @@ -442,8 +442,6 @@ void optimize_multi_value_usage(ov::Output& output, auto alternative_source = multi_symbol_source[*result]; if (output.get_element_type() != alternative_source.get_element_type()) { auto convert = std::make_shared(alternative_source, output.get_element_type()); - // should this be a convert like? no, output should be erased ideally. or should we just avoid dynamic data - // type? ov::copy_runtime_info(output.get_node_shared_ptr(), convert); alternative_source = convert->output(0); } diff --git a/src/common/transformations/tests/common_optimizations/fuse_rotary_positional_embeddings.cpp b/src/common/transformations/tests/common_optimizations/fuse_rotary_positional_embeddings.cpp index a42e11120d7276..b533d273d434dd 100644 --- a/src/common/transformations/tests/common_optimizations/fuse_rotary_positional_embeddings.cpp +++ b/src/common/transformations/tests/common_optimizations/fuse_rotary_positional_embeddings.cpp @@ -3,6 +3,7 @@ // #include "transformations/common_optimizations/fuse_rotary_positional_embeddings.hpp" +#include "transformations/symbolic_transformations/symbolic_optimizations.hpp" #include @@ -124,6 +125,7 @@ TEST_F(TransformationTestsF, ConvertToROPE_LLama2_no_gather) { const size_t num_head = 32; model = buildROPE_Llama2(batch, seq_length, max_position_embeddings, ndims, false); + manager.register_pass(); manager.register_pass(); { @@ -159,6 +161,7 @@ TEST_F(TransformationTestsF, ConvertToROPE_LLama2_with_gather) { const size_t num_head = 32; model = buildROPE_Llama2(batch, seq_length, max_position_embeddings, ndims, true); + manager.register_pass(); manager.register_pass(); { @@ -300,6 +303,7 @@ TEST_F(TransformationTestsF, ConvertToROPE_GPTNEOX_no_gather) { const int max_position_embeddings = 2048; model = buildROPE_GPTNEOX(batch, seq_len, max_position_embeddings, ndims, num_heads, rotary_ndims, false); + manager.register_pass(); manager.register_pass(); { auto input = @@ -335,6 +339,7 @@ TEST_F(TransformationTestsF, ConvertToROPE_GPTNEOX_with_gather) { const int max_position_embeddings = 2048; model = buildROPE_GPTNEOX(batch, seq_len, max_position_embeddings, ndims, num_heads, rotary_ndims, true); + manager.register_pass(); manager.register_pass(); { auto cos_sin = makeCosSinCache(max_position_embeddings, rotary_ndims); @@ -456,6 +461,7 @@ TEST_F(TransformationTestsF, ConvertToROPE_GPTJ) { model = std::make_shared(ov::NodeVector{permute_Transpose_828}, ov::ParameterVector{input, gather_sin_cos}); } + manager.register_pass(); manager.register_pass(); { auto input = @@ -564,6 +570,7 @@ TEST_F(TransformationTestsF, ConvertToROPE_chatGML) { model = std::make_shared(ov::NodeVector{aten_cat_Concat_425}, ov::ParameterVector{input, seq_length, cos_sin_cache}); } + manager.register_pass(); manager.register_pass(); { auto input = std::make_shared(ov::element::f32, ov::Shape{seq_len, batch, 4608}); @@ -643,6 +650,7 @@ TEST_F(TransformationTestsF, ConvertToROPE_chatGML_Slice) { model = std::make_shared(ov::NodeVector{cat_Concat}, ov::ParameterVector{input, seq_length, cos_sin_cache}); } + manager.register_pass(); manager.register_pass(); { auto input = std::make_shared(ov::element::f32, ov::Shape{seq_len, batch, 4608}); @@ -728,6 +736,7 @@ TEST_F(TransformationTestsF, ConvertToROPE_GPTJ_Slice) { model = std::make_shared(ov::NodeVector{permute_Transpose}, ov::ParameterVector{input, gather_sin_cos}); } + manager.register_pass(); manager.register_pass(); { auto input = @@ -842,6 +851,7 @@ TEST_F(TransformationTestsF, ConvertToROPE_chatGML_2d_rope) { model = std::make_shared(ov::NodeVector{cat_Concat_425}, ov::ParameterVector{input, cos_sin_cache, position_ids}); } + manager.register_pass(); manager.register_pass(true); { auto input = std::make_shared(ov::element::f32, ov::Shape{batch, seq_len, 4608}); @@ -951,6 +961,7 @@ TEST_F(TransformationTestsF, ConvertToROPE_chatGML_nano_2d_rope) { model = std::make_shared(ov::NodeVector{flatten_Reshape_421}, ov::ParameterVector{input, cos_sin_cache, position_ids}); } + manager.register_pass(); manager.register_pass(true); { auto input = std::make_shared(ov::element::f32, ov::Shape{batch, seq_len, 3072}); @@ -1007,6 +1018,7 @@ TEST_F(TransformationTestsF, ConvertToROPE_Flux_mul) { model = std::make_shared(ov::NodeVector{y}, ov::ParameterVector{x, t_cos, t_sin}); } + manager.register_pass(); manager.register_pass(true); { auto x = @@ -1061,6 +1073,7 @@ TEST_F(TransformationTestsF, ConvertToROPE_Flux_squeeze_mul_unsqueeze) { model = std::make_shared(ov::NodeVector{y}, ov::ParameterVector{x, t_cos, t_sin}); } + manager.register_pass(); manager.register_pass(true); { auto x = @@ -1115,6 +1128,7 @@ TEST_F(TransformationTestsF, ConvertToROPE_Flux_mul_squeeze_unsqueeze) { model = std::make_shared(ov::NodeVector{y}, ov::ParameterVector{x, t_cos, t_sin}); } + manager.register_pass(); manager.register_pass(true); { auto x = diff --git a/src/core/src/pass/sdpa_to_paged_attention.cpp b/src/core/src/pass/sdpa_to_paged_attention.cpp index 7ed253f4210a79..84f824baaaa2f9 100644 --- a/src/core/src/pass/sdpa_to_paged_attention.cpp +++ b/src/core/src/pass/sdpa_to_paged_attention.cpp @@ -154,7 +154,7 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptroutput(0).get_target_inputs(); if (!strcmp(param_name, "attention_mask") && target_inputs.size() == 1 && ov::is_type(target_inputs.begin()->get_node())) { - target_inputs.begin()->replace_source_output(input_ids_node->output(0)); + target_inputs.begin()->replace_source_output(unsqueezed_input_ids->output(0)); target_inputs = param->output(0).get_target_inputs(); } model->remove_parameter(param); From f71f20bb60eb9419a4a0a7445368f342be8ad50f Mon Sep 17 00:00:00 2001 From: Evgeniia Nugmanova Date: Thu, 26 Dec 2024 14:28:06 +0400 Subject: [PATCH 10/11] style Signed-off-by: Evgeniia Nugmanova --- .../common_optimizations/fuse_rotary_positional_embeddings.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/transformations/tests/common_optimizations/fuse_rotary_positional_embeddings.cpp b/src/common/transformations/tests/common_optimizations/fuse_rotary_positional_embeddings.cpp index b533d273d434dd..4c0d3e0cd1a4fd 100644 --- a/src/common/transformations/tests/common_optimizations/fuse_rotary_positional_embeddings.cpp +++ b/src/common/transformations/tests/common_optimizations/fuse_rotary_positional_embeddings.cpp @@ -3,7 +3,6 @@ // #include "transformations/common_optimizations/fuse_rotary_positional_embeddings.hpp" -#include "transformations/symbolic_transformations/symbolic_optimizations.hpp" #include @@ -14,6 +13,7 @@ #include "openvino/opsets/opset3.hpp" #include "ov_ops/rotary_positional_embeddings.hpp" #include "ov_ops/type_relaxed.hpp" +#include "transformations/symbolic_transformations/symbolic_optimizations.hpp" #include "transformations/utils/gen_pattern.hpp" using namespace testing; From 9a10ad5df0e9ecd4fc5d575baaf1316e50ca278c Mon Sep 17 00:00:00 2001 From: Evgeniia Nugmanova Date: Fri, 27 Dec 2024 11:10:05 +0400 Subject: [PATCH 11/11] test Signed-off-by: Evgeniia Nugmanova --- .../common_optimizations/fuse_rotary_positional_embeddings.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/common/transformations/tests/common_optimizations/fuse_rotary_positional_embeddings.cpp b/src/common/transformations/tests/common_optimizations/fuse_rotary_positional_embeddings.cpp index 4c0d3e0cd1a4fd..e93eae340713d7 100644 --- a/src/common/transformations/tests/common_optimizations/fuse_rotary_positional_embeddings.cpp +++ b/src/common/transformations/tests/common_optimizations/fuse_rotary_positional_embeddings.cpp @@ -570,7 +570,6 @@ TEST_F(TransformationTestsF, ConvertToROPE_chatGML) { model = std::make_shared(ov::NodeVector{aten_cat_Concat_425}, ov::ParameterVector{input, seq_length, cos_sin_cache}); } - manager.register_pass(); manager.register_pass(); { auto input = std::make_shared(ov::element::f32, ov::Shape{seq_len, batch, 4608}); @@ -851,7 +850,6 @@ TEST_F(TransformationTestsF, ConvertToROPE_chatGML_2d_rope) { model = std::make_shared(ov::NodeVector{cat_Concat_425}, ov::ParameterVector{input, cos_sin_cache, position_ids}); } - manager.register_pass(); manager.register_pass(true); { auto input = std::make_shared(ov::element::f32, ov::Shape{batch, seq_len, 4608}); @@ -961,7 +959,6 @@ TEST_F(TransformationTestsF, ConvertToROPE_chatGML_nano_2d_rope) { model = std::make_shared(ov::NodeVector{flatten_Reshape_421}, ov::ParameterVector{input, cos_sin_cache, position_ids}); } - manager.register_pass(); manager.register_pass(true); { auto input = std::make_shared(ov::element::f32, ov::Shape{batch, seq_len, 3072});