Skip to content

Commit 42b3f91

Browse files
[mlir] Vectorize tensor.pad with low padding for unit dims (#133808)
We currently do not have masked vectorization support for tenor.pad with low padding. However, we can allow this in the special case where the result dimension after padding is a unit dim. The reason is when we actually have a low pad on a unit dim, the input size of that dimension will be (or should be for correct IR) dynamically zero and hence we will create a zero mask which is correct. If the low pad is dynamically zero then the lowering is correct as well. --------- Signed-off-by: Nirvedh <[email protected]>
1 parent db21ae7 commit 42b3f91

File tree

3 files changed

+87
-4
lines changed

3 files changed

+87
-4
lines changed

mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp

+18-4
Original file line numberDiff line numberDiff line change
@@ -2178,11 +2178,25 @@ vectorizePadOpPrecondition(tensor::PadOp padOp,
21782178
inputVectorSizes)))
21792179
return failure();
21802180

2181-
if (llvm::any_of(padOp.getLow(), [](Value v) {
2182-
std::optional<int64_t> res = getConstantIntValue(v);
2183-
return !res.has_value() || res.value() != 0;
2181+
// Padding with non-zero low pad values is not supported, unless the
2182+
// corresponding result dim is 1 as this would require shifting the results to
2183+
// the right for the low padded dims by the required amount of low padding.
2184+
// However, we do support low padding if the dims being low padded have result
2185+
// sizes of 1. The reason is when we have a low pad on a unit result dim, the
2186+
// input size of that dimension will be dynamically zero (as the sum of the
2187+
// low pad and input dim size has to be one) and hence we will create a zero
2188+
// mask as the lowering logic just makes the mask one for the input dim size -
2189+
// which is zero here. Hence we will load the pad value which is what we want
2190+
// in this case. If the low pad is dynamically zero then the lowering is
2191+
// correct as well as no shifts are necessary.
2192+
if (llvm::any_of(llvm::enumerate(padOp.getLow()), [&](const auto &en) {
2193+
Value padValue = en.value();
2194+
unsigned pos = en.index();
2195+
std::optional<int64_t> pad = getConstantIntValue(padValue);
2196+
return (!pad.has_value() || pad.value() != 0) &&
2197+
resultTensorShape[pos] != 1;
21842198
})) {
2185-
LDBG("low pad must all be zero: " << padOp << "\n");
2199+
LDBG("low pad must all be zero for all non unit dims: " << padOp << "\n");
21862200
return failure();
21872201
}
21882202

mlir/test/Dialect/Linalg/vectorization-unsupported.mlir

+27
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,33 @@ module attributes {transform.with_named_sequence} {
305305

306306
// -----
307307

308+
// Padding with non-zero low pad values is not supported, unless the corresponding
309+
// result dim is 1. Here `%l0` being a non-zero low pad applied to a
310+
// non-unit result dimension makes this case unsupported.
311+
func.func @tensor_pad_non_zero_low_pad(
312+
%0 : tensor<?x?xf32>, %h0 : index, %h1 : index, %l0 : index)
313+
-> tensor<2x4xf32> {
314+
// expected-error @+3 {{Attempted to vectorize, but failed}}
315+
%cst = arith.constant 42.43 : f32
316+
%c0 = arith.constant 0 : index
317+
%1 = tensor.pad %0 low[%l0, %c0] high[%h0, %h1] {
318+
^bb0(%hh1: index, %hh2: index):
319+
tensor.yield %cst : f32
320+
} : tensor<?x?xf32> to tensor<2x4xf32>
321+
return %1: tensor<2x4xf32>
322+
}
323+
324+
module attributes {transform.with_named_sequence} {
325+
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
326+
%0 = transform.structured.match ops{["tensor.pad"]} in %arg1
327+
: (!transform.any_op) -> !transform.any_op
328+
transform.structured.vectorize %0 vector_sizes [2, 4] : !transform.any_op
329+
transform.yield
330+
}
331+
}
332+
333+
// -----
334+
308335
// With dynamically shaped source, the vectorizer infers the vector size for
309336
// xfer Ops from the destination tensor and, conservatively, assumes
310337
// out-of-bounds accesses. Out-of-bounds accesses require a pad value, but

mlir/test/Dialect/Linalg/vectorization.mlir

+42
Original file line numberDiff line numberDiff line change
@@ -664,6 +664,48 @@ module attributes {transform.with_named_sequence} {
664664
}
665665
}
666666

667+
// -----
668+
// This case is supported because low padding `%l0` is applied on
669+
// a unit dimension which is supported, non unit result dimension low
670+
// padding is currently unsupported.
671+
// CHECK-LABEL: func @test_masked_vectorize_non_zero_low_pad_unit_res_dim
672+
func.func @test_masked_vectorize_non_zero_low_pad_unit_res_dim(
673+
%0 : tensor<?x?xf32>, %h0 : index, %h1 : index, %l0 : index)
674+
-> tensor<1x4xf32>
675+
{
676+
// CHECK-DAG: %[[C42:.*]] = arith.constant 4.243000e+01 : f32
677+
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
678+
// CHECK: %[[C0_1:.*]] = arith.constant 0 : index
679+
// CHECK-DAG: %[[D0:.*]] = tensor.dim {{.*}} : tensor<?x?xf32>
680+
// CHECK-DAG: %[[D1:.*]] = tensor.dim {{.*}} : tensor<?x?xf32>
681+
// CHECK: %[[MASK:.*]] = vector.create_mask %[[D0]], %[[D1]] : vector<1x4xi1>
682+
// CHECK: %[[MASKED_READ:.*]] = vector.mask %[[MASK]] {
683+
// CHECK-SAME: vector.transfer_read %{{.*}}[%[[C0_1]], %[[C0_1]]], %[[C42]]
684+
// CHECK-SAME: {in_bounds = [true, true]} : tensor<?x?xf32>, vector<1x4xf32>
685+
// CHECK-SAME: } : vector<1x4xi1> -> vector<1x4xf32>
686+
// CHECK-DAG: %[[EMPTY:.*]] = tensor.empty() : tensor<1x4xf32>
687+
// CHECK-DAG: %[[C0_2:.*]] = arith.constant 0 : index
688+
// CHECK: %[[MASKED_WRITE:.*]] = vector.transfer_write %[[MASKED_READ]], %[[EMPTY]][%[[C0_2]], %[[C0_2]]]
689+
// CHECK-SAME: {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x4xf32>
690+
// CHECK: return %[[MASKED_WRITE]] : tensor<1x4xf32>
691+
%cst = arith.constant 42.43 : f32
692+
%c0 = arith.constant 0 : index
693+
%1 = tensor.pad %0 low[%l0, %c0] high[%h0, %h1] {
694+
^bb0(%hh1: index, %hh2: index):
695+
tensor.yield %cst : f32
696+
} : tensor<?x?xf32> to tensor<1x4xf32>
697+
return %1: tensor<1x4xf32>
698+
}
699+
700+
module attributes {transform.with_named_sequence} {
701+
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
702+
%0 = transform.structured.match ops{["tensor.pad"]} in %arg1
703+
: (!transform.any_op) -> !transform.any_op
704+
transform.structured.vectorize %0 vector_sizes [1, 4] : !transform.any_op
705+
transform.yield
706+
}
707+
}
708+
667709
// -----
668710

669711
// Input identical as the test in vectorization-with-patterns.mlir. Output is

0 commit comments

Comments
 (0)