diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 3ebd844d6a5a1..09869a374d6de 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1896,6 +1896,10 @@ static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask, using namespace llvm::VPlanPatternMatch; auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * { assert(OrigMask && "Unmasked recipe when folding tail"); + // HeaderMask will be handled using EVL. + VPValue *Mask; + if (match(OrigMask, m_LogicalAnd(m_Specific(HeaderMask), m_VPValue(Mask)))) + return Mask; return HeaderMask == OrigMask ? nullptr : OrigMask; }; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll index a40255c031619..354cc91c6f01f 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll @@ -348,11 +348,8 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP8]] ; IF-EVL-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]] ; IF-EVL-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; IF-EVL-INLOOP-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1 ; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 -; IF-EVL-INLOOP-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 -; IF-EVL-INLOOP-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL-INLOOP: vector.body: ; IF-EVL-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -360,18 +357,11 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] ; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true) -; IF-EVL-INLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[EVL_BASED_IV]], i64 0 -; IF-EVL-INLOOP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = call @llvm.stepvector.nxv4i64() -; IF-EVL-INLOOP-NEXT: [[TMP15:%.*]] = add zeroinitializer, [[TMP14]] -; IF-EVL-INLOOP-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT]], [[TMP15]] -; IF-EVL-INLOOP-NEXT: [[TMP16:%.*]] = icmp ule [[VEC_IV]], [[BROADCAST_SPLAT2]] ; IF-EVL-INLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 ; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP12]]) ; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = icmp sgt [[VP_OP_LOAD]], splat (i32 3) -; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = select [[TMP16]], [[TMP19]], zeroinitializer -; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, [[VP_OP_LOAD]], [[TMP20]], i32 [[TMP12]]) +; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, [[VP_OP_LOAD]], [[TMP19]], i32 [[TMP12]]) ; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]] ; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll index bf3f01343eb24..73c14f86e2782 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll @@ -24,33 +24,23 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) { ; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] ; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1 ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 -; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 -; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true) -; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[EVL_BASED_IV]], i64 0 -; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; IF-EVL-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i64() -; IF-EVL-NEXT: [[TMP13:%.*]] = add zeroinitializer, [[TMP12]] -; IF-EVL-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT]], [[TMP13]] -; IF-EVL-NEXT: [[TMP14:%.*]] = icmp ule [[VEC_IV]], [[BROADCAST_SPLAT2]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP16]], splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP17:%.*]] = icmp ne [[VP_OP_LOAD]], zeroinitializer -; IF-EVL-NEXT: [[TMP18:%.*]] = select [[TMP14]], [[TMP17]], zeroinitializer ; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD3:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], [[TMP18]], i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP_LOAD3:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], [[TMP17]], i32 [[TMP10]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = add [[VP_OP_LOAD]], [[VP_OP_LOAD3]] -; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_OP]], ptr align 4 [[TMP20]], [[TMP18]], i32 [[TMP10]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_OP]], ptr align 4 [[TMP20]], [[TMP17]], i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll index b6d92caa46ab0..695af0d241159 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll @@ -132,25 +132,18 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[OFFSET_IDX3:%.*]] = trunc i64 [[EVL_BASED_IV]] to i32 -; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[EVL_BASED_IV]], i64 0 -; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; IF-EVL-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv4i64() -; IF-EVL-NEXT: [[TMP9:%.*]] = add zeroinitializer, [[TMP8]] -; IF-EVL-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT]], [[TMP9]] -; IF-EVL-NEXT: [[TMP10:%.*]] = icmp ule [[VEC_IV]], splat (i64 1023) ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], -1 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[OFFSET_IDX3]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp slt [[VP_OP_LOAD]], splat (i32 100) -; IF-EVL-NEXT: [[TMP15:%.*]] = select [[TMP10]], [[TMP14]], zeroinitializer ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP26:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP26]] ; IF-EVL-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP26]] ; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP17]] ; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i64 [[TMP18]] -; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP15]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP14]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], [[VP_REVERSE_MASK]], i32 [[TMP5]]) ; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD4]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP11]] @@ -160,7 +153,7 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i64 [[TMP22]] ; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i64 [[TMP23]] ; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], splat (i1 true), i32 [[TMP5]]) -; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP15]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP14]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE5]], ptr align 4 [[TMP25]], [[VP_REVERSE_MASK6]], i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP28:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP28]], [[EVL_BASED_IV]]