[llvm] [LV][EVL]Support reversed loads/stores. (PR #88025)
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Fri May 3 04:28:09 PDT 2024
https://github.com/alexey-bataev updated https://github.com/llvm/llvm-project/pull/88025
>From ae89a26717642c5f46e378879c70d1c7d3b1dfcf Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Mon, 8 Apr 2024 17:56:25 +0000
Subject: [PATCH 1/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
=?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.5
---
.../Transforms/Vectorize/LoopVectorize.cpp | 67 +++++++++++++------
...-force-tail-with-evl-reverse-load-store.ll | 23 +++----
2 files changed, 55 insertions(+), 35 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9e22dce384773e..797a3fd1e9dba4 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1579,13 +1579,7 @@ class LoopVectorizationCostModel {
/// Returns true if VP intrinsics with explicit vector length support should
/// be generated in the tail folded loop.
bool foldTailWithEVL() const {
- return getTailFoldingStyle() == TailFoldingStyle::DataWithEVL &&
- // FIXME: remove this once vp_reverse is supported.
- none_of(
- WideningDecisions,
- [](const std::pair<std::pair<Instruction *, ElementCount>,
- std::pair<InstWidening, InstructionCost>>
- &Data) { return Data.second.first == CM_Widen_Reverse; });
+ return getTailFoldingStyle() == TailFoldingStyle::DataWithEVL;
}
/// Returns true if the Phi is part of an inloop reduction.
@@ -9361,10 +9355,17 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
/// Creates either vp_store or vp_scatter intrinsics calls to represent
/// predicated store/scatter.
-static Instruction *
-lowerStoreUsingVectorIntrinsics(IRBuilderBase &Builder, Value *Addr,
- Value *StoredVal, bool IsScatter, Value *Mask,
- Value *EVL, const Align &Alignment) {
+static Instruction *lowerStoreUsingVectorIntrinsics(
+ IRBuilderBase &Builder, Value *Addr, Value *StoredVal, bool IsScatter,
+ bool IsReverse, Value *Mask, Value *EVL, const Align &Alignment) {
+ if (IsReverse) {
+ auto *StoredValTy = cast<VectorType>(StoredVal->getType());
+ Value *BlockInMaskPart =
+ Builder.getAllOnesMask(StoredValTy->getElementCount());
+ StoredVal = Builder.CreateIntrinsic(
+ StoredValTy, Intrinsic::experimental_vp_reverse,
+ {StoredVal, BlockInMaskPart, EVL}, nullptr, "vp.reverse");
+ }
CallInst *Call;
if (IsScatter) {
Call = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),
@@ -9384,11 +9385,9 @@ lowerStoreUsingVectorIntrinsics(IRBuilderBase &Builder, Value *Addr,
/// Creates either vp_load or vp_gather intrinsics calls to represent
/// predicated load/gather.
-static Instruction *lowerLoadUsingVectorIntrinsics(IRBuilderBase &Builder,
- VectorType *DataTy,
- Value *Addr, bool IsGather,
- Value *Mask, Value *EVL,
- const Align &Alignment) {
+static Instruction *lowerLoadUsingVectorIntrinsics(
+ IRBuilderBase &Builder, VectorType *DataTy, Value *Addr, bool IsGather,
+ bool IsReverse, Value *Mask, Value *EVL, const Align &Alignment) {
CallInst *Call;
if (IsGather) {
Call =
@@ -9402,7 +9401,14 @@ static Instruction *lowerLoadUsingVectorIntrinsics(IRBuilderBase &Builder,
}
Call->addParamAttr(
0, Attribute::getWithAlignment(Call->getContext(), Alignment));
- return Call;
+ Instruction *Res = Call;
+ if (IsReverse) {
+ Value *BlockInMaskPart = Builder.getAllOnesMask(DataTy->getElementCount());
+ Res = Builder.CreateIntrinsic(DataTy, Intrinsic::experimental_vp_reverse,
+ {Res, BlockInMaskPart, EVL}, nullptr,
+ "vp.reverse");
+ }
+ return Res;
}
void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
@@ -9430,7 +9436,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
// a null all-one mask is a null mask.
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *Mask = State.get(getMask(), Part);
- if (isReverse())
+ if (isReverse() && !State.EVL)
Mask = Builder.CreateVectorReverse(Mask, "reverse");
BlockInMaskParts[Part] = Mask;
}
@@ -9456,11 +9462,20 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
// is created only if TTI prefers predicated vectorization, thus if EVL
// is not nullptr it also implies preference for predicated
// vectorization.
- // FIXME: Support reverse store after vp_reverse is added.
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
+ if (isMaskRequired && isReverse() && !getMask()->isLiveIn()) {
+ VectorType *MaskTy = cast<VectorType>(MaskPart->getType());
+ Value *BlockInMaskPart =
+ Builder.getAllOnesMask(MaskTy->getElementCount());
+ MaskPart = Builder.CreateIntrinsic(
+ MaskTy, Intrinsic::experimental_vp_reverse,
+ {MaskPart, BlockInMaskPart, EVL}, nullptr, "vp.reverse.mask");
+ BlockInMaskParts[Part] = MaskPart;
+ }
NewSI = lowerStoreUsingVectorIntrinsics(
Builder, State.get(getAddr(), Part, !CreateGatherScatter),
- StoredVal, CreateGatherScatter, MaskPart, EVL, Alignment);
+ StoredVal, CreateGatherScatter, isReverse(), MaskPart, EVL,
+ Alignment);
} else if (CreateGatherScatter) {
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
Value *VectorGep = State.get(getAddr(), Part);
@@ -9504,11 +9519,19 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
// is created only if TTI prefers predicated vectorization, thus if EVL
// is not nullptr it also implies preference for predicated
// vectorization.
- // FIXME: Support reverse loading after vp_reverse is added.
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
+ if (isMaskRequired && isReverse() && !getMask()->isLiveIn()) {
+ VectorType *MaskTy = cast<VectorType>(MaskPart->getType());
+ Value *BlockInMaskPart =
+ Builder.getAllOnesMask(MaskTy->getElementCount());
+ MaskPart = Builder.CreateIntrinsic(
+ MaskTy, Intrinsic::experimental_vp_reverse,
+ {MaskPart, BlockInMaskPart, EVL}, nullptr, "vp.reverse.mask");
+ BlockInMaskParts[Part] = MaskPart;
+ }
NewLI = lowerLoadUsingVectorIntrinsics(
Builder, DataTy, State.get(getAddr(), Part, !CreateGatherScatter),
- CreateGatherScatter, MaskPart, EVL, Alignment);
+ CreateGatherScatter, isReverse(), MaskPart, EVL, Alignment);
} else if (CreateGatherScatter) {
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
Value *VectorGep = State.get(getAddr(), Part);
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll
index f2222e0a1f936a..f839eafe9b2a61 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll
@@ -30,14 +30,11 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[INDEX]]
+; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
+; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true)
+; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
-; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[INDEX]], i64 0
-; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; IF-EVL-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
-; IF-EVL-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP8]]
-; IF-EVL-NEXT: [[VEC_IV:%.*]] = add <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[TMP9]]
-; IF-EVL-NEXT: [[TMP10:%.*]] = icmp ule <vscale x 4 x i64> [[VEC_IV]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1023, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[TMP7]], -1
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP11]]
; IF-EVL-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
@@ -46,9 +43,8 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt
; IF-EVL-NEXT: [[TMP16:%.*]] = sub i64 1, [[TMP14]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 [[TMP15]]
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[TMP16]]
-; IF-EVL-NEXT: [[REVERSE:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> [[TMP10]])
-; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP18]], i32 4, <vscale x 4 x i1> [[REVERSE]], <vscale x 4 x i32> poison)
-; IF-EVL-NEXT: [[REVERSE3:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[WIDE_MASKED_LOAD]])
+; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP8]])
+; IF-EVL-NEXT: [[TMP31:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP8]])
; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP11]]
; IF-EVL-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4
@@ -56,9 +52,10 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt
; IF-EVL-NEXT: [[TMP23:%.*]] = sub i64 1, [[TMP21]]
; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[TMP22]]
; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 [[TMP23]]
-; IF-EVL-NEXT: [[REVERSE4:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> [[TMP10]])
-; IF-EVL-NEXT: [[REVERSE5:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[REVERSE3]])
-; IF-EVL-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[REVERSE5]], ptr [[TMP25]], i32 4, <vscale x 4 x i1> [[REVERSE4]])
+; IF-EVL-NEXT: [[TMP28:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[TMP31]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP8]])
+; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP28]], ptr align 4 [[TMP25]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP8]])
+; IF-EVL-NEXT: [[TMP29:%.*]] = zext i32 [[TMP8]] to i64
+; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP29]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
; IF-EVL-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
>From 674a61570a383a361cb5740c802b2ff516942bb9 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Wed, 24 Apr 2024 18:21:16 +0000
Subject: [PATCH 2/4] Address comments
Created using spr 1.3.5
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 10 ++++++++++
...vectorize-force-tail-with-evl-reverse-load-store.ll | 3 ++-
2 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 4ea301d15a7711..315e644ed4a814 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9475,6 +9475,16 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
Value *Mask = getMask()
? State.get(getMask(), 0)
: Builder.CreateVectorSplat(State.VF, Builder.getTrue());
+ if (isReverse() && getMask()) {
+ VectorType *MaskTy = cast<VectorType>(Mask->getType());
+ Mask = Builder.CreateIntrinsic(
+ MaskTy, Intrinsic::experimental_vp_reverse,
+ {Mask,
+ Builder.CreateVectorSplat(MaskTy->getElementCount(),
+ Builder.getTrue()),
+ EVL},
+ nullptr, "vp.reverse.mask");
+ }
Value *Addr = State.get(getAddr(), 0, !CreateScatter);
if (CreateScatter) {
NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll
index 5f6310e8fda73a..c9b57361c0b983 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll
@@ -171,7 +171,8 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal
; IF-EVL-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr [[TMP26]], i64 [[TMP29]]
; IF-EVL-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[TMP31]], i64 [[TMP30]]
; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP8]])
-; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE5]], ptr align 4 [[TMP32]], <vscale x 4 x i1> [[TMP18]], i32 [[TMP8]])
+; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP18]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP8]])
+; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE5]], ptr align 4 [[TMP32]], <vscale x 4 x i1> [[VP_REVERSE_MASK6]], i32 [[TMP8]])
; IF-EVL-NEXT: [[TMP33:%.*]] = zext i32 [[TMP8]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP33]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
>From badd355bab7427844bea4672b16f8feb3d6f44dc Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Fri, 3 May 2024 00:50:02 +0000
Subject: [PATCH 3/4] Address comment
Created using spr 1.3.5
---
.../Transforms/Vectorize/LoopVectorize.cpp | 56 ++++++++++---------
1 file changed, 31 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 947e09b5467008..6d6f01a2196ed1 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9377,12 +9377,13 @@ void VPWidenLoadRecipe::execute(VPTransformState &State) {
}
}
-static Value *reverseMask(IRBuilderBase &Builder, Value *Mask, Value *EVL,
- Value *AllTrueMask) {
- VectorType *MaskTy = cast<VectorType>(Mask->getType());
- return Builder.CreateIntrinsic(MaskTy, Intrinsic::experimental_vp_reverse,
- {Mask, AllTrueMask, EVL}, nullptr,
- "vp.reverse.mask");
+static Instruction *createReverseEVL(IRBuilderBase &Builder, Value *Operand,
+ Value *EVL, const Twine &Name) {
+ VectorType *ValTy = cast<VectorType>(Operand->getType());
+ Value *AllTrueMask =
+ Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue());
+ return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse,
+ {Operand, AllTrueMask, EVL}, nullptr, Name);
}
void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
@@ -9400,10 +9401,14 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
CallInst *NewLI;
Value *EVL = State.get(getEVL(), VPIteration(0, 0));
Value *Addr = State.get(getAddr(), 0, !CreateGather);
- Value *AllTrueMask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
- Value *Mask = getMask() ? State.get(getMask(), 0) : AllTrueMask;
- if (isReverse() && getMask())
- Mask = reverseMask(Builder, Mask, EVL, AllTrueMask);
+ Value *Mask = nullptr;
+ if (getMask()) {
+ Mask = State.get(getMask(), 0);
+ if (isReverse())
+ Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
+ } else {
+ Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
+ }
if (CreateGather) {
NewLI =
@@ -9419,11 +9424,10 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
State.addMetadata(NewLI, LI);
Instruction *Res = NewLI;
- if (isReverse()) {
- Res =
- Builder.CreateIntrinsic(DataTy, Intrinsic::experimental_vp_reverse,
- {Res, AllTrueMask, EVL}, nullptr, "vp.reverse");
- }
+ if (isReverse())
+ // Use cheap all-true mask for reverse rather than actual mask, it does not
+ // affect the result.
+ Res = createReverseEVL(Builder, Res, EVL, "vp.reverse");
State.set(this, Res, 0);
}
@@ -9482,16 +9486,18 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
CallInst *NewSI = nullptr;
Value *StoredVal = State.get(StoredValue, 0);
Value *EVL = State.get(getEVL(), VPIteration(0, 0));
- Value *AllTrueMask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
- if (isReverse()) {
- auto *StoredValTy = cast<VectorType>(StoredVal->getType());
- StoredVal = Builder.CreateIntrinsic(
- StoredValTy, Intrinsic::experimental_vp_reverse,
- {StoredVal, AllTrueMask, EVL}, nullptr, "vp.reverse");
- }
- Value *Mask = getMask() ? State.get(getMask(), 0) : AllTrueMask;
- if (isReverse() && getMask())
- Mask = reverseMask(Builder, Mask, EVL, AllTrueMask);
+ if (isReverse())
+ // Use cheap all-true mask for reverse rather than actual mask, it does not
+ // affect the result.
+ StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse");
+ Value *Mask = nullptr;
+ if (getMask()) {
+ Mask = State.get(getMask(), 0);
+ if (isReverse())
+ Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
+ } else {
+ Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
+ }
Value *Addr = State.get(getAddr(), 0, !CreateScatter);
if (CreateScatter) {
NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),
>From 427dd9003a41f7b6a927ac6b928f5b1ff6465bf6 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Fri, 3 May 2024 11:27:57 +0000
Subject: [PATCH 4/4] Address comments
Created using spr 1.3.5
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6d6f01a2196ed1..72819970f8d089 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9402,8 +9402,8 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
Value *EVL = State.get(getEVL(), VPIteration(0, 0));
Value *Addr = State.get(getAddr(), 0, !CreateGather);
Value *Mask = nullptr;
- if (getMask()) {
- Mask = State.get(getMask(), 0);
+ if (VPValue *VPMask = getMask()) {
+ Mask = State.get(VPMask, 0);
if (isReverse())
Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
} else {
@@ -9424,10 +9424,11 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
State.addMetadata(NewLI, LI);
Instruction *Res = NewLI;
- if (isReverse())
+ if (isReverse()) {
// Use cheap all-true mask for reverse rather than actual mask, it does not
// affect the result.
Res = createReverseEVL(Builder, Res, EVL, "vp.reverse");
+ }
State.set(this, Res, 0);
}
@@ -9486,13 +9487,14 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
CallInst *NewSI = nullptr;
Value *StoredVal = State.get(StoredValue, 0);
Value *EVL = State.get(getEVL(), VPIteration(0, 0));
- if (isReverse())
+ if (isReverse()) {
// Use cheap all-true mask for reverse rather than actual mask, it does not
// affect the result.
StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse");
+ }
Value *Mask = nullptr;
- if (getMask()) {
- Mask = State.get(getMask(), 0);
+ if (VPValue *VPMask = getMask()) {
+ Mask = State.get(VPMask, 0);
if (isReverse())
Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
} else {
More information about the llvm-commits
mailing list