[llvm] [VP][EVL] Support select instruction with EVL-vectorization (PR #109614)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 27 00:15:01 PDT 2024
https://github.com/LiqinWeng updated https://github.com/llvm/llvm-project/pull/109614
>From 89622e9a2ad88932c3474ab49746b8d336562df4 Mon Sep 17 00:00:00 2001
From: LiqinWeng <liqin.weng at spacemit.com>
Date: Tue, 24 Sep 2024 11:20:07 +0800
Subject: [PATCH 1/3] [LV][EVL][Test] Prepare test for adding select Recipe
---
.../RISCV/vplan-vp-select-intrinsics.ll | 101 ++++++++++++++++++
1 file changed, 101 insertions(+)
create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll
new file mode 100644
index 00000000000000..da75d32bca9b2f
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll
@@ -0,0 +1,101 @@
+; REQUIRES: asserts
+
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
+; RUN: -force-tail-folding-style=data-with-evl \
+; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
+; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s
+
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
+; RUN: -force-tail-folding-style=none \
+; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \
+; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=NO-VP %s
+
+define void @vp_select(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
+; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12>
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%11>
+; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
+; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
+; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
+; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
+; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
+; IF-EVL-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%c>, vp<%6>
+; IF-EVL-NEXT: vp<%8> = vector-pointer ir<%arrayidx3>
+; IF-EVL-NEXT: WIDEN ir<%1> = vp.load vp<%8>, vp<%5>
+; IF-EVL-NEXT: WIDEN ir<%cmp4> = icmp sgt ir<%0>, ir<%1>
+; IF-EVL-NEXT: WIDEN ir<%2> = vp.sub ir<0>, ir<%1>, vp<%5>
+; IF-EVL-NEXT: WIDEN-SELECT ir<%cond.p> = select ir<%cmp4>, ir<%1>, ir<%2>
+; IF-EVL-NEXT: WIDEN ir<%cond> = vp.add ir<%cond.p>, ir<%0>, vp<%5>
+; IF-EVL-NEXT: CLONE ir<%arrayidx15> = getelementptr inbounds ir<%a>, vp<%6>
+; IF-EVL-NEXT: vp<%9> = vector-pointer ir<%arrayidx15>
+; IF-EVL-NEXT: WIDEN vp.store vp<%9>, ir<%cond>, vp<%5>
+; IF-EVL-NEXT: SCALAR-CAST vp<%10> = zext vp<%5> to i64
+; IF-EVL-NEXT: EMIT vp<%11> = add vp<%10>, vp<%4>
+; IF-EVL-NEXT: EMIT vp<%12> = add vp<%3>, vp<%0>
+; IF-EVL-NEXT: EMIT branch-on-count vp<%12>, vp<%1>
+; IF-EVL-NEXT: No successors
+; IF-EVL-NEXT: }
+
+; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' {
+; NO-VP-NEXT: Live-in vp<%0> = VF * UF
+; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
+; NO-VP-NEXT: Live-in ir<%N> = original trip-count
+
+; NO-VP: vector.ph:
+; NO-VP-NEXT: Successor(s): vector loop
+
+; NO-VP: <x1> vector loop: {
+; NO-VP-NEXT: vector.body:
+; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%7>
+; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
+; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
+; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
+; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
+; NO-VP-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%c>, vp<%3>
+; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx3>
+; NO-VP-NEXT: WIDEN ir<%1> = load vp<%5>
+; NO-VP-NEXT: WIDEN ir<%cmp4> = icmp sgt ir<%0>, ir<%1>
+; NO-VP-NEXT: WIDEN ir<%2> = sub ir<0>, ir<%1>
+; NO-VP-NEXT: WIDEN-SELECT ir<%cond.p> = select ir<%cmp4>, ir<%1>, ir<%2>
+; NO-VP-NEXT: WIDEN ir<%cond> = add ir<%cond.p>, ir<%0>
+; NO-VP-NEXT: CLONE ir<%arrayidx15> = getelementptr inbounds ir<%a>, vp<%3>
+; NO-VP-NEXT: vp<%6> = vector-pointer ir<%arrayidx15>
+; NO-VP-NEXT: WIDEN store vp<%6>, ir<%cond>
+; NO-VP-NEXT: EMIT vp<%7> = add nuw vp<%2>, vp<%0>
+; NO-VP-NEXT: EMIT branch-on-count vp<%7>, vp<%1>
+; NO-VP-NEXT: No successors
+; NO-VP-NEXT: }
+
+
+entry:
+ %cmp30 = icmp sgt i64 %N, 0
+ br i1 %cmp30, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %arrayidx3 = getelementptr inbounds i32, ptr %c, i64 %indvars.iv
+ %1 = load i32, ptr %arrayidx3, align 4
+ %cmp4 = icmp sgt i32 %0, %1
+ %2 = sub i32 0, %1
+ %cond.p = select i1 %cmp4, i32 %1, i32 %2
+ %cond = add i32 %cond.p, %0
+ %arrayidx15 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+ store i32 %cond, ptr %arrayidx15, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
>From b63259a716c0b9e5b4223e6963661484eb7254b2 Mon Sep 17 00:00:00 2001
From: LiqinWeng <liqin.weng at spacemit.com>
Date: Thu, 19 Sep 2024 17:46:54 +0800
Subject: [PATCH 2/3] [VP][EVL] Support select instruction with
EVL-vectorization
---
llvm/lib/Transforms/Vectorize/VPlan.h | 68 ++++++++++++++++++-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 53 ++++++++++++++-
.../Transforms/Vectorize/VPlanTransforms.cpp | 4 ++
llvm/lib/Transforms/Vectorize/VPlanValue.h | 1 +
.../Transforms/Vectorize/VPlanVerifier.cpp | 4 ++
...rize-force-tail-with-evl-cond-reduction.ll | 2 +-
.../RISCV/vplan-vp-select-intrinsics.ll | 2 +-
7 files changed, 126 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 23a24ce293ef2d..f3374614d0c070 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -889,6 +889,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenSC:
case VPRecipeBase::VPWidenEVLSC:
case VPRecipeBase::VPWidenSelectSC:
+ case VPRecipeBase::VPWidenSelectEVLSC:
case VPRecipeBase::VPBlendSC:
case VPRecipeBase::VPPredInstPHISC:
case VPRecipeBase::VPCanonicalIVPHISC:
@@ -1666,10 +1667,17 @@ class VPWidenCallRecipe : public VPSingleDefRecipe {
/// A recipe for widening select instructions.
struct VPWidenSelectRecipe : public VPSingleDefRecipe {
+
+protected:
+ template <typename IterT>
+ VPWidenSelectRecipe(unsigned VPDefOpcode, SelectInst &I,
+ iterator_range<IterT> Operands)
+ : VPSingleDefRecipe(VPDefOpcode, Operands, &I, I.getDebugLoc()) {}
+
+public:
template <typename IterT>
VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands)
- : VPSingleDefRecipe(VPDef::VPWidenSelectSC, Operands, &I,
- I.getDebugLoc()) {}
+ : VPWidenSelectRecipe(VPDef::VPWidenSelectSC, I, Operands) {}
~VPWidenSelectRecipe() override = default;
@@ -1678,7 +1686,15 @@ struct VPWidenSelectRecipe : public VPSingleDefRecipe {
operands());
}
- VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
+ static inline bool classof(const VPRecipeBase *R) {
+ return R->getVPDefID() == VPRecipeBase::VPWidenSelectSC ||
+ R->getVPDefID() == VPRecipeBase::VPWidenSelectEVLSC;
+ }
+
+ static inline bool classof(const VPUser *U) {
+ auto *R = dyn_cast<VPRecipeBase>(U);
+ return R && classof(R);
+ }
/// Produce a widened version of the select instruction.
void execute(VPTransformState &State) override;
@@ -1698,6 +1714,52 @@ struct VPWidenSelectRecipe : public VPSingleDefRecipe {
}
};
+// A recipe for widening select instruction with vector-predication intrinsics
+// with explicit vector length (EVL).
+struct VPWidenSelectEVLRecipe : public VPWidenSelectRecipe {
+
+ template <typename IterT>
+ VPWidenSelectEVLRecipe(SelectInst &I, iterator_range<IterT> Operands,
+ VPValue &EVL)
+ : VPWidenSelectRecipe(VPDef::VPWidenSelectEVLSC, I, Operands) {
+ addOperand(&EVL);
+ }
+
+ VPWidenSelectEVLRecipe(VPWidenSelectRecipe &W, VPValue &EVL)
+ : VPWidenSelectEVLRecipe(*cast<SelectInst>(W.getUnderlyingInstr()),
+ W.operands(), EVL) {}
+
+ ~VPWidenSelectEVLRecipe() override = default;
+
+ VPWidenSelectEVLRecipe *clone() final {
+ llvm_unreachable("VPWidenSelectEVLRecipe cannot be cloned");
+ return nullptr;
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPWidenSelectEVLSC)
+
+ VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
+ const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
+
+ /// Produce a vp-intrinsic version of the select instruction.
+ void execute(VPTransformState &State) final;
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ // EVL in that recipe is always the last operand, thus any use before means
+ // the VPValue should be vectorized.
+ return getEVL() == Op;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const final;
+#endif
+};
+
/// A recipe for handling GEP instructions.
class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
bool isPointerLoopInvariant() const {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 2f0ba5510b8f34..2011236f73b19e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -94,7 +94,8 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPWidenPHISC:
case VPWidenSC:
case VPWidenEVLSC:
- case VPWidenSelectSC: {
+ case VPWidenSelectSC:
+ case VPWidenSelectEVLSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
(void)I;
@@ -135,7 +136,8 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPWidenPHISC:
case VPWidenSC:
case VPWidenEVLSC:
- case VPWidenSelectSC: {
+ case VPWidenSelectSC:
+ case VPWidenSelectEVLSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
(void)I;
@@ -172,7 +174,8 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPWidenPointerInductionSC:
case VPWidenSC:
case VPWidenEVLSC:
- case VPWidenSelectSC: {
+ case VPWidenSelectSC:
+ case VPWidenSelectEVLSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
(void)I;
@@ -1038,6 +1041,21 @@ void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
getOperand(2)->printAsOperand(O, SlotTracker);
O << (isInvariantCond() ? " (condition is loop invariant)" : "");
}
+
+void VPWidenSelectEVLRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "WIDEN-SELECT ";
+ printAsOperand(O, SlotTracker);
+ O << " = vp.select ";
+ getOperand(0)->printAsOperand(O, SlotTracker);
+ O << ", ";
+ getOperand(1)->printAsOperand(O, SlotTracker);
+ O << ", ";
+ getOperand(2)->printAsOperand(O, SlotTracker);
+ O << ", ";
+ getOperand(3)->printAsOperand(O, SlotTracker);
+ O << (isInvariantCond() ? " (condition is loop invariant)" : "");
+}
#endif
void VPWidenSelectRecipe::execute(VPTransformState &State) {
@@ -1058,6 +1076,35 @@ void VPWidenSelectRecipe::execute(VPTransformState &State) {
State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
}
+void VPWidenSelectEVLRecipe::execute(VPTransformState &State) {
+ State.setDebugLocFrom(getDebugLoc());
+ assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
+ "explicit vector length.");
+
+ Value *EVLArg = State.get(getEVL(), 0, /*NeedsScalar=*/true);
+ IRBuilderBase &BuilderIR = State.Builder;
+ VectorBuilder Builder(BuilderIR);
+ Builder.setEVL(EVLArg);
+ // The condition can be loop invariant but still defined inside the
+ // loop. This means that we can't just use the original 'cond' value.
+ // We have to take the 'vectorized' value and pick the first lane.
+ // Instcombine will make this a no-op.
+ auto *InvarCond =
+ isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;
+
+ Value *Cond = InvarCond ? InvarCond : State.get(getCond(), 0);
+ if (!isa<VectorType>(Cond->getType()))
+ Cond = BuilderIR.CreateVectorSplat(State.VF, Cond, "splat.cond");
+
+ Value *Op0 = State.get(getOperand(1), 0);
+ Value *Op1 = State.get(getOperand(2), 0);
+ Value *VPInst = Builder.createVectorInstruction(
+ Instruction::Select, Op0->getType(), {Cond, Op0, Op1}, "vp.select");
+ State.set(this, VPInst, 0);
+ State.addMetadata(VPInst,
+ dyn_cast_or_null<Instruction>(getUnderlyingValue()));
+}
+
VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
const FastMathFlags &FMF) {
AllowReassoc = FMF.allowReassoc();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a878613c4ba483..94e325764a273d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1379,6 +1379,10 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
return nullptr;
return new VPWidenEVLRecipe(*W, EVL);
})
+ .Case<VPWidenSelectRecipe>(
+ [&](VPWidenSelectRecipe *W) -> VPRecipeBase * {
+ return new VPWidenSelectEVLRecipe(*W, EVL);
+ })
.Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
VPValue *NewMask = GetNewMask(Red->getCondOp());
return new VPReductionEVLRecipe(*Red, EVL, NewMask);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index a23a59aa2f11c2..cfe5351e007d04 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -357,6 +357,7 @@ class VPDef {
VPWidenSC,
VPWidenEVLSC,
VPWidenSelectSC,
+ VPWidenSelectEVLSC,
VPBlendSC,
// START: Phi-like recipes. Need to be kept together.
VPWidenPHISC,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 99bc4c38a3c3cd..086d5d6ba24453 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -148,6 +148,10 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
return VerifyEVLUse(
*W, Instruction::isUnaryOp(W->getOpcode()) ? 1 : 2);
})
+ .Case<VPWidenSelectEVLRecipe>(
+ [&](const VPWidenSelectEVLRecipe *S) {
+ return VerifyEVLUse(*S, 3);
+ })
.Case<VPReductionEVLRecipe>([&](const VPReductionEVLRecipe *R) {
return VerifyEVLUse(*R, 2);
})
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
index 41796e848632e4..fc12dd54f88dfa 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
@@ -70,7 +70,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = select <vscale x 4 x i1> [[TMP19]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer
+; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> [[TMP19]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer, i32 [[TMP12]])
; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, <vscale x 4 x i32> [[TMP20]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]]
; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP12]] to i64
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll
index da75d32bca9b2f..62e0d3e58092df 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll
@@ -33,7 +33,7 @@ define void @vp_select(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
; IF-EVL-NEXT: WIDEN ir<%1> = vp.load vp<%8>, vp<%5>
; IF-EVL-NEXT: WIDEN ir<%cmp4> = icmp sgt ir<%0>, ir<%1>
; IF-EVL-NEXT: WIDEN ir<%2> = vp.sub ir<0>, ir<%1>, vp<%5>
-; IF-EVL-NEXT: WIDEN-SELECT ir<%cond.p> = select ir<%cmp4>, ir<%1>, ir<%2>
+; IF-EVL-NEXT: WIDEN-SELECT ir<%cond.p> = vp.select ir<%cmp4>, ir<%1>, ir<%2>, vp<%5>
; IF-EVL-NEXT: WIDEN ir<%cond> = vp.add ir<%cond.p>, ir<%0>, vp<%5>
; IF-EVL-NEXT: CLONE ir<%arrayidx15> = getelementptr inbounds ir<%a>, vp<%6>
; IF-EVL-NEXT: vp<%9> = vector-pointer ir<%arrayidx15>
>From 55f67bebf0778c952b49108700b8b96648dd7ed3 Mon Sep 17 00:00:00 2001
From: LiqinWeng <liqin.weng at spacemit.com>
Date: Wed, 25 Sep 2024 18:24:00 +0800
Subject: [PATCH 3/3] [LV][EVL] Adrress the comments
rebase and update testcase
---
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 10 +-
.../RISCV/vplan-vp-select-intrinsics.ll | 94 ++++++-------------
2 files changed, 32 insertions(+), 72 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 2011236f73b19e..2a6c91b4cd0ac4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1078,10 +1078,7 @@ void VPWidenSelectRecipe::execute(VPTransformState &State) {
void VPWidenSelectEVLRecipe::execute(VPTransformState &State) {
State.setDebugLocFrom(getDebugLoc());
- assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
- "explicit vector length.");
-
- Value *EVLArg = State.get(getEVL(), 0, /*NeedsScalar=*/true);
+ Value *EVLArg = State.get(getEVL(), /*NeedsScalar=*/true);
IRBuilderBase &BuilderIR = State.Builder;
VectorBuilder Builder(BuilderIR);
Builder.setEVL(EVLArg);
@@ -1090,11 +1087,10 @@ void VPWidenSelectEVLRecipe::execute(VPTransformState &State) {
// We have to take the 'vectorized' value and pick the first lane.
// Instcombine will make this a no-op.
auto *InvarCond =
- isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;
+ isInvariantCond() ? State.get(getCond(), VPLane(0)) : nullptr;
Value *Cond = InvarCond ? InvarCond : State.get(getCond(), 0);
- if (!isa<VectorType>(Cond->getType()))
- Cond = BuilderIR.CreateVectorSplat(State.VF, Cond, "splat.cond");
+ assert(isa<VectorType>(Cond->getType()) && "CondType must be vector Type.");
Value *Op0 = State.get(getOperand(1), 0);
Value *Op1 = State.get(getOperand(2), 0);
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll
index 62e0d3e58092df..3a7a0915e02eb7 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll
@@ -5,15 +5,10 @@
; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s
-; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
-; RUN: -force-tail-folding-style=none \
-; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \
-; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=NO-VP %s
-
define void @vp_select(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
-; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
+; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
; IF-EVL: vector.ph:
@@ -21,67 +16,33 @@ define void @vp_select(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
; IF-EVL: <x1> vector loop: {
; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12>
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%11>
-; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
-; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
-; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
-; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
-; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
-; IF-EVL-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%c>, vp<%6>
-; IF-EVL-NEXT: vp<%8> = vector-pointer ir<%arrayidx3>
-; IF-EVL-NEXT: WIDEN ir<%1> = vp.load vp<%8>, vp<%5>
-; IF-EVL-NEXT: WIDEN ir<%cmp4> = icmp sgt ir<%0>, ir<%1>
-; IF-EVL-NEXT: WIDEN ir<%2> = vp.sub ir<0>, ir<%1>, vp<%5>
-; IF-EVL-NEXT: WIDEN-SELECT ir<%cond.p> = vp.select ir<%cmp4>, ir<%1>, ir<%2>, vp<%5>
-; IF-EVL-NEXT: WIDEN ir<%cond> = vp.add ir<%cond.p>, ir<%0>, vp<%5>
-; IF-EVL-NEXT: CLONE ir<%arrayidx15> = getelementptr inbounds ir<%a>, vp<%6>
-; IF-EVL-NEXT: vp<%9> = vector-pointer ir<%arrayidx15>
-; IF-EVL-NEXT: WIDEN vp.store vp<%9>, ir<%cond>, vp<%5>
-; IF-EVL-NEXT: SCALAR-CAST vp<%10> = zext vp<%5> to i64
-; IF-EVL-NEXT: EMIT vp<%11> = add vp<%10>, vp<%4>
-; IF-EVL-NEXT: EMIT vp<%12> = add vp<%3>, vp<%0>
-; IF-EVL-NEXT: EMIT branch-on-count vp<%12>, vp<%1>
+; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEX:%[0-9]+]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
+; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
+; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
+; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
+; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN ir<[[CMP:%.+]]> = icmp sgt ir<[[LD1]]>, ir<[[LD2]]>
+; IF-EVL-NEXT: WIDEN ir<[[SUB:%.+]]> = vp.sub ir<0>, ir<[[LD2]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN-SELECT ir<[[SELECT:%.+]]> = vp.select ir<[[CMP]]>, ir<%1>, ir<%2>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = vp.add ir<[[SELECT]]>, ir<%0>, vp<[[EVL]]>
+; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR3:%.+]]> = vector-pointer ir<[[GEP3]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[IV_NEX]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
+; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
; IF-EVL-NEXT: }
-; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' {
-; NO-VP-NEXT: Live-in vp<%0> = VF * UF
-; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
-; NO-VP-NEXT: Live-in ir<%N> = original trip-count
-
-; NO-VP: vector.ph:
-; NO-VP-NEXT: Successor(s): vector loop
-
-; NO-VP: <x1> vector loop: {
-; NO-VP-NEXT: vector.body:
-; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%7>
-; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
-; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
-; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
-; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
-; NO-VP-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%c>, vp<%3>
-; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx3>
-; NO-VP-NEXT: WIDEN ir<%1> = load vp<%5>
-; NO-VP-NEXT: WIDEN ir<%cmp4> = icmp sgt ir<%0>, ir<%1>
-; NO-VP-NEXT: WIDEN ir<%2> = sub ir<0>, ir<%1>
-; NO-VP-NEXT: WIDEN-SELECT ir<%cond.p> = select ir<%cmp4>, ir<%1>, ir<%2>
-; NO-VP-NEXT: WIDEN ir<%cond> = add ir<%cond.p>, ir<%0>
-; NO-VP-NEXT: CLONE ir<%arrayidx15> = getelementptr inbounds ir<%a>, vp<%3>
-; NO-VP-NEXT: vp<%6> = vector-pointer ir<%arrayidx15>
-; NO-VP-NEXT: WIDEN store vp<%6>, ir<%cond>
-; NO-VP-NEXT: EMIT vp<%7> = add nuw vp<%2>, vp<%0>
-; NO-VP-NEXT: EMIT branch-on-count vp<%7>, vp<%1>
-; NO-VP-NEXT: No successors
-; NO-VP-NEXT: }
-
-
entry:
- %cmp30 = icmp sgt i64 %N, 0
- br i1 %cmp30, label %for.body, label %for.cond.cleanup
-
-for.cond.cleanup:
- ret void
+ br label %for.body
for.body:
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
@@ -97,5 +58,8 @@ for.body:
store i32 %cond, ptr %arrayidx15, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %N
- br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
}
More information about the llvm-commits
mailing list