[llvm] [LV][EVL] Support icmp/fcmp instruction with EVL-vectorization (PR #108533)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 13 03:53:43 PDT 2024
https://github.com/LiqinWeng created https://github.com/llvm/llvm-project/pull/108533
None
>From 2d4782fcd37d1a794f67bc684b5cce07cf855681 Mon Sep 17 00:00:00 2001
From: wengliqin <liqin.weng at spacemit.com>
Date: Fri, 13 Sep 2024 18:53:01 +0800
Subject: [PATCH] [LV][EVL] Support icmp/fcmp instruction with
EVL-vectorization
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 4 +-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 27 +++
.../Transforms/Vectorize/VPlanTransforms.cpp | 3 +-
...rize-force-tail-with-evl-cond-reduction.ll | 4 +-
...ze-force-tail-with-evl-masked-loadstore.ll | 30 +--
...-force-tail-with-evl-reverse-load-store.ll | 48 ++---
.../RISCV/vplan-vp-cmp-intrinsics.ll | 177 ++++++++++++++++++
7 files changed, 249 insertions(+), 44 deletions(-)
create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cmp-intrinsics.ll
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 00d9f2909d71e2..8bcf5f3553cd86 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10546,13 +10546,13 @@ InstructionCost BoUpSLP::getSpillCost() const {
if (II->isAssumeLikeIntrinsic())
return true;
FastMathFlags FMF;
- SmallVector<Type *, 4> Tys;
+ SmallVector<Type *, 8> Tys;
for (auto &ArgOp : II->args())
Tys.push_back(ArgOp->getType());
if (auto *FPMO = dyn_cast<FPMathOperator>(II))
FMF = FPMO->getFastMathFlags();
IntrinsicCostAttributes ICA(II->getIntrinsicID(), II->getType(), Tys,
- FMF);
+ FMF, II);
InstructionCost IntrCost =
TTI->getIntrinsicInstrCost(ICA, TTI::TCK_RecipThroughput);
InstructionCost CallCost = TTI->getCallInstrCost(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 351f909ac0279d..6e0ea5673e5a4e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1269,6 +1269,33 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
void VPWidenEVLRecipe::execute(VPTransformState &State) {
unsigned Opcode = getOpcode();
// TODO: Support other opcodes
+ if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
+ Value *Op1 = State.get(getOperand(0), 0);
+ Value *Op2 = State.get(getOperand(1), 0);
+ auto &Ctx = State.Builder.getContext();
+ Value *Pred = MetadataAsValue::get(
+ Ctx, MDString::get(Ctx, CmpInst::getPredicateName(getPredicate())));
+
+ IRBuilderBase &BuilderIR = State.Builder;
+ VectorBuilder Builder(BuilderIR);
+ Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
+ Builder.setMask(Mask).setEVL(State.get(getEVL(), 0, true));
+
+ VectorType *DataType = VectorType::get(Type::getInt1Ty(Ctx), State.VF);
+
+ Value *VPInst = Builder.createVectorInstruction(Opcode, DataType,
+ {Op1, Op2, Pred}, "vp.op");
+ // if (isa<FPMathOperator>(VPInst))
+ // setFlags(cast<Instruction>(VPInst));
+ if (VPInst) {
+ if (auto *VecOp = dyn_cast<CastInst>(VPInst))
+ VecOp->copyIRFlags(getUnderlyingInstr());
+ }
+ State.set(this, VPInst, 0);
+ State.addMetadata(VPInst,
+ dyn_cast_or_null<Instruction>(getUnderlyingValue()));
+ return;
+ }
if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index b722ec34ee6fb6..0b2c126210162f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1345,7 +1345,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
.Case<VPWidenRecipe>([&](VPWidenRecipe *W) -> VPRecipeBase * {
unsigned Opcode = W->getOpcode();
if (!Instruction::isBinaryOp(Opcode) &&
- !Instruction::isUnaryOp(Opcode))
+ !Instruction::isUnaryOp(Opcode) &&
+ Opcode != Instruction::ICmp && Opcode != Instruction::FCmp)
return nullptr;
return new VPWidenEVLRecipe(*W, EVL);
})
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
index 41796e848632e4..a86f6b1d23ae7a 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
@@ -69,7 +69,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
-; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = call <vscale x 4 x i1> @llvm.vp.icmp.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), metadata !"sgt", <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = select <vscale x 4 x i1> [[TMP19]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer
; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, <vscale x 4 x i32> [[TMP20]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]]
@@ -282,7 +282,7 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
-; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = call <vscale x 4 x i1> @llvm.vp.icmp.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), metadata !"sgt", <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = select <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> [[TMP19]], <vscale x 4 x i1> zeroinitializer
; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> [[TMP20]], i32 [[TMP12]])
; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll
index 99da5058fbf922..c915ffd9a80d91 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll
@@ -45,18 +45,18 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) {
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP11]]
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP16]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]])
-; IF-EVL-NEXT: [[TMP17:%.*]] = icmp ne <vscale x 4 x i32> [[VP_OP_LOAD]], zeroinitializer
-; IF-EVL-NEXT: [[TMP18:%.*]] = select <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> [[TMP17]], <vscale x 4 x i1> zeroinitializer
-; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP11]]
-; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i32 0
-; IF-EVL-NEXT: [[VP_OP_LOAD3:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], <vscale x 4 x i1> [[TMP18]], i32 [[TMP10]])
-; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VP_OP_LOAD3]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]])
-; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_OP]], ptr align 4 [[TMP20]], <vscale x 4 x i1> [[TMP18]], i32 [[TMP10]])
-; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP10]] to i64
-; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i1> @llvm.vp.icmp.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer, metadata !"ne", <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]])
+; IF-EVL-NEXT: [[TMP17:%.*]] = select <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> [[VP_OP]], <vscale x 4 x i1> zeroinitializer
+; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP11]]
+; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP18]], i32 0
+; IF-EVL-NEXT: [[VP_OP_LOAD3:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP19]], <vscale x 4 x i1> [[TMP17]], i32 [[TMP10]])
+; IF-EVL-NEXT: [[VP_OP4:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VP_OP_LOAD3]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]])
+; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_OP4]], ptr align 4 [[TMP19]], <vscale x 4 x i1> [[TMP17]], i32 [[TMP10]])
+; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP10]] to i64
+; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IF-EVL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; IF-EVL: scalar.ph:
@@ -65,13 +65,13 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) {
; IF-EVL: for.body:
; IF-EVL-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I_011]]
-; IF-EVL-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; IF-EVL-NEXT: [[CMP1:%.*]] = icmp ne i32 [[TMP23]], 0
+; IF-EVL-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; IF-EVL-NEXT: [[CMP1:%.*]] = icmp ne i32 [[TMP22]], 0
; IF-EVL-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
; IF-EVL: if.then:
; IF-EVL-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_011]]
-; IF-EVL-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
-; IF-EVL-NEXT: [[ADD:%.*]] = add i32 [[TMP23]], [[TMP24]]
+; IF-EVL-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
+; IF-EVL-NEXT: [[ADD:%.*]] = add i32 [[TMP22]], [[TMP23]]
; IF-EVL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX3]], align 4
; IF-EVL-NEXT: br label [[FOR_INC]]
; IF-EVL: for.inc:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll
index c1cf8b0fc541e7..def9d8d4c19124 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll
@@ -147,33 +147,33 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[TMP8]]
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-NEXT: [[TMP15:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 100, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; IF-EVL-NEXT: [[TMP16:%.*]] = select <vscale x 4 x i1> [[TMP11]], <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> zeroinitializer
-; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP12]]
-; IF-EVL-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
-; IF-EVL-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4
-; IF-EVL-NEXT: [[TMP20:%.*]] = mul i64 0, [[TMP19]]
-; IF-EVL-NEXT: [[TMP21:%.*]] = sub i64 1, [[TMP19]]
-; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP17]], i64 [[TMP20]]
-; IF-EVL-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP22]], i64 [[TMP21]]
-; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP23]], <vscale x 4 x i1> [[VP_REVERSE_MASK]], i32 [[TMP6]])
+; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i1> @llvm.vp.icmp.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 100, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), metadata !"slt", <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
+; IF-EVL-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[TMP11]], <vscale x 4 x i1> [[VP_OP]], <vscale x 4 x i1> zeroinitializer
+; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP12]]
+; IF-EVL-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
+; IF-EVL-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 4
+; IF-EVL-NEXT: [[TMP19:%.*]] = mul i64 0, [[TMP18]]
+; IF-EVL-NEXT: [[TMP20:%.*]] = sub i64 1, [[TMP18]]
+; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP19]]
+; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP21]], i64 [[TMP20]]
+; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
+; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP22]], <vscale x 4 x i1> [[VP_REVERSE_MASK]], i32 [[TMP6]])
; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD4]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP12]]
-; IF-EVL-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64()
-; IF-EVL-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 4
-; IF-EVL-NEXT: [[TMP27:%.*]] = mul i64 0, [[TMP26]]
-; IF-EVL-NEXT: [[TMP28:%.*]] = sub i64 1, [[TMP26]]
-; IF-EVL-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP24]], i64 [[TMP27]]
-; IF-EVL-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr [[TMP29]], i64 [[TMP28]]
+; IF-EVL-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP12]]
+; IF-EVL-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64()
+; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 4
+; IF-EVL-NEXT: [[TMP26:%.*]] = mul i64 0, [[TMP25]]
+; IF-EVL-NEXT: [[TMP27:%.*]] = sub i64 1, [[TMP25]]
+; IF-EVL-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP23]], i64 [[TMP26]]
+; IF-EVL-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP28]], i64 [[TMP27]]
; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE5]], ptr align 4 [[TMP30]], <vscale x 4 x i1> [[VP_REVERSE_MASK6]], i32 [[TMP6]])
-; IF-EVL-NEXT: [[TMP31:%.*]] = zext i32 [[TMP6]] to i64
-; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP31]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
+; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE5]], ptr align 4 [[TMP29]], <vscale x 4 x i1> [[VP_REVERSE_MASK6]], i32 [[TMP6]])
+; IF-EVL-NEXT: [[TMP30:%.*]] = zext i32 [[TMP6]] to i64
+; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP30]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
-; IF-EVL-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; IF-EVL-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]]
; IF-EVL: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cmp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cmp-intrinsics.ll
new file mode 100644
index 00000000000000..49266852647862
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cmp-intrinsics.ll
@@ -0,0 +1,177 @@
+; REQUIRES: asserts
+
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
+; RUN: -force-tail-folding-style=data-with-evl \
+; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
+; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s
+
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
+; RUN: -force-tail-folding-style=none \
+; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \
+; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=NO-VP %s
+
+define void @vp_icmp(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
+; IF-EVL->NEXT: Live-in vp<%0> = VF * UF
+; IF-EVL->NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL->NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12>
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%11>
+; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
+; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
+; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
+; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
+; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
+; IF-EVL-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%c>, vp<%6>
+; IF-EVL-NEXT: vp<%8> = vector-pointer ir<%arrayidx3>
+; IF-EVL-NEXT: WIDEN ir<%1> = vp.load vp<%8>, vp<%5>
+; IF-EVL-NEXT: WIDEN-VP ir<%cmp4> = icmp sgt ir<%0>, ir<%1>, vp<%5>
+; IF-EVL-NEXT: WIDEN-CAST ir<%conv5> = zext ir<%cmp4> to i32
+; IF-EVL-NEXT: CLONE ir<%arrayidx7> = getelementptr inbounds ir<%a>, vp<%6>
+; IF-EVL-NEXT: vp<%9> = vector-pointer ir<%arrayidx7>
+; IF-EVL-NEXT: WIDEN vp.store vp<%9>, ir<%conv5>, vp<%5>
+; IF-EVL-NEXT: SCALAR-CAST vp<%10> = zext vp<%5> to i64
+; IF-EVL-NEXT: EMIT vp<%11> = add vp<%10>, vp<%4>
+; IF-EVL-NEXT: EMIT vp<%12> = add vp<%3>, vp<%0>
+; IF-EVL-NEXT: EMIT branch-on-count vp<%12>, vp<%1>
+; IF-EVL-NEXT: No successors
+; IF-EVL-NEXT: }
+
+; NO-VP: Plan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' {
+; NO-VP->NEXT: Live-in vp<%0> = VF * UF
+; NO-VP->NEXT: Live-in vp<%1> = vector-trip-count
+; NO-VP->NEXT: Live-in ir<%N> = original trip-count
+
+; NO-VP: vector.ph:
+; NO-VP-NEXT: Successor(s): vector loop
+
+; NO-VP: <x1> vector loop: {
+; NO-VP-NEXT: vector.body:
+; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%7>
+; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
+; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
+; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
+; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
+; NO-VP-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%c>, vp<%3>
+; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx3>
+; NO-VP-NEXT: WIDEN ir<%1> = load vp<%5>
+; NO-VP-NEXT: WIDEN ir<%cmp4> = icmp sgt ir<%0>, ir<%1>
+; NO-VP-NEXT: WIDEN-CAST ir<%conv5> = zext ir<%cmp4> to i32
+; NO-VP-NEXT: CLONE ir<%arrayidx7> = getelementptr inbounds ir<%a>, vp<%3>
+; NO-VP-NEXT: vp<%6> = vector-pointer ir<%arrayidx7>
+; NO-VP-NEXT: WIDEN store vp<%6>, ir<%conv5>
+; NO-VP-NEXT: EMIT vp<%7> = add nuw vp<%2>, vp<%0>
+; NO-VP-NEXT: EMIT branch-on-count vp<%7>, vp<%1>
+; NO-VP-NEXT: No successors
+; NO-VP-NEXT: }
+
+entry:
+ %cmp12 = icmp sgt i64 %N, 0
+ br i1 %cmp12, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %arrayidx3 = getelementptr inbounds i32, ptr %c, i64 %indvars.iv
+ %1 = load i32, ptr %arrayidx3, align 4
+ %cmp4 = icmp sgt i32 %0, %1
+ %conv5 = zext i1 %cmp4 to i32
+ %arrayidx7 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+ store i32 %conv5, ptr %arrayidx7, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define void @vp_fcmp(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
+; IF-EVL->NEXT: Live-in vp<%0> = VF * UF
+; IF-EVL->NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL->NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12>
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%11>
+; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
+; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
+; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
+; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
+; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
+; IF-EVL-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%c>, vp<%6>
+; IF-EVL-NEXT: vp<%8> = vector-pointer ir<%arrayidx3>
+; IF-EVL-NEXT: WIDEN ir<%1> = vp.load vp<%8>, vp<%5>
+; IF-EVL-NEXT: WIDEN-VP ir<%cmp4> = fcmp ogt ir<%0>, ir<%1>, vp<%5>
+; IF-EVL-NEXT: WIDEN-CAST ir<%conv6> = uitofp ir<%cmp4> to float
+; IF-EVL-NEXT: CLONE ir<%arrayidx8> = getelementptr inbounds ir<%a>, vp<%6>
+; IF-EVL-NEXT: vp<%9> = vector-pointer ir<%arrayidx8>
+; IF-EVL-NEXT: WIDEN vp.store vp<%9>, ir<%conv6>, vp<%5>
+; IF-EVL-NEXT: SCALAR-CAST vp<%10> = zext vp<%5> to i64
+; IF-EVL-NEXT: EMIT vp<%11> = add vp<%10>, vp<%4>
+; IF-EVL-NEXT: EMIT vp<%12> = add vp<%3>, vp<%0>
+; IF-EVL-NEXT: EMIT branch-on-count vp<%12>, vp<%1>
+; IF-EVL-NEXT: No successors
+; IF-EVL-NEXT: }
+
+; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' {
+; NO-VP->NEXT: Live-in vp<%0> = VF * UF
+; NO-VP->NEXT: Live-in vp<%1> = vector-trip-count
+; NO-VP->NEXT: Live-in ir<%N> = original trip-count
+
+; NO-VP: vector.ph:
+; NO-VP->NEXT: Successor(s): vector loop
+
+; NO-VP: <x1> vector loop: {
+; NO-VP->NEXT: vector.body:
+; NO-VP->NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%7>
+; NO-VP->NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
+; NO-VP->NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
+; NO-VP->NEXT: vp<%4> = vector-pointer ir<%arrayidx>
+; NO-VP->NEXT: WIDEN ir<%0> = load vp<%4>
+; NO-VP->NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%c>, vp<%3>
+; NO-VP->NEXT: vp<%5> = vector-pointer ir<%arrayidx3>
+; NO-VP->NEXT: WIDEN ir<%1> = load vp<%5>
+; NO-VP->NEXT: WIDEN ir<%cmp4> = fcmp ogt ir<%0>, ir<%1>
+; NO-VP->NEXT: WIDEN-CAST ir<%conv6> = uitofp ir<%cmp4> to float
+; NO-VP->NEXT: CLONE ir<%arrayidx8> = getelementptr inbounds ir<%a>, vp<%3>
+; NO-VP->NEXT: vp<%6> = vector-pointer ir<%arrayidx8>
+; NO-VP->NEXT: WIDEN store vp<%6>, ir<%conv6>
+; NO-VP->NEXT: EMIT vp<%7> = add nuw vp<%2>, vp<%0>
+; NO-VP->NEXT: EMIT branch-on-count vp<%7>, vp<%1>
+; NO-VP->NEXT: No successors
+; NO-VP->NEXT: }
+
+entry:
+ %cmp13 = icmp sgt i64 %N, 0
+ br i1 %cmp13, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+ %0 = load float, ptr %arrayidx, align 4
+ %arrayidx3 = getelementptr inbounds float, ptr %c, i64 %indvars.iv
+ %1 = load float, ptr %arrayidx3, align 4
+ %cmp4 = fcmp ogt float %0, %1
+ %conv6 = uitofp i1 %cmp4 to float
+ %arrayidx8 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+ store float %conv6, ptr %arrayidx8, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
\ No newline at end of file
More information about the llvm-commits
mailing list