[llvm] [LV][EVL] Support cast instruction with EVL-vectorization (PR #108351)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 12 02:34:07 PDT 2024
https://github.com/LiqinWeng updated https://github.com/llvm/llvm-project/pull/108351
>From e318a92c5395ead737844efac3cfdcfc066ad971 Mon Sep 17 00:00:00 2001
From: LiqinWeng <liqin.weng at spacemit.com>
Date: Thu, 12 Sep 2024 17:24:35 +0800
Subject: [PATCH] [LV][EVL] Support cast instruction with EVL-vectorization
---
.../Transforms/Vectorize/LoopVectorize.cpp | 1 +
llvm/lib/Transforms/Vectorize/VPlan.h | 77 +++++-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 47 ++++
.../Transforms/Vectorize/VPlanTransforms.cpp | 9 +
llvm/lib/Transforms/Vectorize/VPlanValue.h | 1 +
.../LoopVectorize/RISCV/inloop-reduction.ll | 20 +-
.../RISCV/vplan-vp-cast-intrinsics.ll | 227 ++++++++++++++++++
7 files changed, 367 insertions(+), 15 deletions(-)
create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3b6b154b9660cf..3629fd1c6926a5 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4490,6 +4490,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPWidenCallSC:
case VPDef::VPWidenCanonicalIVSC:
case VPDef::VPWidenCastSC:
+ case VPDef::VPWidenCastEVLSC:
case VPDef::VPWidenGEPSC:
case VPDef::VPWidenSC:
case VPDef::VPWidenSelectSC:
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 64242e43c56bc8..7e45a1c0b36dbd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -921,6 +921,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenCallSC:
case VPRecipeBase::VPWidenCanonicalIVSC:
case VPRecipeBase::VPWidenCastSC:
+ case VPRecipeBase::VPWidenCastEVLSC:
case VPRecipeBase::VPWidenGEPSC:
case VPRecipeBase::VPWidenSC:
case VPRecipeBase::VPWidenEVLSC:
@@ -1111,6 +1112,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
+ R->getVPDefID() == VPRecipeBase::VPWidenCastEVLSC ||
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
}
@@ -1514,19 +1516,28 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
/// Result type for the cast.
Type *ResultTy;
-public:
- VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
- CastInst &UI)
+protected:
+ VPWidenCastRecipe(unsigned VPDefOpcode, Instruction::CastOps Opcode,
+ VPValue *Op, Type *ResultTy, CastInst &UI)
: VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
ResultTy(ResultTy) {
assert(UI.getOpcode() == Opcode &&
"opcode of underlying cast doesn't match");
}
- VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
+ VPWidenCastRecipe(unsigned VPDefOpcode, Instruction::CastOps Opcode,
+ VPValue *Op, Type *ResultTy)
: VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
ResultTy(ResultTy) {}
+public:
+ VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
+ CastInst &UI)
+ : VPWidenCastRecipe(VPDef::VPWidenCastSC, Opcode, Op, ResultTy, UI) {}
+
+ VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
+ : VPWidenCastRecipe(VPDef::VPWidenCastSC, Opcode, Op, ResultTy) {}
+
~VPWidenCastRecipe() override = default;
VPWidenCastRecipe *clone() override {
@@ -1537,7 +1548,15 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);
}
- VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
+ static inline bool classof(const VPRecipeBase *R) {
+ return R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
+ R->getVPDefID() == VPRecipeBase::VPWidenCastEVLSC;
+ }
+
+ static inline bool classof(const VPUser *U) {
+ auto *R = dyn_cast<VPRecipeBase>(U);
+ return R && classof(R);
+ }
/// Produce widened copies of the cast.
void execute(VPTransformState &State) override;
@@ -1554,6 +1573,54 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
Type *getResultType() const { return ResultTy; }
};
+// A recipe for widening cast operation with vector-predication intrinsics with
+/// explicit vector length (EVL).
+class VPWidenCastEVLRecipe : public VPWidenCastRecipe {
+ using VPRecipeWithIRFlags::transferFlags;
+
+public:
+ VPWidenCastEVLRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
+ VPValue &EVL)
+ : VPWidenCastRecipe(VPDef::VPWidenCastEVLSC, Opcode, Op, ResultTy) {
+ addOperand(&EVL);
+ }
+ VPWidenCastEVLRecipe(VPWidenCastRecipe &W, VPValue &EVL)
+ : VPWidenCastEVLRecipe(W.getOpcode(), W.getOperand(0), W.getResultType(),
+ EVL) {
+ transferFlags(W);
+ }
+
+ ~VPWidenCastEVLRecipe() override = default;
+
+ VPWidenCastEVLRecipe *clone() final {
+ llvm_unreachable("VPWidenEVLRecipe cannot be cloned");
+ return nullptr;
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPWidenCastEVLSC)
+
+ VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
+ const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
+
+ /// Produce a vp-intrinsic copies of the cast.
+ void execute(VPTransformState &State) final;
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ // EVL in that recipe is always the last operand, thus any use before means
+ // the VPValue should be vectorized.
+ return getEVL() == Op;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const final;
+#endif
+};
+
/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
class VPScalarCastRecipe : public VPSingleDefRecipe {
Instruction::CastOps Opcode;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 351f909ac0279d..62bc032f0c0563 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -69,6 +69,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPReductionSC:
case VPWidenCanonicalIVSC:
case VPWidenCastSC:
+ case VPWidenCastEVLSC:
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
case VPWidenLoadEVLSC:
@@ -112,6 +113,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPReductionSC:
case VPWidenCanonicalIVSC:
case VPWidenCastSC:
+ case VPWidenCastEVLSC:
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
@@ -162,6 +164,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPScalarIVStepsSC:
case VPWidenCanonicalIVSC:
case VPWidenCastSC:
+ case VPWidenCastEVLSC:
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
@@ -1344,6 +1347,40 @@ void VPWidenCastRecipe::execute(VPTransformState &State) {
}
}
+void VPWidenCastEVLRecipe::execute(VPTransformState &State) {
+ unsigned Opcode = getOpcode();
+ State.setDebugLocFrom(getDebugLoc());
+ assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
+ "explicit vector length.");
+
+ assert(State.get(getOperand(0), 0)->getType()->isVectorTy() &&
+ "VPWidenEVLRecipe should not be used for scalars");
+
+ // TODO: add more cast instruction, eg: fptoint/inttofp/inttoptr/fptofp
+ if (Opcode == Instruction::SExt || Opcode == Instruction::ZExt ||
+ Opcode == Instruction::Trunc) {
+ Value *SrcVal = State.get(getOperand(0), 0);
+ VectorType *SrcTy = cast<VectorType>(SrcVal->getType());
+ VectorType *DsType =
+ VectorType::get(getResultType(), SrcTy->getElementCount());
+
+ IRBuilderBase &BuilderIR = State.Builder;
+ VectorBuilder Builder(BuilderIR);
+ Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
+
+ Builder.setMask(Mask).setEVL(State.get(getEVL(), 0, /*NeedsScalar=*/true));
+ Value *VPInst =
+ Builder.createVectorInstruction(Opcode, DsType, {SrcVal}, "vp.cast");
+ if (VPInst) {
+ if (auto *VecOp = dyn_cast<CastInst>(VPInst))
+ VecOp->copyIRFlags(getUnderlyingInstr());
+ }
+ State.set(this, VPInst, 0);
+ State.addMetadata(VPInst,
+ dyn_cast_or_null<Instruction>(getUnderlyingValue()));
+ }
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
@@ -1354,6 +1391,16 @@ void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
printOperands(O, SlotTracker);
O << " to " << *getResultType();
}
+
+void VPWidenCastEVLRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "WIDEN-VP ";
+ printAsOperand(O, SlotTracker);
+ O << " = vp." << Instruction::getOpcodeName(getOpcode()) << " ";
+ printFlags(O);
+ printOperands(O, SlotTracker);
+ O << " to " << *getResultType();
+}
#endif
/// This function adds
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index b722ec34ee6fb6..c4ac5a13a1473e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1349,6 +1349,15 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
return nullptr;
return new VPWidenEVLRecipe(*W, EVL);
})
+ .Case<VPWidenCastRecipe>(
+ [&](VPWidenCastRecipe *W) -> VPRecipeBase * {
+ unsigned Opcode = W->getOpcode();
+ if (Opcode != Instruction::SExt &&
+ Opcode != Instruction::ZExt &&
+ Opcode != Instruction::Trunc)
+ return nullptr;
+ return new VPWidenCastEVLRecipe(*W, EVL);
+ })
.Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
VPValue *NewMask = GetNewMask(Red->getCondOp());
return new VPReductionEVLRecipe(*Red, EVL, NewMask);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index b8b2c0bd4d5ff1..17fc0a526e58d0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -350,6 +350,7 @@ class VPDef {
VPWidenCallSC,
VPWidenCanonicalIVSC,
VPWidenCastSC,
+ VPWidenCastEVLSC,
VPWidenGEPSC,
VPWidenLoadEVLSC,
VPWidenLoadSC,
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
index 0381f6dae9811f..9c4ebf3d7ff849 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
@@ -159,38 +159,38 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-INLOOP: vector.body:
; IF-EVL-INLOOP-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[TMP5:%.*]] = sub i32 [[N]], [[EVL_BASED_IV]]
; IF-EVL-INLOOP-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[TMP5]], i32 8, i1 true)
; IF-EVL-INLOOP-NEXT: [[TMP7:%.*]] = add i32 [[EVL_BASED_IV]], 0
; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP7]]
; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 0
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i16> @llvm.vp.load.nxv8i16.p0(ptr align 2 [[TMP9]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = sext <vscale x 8 x i16> [[VP_OP_LOAD]] to <vscale x 8 x i32>
-; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.vp.reduce.add.nxv8i32(i32 0, <vscale x 8 x i32> [[TMP10]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-INLOOP-NEXT: [[TMP12]] = add i32 [[TMP11]], [[VEC_PHI]]
+; IF-EVL-INLOOP-NEXT: [[VP_CAST:%.*]] = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i32.nxv8i16(<vscale x 8 x i16> [[VP_OP_LOAD]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
+; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vp.reduce.add.nxv8i32(i32 0, <vscale x 8 x i32> [[VP_CAST]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
+; IF-EVL-INLOOP-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]]
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i32 [[TMP6]], [[EVL_BASED_IV]]
; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP4]]
-; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-INLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-INLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL-INLOOP: middle.block:
; IF-EVL-INLOOP-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; IF-EVL-INLOOP: scalar.ph:
; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]]
; IF-EVL-INLOOP: for.body:
; IF-EVL-INLOOP-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; IF-EVL-INLOOP-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; IF-EVL-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[I_08]]
-; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
-; IF-EVL-INLOOP-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32
+; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
+; IF-EVL-INLOOP-NEXT: [[CONV:%.*]] = sext i16 [[TMP13]] to i32
; IF-EVL-INLOOP-NEXT: [[ADD]] = add nsw i32 [[R_07]], [[CONV]]
; IF-EVL-INLOOP-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1
; IF-EVL-INLOOP-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
; IF-EVL-INLOOP-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; IF-EVL-INLOOP: for.cond.cleanup.loopexit:
-; IF-EVL-INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
+; IF-EVL-INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
; IF-EVL-INLOOP-NEXT: br label [[FOR_COND_CLEANUP]]
; IF-EVL-INLOOP: for.cond.cleanup:
; IF-EVL-INLOOP-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
new file mode 100644
index 00000000000000..f1f3d2c88301e1
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
@@ -0,0 +1,227 @@
+; REQUIRES: asserts
+
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
+; RUN: -force-tail-folding-style=data-with-evl \
+; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
+; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s
+
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
+; RUN: -force-tail-folding-style=none \
+; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \
+; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=NO-VP %s
+
+define void @vp_sext(ptr noalias %a, ptr noalias %b, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
+; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12>
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%11>
+; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
+; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
+; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
+; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
+; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
+; IF-EVL-NEXT: WIDEN-VP vp<%8> = vp.sext ir<%0>, vp<%5> to i64
+; IF-EVL-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
+; IF-EVL-NEXT: vp<%9> = vector-pointer ir<%arrayidx4>
+; IF-EVL-NEXT: WIDEN vp.store vp<%9>, vp<%8>, vp<%5>
+; IF-EVL-NEXT: SCALAR-CAST vp<%10> = zext vp<%5> to i64
+; IF-EVL-NEXT: EMIT vp<%11> = add vp<%10>, vp<%4>
+; IF-EVL-NEXT: EMIT vp<%12> = add vp<%3>, vp<%0>
+; IF-EVL-NEXT: EMIT branch-on-count vp<%12>, vp<%1>
+; IF-EVL-NEXT: No successors
+; IF-EVL-NEXT: }
+
+; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF>=1' {
+; NO-VP-NEXT: Live-in vp<%0> = VF * UF
+; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
+; NO-VP-NEXT: Live-in ir<%N> = original trip-count
+
+; NO-VP: vector.ph:
+; NO-VP-NEXT: Successor(s): vector loop
+
+; NO-VP: <x1> vector loop: {
+; NO-VP-NEXT: vector.body:
+; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
+; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
+; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
+; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
+; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
+; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = sext ir<%0> to i64
+; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
+; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
+; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
+; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
+; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
+; NO-VP-NEXT: No successors
+; NO-VP-NEXT: }
+
+entry:
+ %cmp8 = icmp sgt i64 %N, 0
+ br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %conv2 = sext i32 %0 to i64
+ %arrayidx4 = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+ store i64 %conv2, ptr %arrayidx4, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define void @vp_zext(ptr noalias %a, ptr noalias %b, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
+; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12>
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%11>
+; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
+; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
+; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
+; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
+; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
+; IF-EVL-NEXT: WIDEN-VP vp<%8> = vp.zext ir<%0>, vp<%5> to i64
+; IF-EVL-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
+; IF-EVL-NEXT: vp<%9> = vector-pointer ir<%arrayidx4>
+; IF-EVL-NEXT: WIDEN vp.store vp<%9>, vp<%8>, vp<%5>
+; IF-EVL-NEXT: SCALAR-CAST vp<%10> = zext vp<%5> to i64
+; IF-EVL-NEXT: EMIT vp<%11> = add vp<%10>, vp<%4>
+; IF-EVL-NEXT: EMIT vp<%12> = add vp<%3>, vp<%0>
+; IF-EVL-NEXT: EMIT branch-on-count vp<%12>, vp<%1>
+; IF-EVL-NEXT: No successors
+; IF-EVL-NEXT: }
+
+; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF>=1' {
+; NO-VP-NEXT: Live-in vp<%0> = VF * UF
+; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
+; NO-VP-NEXT: Live-in ir<%N> = original trip-count
+
+; NO-VP: vector.ph:
+; NO-VP-NEXT: Successor(s): vector loop
+
+; NO-VP: <x1> vector loop: {
+; NO-VP-NEXT: vector.body:
+; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
+; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
+; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
+; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
+; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
+; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = zext ir<%0> to i64
+; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
+; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
+; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
+; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
+; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
+; NO-VP-NEXT: No successors
+; NO-VP-NEXT: }
+
+entry:
+ %cmp8 = icmp sgt i64 %N, 0
+ br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %conv2 = zext i32 %0 to i64
+ %arrayidx4 = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+ store i64 %conv2, ptr %arrayidx4, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define void @vp_truncate(ptr noalias %a, ptr noalias %b, i64 %N) {
+; IF-EVL : VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
+; IF-EVL-NEXT : Live-in vp<%0> = VF * UF
+; IF-EVL-NEXT : Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT : Live-in ir<%N> = original trip-count
+
+; IF-EVL : vector.ph:
+; IF-EVL-NEXT : Successor(s): vector loop
+
+; IF-EVL : <x1> vector loop: {
+; IF-EVL-NEXT : vector.body:
+; IF-EVL-NEXT : EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12>
+; IF-EVL-NEXT : EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%11>
+; IF-EVL-NEXT : EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
+; IF-EVL-NEXT : vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
+; IF-EVL-NEXT : CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
+; IF-EVL-NEXT : vp<%7> = vector-pointer ir<%arrayidx>
+; IF-EVL-NEXT : WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
+; IF-EVL-NEXT : WIDEN-VP vp<%8> = vp.trunc ir<%0>, vp<%5> to i16
+; IF-EVL-NEXT : CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
+; IF-EVL-NEXT : vp<%9> = vector-pointer ir<%arrayidx4>
+; IF-EVL-NEXT : WIDEN vp.store vp<%9>, vp<%8>, vp<%5>
+; IF-EVL-NEXT : SCALAR-CAST vp<%10> = zext vp<%5> to i64
+; IF-EVL-NEXT : EMIT vp<%11> = add vp<%10>, vp<%4>
+; IF-EVL-NEXT : EMIT vp<%12> = add vp<%3>, vp<%0>
+; IF-EVL-NEXT : EMIT branch-on-count vp<%12>, vp<%1>
+; IF-EVL-NEXT : No successors
+; IF-EVL-NEXT : }
+
+; NO-VP: Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' {
+; NO-VP-NEXT: Live-in vp<%0> = VF * UF
+; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
+; NO-VP-NEXT: Live-in ir<%N> = original trip-count
+
+; NO-VP: vector.ph:
+; NO-VP-NEXT: Successor(s): vector loop
+
+; NO-VP: <x1> vector loop: {
+; NO-VP: vector.body:
+; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
+; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
+; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
+; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
+; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
+; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = trunc ir<%0> to i16
+; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
+; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
+; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
+; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
+; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
+; NO-VP-NEXT: No successors
+; NO-VP-NEXT: }
+
+entry:
+ %cmp8 = icmp sgt i64 %N, 0
+ br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %conv2 = trunc i32 %0 to i16
+ %arrayidx4 = getelementptr inbounds i16, ptr %a, i64 %indvars.iv
+ store i16 %conv2, ptr %arrayidx4, align 2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
More information about the llvm-commits
mailing list