[llvm] [LV][EVL] Support cast instruction with EVL-vectorization (PR #108351)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 29 20:24:15 PDT 2024
https://github.com/LiqinWeng updated https://github.com/llvm/llvm-project/pull/108351
>From 164862c913ec7180e92af19f291b0e4dd00d579b Mon Sep 17 00:00:00 2001
From: LiqinWeng <liqin.weng at spacemit.com>
Date: Thu, 12 Sep 2024 17:24:35 +0800
Subject: [PATCH 1/3] [LV][EVL] Support sext/zext/truncate of cast instruction
with EVL-vectorization
---
llvm/lib/Transforms/Vectorize/VPlan.h | 82 ++++++-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 45 +++-
.../Transforms/Vectorize/VPlanTransforms.cpp | 9 +
llvm/lib/Transforms/Vectorize/VPlanValue.h | 1 +
.../Transforms/Vectorize/VPlanVerifier.cpp | 3 +
.../LoopVectorize/RISCV/inloop-reduction.ll | 20 +-
.../RISCV/vplan-vp-cast-intrinsics.ll | 227 ++++++++++++++++++
7 files changed, 369 insertions(+), 18 deletions(-)
create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 23a24ce293ef2d..e4daec6887fd26 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -885,6 +885,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenCallSC:
case VPRecipeBase::VPWidenCanonicalIVSC:
case VPRecipeBase::VPWidenCastSC:
+ case VPRecipeBase::VPWidenCastEVLSC:
case VPRecipeBase::VPWidenGEPSC:
case VPRecipeBase::VPWidenSC:
case VPRecipeBase::VPWidenEVLSC:
@@ -1076,6 +1077,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
+ R->getVPDefID() == VPRecipeBase::VPWidenCastEVLSC ||
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
}
@@ -1528,19 +1530,28 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
/// Result type for the cast.
Type *ResultTy;
-public:
- VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
- CastInst &UI)
- : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
+protected:
+ VPWidenCastRecipe(unsigned VPDefOpcode, Instruction::CastOps Opcode,
+ VPValue *Op, Type *ResultTy, CastInst &UI)
+ : VPRecipeWithIRFlags(VPDefOpcode, Op, UI), Opcode(Opcode),
ResultTy(ResultTy) {
assert(UI.getOpcode() == Opcode &&
"opcode of underlying cast doesn't match");
}
- VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
- : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
+ VPWidenCastRecipe(unsigned VPDefOpcode, Instruction::CastOps Opcode,
+ VPValue *Op, Type *ResultTy)
+ : VPRecipeWithIRFlags(VPDefOpcode, Op), Opcode(Opcode),
ResultTy(ResultTy) {}
+public:
+ VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
+ CastInst &UI)
+ : VPWidenCastRecipe(VPDef::VPWidenCastSC, Opcode, Op, ResultTy, UI) {}
+
+ VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
+ : VPWidenCastRecipe(VPDef::VPWidenCastSC, Opcode, Op, ResultTy) {}
+
~VPWidenCastRecipe() override = default;
VPWidenCastRecipe *clone() override {
@@ -1551,7 +1562,15 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);
}
- VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
+ static inline bool classof(const VPRecipeBase *R) {
+ return R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
+ R->getVPDefID() == VPRecipeBase::VPWidenCastEVLSC;
+ }
+
+ static inline bool classof(const VPUser *U) {
+ auto *R = dyn_cast<VPRecipeBase>(U);
+ return R && classof(R);
+ }
/// Produce widened copies of the cast.
void execute(VPTransformState &State) override;
@@ -1568,6 +1587,55 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
Type *getResultType() const { return ResultTy; }
};
+// A recipe for widening cast operation with vector-predication intrinsics with
+/// explicit vector length (EVL).
+class VPWidenCastEVLRecipe : public VPWidenCastRecipe {
+ using VPRecipeWithIRFlags::transferFlags;
+
+public:
+ VPWidenCastEVLRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
+ VPValue &EVL)
+ : VPWidenCastRecipe(VPDef::VPWidenCastEVLSC, Opcode, Op, ResultTy) {
+ addOperand(&EVL);
+ }
+
+ VPWidenCastEVLRecipe(VPWidenCastRecipe &W, VPValue &EVL)
+ : VPWidenCastEVLRecipe(W.getOpcode(), W.getOperand(0), W.getResultType(),
+ EVL) {
+ transferFlags(W);
+ }
+
+ ~VPWidenCastEVLRecipe() override = default;
+
+ VPWidenCastEVLRecipe *clone() final {
+ llvm_unreachable("VPWidenEVLRecipe cannot be cloned");
+ return nullptr;
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPWidenCastEVLSC)
+
+ VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
+ const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
+
+ /// Produce a vp-intrinsic copies of the cast.
+ void execute(VPTransformState &State) final;
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ // EVL in that recipe is always the last operand, thus any use before means
+ // the VPValue should be vectorized.
+ return getEVL() == Op;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const final;
+#endif
+};
+
/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
class VPScalarCastRecipe : public VPSingleDefRecipe {
Instruction::CastOps Opcode;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 2f0ba5510b8f34..9f3fbb5bf09472 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -87,6 +87,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPReductionSC:
case VPWidenCanonicalIVSC:
case VPWidenCastSC:
+ case VPWidenCastEVLSC:
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
case VPWidenLoadEVLSC:
@@ -130,6 +131,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPReductionSC:
case VPWidenCanonicalIVSC:
case VPWidenCastSC:
+ case VPWidenCastEVLSC:
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
@@ -166,6 +168,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPScalarIVStepsSC:
case VPWidenCanonicalIVSC:
case VPWidenCastSC:
+ case VPWidenCastEVLSC:
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
@@ -1340,16 +1343,56 @@ void VPWidenCastRecipe::execute(VPTransformState &State) {
State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
}
+void VPWidenCastEVLRecipe::execute(VPTransformState &State) {
+ unsigned Opcode = getOpcode();
+ State.setDebugLocFrom(getDebugLoc());
+ assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
+ "explicit vector length.");
+
+ // TODO: add more cast instruction, eg: fptoint/inttofp/inttoptr/fptofp
+ if (Opcode == Instruction::SExt || Opcode == Instruction::ZExt ||
+ Opcode == Instruction::Trunc) {
+ Value *SrcVal = State.get(getOperand(0), 0);
+ VectorType *DsType = VectorType::get(getResultType(), State.VF);
+
+ IRBuilderBase &BuilderIR = State.Builder;
+ VectorBuilder Builder(BuilderIR);
+ Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
+ Builder.setMask(Mask).setEVL(State.get(getEVL(), 0, /*NeedsScalar=*/true));
+
+ Value *VPInst =
+ Builder.createVectorInstruction(Opcode, DsType, {SrcVal}, "vp.cast");
+
+ if (VPInst) {
+ if (auto *VecOp = dyn_cast<CastInst>(VPInst))
+ VecOp->copyIRFlags(getUnderlyingInstr());
+ }
+
+ State.set(this, VPInst, 0);
+ State.addMetadata(VPInst,
+ dyn_cast_or_null<Instruction>(getUnderlyingValue()));
+ }
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-CAST ";
printAsOperand(O, SlotTracker);
- O << " = " << Instruction::getOpcodeName(Opcode) << " ";
+ O << " = " << Instruction::getOpcodeName(Opcode);
printFlags(O);
printOperands(O, SlotTracker);
O << " to " << *getResultType();
}
+
+void VPWidenCastEVLRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "WIDEN-CAST ";
+ printAsOperand(O, SlotTracker);
+ O << " = vp." << Instruction::getOpcodeName(getOpcode());
+ printFlags(O);
+ printOperands(O, SlotTracker);
+}
#endif
/// This function adds
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a878613c4ba483..2e89c3811098a0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1379,6 +1379,15 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
return nullptr;
return new VPWidenEVLRecipe(*W, EVL);
})
+ .Case<VPWidenCastRecipe>(
+ [&](VPWidenCastRecipe *W) -> VPRecipeBase * {
+ unsigned Opcode = W->getOpcode();
+ if (Opcode != Instruction::SExt &&
+ Opcode != Instruction::ZExt &&
+ Opcode != Instruction::Trunc)
+ return nullptr;
+ return new VPWidenCastEVLRecipe(*W, EVL);
+ })
.Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
VPValue *NewMask = GetNewMask(Red->getCondOp());
return new VPReductionEVLRecipe(*Red, EVL, NewMask);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index a23a59aa2f11c2..e27f902b9588bc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -349,6 +349,7 @@ class VPDef {
VPWidenCallSC,
VPWidenCanonicalIVSC,
VPWidenCastSC,
+ VPWidenCastEVLSC,
VPWidenGEPSC,
VPWidenLoadEVLSC,
VPWidenLoadSC,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 99bc4c38a3c3cd..80f0b6d612b87d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -148,6 +148,9 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
return VerifyEVLUse(
*W, Instruction::isUnaryOp(W->getOpcode()) ? 1 : 2);
})
+ .Case<VPWidenCastEVLRecipe>([&](const VPWidenCastEVLRecipe *C) {
+ return VerifyEVLUse(*C, 1);
+ })
.Case<VPReductionEVLRecipe>([&](const VPReductionEVLRecipe *R) {
return VerifyEVLUse(*R, 2);
})
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
index 0381f6dae9811f..9c4ebf3d7ff849 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
@@ -159,38 +159,38 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-INLOOP: vector.body:
; IF-EVL-INLOOP-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[TMP5:%.*]] = sub i32 [[N]], [[EVL_BASED_IV]]
; IF-EVL-INLOOP-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[TMP5]], i32 8, i1 true)
; IF-EVL-INLOOP-NEXT: [[TMP7:%.*]] = add i32 [[EVL_BASED_IV]], 0
; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP7]]
; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 0
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i16> @llvm.vp.load.nxv8i16.p0(ptr align 2 [[TMP9]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = sext <vscale x 8 x i16> [[VP_OP_LOAD]] to <vscale x 8 x i32>
-; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.vp.reduce.add.nxv8i32(i32 0, <vscale x 8 x i32> [[TMP10]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-INLOOP-NEXT: [[TMP12]] = add i32 [[TMP11]], [[VEC_PHI]]
+; IF-EVL-INLOOP-NEXT: [[VP_CAST:%.*]] = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i32.nxv8i16(<vscale x 8 x i16> [[VP_OP_LOAD]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
+; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vp.reduce.add.nxv8i32(i32 0, <vscale x 8 x i32> [[VP_CAST]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
+; IF-EVL-INLOOP-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]]
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i32 [[TMP6]], [[EVL_BASED_IV]]
; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP4]]
-; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-INLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-INLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL-INLOOP: middle.block:
; IF-EVL-INLOOP-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; IF-EVL-INLOOP: scalar.ph:
; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]]
; IF-EVL-INLOOP: for.body:
; IF-EVL-INLOOP-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; IF-EVL-INLOOP-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; IF-EVL-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[I_08]]
-; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
-; IF-EVL-INLOOP-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32
+; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
+; IF-EVL-INLOOP-NEXT: [[CONV:%.*]] = sext i16 [[TMP13]] to i32
; IF-EVL-INLOOP-NEXT: [[ADD]] = add nsw i32 [[R_07]], [[CONV]]
; IF-EVL-INLOOP-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1
; IF-EVL-INLOOP-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
; IF-EVL-INLOOP-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; IF-EVL-INLOOP: for.cond.cleanup.loopexit:
-; IF-EVL-INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
+; IF-EVL-INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
; IF-EVL-INLOOP-NEXT: br label [[FOR_COND_CLEANUP]]
; IF-EVL-INLOOP: for.cond.cleanup:
; IF-EVL-INLOOP-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
new file mode 100644
index 00000000000000..58b9de9f88e2d3
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
@@ -0,0 +1,227 @@
+; REQUIRES: asserts
+
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
+; RUN: -force-tail-folding-style=data-with-evl \
+; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
+; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s
+
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
+; RUN: -force-tail-folding-style=none \
+; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \
+; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=NO-VP %s
+
+define void @vp_sext(ptr noalias %a, ptr noalias %b, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
+; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12>
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%11>
+; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
+; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
+; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
+; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
+; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
+; IF-EVL-NEXT: WIDEN-CAST vp<%8> = vp.sext ir<%0>, vp<%5>
+; IF-EVL-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
+; IF-EVL-NEXT: vp<%9> = vector-pointer ir<%arrayidx4>
+; IF-EVL-NEXT: WIDEN vp.store vp<%9>, vp<%8>, vp<%5>
+; IF-EVL-NEXT: SCALAR-CAST vp<%10> = zext vp<%5> to i64
+; IF-EVL-NEXT: EMIT vp<%11> = add vp<%10>, vp<%4>
+; IF-EVL-NEXT: EMIT vp<%12> = add vp<%3>, vp<%0>
+; IF-EVL-NEXT: EMIT branch-on-count vp<%12>, vp<%1>
+; IF-EVL-NEXT: No successors
+; IF-EVL-NEXT: }
+
+; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF>=1' {
+; NO-VP-NEXT: Live-in vp<%0> = VF * UF
+; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
+; NO-VP-NEXT: Live-in ir<%N> = original trip-count
+
+; NO-VP: vector.ph:
+; NO-VP-NEXT: Successor(s): vector loop
+
+; NO-VP: <x1> vector loop: {
+; NO-VP-NEXT: vector.body:
+; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
+; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
+; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
+; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
+; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
+; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = sext ir<%0> to i64
+; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
+; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
+; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
+; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
+; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
+; NO-VP-NEXT: No successors
+; NO-VP-NEXT: }
+
+entry:
+ %cmp8 = icmp sgt i64 %N, 0
+ br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %conv2 = sext i32 %0 to i64
+ %arrayidx4 = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+ store i64 %conv2, ptr %arrayidx4, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define void @vp_zext(ptr noalias %a, ptr noalias %b, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
+; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12>
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%11>
+; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
+; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
+; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
+; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
+; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
+; IF-EVL-NEXT: WIDEN-CAST vp<%8> = vp.zext ir<%0>, vp<%5>
+; IF-EVL-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
+; IF-EVL-NEXT: vp<%9> = vector-pointer ir<%arrayidx4>
+; IF-EVL-NEXT: WIDEN vp.store vp<%9>, vp<%8>, vp<%5>
+; IF-EVL-NEXT: SCALAR-CAST vp<%10> = zext vp<%5> to i64
+; IF-EVL-NEXT: EMIT vp<%11> = add vp<%10>, vp<%4>
+; IF-EVL-NEXT: EMIT vp<%12> = add vp<%3>, vp<%0>
+; IF-EVL-NEXT: EMIT branch-on-count vp<%12>, vp<%1>
+; IF-EVL-NEXT: No successors
+; IF-EVL-NEXT: }
+
+; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF>=1' {
+; NO-VP-NEXT: Live-in vp<%0> = VF * UF
+; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
+; NO-VP-NEXT: Live-in ir<%N> = original trip-count
+
+; NO-VP: vector.ph:
+; NO-VP-NEXT: Successor(s): vector loop
+
+; NO-VP: <x1> vector loop: {
+; NO-VP-NEXT: vector.body:
+; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
+; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
+; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
+; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
+; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
+; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = zext ir<%0> to i64
+; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
+; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
+; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
+; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
+; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
+; NO-VP-NEXT: No successors
+; NO-VP-NEXT: }
+
+entry:
+ %cmp8 = icmp sgt i64 %N, 0
+ br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %conv2 = zext i32 %0 to i64
+ %arrayidx4 = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+ store i64 %conv2, ptr %arrayidx4, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define void @vp_truncate(ptr noalias %a, ptr noalias %b, i64 %N) {
+; IF-EVL : VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
+; IF-EVL-NEXT : Live-in vp<%0> = VF * UF
+; IF-EVL-NEXT : Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT : Live-in ir<%N> = original trip-count
+
+; IF-EVL : vector.ph:
+; IF-EVL-NEXT : Successor(s): vector loop
+
+; IF-EVL : <x1> vector loop: {
+; IF-EVL-NEXT : vector.body:
+; IF-EVL-NEXT : EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12>
+; IF-EVL-NEXT : EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%11>
+; IF-EVL-NEXT : EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
+; IF-EVL-NEXT : vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
+; IF-EVL-NEXT : CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
+; IF-EVL-NEXT : vp<%7> = vector-pointer ir<%arrayidx>
+; IF-EVL-NEXT : WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
+; IF-EVL-NEXT : WIDEN-CAST vp<%8> = vp.trunc ir<%0>, vp<%5>
+; IF-EVL-NEXT : CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
+; IF-EVL-NEXT : vp<%9> = vector-pointer ir<%arrayidx4>
+; IF-EVL-NEXT : WIDEN vp.store vp<%9>, vp<%8>, vp<%5>
+; IF-EVL-NEXT : SCALAR-CAST vp<%10> = zext vp<%5> to i64
+; IF-EVL-NEXT : EMIT vp<%11> = add vp<%10>, vp<%4>
+; IF-EVL-NEXT : EMIT vp<%12> = add vp<%3>, vp<%0>
+; IF-EVL-NEXT : EMIT branch-on-count vp<%12>, vp<%1>
+; IF-EVL-NEXT : No successors
+; IF-EVL-NEXT : }
+
+; NO-VP: Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' {
+; NO-VP-NEXT: Live-in vp<%0> = VF * UF
+; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
+; NO-VP-NEXT: Live-in ir<%N> = original trip-count
+
+; NO-VP: vector.ph:
+; NO-VP-NEXT: Successor(s): vector loop
+
+; NO-VP: <x1> vector loop: {
+; NO-VP: vector.body:
+; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
+; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
+; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
+; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
+; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
+; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = trunc ir<%0> to i16
+; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
+; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
+; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
+; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
+; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
+; NO-VP-NEXT: No successors
+; NO-VP-NEXT: }
+
+entry:
+ %cmp8 = icmp sgt i64 %N, 0
+ br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %conv2 = trunc i32 %0 to i16
+ %arrayidx4 = getelementptr inbounds i16, ptr %a, i64 %indvars.iv
+ store i16 %conv2, ptr %arrayidx4, align 2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
>From 44ebfbdd4c456444a899288d33d4b79b1dbea867 Mon Sep 17 00:00:00 2001
From: LiqinWeng <liqin.weng at spacemit.com>
Date: Mon, 23 Sep 2024 15:36:08 +0800
Subject: [PATCH 2/3] [LV][EVL] Support
fpext/fptrunc/fptosi/fptoui/sitofp/uitofp/inttoptr/ptrtoint of cast
instruction with EVL-vectorization
---
llvm/lib/Transforms/Vectorize/VPlan.h | 6 +-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 5 +-
.../Transforms/Vectorize/VPlanTransforms.cpp | 6 +-
.../RISCV/vplan-vp-cast-intrinsics.ll | 472 +++++++++++++++++-
4 files changed, 459 insertions(+), 30 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index e4daec6887fd26..394d76799ec2b6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1594,14 +1594,14 @@ class VPWidenCastEVLRecipe : public VPWidenCastRecipe {
public:
VPWidenCastEVLRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
- VPValue &EVL)
- : VPWidenCastRecipe(VPDef::VPWidenCastEVLSC, Opcode, Op, ResultTy) {
+ CastInst &UI, VPValue &EVL)
+ : VPWidenCastRecipe(VPDef::VPWidenCastEVLSC, Opcode, Op, ResultTy, UI) {
addOperand(&EVL);
}
VPWidenCastEVLRecipe(VPWidenCastRecipe &W, VPValue &EVL)
: VPWidenCastEVLRecipe(W.getOpcode(), W.getOperand(0), W.getResultType(),
- EVL) {
+ *cast<CastInst>(W.getUnderlyingInstr()), EVL) {
transferFlags(W);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 9f3fbb5bf09472..4b80b7da46d985 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1345,13 +1345,12 @@ void VPWidenCastRecipe::execute(VPTransformState &State) {
void VPWidenCastEVLRecipe::execute(VPTransformState &State) {
unsigned Opcode = getOpcode();
+ auto Inst = cast<CastInst>(getUnderlyingInstr());
State.setDebugLocFrom(getDebugLoc());
assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
"explicit vector length.");
- // TODO: add more cast instruction, eg: fptoint/inttofp/inttoptr/fptofp
- if (Opcode == Instruction::SExt || Opcode == Instruction::ZExt ||
- Opcode == Instruction::Trunc) {
+ if (Inst->isCast()) {
Value *SrcVal = State.get(getOperand(0), 0);
VectorType *DsType = VectorType::get(getResultType(), State.VF);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 2e89c3811098a0..589a5468c8e919 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1381,10 +1381,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
})
.Case<VPWidenCastRecipe>(
[&](VPWidenCastRecipe *W) -> VPRecipeBase * {
- unsigned Opcode = W->getOpcode();
- if (Opcode != Instruction::SExt &&
- Opcode != Instruction::ZExt &&
- Opcode != Instruction::Trunc)
+ auto Inst = cast<CastInst>(W->getUnderlyingInstr());
+ if (!Inst->isCast())
return nullptr;
return new VPWidenCastEVLRecipe(*W, EVL);
})
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
index 58b9de9f88e2d3..2db5e25b45936e 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
@@ -21,21 +21,21 @@ define void @vp_sext(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL: <x1> vector loop: {
; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12>
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%11>
+; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%11>
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%10>
; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
-; IF-EVL-NEXT: WIDEN-CAST vp<%8> = vp.sext ir<%0>, vp<%5>
+; IF-EVL-NEXT: WIDEN-CAST ir<%conv2> = vp.sext ir<%0>, vp<%5>
; IF-EVL-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
-; IF-EVL-NEXT: vp<%9> = vector-pointer ir<%arrayidx4>
-; IF-EVL-NEXT: WIDEN vp.store vp<%9>, vp<%8>, vp<%5>
-; IF-EVL-NEXT: SCALAR-CAST vp<%10> = zext vp<%5> to i64
-; IF-EVL-NEXT: EMIT vp<%11> = add vp<%10>, vp<%4>
-; IF-EVL-NEXT: EMIT vp<%12> = add vp<%3>, vp<%0>
-; IF-EVL-NEXT: EMIT branch-on-count vp<%12>, vp<%1>
+; IF-EVL-NEXT: vp<%8> = vector-pointer ir<%arrayidx4>
+; IF-EVL-NEXT: WIDEN vp.store vp<%8>, ir<%conv2>, vp<%5>
+; IF-EVL-NEXT: SCALAR-CAST vp<%9> = zext vp<%5> to i64
+; IF-EVL-NEXT: EMIT vp<%10> = add vp<%9>, vp<%4>
+; IF-EVL-NEXT: EMIT vp<%11> = add vp<%3>, vp<%0>
+; IF-EVL-NEXT: EMIT branch-on-count vp<%11>, vp<%1>
; IF-EVL-NEXT: No successors
; IF-EVL-NEXT: }
@@ -93,21 +93,21 @@ define void @vp_zext(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL: <x1> vector loop: {
; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12>
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%11>
+; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%11>
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%10>
; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
-; IF-EVL-NEXT: WIDEN-CAST vp<%8> = vp.zext ir<%0>, vp<%5>
+; IF-EVL-NEXT: WIDEN-CAST ir<%conv2> = vp.zext ir<%0>, vp<%5>
; IF-EVL-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
-; IF-EVL-NEXT: vp<%9> = vector-pointer ir<%arrayidx4>
-; IF-EVL-NEXT: WIDEN vp.store vp<%9>, vp<%8>, vp<%5>
-; IF-EVL-NEXT: SCALAR-CAST vp<%10> = zext vp<%5> to i64
-; IF-EVL-NEXT: EMIT vp<%11> = add vp<%10>, vp<%4>
-; IF-EVL-NEXT: EMIT vp<%12> = add vp<%3>, vp<%0>
-; IF-EVL-NEXT: EMIT branch-on-count vp<%12>, vp<%1>
+; IF-EVL-NEXT: vp<%8> = vector-pointer ir<%arrayidx4>
+; IF-EVL-NEXT: WIDEN vp.store vp<%8>, ir<%conv2>, vp<%5>
+; IF-EVL-NEXT: SCALAR-CAST vp<%9> = zext vp<%5> to i64
+; IF-EVL-NEXT: EMIT vp<%10> = add vp<%9>, vp<%4>
+; IF-EVL-NEXT: EMIT vp<%11> = add vp<%3>, vp<%0>
+; IF-EVL-NEXT: EMIT branch-on-count vp<%11>, vp<%1>
; IF-EVL-NEXT: No successors
; IF-EVL-NEXT: }
@@ -172,7 +172,7 @@ define void @vp_truncate(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL-NEXT : CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
; IF-EVL-NEXT : vp<%7> = vector-pointer ir<%arrayidx>
; IF-EVL-NEXT : WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
-; IF-EVL-NEXT : WIDEN-CAST vp<%8> = vp.trunc ir<%0>, vp<%5>
+; IF-EVL-NEXT : WIDEN-CAST vp<%8> = vp.trunc ir<%0>, vp<%5>
; IF-EVL-NEXT : CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
; IF-EVL-NEXT : vp<%9> = vector-pointer ir<%arrayidx4>
; IF-EVL-NEXT : WIDEN vp.store vp<%9>, vp<%8>, vp<%5>
@@ -198,7 +198,7 @@ define void @vp_truncate(ptr noalias %a, ptr noalias %b, i64 %N) {
; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
-; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = trunc ir<%0> to i16
+; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = trunc ir<%0> to i16
; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
@@ -225,3 +225,435 @@ for.body:
%exitcond.not = icmp eq i64 %indvars.iv.next, %N
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
+
+define void @vp_fpext(ptr noalias %a, ptr noalias %b, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
+; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%11>
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%10>
+; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
+; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
+; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
+; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
+; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
+; IF-EVL-NEXT: WIDEN-CAST ir<%conv2> = vp.fpext ir<%0>, vp<%5>
+; IF-EVL-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
+; IF-EVL-NEXT: vp<%8> = vector-pointer ir<%arrayidx4>
+; IF-EVL-NEXT: WIDEN vp.store vp<%8>, ir<%conv2>, vp<%5>
+; IF-EVL-NEXT: SCALAR-CAST vp<%9> = zext vp<%5> to i64
+; IF-EVL-NEXT: EMIT vp<%10> = add vp<%9>, vp<%4>
+; IF-EVL-NEXT: EMIT vp<%11> = add vp<%3>, vp<%0>
+; IF-EVL-NEXT: EMIT branch-on-count vp<%11>, vp<%1>
+; IF-EVL-NEXT: No successors
+; IF-EVL-NEXT: }
+
+; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF>=1' {
+; NO-VP-NEXT: Live-in vp<%0> = VF * UF
+; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
+; NO-VP-NEXT: Live-in ir<%N> = original trip-count
+
+; NO-VP: vector.ph:
+; NO-VP-NEXT: Successor(s): vector loop
+
+; NO-VP: <x1> vector loop: {
+; NO-VP-NEXT: vector.body:
+; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
+; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
+; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
+; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
+; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
+; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = fpext ir<%0> to double
+; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
+; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
+; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
+; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
+; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
+; NO-VP-NEXT: No successors
+; NO-VP-NEXT: }
+
+entry:
+ %cmp8 = icmp sgt i64 %N, 0
+ br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+ %0 = load float, ptr %arrayidx, align 4
+ %conv2 = fpext float %0 to double
+ %arrayidx4 = getelementptr inbounds double, ptr %a, i64 %indvars.iv
+ store double %conv2, ptr %arrayidx4, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define void @vp_fptrunct(ptr noalias %a, ptr noalias %b, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
+; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%11>
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%10>
+; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
+; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
+; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
+; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
+; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
+; IF-EVL-NEXT: WIDEN-CAST ir<%conv2> = vp.fptrunc ir<%0>, vp<%5>
+; IF-EVL-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
+; IF-EVL-NEXT: vp<%8> = vector-pointer ir<%arrayidx4>
+; IF-EVL-NEXT: WIDEN vp.store vp<%8>, ir<%conv2>, vp<%5>
+; IF-EVL-NEXT: SCALAR-CAST vp<%9> = zext vp<%5> to i64
+; IF-EVL-NEXT: EMIT vp<%10> = add vp<%9>, vp<%4>
+; IF-EVL-NEXT: EMIT vp<%11> = add vp<%3>, vp<%0>
+; IF-EVL-NEXT: EMIT branch-on-count vp<%11>, vp<%1>
+; IF-EVL-NEXT: No successors
+; IF-EVL-NEXT: }
+
+; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF>=1' {
+; NO-VP-NEXT: Live-in vp<%0> = VF * UF
+; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
+; NO-VP-NEXT: Live-in ir<%N> = original trip-count
+
+; NO-VP: vector.ph:
+; NO-VP-NEXT: Successor(s): vector loop
+
+; NO-VP: <x1> vector loop: {
+; NO-VP-NEXT: vector.body:
+; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
+; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
+; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
+; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
+; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
+; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = fptrunc ir<%0> to float
+; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
+; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
+; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
+; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
+; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
+; NO-VP-NEXT: No successors
+; NO-VP-NEXT: }
+
+entry:
+ %cmp8 = icmp sgt i64 %N, 0
+ br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds double, ptr %b, i64 %indvars.iv
+ %0 = load double, ptr %arrayidx, align 8
+ %conv2 = fptrunc double %0 to float
+ %arrayidx4 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+ store float %conv2, ptr %arrayidx4, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define void @vp_fptosi(ptr noalias %a, ptr noalias %b, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
+; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%11>
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%10>
+; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
+; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
+; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
+; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
+; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
+; IF-EVL-NEXT: WIDEN-CAST ir<%conv2> = vp.fptosi ir<%0>, vp<%5>
+; IF-EVL-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
+; IF-EVL-NEXT: vp<%8> = vector-pointer ir<%arrayidx4>
+; IF-EVL-NEXT: WIDEN vp.store vp<%8>, ir<%conv2>, vp<%5>
+; IF-EVL-NEXT: SCALAR-CAST vp<%9> = zext vp<%5> to i64
+; IF-EVL-NEXT: EMIT vp<%10> = add vp<%9>, vp<%4>
+; IF-EVL-NEXT: EMIT vp<%11> = add vp<%3>, vp<%0>
+; IF-EVL-NEXT: EMIT branch-on-count vp<%11>, vp<%1>
+; IF-EVL-NEXT: No successors
+; IF-EVL-NEXT: }
+
+; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' {
+; NO-VP-NEXT: Live-in vp<%0> = VF * UF
+; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
+; NO-VP-NEXT: Live-in ir<%N> = original trip-count
+
+; NO-VP: vector.ph:
+; NO-VP-NEXT: Successor(s): vector loop
+
+; NO-VP: <x1> vector loop: {
+; NO-VP-NEXT: vector.body:
+; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
+; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
+; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
+; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
+; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
+; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = fptosi ir<%0> to i32
+; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
+; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
+; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
+; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
+; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
+; NO-VP-NEXT: No successors
+; NO-VP-NEXT: }
+
+entry:
+ %cmp8 = icmp sgt i64 %N, 0
+ br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+ %0 = load float, ptr %arrayidx, align 4
+ %conv2 = fptosi float %0 to i32
+ %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+ store i32 %conv2, ptr %arrayidx4, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define void @vp_fptouint(ptr noalias %a, ptr noalias %b, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
+; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%11>
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%10>
+; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
+; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
+; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
+; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
+; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
+; IF-EVL-NEXT: WIDEN-CAST ir<%conv2> = vp.fptoui ir<%0>, vp<%5>
+; IF-EVL-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
+; IF-EVL-NEXT: vp<%8> = vector-pointer ir<%arrayidx4>
+; IF-EVL-NEXT: WIDEN vp.store vp<%8>, ir<%conv2>, vp<%5>
+; IF-EVL-NEXT: SCALAR-CAST vp<%9> = zext vp<%5> to i64
+; IF-EVL-NEXT: EMIT vp<%10> = add vp<%9>, vp<%4>
+; IF-EVL-NEXT: EMIT vp<%11> = add vp<%3>, vp<%0>
+; IF-EVL-NEXT: EMIT branch-on-count vp<%11>, vp<%1>
+; IF-EVL-NEXT: No successors
+; IF-EVL-NEXT: }
+
+; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' {
+; NO-VP-NEXT: Live-in vp<%0> = VF * UF
+; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
+; NO-VP-NEXT: Live-in ir<%N> = original trip-count
+
+; NO-VP: vector.ph:
+; NO-VP-NEXT: Successor(s): vector loop
+
+; NO-VP: <x1> vector loop: {
+; NO-VP-NEXT: vector.body:
+; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
+; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
+; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
+; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
+; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
+; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = fptoui ir<%0> to i32
+; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
+; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
+; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
+; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
+; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
+; NO-VP-NEXT: No successors
+; NO-VP-NEXT: }
+
+entry:
+ %cmp8 = icmp sgt i64 %N, 0
+ br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+ %0 = load float, ptr %arrayidx, align 4
+ %conv2 = fptoui float %0 to i32
+ %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+ store i32 %conv2, ptr %arrayidx4, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define void @vp_inttofp(ptr noalias %a, ptr noalias %b, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
+; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%11>
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%10>
+; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
+; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
+; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
+; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
+; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
+; IF-EVL-NEXT: WIDEN-CAST ir<%conv2> = vp.sitofp ir<%0>, vp<%5>
+; IF-EVL-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
+; IF-EVL-NEXT: vp<%8> = vector-pointer ir<%arrayidx4>
+; IF-EVL-NEXT: WIDEN vp.store vp<%8>, ir<%conv2>, vp<%5>
+; IF-EVL-NEXT: SCALAR-CAST vp<%9> = zext vp<%5> to i64
+; IF-EVL-NEXT: EMIT vp<%10> = add vp<%9>, vp<%4>
+; IF-EVL-NEXT: EMIT vp<%11> = add vp<%3>, vp<%0>
+; IF-EVL-NEXT: EMIT branch-on-count vp<%11>, vp<%1>
+; IF-EVL-NEXT: No successors
+; IF-EVL-NEXT: }
+
+; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' {
+; NO-VP-NEXT: Live-in vp<%0> = VF * UF
+; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
+; NO-VP-NEXT: Live-in ir<%N> = original trip-count
+
+; NO-VP: vector.ph:
+; NO-VP-NEXT: Successor(s): vector loop
+
+; NO-VP: <x1> vector loop: {
+; NO-VP-NEXT: vector.body:
+; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
+; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
+; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
+; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
+; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
+; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = sitofp ir<%0> to float
+; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
+; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
+; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
+; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
+; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
+; NO-VP-NEXT: No successors
+; NO-VP-NEXT: }
+
+entry:
+ %cmp8 = icmp sgt i64 %N, 0
+ br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %conv2 = sitofp i32 %0 to float
+ %arrayidx4 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+ store float %conv2, ptr %arrayidx4, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define void @vp_uinttofp(ptr noalias %a, ptr noalias %b, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
+; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%11>
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%10>
+; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
+; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
+; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
+; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
+; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
+; IF-EVL-NEXT: WIDEN-CAST ir<%conv2> = vp.uitofp ir<%0>, vp<%5>
+; IF-EVL-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
+; IF-EVL-NEXT: vp<%8> = vector-pointer ir<%arrayidx4>
+; IF-EVL-NEXT: WIDEN vp.store vp<%8>, ir<%conv2>, vp<%5>
+; IF-EVL-NEXT: SCALAR-CAST vp<%9> = zext vp<%5> to i64
+; IF-EVL-NEXT: EMIT vp<%10> = add vp<%9>, vp<%4>
+; IF-EVL-NEXT: EMIT vp<%11> = add vp<%3>, vp<%0>
+; IF-EVL-NEXT: EMIT branch-on-count vp<%11>, vp<%1>
+; IF-EVL-NEXT: No successors
+; IF-EVL-NEXT: }
+
+; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' {
+; NO-VP-NEXT: Live-in vp<%0> = VF * UF
+; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
+; NO-VP-NEXT: Live-in ir<%N> = original trip-count
+
+; NO-VP: vector.ph:
+; NO-VP-NEXT: Successor(s): vector loop
+
+; NO-VP: <x1> vector loop: {
+; NO-VP-NEXT: vector.body:
+; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
+; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
+; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
+; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
+; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
+; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = uitofp ir<%0> to float
+; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
+; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
+; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
+; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
+; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
+; NO-VP-NEXT: No successors
+; NO-VP-NEXT: }
+
+entry:
+ %cmp8 = icmp sgt i64 %N, 0
+ br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %conv2 = uitofp i32 %0 to float
+ %arrayidx4 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+ store float %conv2, ptr %arrayidx4, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
>From ee4b5f48c8b622182980a61622913b0523f6deab Mon Sep 17 00:00:00 2001
From: LiqinWeng <liqin.weng at spacemit.com>
Date: Tue, 24 Sep 2024 10:44:02 +0800
Subject: [PATCH 3/3] [LV][EVL] Address code review comments
---
llvm/lib/Transforms/Vectorize/VPlan.h | 2 +-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 35 +-
.../Transforms/Vectorize/VPlanTransforms.cpp | 3 -
.../Transforms/Vectorize/VPlanVerifier.cpp | 8 +-
.../RISCV/vplan-vp-cast-intrinsics.ll | 734 ++++++------------
5 files changed, 248 insertions(+), 534 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 394d76799ec2b6..4a6fb2b0a68673 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1587,7 +1587,7 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
Type *getResultType() const { return ResultTy; }
};
-// A recipe for widening cast operation with vector-predication intrinsics with
+/// A recipe for widening cast operation with vector-predication intrinsics with
/// explicit vector length (EVL).
class VPWidenCastEVLRecipe : public VPWidenCastRecipe {
using VPRecipeWithIRFlags::transferFlags;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 4b80b7da46d985..9818a4c14fdd66 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1345,32 +1345,21 @@ void VPWidenCastRecipe::execute(VPTransformState &State) {
void VPWidenCastEVLRecipe::execute(VPTransformState &State) {
unsigned Opcode = getOpcode();
- auto Inst = cast<CastInst>(getUnderlyingInstr());
State.setDebugLocFrom(getDebugLoc());
- assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
- "explicit vector length.");
+ Value *SrcVal = State.get(getOperand(0));
+ VectorType *DsType = VectorType::get(getResultType(), State.VF);
- if (Inst->isCast()) {
- Value *SrcVal = State.get(getOperand(0), 0);
- VectorType *DsType = VectorType::get(getResultType(), State.VF);
-
- IRBuilderBase &BuilderIR = State.Builder;
- VectorBuilder Builder(BuilderIR);
- Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
- Builder.setMask(Mask).setEVL(State.get(getEVL(), 0, /*NeedsScalar=*/true));
-
- Value *VPInst =
- Builder.createVectorInstruction(Opcode, DsType, {SrcVal}, "vp.cast");
+ IRBuilderBase &BuilderIR = State.Builder;
+ VectorBuilder Builder(BuilderIR);
+ Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
+ Builder.setMask(Mask).setEVL(State.get(getEVL(), /*NeedsScalar=*/true));
- if (VPInst) {
- if (auto *VecOp = dyn_cast<CastInst>(VPInst))
- VecOp->copyIRFlags(getUnderlyingInstr());
- }
+ Value *VPInst =
+ Builder.createVectorInstruction(Opcode, DsType, {SrcVal}, "vp.cast");
- State.set(this, VPInst, 0);
- State.addMetadata(VPInst,
- dyn_cast_or_null<Instruction>(getUnderlyingValue()));
- }
+ State.set(this, VPInst, 0);
+ State.addMetadata(VPInst,
+ dyn_cast_or_null<Instruction>(getUnderlyingValue()));
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1378,7 +1367,7 @@ void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-CAST ";
printAsOperand(O, SlotTracker);
- O << " = " << Instruction::getOpcodeName(Opcode);
+ O << " = " << Instruction::getOpcodeName(Opcode) << " ";
printFlags(O);
printOperands(O, SlotTracker);
O << " to " << *getResultType();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 589a5468c8e919..31e487aa1d227b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1381,9 +1381,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
})
.Case<VPWidenCastRecipe>(
[&](VPWidenCastRecipe *W) -> VPRecipeBase * {
- auto Inst = cast<CastInst>(W->getUnderlyingInstr());
- if (!Inst->isCast())
- return nullptr;
return new VPWidenCastEVLRecipe(*W, EVL);
})
.Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 80f0b6d612b87d..da63b15e714690 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -141,16 +141,12 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
.Case<VPWidenStoreEVLRecipe>([&](const VPWidenStoreEVLRecipe *S) {
return VerifyEVLUse(*S, 2);
})
- .Case<VPWidenLoadEVLRecipe>([&](const VPWidenLoadEVLRecipe *L) {
- return VerifyEVLUse(*L, 1);
- })
+ .Case<VPWidenLoadEVLRecipe, VPWidenCastEVLRecipe>(
+ [&](const VPRecipeBase *C) { return VerifyEVLUse(*C, 1); })
.Case<VPWidenEVLRecipe>([&](const VPWidenEVLRecipe *W) {
return VerifyEVLUse(
*W, Instruction::isUnaryOp(W->getOpcode()) ? 1 : 2);
})
- .Case<VPWidenCastEVLRecipe>([&](const VPWidenCastEVLRecipe *C) {
- return VerifyEVLUse(*C, 1);
- })
.Case<VPReductionEVLRecipe>([&](const VPReductionEVLRecipe *R) {
return VerifyEVLUse(*R, 2);
})
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
index 2db5e25b45936e..6f00f96a23e2e5 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
@@ -5,15 +5,10 @@
; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s
-; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
-; RUN: -force-tail-folding-style=none \
-; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \
-; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=NO-VP %s
-
define void @vp_sext(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
-; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
+; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
; IF-EVL: vector.ph:
@@ -21,71 +16,48 @@ define void @vp_sext(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL: <x1> vector loop: {
; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%11>
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%10>
-; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
-; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
-; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
-; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
-; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
-; IF-EVL-NEXT: WIDEN-CAST ir<%conv2> = vp.sext ir<%0>, vp<%5>
-; IF-EVL-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
-; IF-EVL-NEXT: vp<%8> = vector-pointer ir<%arrayidx4>
-; IF-EVL-NEXT: WIDEN vp.store vp<%8>, ir<%conv2>, vp<%5>
-; IF-EVL-NEXT: SCALAR-CAST vp<%9> = zext vp<%5> to i64
-; IF-EVL-NEXT: EMIT vp<%10> = add vp<%9>, vp<%4>
-; IF-EVL-NEXT: EMIT vp<%11> = add vp<%3>, vp<%0>
-; IF-EVL-NEXT: EMIT branch-on-count vp<%11>, vp<%1>
+; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEX:%[0-9]+]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
+; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
+; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
+; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN-CAST ir<[[SEXT:%.+]]> = vp.sext ir<[[LD1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[SEXT]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[IV_NEX]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
+; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
; IF-EVL-NEXT: }
-; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF>=1' {
-; NO-VP-NEXT: Live-in vp<%0> = VF * UF
-; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
-; NO-VP-NEXT: Live-in ir<%N> = original trip-count
-
-; NO-VP: vector.ph:
-; NO-VP-NEXT: Successor(s): vector loop
-
-; NO-VP: <x1> vector loop: {
-; NO-VP-NEXT: vector.body:
-; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
-; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
-; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
-; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
-; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
-; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = sext ir<%0> to i64
-; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
-; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
-; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
-; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
-; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
-; NO-VP-NEXT: No successors
-; NO-VP-NEXT: }
entry:
- %cmp8 = icmp sgt i64 %N, 0
- br i1 %cmp8, label %for.body, label %for.cond.cleanup
-
-for.cond.cleanup:
- ret void
+ br label %for.body
for.body:
- %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
- %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+ %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv
%0 = load i32, ptr %arrayidx, align 4
%conv2 = sext i32 %0 to i64
- %arrayidx4 = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+ %arrayidx4 = getelementptr inbounds i64, ptr %a, i64 %iv
store i64 %conv2, ptr %arrayidx4, align 8
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond.not = icmp eq i64 %indvars.iv.next, %N
- br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %N
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
}
define void @vp_zext(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
-; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
+; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
; IF-EVL: vector.ph:
@@ -93,143 +65,95 @@ define void @vp_zext(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL: <x1> vector loop: {
; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%11>
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%10>
-; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
-; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
-; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
-; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
-; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
-; IF-EVL-NEXT: WIDEN-CAST ir<%conv2> = vp.zext ir<%0>, vp<%5>
-; IF-EVL-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
-; IF-EVL-NEXT: vp<%8> = vector-pointer ir<%arrayidx4>
-; IF-EVL-NEXT: WIDEN vp.store vp<%8>, ir<%conv2>, vp<%5>
-; IF-EVL-NEXT: SCALAR-CAST vp<%9> = zext vp<%5> to i64
-; IF-EVL-NEXT: EMIT vp<%10> = add vp<%9>, vp<%4>
-; IF-EVL-NEXT: EMIT vp<%11> = add vp<%3>, vp<%0>
-; IF-EVL-NEXT: EMIT branch-on-count vp<%11>, vp<%1>
+; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%[0-9]+]]>
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEX:%[0-9]+]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
+; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
+; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
+; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN-CAST ir<[[ZEXT:%.+]]> = vp.zext ir<[[LD1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ZEXT]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[IV_NEX]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
+; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
; IF-EVL-NEXT: }
-; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF>=1' {
-; NO-VP-NEXT: Live-in vp<%0> = VF * UF
-; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
-; NO-VP-NEXT: Live-in ir<%N> = original trip-count
-
-; NO-VP: vector.ph:
-; NO-VP-NEXT: Successor(s): vector loop
-
-; NO-VP: <x1> vector loop: {
-; NO-VP-NEXT: vector.body:
-; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
-; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
-; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
-; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
-; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
-; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = zext ir<%0> to i64
-; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
-; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
-; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
-; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
-; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
-; NO-VP-NEXT: No successors
-; NO-VP-NEXT: }
-
entry:
- %cmp8 = icmp sgt i64 %N, 0
- br i1 %cmp8, label %for.body, label %for.cond.cleanup
-
-for.cond.cleanup:
- ret void
+ br label %for.body
for.body:
- %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
- %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+ %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv
%0 = load i32, ptr %arrayidx, align 4
%conv2 = zext i32 %0 to i64
- %arrayidx4 = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+ %arrayidx4 = getelementptr inbounds i64, ptr %a, i64 %iv
store i64 %conv2, ptr %arrayidx4, align 8
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond.not = icmp eq i64 %indvars.iv.next, %N
- br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %N
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
}
define void @vp_truncate(ptr noalias %a, ptr noalias %b, i64 %N) {
-; IF-EVL : VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
-; IF-EVL-NEXT : Live-in vp<%0> = VF * UF
-; IF-EVL-NEXT : Live-in vp<%1> = vector-trip-count
-; IF-EVL-NEXT : Live-in ir<%N> = original trip-count
-
-; IF-EVL : vector.ph:
-; IF-EVL-NEXT : Successor(s): vector loop
-
-; IF-EVL : <x1> vector loop: {
-; IF-EVL-NEXT : vector.body:
-; IF-EVL-NEXT : EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12>
-; IF-EVL-NEXT : EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%11>
-; IF-EVL-NEXT : EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
-; IF-EVL-NEXT : vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
-; IF-EVL-NEXT : CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
-; IF-EVL-NEXT : vp<%7> = vector-pointer ir<%arrayidx>
-; IF-EVL-NEXT : WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
-; IF-EVL-NEXT : WIDEN-CAST vp<%8> = vp.trunc ir<%0>, vp<%5>
-; IF-EVL-NEXT : CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
-; IF-EVL-NEXT : vp<%9> = vector-pointer ir<%arrayidx4>
-; IF-EVL-NEXT : WIDEN vp.store vp<%9>, vp<%8>, vp<%5>
-; IF-EVL-NEXT : SCALAR-CAST vp<%10> = zext vp<%5> to i64
-; IF-EVL-NEXT : EMIT vp<%11> = add vp<%10>, vp<%4>
-; IF-EVL-NEXT : EMIT vp<%12> = add vp<%3>, vp<%0>
-; IF-EVL-NEXT : EMIT branch-on-count vp<%12>, vp<%1>
-; IF-EVL-NEXT : No successors
-; IF-EVL-NEXT : }
-
-; NO-VP: Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' {
-; NO-VP-NEXT: Live-in vp<%0> = VF * UF
-; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
-; NO-VP-NEXT: Live-in ir<%N> = original trip-count
-
-; NO-VP: vector.ph:
-; NO-VP-NEXT: Successor(s): vector loop
-
-; NO-VP: <x1> vector loop: {
-; NO-VP: vector.body:
-; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
-; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
-; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
-; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
-; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
-; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = trunc ir<%0> to i16
-; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
-; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
-; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
-; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
-; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
-; NO-VP-NEXT: No successors
-; NO-VP-NEXT: }
+; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
+; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
-entry:
- %cmp8 = icmp sgt i64 %N, 0
- br i1 %cmp8, label %for.body, label %for.cond.cleanup
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
-for.cond.cleanup:
- ret void
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%[0-9]+]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
+; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
+; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
+; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN-CAST ir<[[TRUNC:%.+]]> = vp.trunc ir<[[LD1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
+; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
+; IF-EVL-NEXT: No successors
+; IF-EVL-NEXT: }
+
+entry:
+ br label %for.body
for.body:
- %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
- %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+ %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv
%0 = load i32, ptr %arrayidx, align 4
%conv2 = trunc i32 %0 to i16
- %arrayidx4 = getelementptr inbounds i16, ptr %a, i64 %indvars.iv
+ %arrayidx4 = getelementptr inbounds i16, ptr %a, i64 %iv
store i16 %conv2, ptr %arrayidx4, align 2
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond.not = icmp eq i64 %indvars.iv.next, %N
- br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %N
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
}
define void @vp_fpext(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
-; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
+; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
; IF-EVL: vector.ph:
@@ -237,71 +161,47 @@ define void @vp_fpext(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL: <x1> vector loop: {
; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%11>
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%10>
-; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
-; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
-; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
-; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
-; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
-; IF-EVL-NEXT: WIDEN-CAST ir<%conv2> = vp.fpext ir<%0>, vp<%5>
-; IF-EVL-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
-; IF-EVL-NEXT: vp<%8> = vector-pointer ir<%arrayidx4>
-; IF-EVL-NEXT: WIDEN vp.store vp<%8>, ir<%conv2>, vp<%5>
-; IF-EVL-NEXT: SCALAR-CAST vp<%9> = zext vp<%5> to i64
-; IF-EVL-NEXT: EMIT vp<%10> = add vp<%9>, vp<%4>
-; IF-EVL-NEXT: EMIT vp<%11> = add vp<%3>, vp<%0>
-; IF-EVL-NEXT: EMIT branch-on-count vp<%11>, vp<%1>
+; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEX:%[0-9]+]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
+; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
+; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
+; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN-CAST ir<[[FPEXT:%.+]]> = vp.fpext ir<[[LD1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPEXT]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[IV_NEX]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
+; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
; IF-EVL-NEXT: }
-; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF>=1' {
-; NO-VP-NEXT: Live-in vp<%0> = VF * UF
-; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
-; NO-VP-NEXT: Live-in ir<%N> = original trip-count
-
-; NO-VP: vector.ph:
-; NO-VP-NEXT: Successor(s): vector loop
-
-; NO-VP: <x1> vector loop: {
-; NO-VP-NEXT: vector.body:
-; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
-; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
-; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
-; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
-; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
-; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = fpext ir<%0> to double
-; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
-; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
-; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
-; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
-; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
-; NO-VP-NEXT: No successors
-; NO-VP-NEXT: }
-
entry:
- %cmp8 = icmp sgt i64 %N, 0
- br i1 %cmp8, label %for.body, label %for.cond.cleanup
-
-for.cond.cleanup:
- ret void
+ br label %for.body
for.body:
- %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
- %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+ %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, ptr %b, i64 %iv
%0 = load float, ptr %arrayidx, align 4
%conv2 = fpext float %0 to double
- %arrayidx4 = getelementptr inbounds double, ptr %a, i64 %indvars.iv
+ %arrayidx4 = getelementptr inbounds double, ptr %a, i64 %iv
store double %conv2, ptr %arrayidx4, align 8
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond.not = icmp eq i64 %indvars.iv.next, %N
- br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %N
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
}
define void @vp_fptrunct(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
-; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
+; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
; IF-EVL: vector.ph:
@@ -309,143 +209,47 @@ define void @vp_fptrunct(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL: <x1> vector loop: {
; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%11>
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%10>
-; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
-; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
-; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
-; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
-; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
-; IF-EVL-NEXT: WIDEN-CAST ir<%conv2> = vp.fptrunc ir<%0>, vp<%5>
-; IF-EVL-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
-; IF-EVL-NEXT: vp<%8> = vector-pointer ir<%arrayidx4>
-; IF-EVL-NEXT: WIDEN vp.store vp<%8>, ir<%conv2>, vp<%5>
-; IF-EVL-NEXT: SCALAR-CAST vp<%9> = zext vp<%5> to i64
-; IF-EVL-NEXT: EMIT vp<%10> = add vp<%9>, vp<%4>
-; IF-EVL-NEXT: EMIT vp<%11> = add vp<%3>, vp<%0>
-; IF-EVL-NEXT: EMIT branch-on-count vp<%11>, vp<%1>
+; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEX:%[0-9]+]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
+; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
+; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
+; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN-CAST ir<[[FPTRUNC:%.+]]> = vp.fptrunc ir<[[LD1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTRUNC]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[IV_NEX:%[0-9]+]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
+; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
; IF-EVL-NEXT: }
-; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF>=1' {
-; NO-VP-NEXT: Live-in vp<%0> = VF * UF
-; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
-; NO-VP-NEXT: Live-in ir<%N> = original trip-count
-
-; NO-VP: vector.ph:
-; NO-VP-NEXT: Successor(s): vector loop
-
-; NO-VP: <x1> vector loop: {
-; NO-VP-NEXT: vector.body:
-; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
-; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
-; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
-; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
-; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
-; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = fptrunc ir<%0> to float
-; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
-; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
-; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
-; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
-; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
-; NO-VP-NEXT: No successors
-; NO-VP-NEXT: }
-
entry:
- %cmp8 = icmp sgt i64 %N, 0
- br i1 %cmp8, label %for.body, label %for.cond.cleanup
-
-for.cond.cleanup:
- ret void
+ br label %for.body
for.body:
- %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
- %arrayidx = getelementptr inbounds double, ptr %b, i64 %indvars.iv
+ %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds double, ptr %b, i64 %iv
%0 = load double, ptr %arrayidx, align 8
%conv2 = fptrunc double %0 to float
- %arrayidx4 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+ %arrayidx4 = getelementptr inbounds float, ptr %a, i64 %iv
store float %conv2, ptr %arrayidx4, align 4
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond.not = icmp eq i64 %indvars.iv.next, %N
- br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
-}
-
-define void @vp_fptosi(ptr noalias %a, ptr noalias %b, i64 %N) {
-; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
-; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
-; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %N
+ br i1 %exitcond.not, label %exit, label %for.body
-; IF-EVL: vector.ph:
-; IF-EVL-NEXT: Successor(s): vector loop
-
-; IF-EVL: <x1> vector loop: {
-; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%11>
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%10>
-; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
-; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
-; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
-; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
-; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
-; IF-EVL-NEXT: WIDEN-CAST ir<%conv2> = vp.fptosi ir<%0>, vp<%5>
-; IF-EVL-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
-; IF-EVL-NEXT: vp<%8> = vector-pointer ir<%arrayidx4>
-; IF-EVL-NEXT: WIDEN vp.store vp<%8>, ir<%conv2>, vp<%5>
-; IF-EVL-NEXT: SCALAR-CAST vp<%9> = zext vp<%5> to i64
-; IF-EVL-NEXT: EMIT vp<%10> = add vp<%9>, vp<%4>
-; IF-EVL-NEXT: EMIT vp<%11> = add vp<%3>, vp<%0>
-; IF-EVL-NEXT: EMIT branch-on-count vp<%11>, vp<%1>
-; IF-EVL-NEXT: No successors
-; IF-EVL-NEXT: }
-
-; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' {
-; NO-VP-NEXT: Live-in vp<%0> = VF * UF
-; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
-; NO-VP-NEXT: Live-in ir<%N> = original trip-count
-
-; NO-VP: vector.ph:
-; NO-VP-NEXT: Successor(s): vector loop
-
-; NO-VP: <x1> vector loop: {
-; NO-VP-NEXT: vector.body:
-; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
-; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
-; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
-; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
-; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
-; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = fptosi ir<%0> to i32
-; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
-; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
-; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
-; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
-; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
-; NO-VP-NEXT: No successors
-; NO-VP-NEXT: }
-
-entry:
- %cmp8 = icmp sgt i64 %N, 0
- br i1 %cmp8, label %for.body, label %for.cond.cleanup
-
-for.cond.cleanup:
+exit:
ret void
-
-for.body:
- %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
- %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
- %0 = load float, ptr %arrayidx, align 4
- %conv2 = fptosi float %0 to i32
- %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
- store i32 %conv2, ptr %arrayidx4, align 4
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond.not = icmp eq i64 %indvars.iv.next, %N
- br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
-define void @vp_fptouint(ptr noalias %a, ptr noalias %b, i64 %N) {
+define void @vp_fptosi(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
-; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
+; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
; IF-EVL: vector.ph:
@@ -453,71 +257,47 @@ define void @vp_fptouint(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL: <x1> vector loop: {
; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%11>
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%10>
-; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
-; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
-; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
-; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
-; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
-; IF-EVL-NEXT: WIDEN-CAST ir<%conv2> = vp.fptoui ir<%0>, vp<%5>
-; IF-EVL-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
-; IF-EVL-NEXT: vp<%8> = vector-pointer ir<%arrayidx4>
-; IF-EVL-NEXT: WIDEN vp.store vp<%8>, ir<%conv2>, vp<%5>
-; IF-EVL-NEXT: SCALAR-CAST vp<%9> = zext vp<%5> to i64
-; IF-EVL-NEXT: EMIT vp<%10> = add vp<%9>, vp<%4>
-; IF-EVL-NEXT: EMIT vp<%11> = add vp<%3>, vp<%0>
-; IF-EVL-NEXT: EMIT branch-on-count vp<%11>, vp<%1>
+; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEX:%[0-9]+]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
+; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
+; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
+; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN-CAST ir<[[FPTOSI:%.+]]> = vp.fptoui ir<[[LD1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTOSI]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
+; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
; IF-EVL-NEXT: }
-; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' {
-; NO-VP-NEXT: Live-in vp<%0> = VF * UF
-; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
-; NO-VP-NEXT: Live-in ir<%N> = original trip-count
-
-; NO-VP: vector.ph:
-; NO-VP-NEXT: Successor(s): vector loop
-
-; NO-VP: <x1> vector loop: {
-; NO-VP-NEXT: vector.body:
-; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
-; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
-; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
-; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
-; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
-; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = fptoui ir<%0> to i32
-; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
-; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
-; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
-; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
-; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
-; NO-VP-NEXT: No successors
-; NO-VP-NEXT: }
-
entry:
- %cmp8 = icmp sgt i64 %N, 0
- br i1 %cmp8, label %for.body, label %for.cond.cleanup
-
-for.cond.cleanup:
- ret void
+ br label %for.body
for.body:
- %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
- %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+ %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, ptr %b, i64 %iv
%0 = load float, ptr %arrayidx, align 4
%conv2 = fptoui float %0 to i32
- %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+ %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv
store i32 %conv2, ptr %arrayidx4, align 4
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond.not = icmp eq i64 %indvars.iv.next, %N
- br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %N
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
}
define void @vp_inttofp(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
-; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
+; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
; IF-EVL: vector.ph:
@@ -525,71 +305,47 @@ define void @vp_inttofp(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL: <x1> vector loop: {
; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%11>
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%10>
-; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
-; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
-; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
-; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
-; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
-; IF-EVL-NEXT: WIDEN-CAST ir<%conv2> = vp.sitofp ir<%0>, vp<%5>
-; IF-EVL-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
-; IF-EVL-NEXT: vp<%8> = vector-pointer ir<%arrayidx4>
-; IF-EVL-NEXT: WIDEN vp.store vp<%8>, ir<%conv2>, vp<%5>
-; IF-EVL-NEXT: SCALAR-CAST vp<%9> = zext vp<%5> to i64
-; IF-EVL-NEXT: EMIT vp<%10> = add vp<%9>, vp<%4>
-; IF-EVL-NEXT: EMIT vp<%11> = add vp<%3>, vp<%0>
-; IF-EVL-NEXT: EMIT branch-on-count vp<%11>, vp<%1>
+; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEX:%[0-9]+]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
+; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
+; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
+; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN-CAST ir<[[SITOFP:%.+]]> = vp.sitofp ir<[[LD1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[SITOFP]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
+; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
; IF-EVL-NEXT: }
-; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' {
-; NO-VP-NEXT: Live-in vp<%0> = VF * UF
-; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
-; NO-VP-NEXT: Live-in ir<%N> = original trip-count
-
-; NO-VP: vector.ph:
-; NO-VP-NEXT: Successor(s): vector loop
-
-; NO-VP: <x1> vector loop: {
-; NO-VP-NEXT: vector.body:
-; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
-; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
-; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
-; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
-; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
-; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = sitofp ir<%0> to float
-; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
-; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
-; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
-; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
-; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
-; NO-VP-NEXT: No successors
-; NO-VP-NEXT: }
-
entry:
- %cmp8 = icmp sgt i64 %N, 0
- br i1 %cmp8, label %for.body, label %for.cond.cleanup
-
-for.cond.cleanup:
- ret void
+ br label %for.body
for.body:
- %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
- %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+ %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv
%0 = load i32, ptr %arrayidx, align 4
%conv2 = sitofp i32 %0 to float
- %arrayidx4 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+ %arrayidx4 = getelementptr inbounds float, ptr %a, i64 %iv
store float %conv2, ptr %arrayidx4, align 4
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond.not = icmp eq i64 %indvars.iv.next, %N
- br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %N
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
}
define void @vp_uinttofp(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
-; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
+; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
; IF-EVL: vector.ph:
@@ -597,63 +353,39 @@ define void @vp_uinttofp(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL: <x1> vector loop: {
; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%11>
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%10>
-; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
-; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
-; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
-; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
-; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
-; IF-EVL-NEXT: WIDEN-CAST ir<%conv2> = vp.uitofp ir<%0>, vp<%5>
-; IF-EVL-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
-; IF-EVL-NEXT: vp<%8> = vector-pointer ir<%arrayidx4>
-; IF-EVL-NEXT: WIDEN vp.store vp<%8>, ir<%conv2>, vp<%5>
-; IF-EVL-NEXT: SCALAR-CAST vp<%9> = zext vp<%5> to i64
-; IF-EVL-NEXT: EMIT vp<%10> = add vp<%9>, vp<%4>
-; IF-EVL-NEXT: EMIT vp<%11> = add vp<%3>, vp<%0>
-; IF-EVL-NEXT: EMIT branch-on-count vp<%11>, vp<%1>
+; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEX:%[0-9]+]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
+; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
+; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
+; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN-CAST ir<[[UITOFP:%.+]]> = vp.uitofp ir<[[LD1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[UITOFP]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
+; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
; IF-EVL-NEXT: }
-; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' {
-; NO-VP-NEXT: Live-in vp<%0> = VF * UF
-; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
-; NO-VP-NEXT: Live-in ir<%N> = original trip-count
-
-; NO-VP: vector.ph:
-; NO-VP-NEXT: Successor(s): vector loop
-
-; NO-VP: <x1> vector loop: {
-; NO-VP-NEXT: vector.body:
-; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
-; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
-; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
-; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
-; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
-; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = uitofp ir<%0> to float
-; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
-; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
-; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
-; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
-; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
-; NO-VP-NEXT: No successors
-; NO-VP-NEXT: }
-
entry:
- %cmp8 = icmp sgt i64 %N, 0
- br i1 %cmp8, label %for.body, label %for.cond.cleanup
-
-for.cond.cleanup:
- ret void
+ br label %for.body
for.body:
- %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
- %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+ %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv
%0 = load i32, ptr %arrayidx, align 4
%conv2 = uitofp i32 %0 to float
- %arrayidx4 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+ %arrayidx4 = getelementptr inbounds float, ptr %a, i64 %iv
store float %conv2, ptr %arrayidx4, align 4
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond.not = icmp eq i64 %indvars.iv.next, %N
- br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %N
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
}
More information about the llvm-commits
mailing list