[llvm] [LV][EVL] Support cast instruction with EVL-vectorization (PR #108351)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 29 19:41:11 PDT 2024
https://github.com/LiqinWeng updated https://github.com/llvm/llvm-project/pull/108351
>From 5ace8b229781bac0c15b8dac66001cb7407d9fe6 Mon Sep 17 00:00:00 2001
From: LiqinWeng <liqin.weng at spacemit.com>
Date: Thu, 12 Sep 2024 17:24:35 +0800
Subject: [PATCH 1/3] [LV][EVL] Support cast instruction with EVL-vectorization
---
.../Transforms/Vectorize/LoopVectorize.cpp | 1 +
llvm/lib/Transforms/Vectorize/VPlan.h | 77 +++++-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 47 ++++
.../Transforms/Vectorize/VPlanTransforms.cpp | 9 +
llvm/lib/Transforms/Vectorize/VPlanValue.h | 1 +
.../LoopVectorize/RISCV/inloop-reduction.ll | 20 +-
.../RISCV/vplan-vp-cast-intrinsics.ll | 227 ++++++++++++++++++
7 files changed, 367 insertions(+), 15 deletions(-)
create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 88086f24dfdce2..9279b2f2e11c6a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4503,6 +4503,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPWidenCallSC:
case VPDef::VPWidenCanonicalIVSC:
case VPDef::VPWidenCastSC:
+ case VPDef::VPWidenCastEVLSC:
case VPDef::VPWidenGEPSC:
case VPDef::VPWidenIntrinsicSC:
case VPDef::VPWidenSC:
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index a34e34a0d71f1e..be1493c69dbf6a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -909,6 +909,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenCallSC:
case VPRecipeBase::VPWidenCanonicalIVSC:
case VPRecipeBase::VPWidenCastSC:
+ case VPRecipeBase::VPWidenCastEVLSC:
case VPRecipeBase::VPWidenGEPSC:
case VPRecipeBase::VPWidenIntrinsicSC:
case VPRecipeBase::VPWidenSC:
@@ -1110,6 +1111,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
+ R->getVPDefID() == VPRecipeBase::VPWidenCastEVLSC ||
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
@@ -1576,19 +1578,28 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
/// Result type for the cast.
Type *ResultTy;
-public:
- VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
- CastInst &UI)
+protected:
+ VPWidenCastRecipe(unsigned VPDefOpcode, Instruction::CastOps Opcode,
+ VPValue *Op, Type *ResultTy, CastInst &UI)
: VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
ResultTy(ResultTy) {
assert(UI.getOpcode() == Opcode &&
"opcode of underlying cast doesn't match");
}
- VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
+ VPWidenCastRecipe(unsigned VPDefOpcode, Instruction::CastOps Opcode,
+ VPValue *Op, Type *ResultTy)
: VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
ResultTy(ResultTy) {}
+public:
+ VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
+ CastInst &UI)
+ : VPWidenCastRecipe(VPDef::VPWidenCastSC, Opcode, Op, ResultTy, UI) {}
+
+ VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
+ : VPWidenCastRecipe(VPDef::VPWidenCastSC, Opcode, Op, ResultTy) {}
+
~VPWidenCastRecipe() override = default;
VPWidenCastRecipe *clone() override {
@@ -1599,7 +1610,15 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);
}
- VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
+ static inline bool classof(const VPRecipeBase *R) {
+ return R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
+ R->getVPDefID() == VPRecipeBase::VPWidenCastEVLSC;
+ }
+
+ static inline bool classof(const VPUser *U) {
+ auto *R = dyn_cast<VPRecipeBase>(U);
+ return R && classof(R);
+ }
/// Produce widened copies of the cast.
void execute(VPTransformState &State) override;
@@ -1620,6 +1639,54 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
Type *getResultType() const { return ResultTy; }
};
+// A recipe for widening cast operation with vector-predication intrinsics with
+/// explicit vector length (EVL).
+class VPWidenCastEVLRecipe : public VPWidenCastRecipe {
+ using VPRecipeWithIRFlags::transferFlags;
+
+public:
+ VPWidenCastEVLRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
+ VPValue &EVL)
+ : VPWidenCastRecipe(VPDef::VPWidenCastEVLSC, Opcode, Op, ResultTy) {
+ addOperand(&EVL);
+ }
+ VPWidenCastEVLRecipe(VPWidenCastRecipe &W, VPValue &EVL)
+ : VPWidenCastEVLRecipe(W.getOpcode(), W.getOperand(0), W.getResultType(),
+ EVL) {
+ transferFlags(W);
+ }
+
+ ~VPWidenCastEVLRecipe() override = default;
+
+ VPWidenCastEVLRecipe *clone() final {
+ llvm_unreachable("VPWidenEVLRecipe cannot be cloned");
+ return nullptr;
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPWidenCastEVLSC)
+
+ VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
+ const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
+
+ /// Produce a vp-intrinsic copies of the cast.
+ void execute(VPTransformState &State) final;
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ // EVL in that recipe is always the last operand, thus any use before means
+ // the VPValue should be vectorized.
+ return getEVL() == Op;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const final;
+#endif
+};
+
/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
class VPScalarCastRecipe : public VPSingleDefRecipe {
Instruction::CastOps Opcode;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index b1e6086398c4df..65db034b414dce 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -93,6 +93,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPVectorPointerSC:
case VPWidenCanonicalIVSC:
case VPWidenCastSC:
+ case VPWidenCastEVLSC:
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
case VPWidenLoadEVLSC:
@@ -139,6 +140,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPVectorPointerSC:
case VPWidenCanonicalIVSC:
case VPWidenCastSC:
+ case VPWidenCastEVLSC:
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
@@ -179,6 +181,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPVectorPointerSC:
case VPWidenCanonicalIVSC:
case VPWidenCastSC:
+ case VPWidenCastEVLSC:
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
@@ -1577,6 +1580,40 @@ InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF,
dyn_cast_if_present<Instruction>(getUnderlyingValue()));
}
+void VPWidenCastEVLRecipe::execute(VPTransformState &State) {
+ unsigned Opcode = getOpcode();
+ State.setDebugLocFrom(getDebugLoc());
+ assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
+ "explicit vector length.");
+
+ assert(State.get(getOperand(0), 0)->getType()->isVectorTy() &&
+ "VPWidenCastEVLRecipe should not be used for scalars");
+
+ // TODO: add more cast instruction, eg: fptoint/inttofp/inttoptr/fptofp
+ if (Opcode == Instruction::SExt || Opcode == Instruction::ZExt ||
+ Opcode == Instruction::Trunc) {
+ Value *SrcVal = State.get(getOperand(0), 0);
+ VectorType *SrcTy = cast<VectorType>(SrcVal->getType());
+ VectorType *DsType =
+ VectorType::get(getResultType(), SrcTy->getElementCount());
+
+ IRBuilderBase &BuilderIR = State.Builder;
+ VectorBuilder Builder(BuilderIR);
+ Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
+
+ Builder.setMask(Mask).setEVL(State.get(getEVL(), 0, /*NeedsScalar=*/true));
+ Value *VPInst =
+ Builder.createVectorInstruction(Opcode, DsType, {SrcVal}, "vp.cast");
+ if (VPInst) {
+ if (auto *VecOp = dyn_cast<CastInst>(VPInst))
+ VecOp->copyIRFlags(getUnderlyingInstr());
+ }
+ State.set(this, VPInst, 0);
+ State.addMetadata(VPInst,
+ dyn_cast_or_null<Instruction>(getUnderlyingValue()));
+ }
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
@@ -1587,6 +1624,16 @@ void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
printOperands(O, SlotTracker);
O << " to " << *getResultType();
}
+
+void VPWidenCastEVLRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "WIDEN-VP ";
+ printAsOperand(O, SlotTracker);
+ O << " = vp." << Instruction::getOpcodeName(getOpcode()) << " ";
+ printFlags(O);
+ printOperands(O, SlotTracker);
+ O << " to " << *getResultType();
+}
#endif
/// This function adds
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 03c4110761ac6a..29070207d4c881 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1480,6 +1480,15 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
return nullptr;
return new VPWidenEVLRecipe(*W, EVL);
})
+ .Case<VPWidenCastRecipe>(
+ [&](VPWidenCastRecipe *W) -> VPRecipeBase * {
+ unsigned Opcode = W->getOpcode();
+ if (Opcode != Instruction::SExt &&
+ Opcode != Instruction::ZExt &&
+ Opcode != Instruction::Trunc)
+ return nullptr;
+ return new VPWidenCastEVLRecipe(*W, EVL);
+ })
.Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
VPValue *NewMask = GetNewMask(Red->getCondOp());
return new VPReductionEVLRecipe(*Red, EVL, NewMask);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 89b3ed72b8eb65..aada5647a18abe 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -350,6 +350,7 @@ class VPDef {
VPWidenCallSC,
VPWidenCanonicalIVSC,
VPWidenCastSC,
+ VPWidenCastEVLSC,
VPWidenGEPSC,
VPWidenIntrinsicSC,
VPWidenLoadEVLSC,
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
index 0381f6dae9811f..9c4ebf3d7ff849 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
@@ -159,38 +159,38 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-INLOOP: vector.body:
; IF-EVL-INLOOP-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[TMP5:%.*]] = sub i32 [[N]], [[EVL_BASED_IV]]
; IF-EVL-INLOOP-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[TMP5]], i32 8, i1 true)
; IF-EVL-INLOOP-NEXT: [[TMP7:%.*]] = add i32 [[EVL_BASED_IV]], 0
; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP7]]
; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 0
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i16> @llvm.vp.load.nxv8i16.p0(ptr align 2 [[TMP9]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = sext <vscale x 8 x i16> [[VP_OP_LOAD]] to <vscale x 8 x i32>
-; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.vp.reduce.add.nxv8i32(i32 0, <vscale x 8 x i32> [[TMP10]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-INLOOP-NEXT: [[TMP12]] = add i32 [[TMP11]], [[VEC_PHI]]
+; IF-EVL-INLOOP-NEXT: [[VP_CAST:%.*]] = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i32.nxv8i16(<vscale x 8 x i16> [[VP_OP_LOAD]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
+; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vp.reduce.add.nxv8i32(i32 0, <vscale x 8 x i32> [[VP_CAST]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
+; IF-EVL-INLOOP-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]]
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i32 [[TMP6]], [[EVL_BASED_IV]]
; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP4]]
-; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-INLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-INLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL-INLOOP: middle.block:
; IF-EVL-INLOOP-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; IF-EVL-INLOOP: scalar.ph:
; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]]
; IF-EVL-INLOOP: for.body:
; IF-EVL-INLOOP-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; IF-EVL-INLOOP-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; IF-EVL-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[I_08]]
-; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
-; IF-EVL-INLOOP-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32
+; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
+; IF-EVL-INLOOP-NEXT: [[CONV:%.*]] = sext i16 [[TMP13]] to i32
; IF-EVL-INLOOP-NEXT: [[ADD]] = add nsw i32 [[R_07]], [[CONV]]
; IF-EVL-INLOOP-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1
; IF-EVL-INLOOP-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
; IF-EVL-INLOOP-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; IF-EVL-INLOOP: for.cond.cleanup.loopexit:
-; IF-EVL-INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
+; IF-EVL-INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
; IF-EVL-INLOOP-NEXT: br label [[FOR_COND_CLEANUP]]
; IF-EVL-INLOOP: for.cond.cleanup:
; IF-EVL-INLOOP-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
new file mode 100644
index 00000000000000..f1f3d2c88301e1
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
@@ -0,0 +1,227 @@
+; REQUIRES: asserts
+
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
+; RUN: -force-tail-folding-style=data-with-evl \
+; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
+; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s
+
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
+; RUN: -force-tail-folding-style=none \
+; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \
+; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=NO-VP %s
+
+define void @vp_sext(ptr noalias %a, ptr noalias %b, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
+; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12>
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%11>
+; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
+; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
+; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
+; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
+; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
+; IF-EVL-NEXT: WIDEN-VP vp<%8> = vp.sext ir<%0>, vp<%5> to i64
+; IF-EVL-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
+; IF-EVL-NEXT: vp<%9> = vector-pointer ir<%arrayidx4>
+; IF-EVL-NEXT: WIDEN vp.store vp<%9>, vp<%8>, vp<%5>
+; IF-EVL-NEXT: SCALAR-CAST vp<%10> = zext vp<%5> to i64
+; IF-EVL-NEXT: EMIT vp<%11> = add vp<%10>, vp<%4>
+; IF-EVL-NEXT: EMIT vp<%12> = add vp<%3>, vp<%0>
+; IF-EVL-NEXT: EMIT branch-on-count vp<%12>, vp<%1>
+; IF-EVL-NEXT: No successors
+; IF-EVL-NEXT: }
+
+; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF>=1' {
+; NO-VP-NEXT: Live-in vp<%0> = VF * UF
+; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
+; NO-VP-NEXT: Live-in ir<%N> = original trip-count
+
+; NO-VP: vector.ph:
+; NO-VP-NEXT: Successor(s): vector loop
+
+; NO-VP: <x1> vector loop: {
+; NO-VP-NEXT: vector.body:
+; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
+; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
+; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
+; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
+; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
+; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = sext ir<%0> to i64
+; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
+; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
+; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
+; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
+; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
+; NO-VP-NEXT: No successors
+; NO-VP-NEXT: }
+
+entry:
+ %cmp8 = icmp sgt i64 %N, 0
+ br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %conv2 = sext i32 %0 to i64
+ %arrayidx4 = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+ store i64 %conv2, ptr %arrayidx4, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define void @vp_zext(ptr noalias %a, ptr noalias %b, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
+; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12>
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%11>
+; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
+; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
+; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
+; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
+; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
+; IF-EVL-NEXT: WIDEN-VP vp<%8> = vp.zext ir<%0>, vp<%5> to i64
+; IF-EVL-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
+; IF-EVL-NEXT: vp<%9> = vector-pointer ir<%arrayidx4>
+; IF-EVL-NEXT: WIDEN vp.store vp<%9>, vp<%8>, vp<%5>
+; IF-EVL-NEXT: SCALAR-CAST vp<%10> = zext vp<%5> to i64
+; IF-EVL-NEXT: EMIT vp<%11> = add vp<%10>, vp<%4>
+; IF-EVL-NEXT: EMIT vp<%12> = add vp<%3>, vp<%0>
+; IF-EVL-NEXT: EMIT branch-on-count vp<%12>, vp<%1>
+; IF-EVL-NEXT: No successors
+; IF-EVL-NEXT: }
+
+; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF>=1' {
+; NO-VP-NEXT: Live-in vp<%0> = VF * UF
+; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
+; NO-VP-NEXT: Live-in ir<%N> = original trip-count
+
+; NO-VP: vector.ph:
+; NO-VP-NEXT: Successor(s): vector loop
+
+; NO-VP: <x1> vector loop: {
+; NO-VP-NEXT: vector.body:
+; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
+; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
+; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
+; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
+; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
+; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = zext ir<%0> to i64
+; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
+; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
+; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
+; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
+; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
+; NO-VP-NEXT: No successors
+; NO-VP-NEXT: }
+
+entry:
+ %cmp8 = icmp sgt i64 %N, 0
+ br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %conv2 = zext i32 %0 to i64
+ %arrayidx4 = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+ store i64 %conv2, ptr %arrayidx4, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define void @vp_truncate(ptr noalias %a, ptr noalias %b, i64 %N) {
+; IF-EVL : VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
+; IF-EVL-NEXT : Live-in vp<%0> = VF * UF
+; IF-EVL-NEXT : Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT : Live-in ir<%N> = original trip-count
+
+; IF-EVL : vector.ph:
+; IF-EVL-NEXT : Successor(s): vector loop
+
+; IF-EVL : <x1> vector loop: {
+; IF-EVL-NEXT : vector.body:
+; IF-EVL-NEXT : EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12>
+; IF-EVL-NEXT : EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%11>
+; IF-EVL-NEXT : EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
+; IF-EVL-NEXT : vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
+; IF-EVL-NEXT : CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
+; IF-EVL-NEXT : vp<%7> = vector-pointer ir<%arrayidx>
+; IF-EVL-NEXT : WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
+; IF-EVL-NEXT : WIDEN-VP vp<%8> = vp.trunc ir<%0>, vp<%5> to i16
+; IF-EVL-NEXT : CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
+; IF-EVL-NEXT : vp<%9> = vector-pointer ir<%arrayidx4>
+; IF-EVL-NEXT : WIDEN vp.store vp<%9>, vp<%8>, vp<%5>
+; IF-EVL-NEXT : SCALAR-CAST vp<%10> = zext vp<%5> to i64
+; IF-EVL-NEXT : EMIT vp<%11> = add vp<%10>, vp<%4>
+; IF-EVL-NEXT : EMIT vp<%12> = add vp<%3>, vp<%0>
+; IF-EVL-NEXT : EMIT branch-on-count vp<%12>, vp<%1>
+; IF-EVL-NEXT : No successors
+; IF-EVL-NEXT : }
+
+; NO-VP: Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' {
+; NO-VP-NEXT: Live-in vp<%0> = VF * UF
+; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
+; NO-VP-NEXT: Live-in ir<%N> = original trip-count
+
+; NO-VP: vector.ph:
+; NO-VP-NEXT: Successor(s): vector loop
+
+; NO-VP: <x1> vector loop: {
+; NO-VP: vector.body:
+; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
+; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
+; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
+; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
+; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
+; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = trunc ir<%0> to i16
+; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
+; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
+; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
+; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
+; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
+; NO-VP-NEXT: No successors
+; NO-VP-NEXT: }
+
+entry:
+ %cmp8 = icmp sgt i64 %N, 0
+ br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %conv2 = trunc i32 %0 to i16
+ %arrayidx4 = getelementptr inbounds i16, ptr %a, i64 %indvars.iv
+ store i16 %conv2, ptr %arrayidx4, align 2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
>From 7fc21ade1534e9c2e99ada52cee056ab046824ec Mon Sep 17 00:00:00 2001
From: "Liqin.Weng" <liqin.weng at spacemit.com>
Date: Fri, 18 Oct 2024 14:02:54 +0800
Subject: [PATCH 2/3] Rebase and Revert the implement of castEVLRecipe
---
.../Transforms/Vectorize/LoopVectorize.cpp | 1 -
llvm/lib/Transforms/Vectorize/VPlan.h | 77 +--
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 47 --
.../Transforms/Vectorize/VPlanTransforms.cpp | 9 -
llvm/lib/Transforms/Vectorize/VPlanValue.h | 1 -
.../LoopVectorize/RISCV/inloop-reduction.ll | 2 +-
.../RISCV/vplan-vp-cast-intrinsics.ll | 526 ++++++++++++------
7 files changed, 351 insertions(+), 312 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9279b2f2e11c6a..88086f24dfdce2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4503,7 +4503,6 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPWidenCallSC:
case VPDef::VPWidenCanonicalIVSC:
case VPDef::VPWidenCastSC:
- case VPDef::VPWidenCastEVLSC:
case VPDef::VPWidenGEPSC:
case VPDef::VPWidenIntrinsicSC:
case VPDef::VPWidenSC:
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index be1493c69dbf6a..a34e34a0d71f1e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -909,7 +909,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenCallSC:
case VPRecipeBase::VPWidenCanonicalIVSC:
case VPRecipeBase::VPWidenCastSC:
- case VPRecipeBase::VPWidenCastEVLSC:
case VPRecipeBase::VPWidenGEPSC:
case VPRecipeBase::VPWidenIntrinsicSC:
case VPRecipeBase::VPWidenSC:
@@ -1111,7 +1110,6 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
- R->getVPDefID() == VPRecipeBase::VPWidenCastEVLSC ||
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
@@ -1578,28 +1576,19 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
/// Result type for the cast.
Type *ResultTy;
-protected:
- VPWidenCastRecipe(unsigned VPDefOpcode, Instruction::CastOps Opcode,
- VPValue *Op, Type *ResultTy, CastInst &UI)
+public:
+ VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
+ CastInst &UI)
: VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
ResultTy(ResultTy) {
assert(UI.getOpcode() == Opcode &&
"opcode of underlying cast doesn't match");
}
- VPWidenCastRecipe(unsigned VPDefOpcode, Instruction::CastOps Opcode,
- VPValue *Op, Type *ResultTy)
+ VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
: VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
ResultTy(ResultTy) {}
-public:
- VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
- CastInst &UI)
- : VPWidenCastRecipe(VPDef::VPWidenCastSC, Opcode, Op, ResultTy, UI) {}
-
- VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
- : VPWidenCastRecipe(VPDef::VPWidenCastSC, Opcode, Op, ResultTy) {}
-
~VPWidenCastRecipe() override = default;
VPWidenCastRecipe *clone() override {
@@ -1610,15 +1599,7 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);
}
- static inline bool classof(const VPRecipeBase *R) {
- return R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
- R->getVPDefID() == VPRecipeBase::VPWidenCastEVLSC;
- }
-
- static inline bool classof(const VPUser *U) {
- auto *R = dyn_cast<VPRecipeBase>(U);
- return R && classof(R);
- }
+ VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
/// Produce widened copies of the cast.
void execute(VPTransformState &State) override;
@@ -1639,54 +1620,6 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
Type *getResultType() const { return ResultTy; }
};
-// A recipe for widening cast operation with vector-predication intrinsics with
-/// explicit vector length (EVL).
-class VPWidenCastEVLRecipe : public VPWidenCastRecipe {
- using VPRecipeWithIRFlags::transferFlags;
-
-public:
- VPWidenCastEVLRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
- VPValue &EVL)
- : VPWidenCastRecipe(VPDef::VPWidenCastEVLSC, Opcode, Op, ResultTy) {
- addOperand(&EVL);
- }
- VPWidenCastEVLRecipe(VPWidenCastRecipe &W, VPValue &EVL)
- : VPWidenCastEVLRecipe(W.getOpcode(), W.getOperand(0), W.getResultType(),
- EVL) {
- transferFlags(W);
- }
-
- ~VPWidenCastEVLRecipe() override = default;
-
- VPWidenCastEVLRecipe *clone() final {
- llvm_unreachable("VPWidenEVLRecipe cannot be cloned");
- return nullptr;
- }
-
- VP_CLASSOF_IMPL(VPDef::VPWidenCastEVLSC)
-
- VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
- const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
-
- /// Produce a vp-intrinsic copies of the cast.
- void execute(VPTransformState &State) final;
-
- /// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
- assert(is_contained(operands(), Op) &&
- "Op must be an operand of the recipe");
- // EVL in that recipe is always the last operand, thus any use before means
- // the VPValue should be vectorized.
- return getEVL() == Op;
- }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- /// Print the recipe.
- void print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const final;
-#endif
-};
-
/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
class VPScalarCastRecipe : public VPSingleDefRecipe {
Instruction::CastOps Opcode;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 65db034b414dce..b1e6086398c4df 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -93,7 +93,6 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPVectorPointerSC:
case VPWidenCanonicalIVSC:
case VPWidenCastSC:
- case VPWidenCastEVLSC:
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
case VPWidenLoadEVLSC:
@@ -140,7 +139,6 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPVectorPointerSC:
case VPWidenCanonicalIVSC:
case VPWidenCastSC:
- case VPWidenCastEVLSC:
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
@@ -181,7 +179,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPVectorPointerSC:
case VPWidenCanonicalIVSC:
case VPWidenCastSC:
- case VPWidenCastEVLSC:
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
@@ -1580,40 +1577,6 @@ InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF,
dyn_cast_if_present<Instruction>(getUnderlyingValue()));
}
-void VPWidenCastEVLRecipe::execute(VPTransformState &State) {
- unsigned Opcode = getOpcode();
- State.setDebugLocFrom(getDebugLoc());
- assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
- "explicit vector length.");
-
- assert(State.get(getOperand(0), 0)->getType()->isVectorTy() &&
- "VPWidenCastEVLRecipe should not be used for scalars");
-
- // TODO: add more cast instruction, eg: fptoint/inttofp/inttoptr/fptofp
- if (Opcode == Instruction::SExt || Opcode == Instruction::ZExt ||
- Opcode == Instruction::Trunc) {
- Value *SrcVal = State.get(getOperand(0), 0);
- VectorType *SrcTy = cast<VectorType>(SrcVal->getType());
- VectorType *DsType =
- VectorType::get(getResultType(), SrcTy->getElementCount());
-
- IRBuilderBase &BuilderIR = State.Builder;
- VectorBuilder Builder(BuilderIR);
- Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
-
- Builder.setMask(Mask).setEVL(State.get(getEVL(), 0, /*NeedsScalar=*/true));
- Value *VPInst =
- Builder.createVectorInstruction(Opcode, DsType, {SrcVal}, "vp.cast");
- if (VPInst) {
- if (auto *VecOp = dyn_cast<CastInst>(VPInst))
- VecOp->copyIRFlags(getUnderlyingInstr());
- }
- State.set(this, VPInst, 0);
- State.addMetadata(VPInst,
- dyn_cast_or_null<Instruction>(getUnderlyingValue()));
- }
-}
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
@@ -1624,16 +1587,6 @@ void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
printOperands(O, SlotTracker);
O << " to " << *getResultType();
}
-
-void VPWidenCastEVLRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "WIDEN-VP ";
- printAsOperand(O, SlotTracker);
- O << " = vp." << Instruction::getOpcodeName(getOpcode()) << " ";
- printFlags(O);
- printOperands(O, SlotTracker);
- O << " to " << *getResultType();
-}
#endif
/// This function adds
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 29070207d4c881..03c4110761ac6a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1480,15 +1480,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
return nullptr;
return new VPWidenEVLRecipe(*W, EVL);
})
- .Case<VPWidenCastRecipe>(
- [&](VPWidenCastRecipe *W) -> VPRecipeBase * {
- unsigned Opcode = W->getOpcode();
- if (Opcode != Instruction::SExt &&
- Opcode != Instruction::ZExt &&
- Opcode != Instruction::Trunc)
- return nullptr;
- return new VPWidenCastEVLRecipe(*W, EVL);
- })
.Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
VPValue *NewMask = GetNewMask(Red->getCondOp());
return new VPReductionEVLRecipe(*Red, EVL, NewMask);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index aada5647a18abe..89b3ed72b8eb65 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -350,7 +350,6 @@ class VPDef {
VPWidenCallSC,
VPWidenCanonicalIVSC,
VPWidenCastSC,
- VPWidenCastEVLSC,
VPWidenGEPSC,
VPWidenIntrinsicSC,
VPWidenLoadEVLSC,
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
index 9c4ebf3d7ff849..0fa0dcf94a1d00 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
@@ -166,7 +166,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP7]]
; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 0
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i16> @llvm.vp.load.nxv8i16.p0(ptr align 2 [[TMP9]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-INLOOP-NEXT: [[VP_CAST:%.*]] = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i32.nxv8i16(<vscale x 8 x i16> [[VP_OP_LOAD]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
+; IF-EVL-INLOOP-NEXT: [[VP_CAST:%.*]] = sext <vscale x 8 x i16> [[VP_OP_LOAD]] to <vscale x 8 x i32>
; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vp.reduce.add.nxv8i32(i32 0, <vscale x 8 x i32> [[VP_CAST]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
; IF-EVL-INLOOP-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]]
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i32 [[TMP6]], [[EVL_BASED_IV]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
index f1f3d2c88301e1..6894a30f30efe4 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
@@ -1,91 +1,63 @@
; REQUIRES: asserts
-
; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
; RUN: -force-tail-folding-style=data-with-evl \
; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s
-; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
-; RUN: -force-tail-folding-style=none \
-; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \
-; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=NO-VP %s
-
define void @vp_sext(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
-; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
+; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
; IF-EVL: vector.ph:
; IF-EVL-NEXT: Successor(s): vector loop
-; IF-EVL: <x1> vector loop: {
-; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12>
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%11>
-; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
-; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
-; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
-; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
-; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
-; IF-EVL-NEXT: WIDEN-VP vp<%8> = vp.sext ir<%0>, vp<%5> to i64
-; IF-EVL-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
-; IF-EVL-NEXT: vp<%9> = vector-pointer ir<%arrayidx4>
-; IF-EVL-NEXT: WIDEN vp.store vp<%9>, vp<%8>, vp<%5>
-; IF-EVL-NEXT: SCALAR-CAST vp<%10> = zext vp<%5> to i64
-; IF-EVL-NEXT: EMIT vp<%11> = add vp<%10>, vp<%4>
-; IF-EVL-NEXT: EMIT vp<%12> = add vp<%3>, vp<%0>
-; IF-EVL-NEXT: EMIT branch-on-count vp<%12>, vp<%1>
-; IF-EVL-NEXT: No successors
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
+; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
+; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
+; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN-CAST ir<[[SEXT:%.+]]> = sext ir<[[LD1]]> to i64
+; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[SEXT]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
+; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
+; IF-EVL-NEXT: No successors
; IF-EVL-NEXT: }
+; IF-EVL-NEXT: Successor(s): middle.block
-; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF>=1' {
-; NO-VP-NEXT: Live-in vp<%0> = VF * UF
-; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
-; NO-VP-NEXT: Live-in ir<%N> = original trip-count
-
-; NO-VP: vector.ph:
-; NO-VP-NEXT: Successor(s): vector loop
-
-; NO-VP: <x1> vector loop: {
-; NO-VP-NEXT: vector.body:
-; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
-; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
-; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
-; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
-; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
-; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = sext ir<%0> to i64
-; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
-; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
-; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
-; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
-; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
-; NO-VP-NEXT: No successors
-; NO-VP-NEXT: }
entry:
- %cmp8 = icmp sgt i64 %N, 0
- br i1 %cmp8, label %for.body, label %for.cond.cleanup
-
-for.cond.cleanup:
- ret void
+ br label %loop
-for.body:
- %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
- %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
- %0 = load i32, ptr %arrayidx, align 4
+loop:
+ %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
+ %gep = getelementptr inbounds i32, ptr %b, i64 %iv
+ %0 = load i32, ptr %gep, align 4
%conv2 = sext i32 %0 to i64
- %arrayidx4 = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
- store i64 %conv2, ptr %arrayidx4, align 8
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond.not = icmp eq i64 %indvars.iv.next, %N
- br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+ %gep4 = getelementptr inbounds i64, ptr %a, i64 %iv
+ store i64 %conv2, ptr %gep4, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %N
+ br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+ ret void
}
define void @vp_zext(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
-; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
+; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
; IF-EVL: vector.ph:
@@ -93,135 +65,327 @@ define void @vp_zext(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL: <x1> vector loop: {
; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12>
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%11>
-; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
-; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
-; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
-; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
-; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
-; IF-EVL-NEXT: WIDEN-VP vp<%8> = vp.zext ir<%0>, vp<%5> to i64
-; IF-EVL-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
-; IF-EVL-NEXT: vp<%9> = vector-pointer ir<%arrayidx4>
-; IF-EVL-NEXT: WIDEN vp.store vp<%9>, vp<%8>, vp<%5>
-; IF-EVL-NEXT: SCALAR-CAST vp<%10> = zext vp<%5> to i64
-; IF-EVL-NEXT: EMIT vp<%11> = add vp<%10>, vp<%4>
-; IF-EVL-NEXT: EMIT vp<%12> = add vp<%3>, vp<%0>
-; IF-EVL-NEXT: EMIT branch-on-count vp<%12>, vp<%1>
+; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
+; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
+; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
+; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN-CAST ir<[[ZEXT:%.+]]> = zext ir<[[LD1]]> to i64
+; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ZEXT]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
+; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
; IF-EVL-NEXT: }
-; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF>=1' {
-; NO-VP-NEXT: Live-in vp<%0> = VF * UF
-; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
-; NO-VP-NEXT: Live-in ir<%N> = original trip-count
-
-; NO-VP: vector.ph:
-; NO-VP-NEXT: Successor(s): vector loop
-
-; NO-VP: <x1> vector loop: {
-; NO-VP-NEXT: vector.body:
-; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
-; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
-; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
-; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
-; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
-; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = zext ir<%0> to i64
-; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
-; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
-; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
-; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
-; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
-; NO-VP-NEXT: No successors
-; NO-VP-NEXT: }
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
+ %gep = getelementptr inbounds i32, ptr %b, i64 %iv
+ %0 = load i32, ptr %gep, align 4
+ %conv2 = zext i32 %0 to i64
+ %gep4 = getelementptr inbounds i64, ptr %a, i64 %iv
+ store i64 %conv2, ptr %gep4, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %N
+ br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @vp_truncate(ptr noalias %a, ptr noalias %b, i64 %N) {
+; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
+; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
+; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
+; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
+; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN-CAST ir<[[TRUNC:%.+]]> = trunc ir<[[LD1]]> to i16
+; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
+; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
+; IF-EVL-NEXT: No successors
+; IF-EVL-NEXT: }
entry:
- %cmp8 = icmp sgt i64 %N, 0
- br i1 %cmp8, label %for.body, label %for.cond.cleanup
+ br label %loop
-for.cond.cleanup:
+loop:
+ %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
+ %gep = getelementptr inbounds i32, ptr %b, i64 %iv
+ %0 = load i32, ptr %gep, align 4
+ %conv2 = trunc i32 %0 to i16
+ %gep4 = getelementptr inbounds i16, ptr %a, i64 %iv
+ store i16 %conv2, ptr %gep4, align 2
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %N
+ br i1 %exitcond.not, label %exit, label %loop
+
+exit:
ret void
+}
-for.body:
- %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
- %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
- %0 = load i32, ptr %arrayidx, align 4
- %conv2 = zext i32 %0 to i64
- %arrayidx4 = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
- store i64 %conv2, ptr %arrayidx4, align 8
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond.not = icmp eq i64 %indvars.iv.next, %N
- br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+define void @vp_fpext(ptr noalias %a, ptr noalias %b, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
+; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
+; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
+; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
+; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN-CAST ir<[[FPEXT:%.+]]> = fpext ir<[[LD1]]> to
+; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPEXT]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
+; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
+; IF-EVL-NEXT: No successors
+; IF-EVL-NEXT: }
+
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
+ %gep = getelementptr inbounds float, ptr %b, i64 %iv
+ %0 = load float, ptr %gep, align 4
+ %conv2 = fpext float %0 to double
+ %gep4 = getelementptr inbounds double, ptr %a, i64 %iv
+ store double %conv2, ptr %gep4, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %N
+ br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+ ret void
}
-define void @vp_truncate(ptr noalias %a, ptr noalias %b, i64 %N) {
-; IF-EVL : VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
-; IF-EVL-NEXT : Live-in vp<%0> = VF * UF
-; IF-EVL-NEXT : Live-in vp<%1> = vector-trip-count
-; IF-EVL-NEXT : Live-in ir<%N> = original trip-count
-
-; IF-EVL : vector.ph:
-; IF-EVL-NEXT : Successor(s): vector loop
-
-; IF-EVL : <x1> vector loop: {
-; IF-EVL-NEXT : vector.body:
-; IF-EVL-NEXT : EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12>
-; IF-EVL-NEXT : EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%11>
-; IF-EVL-NEXT : EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
-; IF-EVL-NEXT : vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
-; IF-EVL-NEXT : CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
-; IF-EVL-NEXT : vp<%7> = vector-pointer ir<%arrayidx>
-; IF-EVL-NEXT : WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
-; IF-EVL-NEXT : WIDEN-VP vp<%8> = vp.trunc ir<%0>, vp<%5> to i16
-; IF-EVL-NEXT : CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
-; IF-EVL-NEXT : vp<%9> = vector-pointer ir<%arrayidx4>
-; IF-EVL-NEXT : WIDEN vp.store vp<%9>, vp<%8>, vp<%5>
-; IF-EVL-NEXT : SCALAR-CAST vp<%10> = zext vp<%5> to i64
-; IF-EVL-NEXT : EMIT vp<%11> = add vp<%10>, vp<%4>
-; IF-EVL-NEXT : EMIT vp<%12> = add vp<%3>, vp<%0>
-; IF-EVL-NEXT : EMIT branch-on-count vp<%12>, vp<%1>
-; IF-EVL-NEXT : No successors
-; IF-EVL-NEXT : }
-
-; NO-VP: Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' {
-; NO-VP-NEXT: Live-in vp<%0> = VF * UF
-; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
-; NO-VP-NEXT: Live-in ir<%N> = original trip-count
-
-; NO-VP: vector.ph:
-; NO-VP-NEXT: Successor(s): vector loop
-
-; NO-VP: <x1> vector loop: {
-; NO-VP: vector.body:
-; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
-; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
-; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
-; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
-; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
-; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = trunc ir<%0> to i16
-; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
-; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
-; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
-; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
-; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
-; NO-VP-NEXT: No successors
-; NO-VP-NEXT: }
+define void @vp_fptrunct(ptr noalias %a, ptr noalias %b, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
+; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
+; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
+; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
+; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN-CAST ir<[[FPTRUNC:%.+]]> = fptrunc ir<[[LD1]]> to float
+; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTRUNC]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
+; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
+; IF-EVL-NEXT: No successors
+; IF-EVL-NEXT: }
entry:
- %cmp8 = icmp sgt i64 %N, 0
- br i1 %cmp8, label %for.body, label %for.cond.cleanup
+ br label %loop
-for.cond.cleanup:
+loop:
+ %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
+ %gep = getelementptr inbounds double, ptr %b, i64 %iv
+ %0 = load double, ptr %gep, align 8
+ %conv2 = fptrunc double %0 to float
+ %gep4 = getelementptr inbounds float, ptr %a, i64 %iv
+ store float %conv2, ptr %gep4, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %N
+ br i1 %exitcond.not, label %exit, label %loop
+
+exit:
ret void
+}
-for.body:
- %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
- %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
- %0 = load i32, ptr %arrayidx, align 4
- %conv2 = trunc i32 %0 to i16
- %arrayidx4 = getelementptr inbounds i16, ptr %a, i64 %indvars.iv
- store i16 %conv2, ptr %arrayidx4, align 2
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond.not = icmp eq i64 %indvars.iv.next, %N
- br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+define void @vp_fptosi(ptr noalias %a, ptr noalias %b, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
+; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
+; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
+; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
+; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN-CAST ir<[[FPTOSI:%.+]]> = fptoui ir<[[LD1]]> to i32
+; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTOSI]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
+; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
+; IF-EVL-NEXT: No successors
+; IF-EVL-NEXT: }
+
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
+ %gep = getelementptr inbounds float, ptr %b, i64 %iv
+ %0 = load float, ptr %gep, align 4
+ %conv2 = fptoui float %0 to i32
+ %gep4 = getelementptr inbounds i32, ptr %a, i64 %iv
+ store i32 %conv2, ptr %gep4, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %N
+ br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @vp_inttofp(ptr noalias %a, ptr noalias %b, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
+; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
+; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
+; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
+; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN-CAST ir<[[SITOFP:%.+]]> = sitofp ir<[[LD1]]> to float
+; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[SITOFP]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
+; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
+; IF-EVL-NEXT: No successors
+; IF-EVL-NEXT: }
+
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
+ %gep = getelementptr inbounds i32, ptr %b, i64 %iv
+ %0 = load i32, ptr %gep, align 4
+ %conv2 = sitofp i32 %0 to float
+ %gep4 = getelementptr inbounds float, ptr %a, i64 %iv
+ store float %conv2, ptr %gep4, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %N
+ br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+ ret void
}
+
+define void @vp_uinttofp(ptr noalias %a, ptr noalias %b, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
+; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
+; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
+; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
+; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN-CAST ir<[[UITOFP:%.+]]> = uitofp ir<[[LD1]]> to float
+; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
+; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[UITOFP]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
+; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
+; IF-EVL-NEXT: No successors
+; IF-EVL-NEXT: }
+
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
+ %gep = getelementptr inbounds i32, ptr %b, i64 %iv
+ %0 = load i32, ptr %gep, align 4
+ %conv2 = uitofp i32 %0 to float
+ %gep4 = getelementptr inbounds float, ptr %a, i64 %iv
+ store float %conv2, ptr %gep4, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %N
+ br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+ ret void
+}
\ No newline at end of file
>From f73fe7899b153c496e077426b2b0608ccf6bcfe7 Mon Sep 17 00:00:00 2001
From: "Liqin.Weng" <liqin.weng at spacemit.com>
Date: Fri, 18 Oct 2024 17:20:53 +0800
Subject: [PATCH 3/3] [VPlan] Use VPWidenIntrinsicRecipe to vp.cast
---
llvm/include/llvm/IR/VectorBuilder.h | 4 +-
llvm/lib/IR/VectorBuilder.cpp | 9 ++--
llvm/lib/Transforms/Utils/LoopUtils.cpp | 4 +-
llvm/lib/Transforms/Vectorize/VPlan.h | 9 ++++
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 49 ++++++++++++-------
.../Transforms/Vectorize/VPlanTransforms.cpp | 18 +++++++
.../LoopVectorize/RISCV/inloop-reduction.ll | 2 +-
.../RISCV/vplan-vp-cast-intrinsics.ll | 32 ++++++------
8 files changed, 83 insertions(+), 44 deletions(-)
diff --git a/llvm/include/llvm/IR/VectorBuilder.h b/llvm/include/llvm/IR/VectorBuilder.h
index b0277c2b52595e..830163984e37ba 100644
--- a/llvm/include/llvm/IR/VectorBuilder.h
+++ b/llvm/include/llvm/IR/VectorBuilder.h
@@ -99,11 +99,11 @@ class VectorBuilder {
const Twine &Name = Twine());
/// Emit a VP reduction intrinsic call for recurrence kind.
- /// \param RdxID The intrinsic ID of llvm.vector.reduce.*
+ /// \param ID The intrinsic ID of call Intrinsic
/// \param ValTy The type of operand which the reduction operation is
/// performed.
/// \param VecOpArray The operand list.
- Value *createSimpleReduction(Intrinsic::ID RdxID, Type *ValTy,
+ Value *createSimpleIntrinsic(Intrinsic::ID ID, Type *ValTy,
ArrayRef<Value *> VecOpArray,
const Twine &Name = Twine());
};
diff --git a/llvm/lib/IR/VectorBuilder.cpp b/llvm/lib/IR/VectorBuilder.cpp
index 737f49b1334d76..d629a2fb6af7b3 100644
--- a/llvm/lib/IR/VectorBuilder.cpp
+++ b/llvm/lib/IR/VectorBuilder.cpp
@@ -60,13 +60,12 @@ Value *VectorBuilder::createVectorInstruction(unsigned Opcode, Type *ReturnTy,
return createVectorInstructionImpl(VPID, ReturnTy, InstOpArray, Name);
}
-Value *VectorBuilder::createSimpleReduction(Intrinsic::ID RdxID,
- Type *ValTy,
+Value *VectorBuilder::createSimpleIntrinsic(Intrinsic::ID ID, Type *ValTy,
ArrayRef<Value *> InstOpArray,
const Twine &Name) {
- auto VPID = VPIntrinsic::getForIntrinsic(RdxID);
- assert(VPReductionIntrinsic::isVPReduction(VPID) &&
- "No VPIntrinsic for this reduction");
+ auto VPID = VPIntrinsic::getForIntrinsic(ID);
+ assert(VPIntrinsic::isVPIntrinsic(VPID) &&
+ "No VPIntrinsic for this Intrinsic");
return createVectorInstructionImpl(VPID, ValTy, InstOpArray, Name);
}
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 70047273c3b9af..2dac2d43f7f3a3 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1300,7 +1300,7 @@ Value *llvm::createSimpleReduction(VectorBuilder &VBuilder, Value *Src,
Type *SrcEltTy = SrcTy->getElementType();
Value *Iden = getRecurrenceIdentity(Kind, SrcEltTy, Desc.getFastMathFlags());
Value *Ops[] = {Iden, Src};
- return VBuilder.createSimpleReduction(Id, SrcTy, Ops);
+ return VBuilder.createSimpleIntrinsic(Id, SrcTy, Ops);
}
Value *llvm::createReduction(IRBuilderBase &B,
@@ -1343,7 +1343,7 @@ Value *llvm::createOrderedReduction(VectorBuilder &VBuilder,
Intrinsic::ID Id = getReductionIntrinsicID(RecurKind::FAdd);
auto *SrcTy = cast<VectorType>(Src->getType());
Value *Ops[] = {Start, Src};
- return VBuilder.createSimpleReduction(Id, SrcTy, Ops);
+ return VBuilder.createSimpleIntrinsic(Id, SrcTy, Ops);
}
void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue,
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index a34e34a0d71f1e..2fd55c7e06148a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1686,6 +1686,15 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
MayWriteToMemory(CI.mayWriteToMemory()),
MayHaveSideEffects(CI.mayHaveSideEffects()) {}
+ // VPWidenIntrinsicRecipe(CastInst &CI, Intrinsic::ID VectorIntrinsicID,
+ // ArrayRef<VPValue *> CallArguments, Type *Ty,
+ // DebugLoc DL = {})
+ // : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI),
+ // VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
+ // MayReadFromMemory(CI.mayReadFromMemory()),
+ // MayWriteToMemory(CI.mayWriteToMemory()),
+ // MayHaveSideEffects(CI.mayHaveSideEffects()) {}
+
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID,
ArrayRef<VPValue *> CallArguments, Type *Ty,
bool MayReadFromMemory, bool MayWriteToMemory,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index b1e6086398c4df..cd7681ae5d3e70 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -994,24 +994,37 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
Args.push_back(Arg);
}
- // Use vector version of the intrinsic.
- Module *M = State.Builder.GetInsertBlock()->getModule();
- Function *VectorF =
- Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
- assert(VectorF && "Can't retrieve vector intrinsic.");
-
- auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
- SmallVector<OperandBundleDef, 1> OpBundles;
- if (CI)
- CI->getOperandBundlesAsDefs(OpBundles);
-
- CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
-
- setFlags(V);
-
- if (!V->getType()->isVoidTy())
- State.set(this, V);
- State.addMetadata(V, CI);
+ if (VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)) {
+ // Use vector version of the vector predicate Intrinsic
+ IRBuilderBase &BuilderIR = State.Builder;
+ VectorBuilder VBuilder(BuilderIR);
+ Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
+ VBuilder.setMask(Mask).setEVL(Args.back());
+ Args.pop_back();
+ Value *VPInst = VBuilder.createSimpleIntrinsic(
+ VectorIntrinsicID, TysForDecl[0], Args, "vp.call");
+ if (!VPInst->getType()->isVoidTy())
+ State.set(this, VPInst);
+ State.addMetadata(VPInst,
+ dyn_cast_or_null<Instruction>(getUnderlyingValue()));
+ } else {
+ // Use vector version of the intrinsic.
+ Module *M = State.Builder.GetInsertBlock()->getModule();
+ Function *VectorF =
+ Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
+ assert(VectorF && "Can't retrieve vector intrinsic.");
+
+ auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ if (CI)
+ CI->getOperandBundlesAsDefs(OpBundles);
+
+ CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
+ setFlags(V);
+ if (!V->getType()->isVoidTy())
+ State.set(this, V);
+ State.addMetadata(V, CI);
+ }
}
InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 03c4110761ac6a..011cf21d874d66 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1484,6 +1484,24 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
VPValue *NewMask = GetNewMask(Red->getCondOp());
return new VPReductionEVLRecipe(*Red, EVL, NewMask);
})
+ .Case<VPWidenCastRecipe>(
+ [&](VPWidenCastRecipe *CInst) -> VPRecipeBase * {
+ auto *CI = cast<CastInst>(CInst->getUnderlyingInstr());
+ SmallVector<VPValue *> Ops(CInst->operands());
+ Ops.push_back(&EVL);
+ Intrinsic::ID VPID =
+ VPIntrinsic::getForOpcode(CI->getOpcode());
+ if (VPID == Intrinsic::not_intrinsic)
+ return nullptr;
+ // FIXME: In fact, can we really not pass the
+ // underlyingInstr? In this case, how to set the Flag and
+ // add metadata in VPWidenIntrinsicRecipe::execute?
+ // return new VPWidenIntrinsicRecipe(
+ // *CI, VPID, Ops, CI->getType(), CI->getDebugLoc());
+ return new VPWidenIntrinsicRecipe(
+ VPID, Ops, TypeInfo.inferScalarType(CInst), false,
+ false, false);
+ })
.Case<VPWidenSelectRecipe>([&](VPWidenSelectRecipe *Sel) {
SmallVector<VPValue *> Ops(Sel->operands());
Ops.push_back(&EVL);
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
index 0fa0dcf94a1d00..9c4ebf3d7ff849 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
@@ -166,7 +166,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP7]]
; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 0
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i16> @llvm.vp.load.nxv8i16.p0(ptr align 2 [[TMP9]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-INLOOP-NEXT: [[VP_CAST:%.*]] = sext <vscale x 8 x i16> [[VP_OP_LOAD]] to <vscale x 8 x i32>
+; IF-EVL-INLOOP-NEXT: [[VP_CAST:%.*]] = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i32.nxv8i16(<vscale x 8 x i16> [[VP_OP_LOAD]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vp.reduce.add.nxv8i32(i32 0, <vscale x 8 x i32> [[VP_CAST]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
; IF-EVL-INLOOP-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]]
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i32 [[TMP6]], [[EVL_BASED_IV]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
index 6894a30f30efe4..8692f6c802e559 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
@@ -23,10 +23,10 @@ define void @vp_sext(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-CAST ir<[[SEXT:%.+]]> = sext ir<[[LD1]]> to i64
+; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SEXT:%.+]]> = call llvm.vp.sext(ir<[[LD1]]>, vp<[[EVL]]>)
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[SEXT]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[SEXT]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@@ -73,10 +73,10 @@ define void @vp_zext(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-CAST ir<[[ZEXT:%.+]]> = zext ir<[[LD1]]> to i64
+; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[ZEXT:%.+]]> = call llvm.vp.zext(ir<[[LD1]]>, vp<[[EVL]]>)
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ZEXT]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[ZEXT]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@@ -121,10 +121,10 @@ define void @vp_truncate(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-CAST ir<[[TRUNC:%.+]]> = trunc ir<[[LD1]]> to i16
+; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[TRUNC:%.+]]> = call llvm.vp.trunc(ir<[[LD1]]>, vp<[[EVL]]>)
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[TRUNC]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@@ -169,10 +169,10 @@ define void @vp_fpext(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-CAST ir<[[FPEXT:%.+]]> = fpext ir<[[LD1]]> to
+; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[FPEXT:%.+]]> = call llvm.vp.fpext(ir<[[LD1]]>, vp<[[EVL]]>)
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPEXT]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[FPEXT]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@@ -217,10 +217,10 @@ define void @vp_fptrunct(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-CAST ir<[[FPTRUNC:%.+]]> = fptrunc ir<[[LD1]]> to float
+; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[FPTRUNC:%.+]]> = call llvm.vp.fptrunc(ir<[[LD1]]>, vp<[[EVL]]>)
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTRUNC]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[FPTRUNC]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@@ -265,10 +265,10 @@ define void @vp_fptosi(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-CAST ir<[[FPTOSI:%.+]]> = fptoui ir<[[LD1]]> to i32
+; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[FPTOSI:%.+]]> = call llvm.vp.fptoui(ir<[[LD1]]>, vp<[[EVL]]>)
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTOSI]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[FPTOSI]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@@ -313,10 +313,10 @@ define void @vp_inttofp(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-CAST ir<[[SITOFP:%.+]]> = sitofp ir<[[LD1]]> to float
+; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SITOFP:%.+]]> = call llvm.vp.sitofp(ir<[[LD1]]>, vp<[[EVL]]>)
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[SITOFP]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[SITOFP]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@@ -361,10 +361,10 @@ define void @vp_uinttofp(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-CAST ir<[[UITOFP:%.+]]> = uitofp ir<[[LD1]]> to float
+; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[UITOFP:%.+]]> = call llvm.vp.uitofp(ir<[[LD1]]>, vp<[[EVL]]>)
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[UITOFP]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[UITOFP]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
More information about the llvm-commits
mailing list