[llvm] [LV] Make having flags (FMFs etc) not require inheriting VPSingleDefRecipe (PR #114972)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 5 04:05:43 PST 2024
https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/114972
>From 92e46ff96e8ccbe2d82c19fd331786c97ae08bc6 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 15 Oct 2024 14:07:36 +0000
Subject: [PATCH] [LV] Make having flags (FMFs etc) not require inheriting
VPSingleDefRecipe
This splits out `VPRecipeWithIRFlags` into `VPRecipeIRFlags` and
`VPSingleDefRecipeWithIRFlags`. With this, the `VPRecipeIRFlags` class
contains the flags but does not inherit from any VPRecipe. The new
`VPSingleDefRecipeWithIRFlags` class functions the same as the previous
`VPRecipeWithIRFlags` (and all previous uses have been replaced with
it).
This alone is an NFC, but it will be needed to vectorize
calls/intrinsics that return multiple values (via literal structs) but
modeled in VPlan as multiple recipe results. These calls can still have
FMFs, so they need the `VPRecipeIRFlags` but can't inherit from
`VPSingleDefRecipe`.
---
.../Vectorize/LoopVectorizationPlanner.h | 12 +-
llvm/lib/Transforms/Vectorize/VPlan.h | 216 ++++++++++--------
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 47 ++--
.../Transforms/Vectorize/VPlanTransforms.cpp | 31 +--
.../AArch64/sve2-histcnt-vplan.ll | 2 +-
5 files changed, 168 insertions(+), 140 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 7787f58683b2a4..7cbef9fd310c18 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -168,7 +168,7 @@ class VPBuilder {
VPInstruction *createOverflowingOp(unsigned Opcode,
std::initializer_list<VPValue *> Operands,
- VPRecipeWithIRFlags::WrapFlagsTy WrapFlags,
+ VPRecipeIRFlags::WrapFlagsTy WrapFlags,
DebugLoc DL = {}, const Twine &Name = "") {
return tryInsertInstruction(
new VPInstruction(Opcode, Operands, WrapFlags, DL, Name));
@@ -187,9 +187,9 @@ class VPBuilder {
VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
const Twine &Name = "") {
- return tryInsertInstruction(new VPInstruction(
- Instruction::BinaryOps::Or, {LHS, RHS},
- VPRecipeWithIRFlags::DisjointFlagsTy(false), DL, Name));
+ return tryInsertInstruction(
+ new VPInstruction(Instruction::BinaryOps::Or, {LHS, RHS},
+ VPRecipeIRFlags::DisjointFlagsTy(false), DL, Name));
}
VPValue *createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
@@ -223,12 +223,12 @@ class VPBuilder {
VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
const Twine &Name = "") {
return tryInsertInstruction(new VPInstruction(
- Ptr, Offset, VPRecipeWithIRFlags::GEPFlagsTy(false), DL, Name));
+ Ptr, Offset, VPRecipeIRFlags::GEPFlagsTy(false), DL, Name));
}
VPValue *createInBoundsPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
const Twine &Name = "") {
return tryInsertInstruction(new VPInstruction(
- Ptr, Offset, VPRecipeWithIRFlags::GEPFlagsTy(true), DL, Name));
+ Ptr, Offset, VPRecipeIRFlags::GEPFlagsTy(true), DL, Name));
}
VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind,
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index ed0ea98f35a9a9..6939fd8eca99f0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -700,6 +700,8 @@ struct VPCostContext {
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const;
};
+class VPRecipeIRFlags;
+
/// VPRecipeBase is a base class modeling a sequence of one or more output IR
/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
/// and is responsible for deleting its defined values. Single-value
@@ -803,6 +805,9 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
/// Returns the debug location of the recipe.
DebugLoc getDebugLoc() const { return DL; }
+ /// Returns the IR flags for the recipe.
+ virtual VPRecipeIRFlags *getIRFlags() { return nullptr; }
+
protected:
/// Compute the cost of this recipe either using a recipe's specialized
/// implementation or using the legacy cost model and the underlying
@@ -916,8 +921,8 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
#endif
};
-/// Class to record LLVM IR flag for a recipe along with it.
-class VPRecipeWithIRFlags : public VPSingleDefRecipe {
+/// Class to record LLVM IR flags for a recipe.
+class VPRecipeIRFlags {
enum class OperationType : unsigned char {
Cmp,
OverflowingBinOp,
@@ -979,23 +984,10 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
unsigned AllFlags;
};
-protected:
- void transferFlags(VPRecipeWithIRFlags &Other) {
- OpType = Other.OpType;
- AllFlags = Other.AllFlags;
- }
-
public:
- template <typename IterT>
- VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL = {})
- : VPSingleDefRecipe(SC, Operands, DL) {
- OpType = OperationType::Other;
- AllFlags = 0;
- }
+ VPRecipeIRFlags() : OpType(OperationType::Other), AllFlags(0) {}
- template <typename IterT>
- VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
- : VPSingleDefRecipe(SC, Operands, &I, I.getDebugLoc()) {
+ VPRecipeIRFlags(Instruction &I) {
if (auto *Op = dyn_cast<CmpInst>(&I)) {
OpType = OperationType::Cmp;
CmpPredicate = Op->getPredicate();
@@ -1023,54 +1015,22 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
}
}
- template <typename IterT>
- VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
- CmpInst::Predicate Pred, DebugLoc DL = {})
- : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::Cmp),
- CmpPredicate(Pred) {}
+ VPRecipeIRFlags(CmpInst::Predicate Pred)
+ : OpType(OperationType::Cmp), CmpPredicate(Pred) {}
- template <typename IterT>
- VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
- WrapFlagsTy WrapFlags, DebugLoc DL = {})
- : VPSingleDefRecipe(SC, Operands, DL),
- OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
+ VPRecipeIRFlags(WrapFlagsTy WrapFlags)
+ : OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
- template <typename IterT>
- VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
- FastMathFlags FMFs, DebugLoc DL = {})
- : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::FPMathOp),
- FMFs(FMFs) {}
+ VPRecipeIRFlags(FastMathFlags FMFs)
+ : OpType(OperationType::FPMathOp), FMFs(FMFs) {}
- template <typename IterT>
- VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
- DisjointFlagsTy DisjointFlags, DebugLoc DL = {})
- : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::DisjointOp),
- DisjointFlags(DisjointFlags) {}
+ VPRecipeIRFlags(DisjointFlagsTy DisjointFlags)
+ : OpType(OperationType::DisjointOp), DisjointFlags(DisjointFlags) {}
-protected:
- template <typename IterT>
- VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
- GEPFlagsTy GEPFlags, DebugLoc DL = {})
- : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp),
- GEPFlags(GEPFlags) {}
+ VPRecipeIRFlags(GEPFlagsTy GEPFlags)
+ : OpType(OperationType::GEPOp), GEPFlags(GEPFlags) {}
public:
- static inline bool classof(const VPRecipeBase *R) {
- return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
- R->getVPDefID() == VPRecipeBase::VPWidenSC ||
- R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
- R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
- R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
- R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
- R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
- R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
- }
-
- static inline bool classof(const VPUser *U) {
- auto *R = dyn_cast<VPRecipeBase>(U);
- return R && classof(R);
- }
-
/// Drop all poison-generating flags.
void dropPoisonGeneratingFlags() {
// NOTE: This needs to be kept in-sync with
@@ -1179,6 +1139,54 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
#endif
};
+// Class to record LLVM IR flags for a single-def recipe along with it.
+class VPSingleDefRecipeWithIRFlags : public VPSingleDefRecipe,
+ public VPRecipeIRFlags {
+public:
+ template <typename IterT>
+ VPSingleDefRecipeWithIRFlags(const unsigned char SC, IterT Operands,
+ DebugLoc DL = {})
+ : VPSingleDefRecipe(SC, Operands, DL), VPRecipeIRFlags() {}
+
+ template <typename IterT>
+ VPSingleDefRecipeWithIRFlags(const unsigned char SC, IterT Operands,
+ Instruction &I)
+ : VPSingleDefRecipe(SC, Operands, &I, I.getDebugLoc()),
+ VPRecipeIRFlags(I) {}
+
+ template <typename IterT>
+ VPSingleDefRecipeWithIRFlags(const unsigned char SC, IterT Operands,
+ CmpInst::Predicate Pred, DebugLoc DL = {})
+ : VPSingleDefRecipe(SC, Operands, DL), VPRecipeIRFlags(Pred) {}
+ template <typename IterT>
+ VPSingleDefRecipeWithIRFlags(const unsigned char SC, IterT Operands,
+ VPRecipeIRFlags::WrapFlagsTy WrapFlags,
+ DebugLoc DL = {})
+ : VPSingleDefRecipe(SC, Operands, DL), VPRecipeIRFlags(WrapFlags) {}
+
+ template <typename IterT>
+ VPSingleDefRecipeWithIRFlags(const unsigned char SC, IterT Operands,
+ FastMathFlags FMFs, DebugLoc DL = {})
+ : VPSingleDefRecipe(SC, Operands, DL), VPRecipeIRFlags(FMFs) {}
+
+ template <typename IterT>
+ VPSingleDefRecipeWithIRFlags(const unsigned char SC, IterT Operands,
+ VPRecipeIRFlags::DisjointFlagsTy DisjointFlags,
+ DebugLoc DL = {})
+ : VPSingleDefRecipe(SC, Operands, DL), VPRecipeIRFlags(DisjointFlags) {}
+
+ virtual VPRecipeIRFlags *getIRFlags() override {
+ return static_cast<VPRecipeIRFlags *>(this);
+ }
+
+protected:
+ template <typename IterT>
+ VPSingleDefRecipeWithIRFlags(const unsigned char SC, IterT Operands,
+ VPRecipeIRFlags::GEPFlagsTy GEPFlags,
+ DebugLoc DL = {})
+ : VPSingleDefRecipe(SC, Operands, DL), VPRecipeIRFlags(GEPFlags) {}
+};
+
/// Helper to access the operand that contains the unroll part for this recipe
/// after unrolling.
template <unsigned PartOpIdx> class VPUnrollPartAccessor {
@@ -1195,7 +1203,7 @@ template <unsigned PartOpIdx> class VPUnrollPartAccessor {
/// While as any Recipe it may generate a sequence of IR instructions when
/// executed, these instructions would always form a single-def expression as
/// the VPInstruction is also a single def-use vertex.
-class VPInstruction : public VPRecipeWithIRFlags,
+class VPInstruction : public VPSingleDefRecipeWithIRFlags,
public VPUnrollPartAccessor<1> {
friend class VPlanSlp;
@@ -1270,7 +1278,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
public:
VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL,
const Twine &Name = "")
- : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
+ : VPSingleDefRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
Opcode(Opcode), Name(Name.str()) {}
VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
@@ -1281,22 +1289,27 @@ class VPInstruction : public VPRecipeWithIRFlags,
VPValue *B, DebugLoc DL = {}, const Twine &Name = "");
VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
- WrapFlagsTy WrapFlags, DebugLoc DL = {}, const Twine &Name = "")
- : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, WrapFlags, DL),
+ VPRecipeIRFlags::WrapFlagsTy WrapFlags, DebugLoc DL = {},
+ const Twine &Name = "")
+ : VPSingleDefRecipeWithIRFlags(VPDef::VPInstructionSC, Operands,
+ WrapFlags, DL),
Opcode(Opcode), Name(Name.str()) {}
VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
- DisjointFlagsTy DisjointFlag, DebugLoc DL = {},
+ VPRecipeIRFlags::DisjointFlagsTy DisjointFlag, DebugLoc DL = {},
const Twine &Name = "")
- : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DisjointFlag, DL),
+ : VPSingleDefRecipeWithIRFlags(VPDef::VPInstructionSC, Operands,
+ DisjointFlag, DL),
Opcode(Opcode), Name(Name.str()) {
assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint");
}
- VPInstruction(VPValue *Ptr, VPValue *Offset, GEPFlagsTy Flags,
- DebugLoc DL = {}, const Twine &Name = "")
- : VPRecipeWithIRFlags(VPDef::VPInstructionSC,
- ArrayRef<VPValue *>({Ptr, Offset}), Flags, DL),
+ VPInstruction(VPValue *Ptr, VPValue *Offset,
+ VPRecipeIRFlags::GEPFlagsTy Flags, DebugLoc DL = {},
+ const Twine &Name = "")
+ : VPSingleDefRecipeWithIRFlags(VPDef::VPInstructionSC,
+ ArrayRef<VPValue *>({Ptr, Offset}), Flags,
+ DL),
Opcode(VPInstruction::PtrAdd), Name(Name.str()) {}
VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
@@ -1307,7 +1320,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
VPInstruction *clone() override {
SmallVector<VPValue *, 2> Operands(operands());
auto *New = new VPInstruction(Opcode, Operands, getDebugLoc(), Name);
- New->transferFlags(*this);
+ *New->getIRFlags() = *getIRFlags();
return New;
}
@@ -1435,14 +1448,15 @@ class VPIRInstruction : public VPRecipeBase {
/// opcode and operands of the recipe. This recipe covers most of the
/// traditional vectorization cases where each recipe transforms into a
/// vectorized version of itself.
-class VPWidenRecipe : public VPRecipeWithIRFlags {
+class VPWidenRecipe : public VPSingleDefRecipeWithIRFlags {
unsigned Opcode;
protected:
template <typename IterT>
VPWidenRecipe(unsigned VPDefOpcode, Instruction &I,
iterator_range<IterT> Operands)
- : VPRecipeWithIRFlags(VPDefOpcode, Operands, I), Opcode(I.getOpcode()) {}
+ : VPSingleDefRecipeWithIRFlags(VPDefOpcode, Operands, I),
+ Opcode(I.getOpcode()) {}
public:
template <typename IterT>
@@ -1453,7 +1467,7 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
VPWidenRecipe *clone() override {
auto *R = new VPWidenRecipe(*getUnderlyingInstr(), operands());
- R->transferFlags(*this);
+ *R->getIRFlags() = *getIRFlags();
return R;
}
@@ -1487,8 +1501,6 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
/// A recipe for widening operations with vector-predication intrinsics with
/// explicit vector length (EVL).
class VPWidenEVLRecipe : public VPWidenRecipe {
- using VPRecipeWithIRFlags::transferFlags;
-
public:
template <typename IterT>
VPWidenEVLRecipe(Instruction &I, iterator_range<IterT> Operands, VPValue &EVL)
@@ -1497,7 +1509,7 @@ class VPWidenEVLRecipe : public VPWidenRecipe {
}
VPWidenEVLRecipe(VPWidenRecipe &W, VPValue &EVL)
: VPWidenEVLRecipe(*W.getUnderlyingInstr(), W.operands(), EVL) {
- transferFlags(W);
+ *getIRFlags() = *W.getIRFlags();
}
~VPWidenEVLRecipe() override = default;
@@ -1533,7 +1545,7 @@ class VPWidenEVLRecipe : public VPWidenRecipe {
};
/// VPWidenCastRecipe is a recipe to create vector cast instructions.
-class VPWidenCastRecipe : public VPRecipeWithIRFlags {
+class VPWidenCastRecipe : public VPSingleDefRecipeWithIRFlags {
/// Cast instruction opcode.
Instruction::CastOps Opcode;
@@ -1543,14 +1555,14 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
public:
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
CastInst &UI)
- : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
- ResultTy(ResultTy) {
+ : VPSingleDefRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI),
+ Opcode(Opcode), ResultTy(ResultTy) {
assert(UI.getOpcode() == Opcode &&
"opcode of underlying cast doesn't match");
}
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
- : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
+ : VPSingleDefRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
ResultTy(ResultTy) {}
~VPWidenCastRecipe() override = default;
@@ -1631,7 +1643,7 @@ class VPScalarCastRecipe : public VPSingleDefRecipe {
};
/// A recipe for widening vector intrinsics.
-class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
+class VPWidenIntrinsicRecipe : public VPSingleDefRecipeWithIRFlags {
/// ID of the vector intrinsic to widen.
Intrinsic::ID VectorIntrinsicID;
@@ -1651,7 +1663,8 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID,
ArrayRef<VPValue *> CallArguments, Type *Ty,
DebugLoc DL = {})
- : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI),
+ : VPSingleDefRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments,
+ CI),
VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
MayReadFromMemory(CI.mayReadFromMemory()),
MayWriteToMemory(CI.mayWriteToMemory()),
@@ -1660,7 +1673,7 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID,
ArrayRef<VPValue *> CallArguments, Type *Ty,
DebugLoc DL = {})
- : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments),
+ : VPSingleDefRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments),
VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) {
LLVMContext &Ctx = Ty->getContext();
AttributeList Attrs = Intrinsic::getAttributes(Ctx, VectorIntrinsicID);
@@ -1714,7 +1727,7 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
};
/// A recipe for widening Call instructions using library calls.
-class VPWidenCallRecipe : public VPRecipeWithIRFlags {
+class VPWidenCallRecipe : public VPSingleDefRecipeWithIRFlags {
/// Variant stores a pointer to the chosen function. There is a 1:1 mapping
/// between a given VF and the chosen vectorized variant, so there will be a
/// different VPlan for each VF with a valid variant.
@@ -1723,8 +1736,8 @@ class VPWidenCallRecipe : public VPRecipeWithIRFlags {
public:
VPWidenCallRecipe(Value *UV, Function *Variant,
ArrayRef<VPValue *> CallArguments, DebugLoc DL = {})
- : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments,
- *cast<Instruction>(UV)),
+ : VPSingleDefRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments,
+ *cast<Instruction>(UV)),
Variant(Variant) {
assert(
isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
@@ -1849,7 +1862,7 @@ struct VPWidenSelectRecipe : public VPSingleDefRecipe {
};
/// A recipe for handling GEP instructions.
-class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
+class VPWidenGEPRecipe : public VPSingleDefRecipeWithIRFlags {
bool isPointerLoopInvariant() const {
return getOperand(0)->isDefinedOutsideLoopRegions();
}
@@ -1867,7 +1880,7 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
public:
template <typename IterT>
VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands)
- : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) {}
+ : VPSingleDefRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) {}
~VPWidenGEPRecipe() override = default;
@@ -1897,16 +1910,16 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
/// A recipe to compute the pointers for widened memory accesses of IndexTy
/// in reverse order.
-class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags,
+class VPReverseVectorPointerRecipe : public VPSingleDefRecipeWithIRFlags,
public VPUnrollPartAccessor<2> {
Type *IndexedTy;
public:
VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy,
bool IsInBounds, DebugLoc DL)
- : VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
- ArrayRef<VPValue *>({Ptr, VF}),
- GEPFlagsTy(IsInBounds), DL),
+ : VPSingleDefRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
+ ArrayRef<VPValue *>({Ptr, VF}),
+ GEPFlagsTy(IsInBounds), DL),
IndexedTy(IndexedTy) {}
VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC)
@@ -1950,15 +1963,16 @@ class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags,
};
/// A recipe to compute the pointers for widened memory accesses of IndexTy.
-class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
+class VPVectorPointerRecipe : public VPSingleDefRecipeWithIRFlags,
public VPUnrollPartAccessor<1> {
Type *IndexedTy;
public:
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsInBounds,
DebugLoc DL)
- : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
- GEPFlagsTy(IsInBounds), DL),
+ : VPSingleDefRecipeWithIRFlags(VPDef::VPVectorPointerSC,
+ ArrayRef<VPValue *>(Ptr),
+ GEPFlagsTy(IsInBounds), DL),
IndexedTy(IndexedTy) {}
VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
@@ -2651,7 +2665,7 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
/// copies of the original scalar type, one per lane, instead of producing a
/// single copy of widened type for all lanes. If the instruction is known to be
/// uniform only one copy, per lane zero, will be generated.
-class VPReplicateRecipe : public VPRecipeWithIRFlags {
+class VPReplicateRecipe : public VPSingleDefRecipeWithIRFlags {
/// Indicator if only a single replica per lane is needed.
bool IsUniform;
@@ -2662,7 +2676,7 @@ class VPReplicateRecipe : public VPRecipeWithIRFlags {
template <typename IterT>
VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands,
bool IsUniform, VPValue *Mask = nullptr)
- : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
+ : VPSingleDefRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
IsUniform(IsUniform), IsPredicated(Mask) {
if (Mask)
addOperand(Mask);
@@ -2674,7 +2688,7 @@ class VPReplicateRecipe : public VPRecipeWithIRFlags {
auto *Copy =
new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform,
isPredicated() ? getMask() : nullptr);
- Copy->transferFlags(*this);
+ *Copy->getIRFlags() = *getIRFlags();
return Copy;
}
@@ -3350,15 +3364,15 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
/// A recipe for handling phi nodes of integer and floating-point inductions,
/// producing their scalar values.
-class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
+class VPScalarIVStepsRecipe : public VPSingleDefRecipeWithIRFlags,
public VPUnrollPartAccessor<2> {
Instruction::BinaryOps InductionOpcode;
public:
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step,
Instruction::BinaryOps Opcode, FastMathFlags FMFs)
- : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
- ArrayRef<VPValue *>({IV, Step}), FMFs),
+ : VPSingleDefRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
+ ArrayRef<VPValue *>({IV, Step}), FMFs),
InductionOpcode(Opcode) {}
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 6254ea15191819..c2aaa1a7c95596 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -291,7 +291,7 @@ InstructionCost VPRecipeBase::computeCost(ElementCount VF,
llvm_unreachable("subclasses should implement computeCost");
}
-FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const {
+FastMathFlags VPRecipeIRFlags::getFastMathFlags() const {
assert(OpType == OperationType::FPMathOp &&
"recipe doesn't have fast math flags");
FastMathFlags Res;
@@ -327,8 +327,8 @@ unsigned VPUnrollPartAccessor<PartOpIdx>::getUnrollPart(VPUser &U) const {
VPInstruction::VPInstruction(unsigned Opcode, CmpInst::Predicate Pred,
VPValue *A, VPValue *B, DebugLoc DL,
const Twine &Name)
- : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
- Pred, DL),
+ : VPSingleDefRecipeWithIRFlags(VPDef::VPInstructionSC,
+ ArrayRef<VPValue *>({A, B}), Pred, DL),
Opcode(Opcode), Name(Name.str()) {
assert(Opcode == Instruction::ICmp &&
"only ICmp predicates supported at the moment");
@@ -337,7 +337,7 @@ VPInstruction::VPInstruction(unsigned Opcode, CmpInst::Predicate Pred,
VPInstruction::VPInstruction(unsigned Opcode,
std::initializer_list<VPValue *> Operands,
FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
- : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
+ : VPSingleDefRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
Opcode(Opcode), Name(Name.str()) {
// Make sure the VPInstruction is a floating-point operation.
assert(isFPMathOp() && "this op can't take fast-math flags");
@@ -807,7 +807,10 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
}
printFlags(O);
- printOperands(O, SlotTracker);
+ if (getNumOperands() > 0) {
+ O << " ";
+ printOperands(O, SlotTracker);
+ }
if (auto DL = getDebugLoc()) {
O << ", !dbg ";
@@ -1039,7 +1042,7 @@ void VPWidenIntrinsicRecipe::print(raw_ostream &O, const Twine &Indent,
O << "call";
printFlags(O);
- O << getIntrinsicName() << "(";
+ O << " " << getIntrinsicName() << "(";
interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) {
Op->printAsOperand(O, SlotTracker);
@@ -1207,8 +1210,7 @@ InstructionCost VPWidenSelectRecipe::computeCost(ElementCount VF,
{TTI::OK_AnyValue, TTI::OP_None}, SI);
}
-VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
- const FastMathFlags &FMF) {
+VPRecipeIRFlags::FastMathFlagsTy::FastMathFlagsTy(const FastMathFlags &FMF) {
AllowReassoc = FMF.allowReassoc();
NoNaNs = FMF.noNaNs();
NoInfs = FMF.noInfs();
@@ -1219,7 +1221,7 @@ VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const {
+void VPRecipeIRFlags::printFlags(raw_ostream &O) const {
switch (OpType) {
case OperationType::Cmp:
O << " " << CmpInst::getPredicateName(getPredicate());
@@ -1252,8 +1254,6 @@ void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const {
case OperationType::Other:
break;
}
- if (getNumOperands() > 0)
- O << " ";
}
#endif
@@ -1460,7 +1460,10 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
printAsOperand(O, SlotTracker);
O << " = " << Instruction::getOpcodeName(Opcode);
printFlags(O);
- printOperands(O, SlotTracker);
+ if (getNumOperands() > 0) {
+ O << " ";
+ printOperands(O, SlotTracker);
+ }
}
void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent,
@@ -1469,7 +1472,10 @@ void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent,
printAsOperand(O, SlotTracker);
O << " = vp." << Instruction::getOpcodeName(getOpcode());
printFlags(O);
- printOperands(O, SlotTracker);
+ if (getNumOperands() > 0) {
+ O << " ";
+ printOperands(O, SlotTracker);
+ }
}
#endif
@@ -1545,9 +1551,12 @@ void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-CAST ";
printAsOperand(O, SlotTracker);
- O << " = " << Instruction::getOpcodeName(Opcode) << " ";
+ O << " = " << Instruction::getOpcodeName(Opcode);
printFlags(O);
- printOperands(O, SlotTracker);
+ if (getNumOperands() > 0) {
+ O << " ";
+ printOperands(O, SlotTracker);
+ }
O << " to " << *getResultType();
}
#endif
@@ -1936,6 +1945,7 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
printAsOperand(O, SlotTracker);
O << " = getelementptr";
printFlags(O);
+ O << " ";
printOperands(O, SlotTracker);
}
#endif
@@ -2291,7 +2301,7 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
O << "call";
printFlags(O);
- O << "@" << CB->getCalledFunction()->getName() << "(";
+ O << " @" << CB->getCalledFunction()->getName() << "(";
interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),
O, [&O, &SlotTracker](VPValue *Op) {
Op->printAsOperand(O, SlotTracker);
@@ -2300,7 +2310,10 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
} else {
O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode());
printFlags(O);
- printOperands(O, SlotTracker);
+ if (getNumOperands() > 0) {
+ O << " ";
+ printOperands(O, SlotTracker);
+ }
}
if (shouldPack())
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a4f0df17f5832f..b391febe869765 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -936,8 +936,9 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
continue;
for (VPUser *U : collectUsersRecursively(PhiR))
- if (auto *RecWithFlags = dyn_cast<VPRecipeWithIRFlags>(U)) {
- RecWithFlags->dropPoisonGeneratingFlags();
+ if (auto *R = dyn_cast<VPRecipeBase>(U)) {
+ if (auto *IRFlags = R->getIRFlags())
+ IRFlags->dropPoisonGeneratingFlags();
}
}
}
@@ -1182,8 +1183,8 @@ void VPlanTransforms::truncateToMinimalBitwidths(
// Any wrapping introduced by shrinking this operation shouldn't be
// considered undefined behavior. So, we can't unconditionally copy
// arithmetic wrapping flags to VPW.
- if (auto *VPW = dyn_cast<VPRecipeWithIRFlags>(&R))
- VPW->dropPoisonGeneratingFlags();
+ if (auto *Flags = R.getIRFlags())
+ Flags->dropPoisonGeneratingFlags();
using namespace llvm::VPlanPatternMatch;
if (OldResSizeInBits != NewResSizeInBits &&
@@ -1639,7 +1640,7 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
// This recipe contributes to the address computation of a widen
// load/store. If the underlying instruction has poison-generating flags,
// drop them directly.
- if (auto *RecWithFlags = dyn_cast<VPRecipeWithIRFlags>(CurRec)) {
+ if (auto *Flags = CurRec->getIRFlags()) {
VPValue *A, *B;
using namespace llvm::VPlanPatternMatch;
// Dropping disjoint from an OR may yield incorrect results, as some
@@ -1647,25 +1648,25 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
// for dependence analysis). Instead, replace it with an equivalent Add.
// This is possible as all users of the disjoint OR only access lanes
// where the operands are disjoint or poison otherwise.
- if (match(RecWithFlags, m_BinaryOr(m_VPValue(A), m_VPValue(B))) &&
- RecWithFlags->isDisjoint()) {
- VPBuilder Builder(RecWithFlags);
+ if (match(CurRec, m_BinaryOr(m_VPValue(A), m_VPValue(B))) &&
+ Flags->isDisjoint()) {
+ VPValue *OldValue = CurRec->getVPSingleValue();
+ VPBuilder Builder(CurRec);
VPInstruction *New = Builder.createOverflowingOp(
- Instruction::Add, {A, B}, {false, false},
- RecWithFlags->getDebugLoc());
- New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
- RecWithFlags->replaceAllUsesWith(New);
- RecWithFlags->eraseFromParent();
+ Instruction::Add, {A, B}, {false, false}, CurRec->getDebugLoc());
+ New->setUnderlyingValue(OldValue->getUnderlyingValue());
+ OldValue->replaceAllUsesWith(New);
+ CurRec->eraseFromParent();
CurRec = New;
} else
- RecWithFlags->dropPoisonGeneratingFlags();
+ Flags->dropPoisonGeneratingFlags();
} else {
Instruction *Instr = dyn_cast_or_null<Instruction>(
CurRec->getVPSingleValue()->getUnderlyingValue());
(void)Instr;
assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
"found instruction with poison generating flags not covered by "
- "VPRecipeWithIRFlags");
+ "without VPRecipeIRFlags");
}
// Add new definitions to the worklist.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll
index 6257d3325f9796..7585a005b1117f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll
@@ -69,7 +69,7 @@ target triple = "aarch64-unknown-linux-gnu"
; CHECK-NEXT: CLONE [[GEP_IDX:.*]] = getelementptr inbounds ir<%indices>, [[STEPS]]
; CHECK-NEXT: [[VECP_IDX:vp.*]] = vector-pointer [[GEP_IDX]]
; CHECK-NEXT: WIDEN [[IDX:.*]] = load [[VECP_IDX]]
-; CHECK-NEXT: WIDEN-CAST [[EXT_IDX:.*]] = zext [[IDX]] to i64
+; CHECK-NEXT: WIDEN-CAST [[EXT_IDX:.*]] = zext [[IDX]] to i64
; CHECK-NEXT: WIDEN-GEP Inv[Var] [[GEP_BUCKET:.*]] = getelementptr inbounds ir<%buckets>, [[EXT_IDX]]
; CHECK-NEXT: WIDEN-HISTOGRAM buckets: [[GEP_BUCKET]], inc: ir<1>
; CHECK-NEXT: EMIT [[IV_NEXT]] = add nuw [[IV]], [[VFxUF]]
More information about the llvm-commits
mailing list