[llvm] 18ec330 - [VPlan] Manage InBounds via VPRecipeWithIRFlags for VectorPtrRecipe.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 7 06:00:06 PST 2024
Author: Florian Hahn
Date: 2024-01-07T13:58:05Z
New Revision: 18ec3304a9e4aed25d998180728faaf02236205d
URL: https://github.com/llvm/llvm-project/commit/18ec3304a9e4aed25d998180728faaf02236205d
DIFF: https://github.com/llvm/llvm-project/commit/18ec3304a9e4aed25d998180728faaf02236205d.diff
LOG: [VPlan] Manage InBounds via VPRecipeWithIRFlags for VectorPtrRecipe.
As suggested as follow-up in
https://github.com/llvm/llvm-project/pull/72164, manage inbounds via
VPRecipeWithIRFlags.
Note that in some cases we can now preserve inbounds in a few more
cases.
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlan.h
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 10c068e3b5895c..8b6212aaa358ec 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8077,8 +8077,11 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I,
VPValue *Ptr = isa<LoadInst>(I) ? Operands[0] : Operands[1];
if (Consecutive) {
- auto *VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
- Reverse, I->getDebugLoc());
+ auto *GEP = dyn_cast<GetElementPtrInst>(
+ Ptr->getUnderlyingValue()->stripPointerCasts());
+ auto *VectorPtr = new VPVectorPointerRecipe(
+ Ptr, getLoadStoreType(I), Reverse, GEP ? GEP->isInBounds() : false,
+ I->getDebugLoc());
Builder.getInsertBlock()->appendRecipe(VectorPtr);
Ptr = VectorPtr;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 9d279da758ec00..4b4f4911eb6415 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -842,6 +842,12 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
WrapFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {}
};
+protected:
+ struct GEPFlagsTy {
+ char IsInBounds : 1;
+ GEPFlagsTy(bool IsInBounds) : IsInBounds(IsInBounds) {}
+ };
+
private:
struct DisjointFlagsTy {
char IsDisjoint : 1;
@@ -849,9 +855,6 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
struct ExactFlagsTy {
char IsExact : 1;
};
- struct GEPFlagsTy {
- char IsInBounds : 1;
- };
struct NonNegFlagsTy {
char NonNeg : 1;
};
@@ -933,12 +936,21 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
: VPRecipeBase(SC, Operands, DL), OpType(OperationType::FPMathOp),
FMFs(FMFs) {}
+protected:
+ template <typename IterT>
+ VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
+ GEPFlagsTy GEPFlags, DebugLoc DL = {})
+ : VPRecipeBase(SC, Operands, DL), OpType(OperationType::GEPOp),
+ GEPFlags(GEPFlags) {}
+
+public:
static inline bool classof(const VPRecipeBase *R) {
return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
R->getVPDefID() == VPRecipeBase::VPWidenSC ||
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
- R->getVPDefID() == VPRecipeBase::VPReplicateSC;
+ R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
+ R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
}
/// Drop all poison-generating flags.
@@ -1361,15 +1373,16 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags, public VPValue {
/// A recipe to compute the pointers for widened memory accesses of IndexTy for
/// all parts. If IsReverse is true, compute pointers for accessing the input in
/// reverse order per part.
-class VPVectorPointerRecipe : public VPRecipeBase, public VPValue {
+class VPVectorPointerRecipe : public VPRecipeWithIRFlags, public VPValue {
Type *IndexedTy;
bool IsReverse;
public:
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,
- DebugLoc DL)
- : VPRecipeBase(VPDef::VPVectorPointerSC, {Ptr}, DL), VPValue(this),
- IndexedTy(IndexedTy), IsReverse(IsReverse) {}
+ bool IsInBounds, DebugLoc DL)
+ : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
+ GEPFlagsTy(IsInBounds), DL),
+ VPValue(this), IndexedTy(IndexedTy), IsReverse(IsReverse) {}
VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index c9def41a3ceca7..349544f9e3904a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1308,9 +1308,7 @@ void VPVectorPointerRecipe ::execute(VPTransformState &State) {
? DL.getIndexType(IndexedTy->getPointerTo())
: Builder.getInt32Ty();
Value *Ptr = State.get(getOperand(0), VPIteration(0, 0));
- bool InBounds = false;
- if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts()))
- InBounds = GEP->isInBounds();
+ bool InBounds = isInBounds();
if (IsReverse) {
// If the address is consecutive but reversed, then the
// wide store needs to start at the last vector element.
diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
index d7791a24816bd0..3c6cba1b00221a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
@@ -584,10 +584,11 @@ define void @Bgep_inbounds_unconditionally_due_to_store(ptr noalias %B, ptr read
; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[WIDE_LOAD2]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x float> <float 3.300000e+01, float 3.300000e+01, float 3.300000e+01, float 3.300000e+01>, <4 x float> [[TMP6]]
-; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP5]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i32 0
+; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
-; CHECK-NEXT: br i1 [[TMP8]], label %middle.block, label %vector.body
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
+; CHECK-NEXT: br i1 [[TMP9]], label %middle.block, label %vector.body
entry:
br label %loop.body
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
index b43fcc8548ce66..cf4dfc0b76ac98 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
@@ -150,29 +150,30 @@ define void @loop2(ptr %A, ptr %B, ptr %C, float %x) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4, !alias.scope !4
-; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !alias.scope !4
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4, !alias.scope [[META4:![0-9]+]]
+; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !alias.scope [[META4]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], <i32 20, i32 20, i32 20, i32 20>
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD7]], <i32 20, i32 20, i32 20, i32 20>
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 4
-; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !alias.scope !7
-; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP5]], align 4, !alias.scope !7
+; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !alias.scope [[META7:![0-9]+]]
+; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP5]], align 4, !alias.scope [[META7]]
; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[WIDE_LOAD8]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x float> [[WIDE_LOAD9]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[TMP8]], i64 4
-; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP8]], align 4, !alias.scope !9, !noalias !11
-; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x float>, ptr [[TMP9]], align 4, !alias.scope !9, !noalias !11
+; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP8]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META11:![0-9]+]]
+; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x float>, ptr [[TMP9]], align 4, !alias.scope [[META9]], !noalias [[META11]]
; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP2]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <4 x float> [[WIDE_LOAD10]]
; CHECK-NEXT: [[PREDPHI:%.*]] = fadd <4 x float> [[TMP6]], [[TMP10]]
; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP3]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <4 x float> [[WIDE_LOAD11]]
; CHECK-NEXT: [[PREDPHI12:%.*]] = fadd <4 x float> [[TMP7]], [[TMP11]]
-; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4, !alias.scope !9, !noalias !11
-; CHECK-NEXT: store <4 x float> [[PREDPHI12]], ptr [[TMP9]], align 4, !alias.scope !9, !noalias !11
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 4
+; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4, !alias.scope [[META9]], !noalias [[META11]]
+; CHECK-NEXT: store <4 x float> [[PREDPHI12]], ptr [[TMP12]], align 4, !alias.scope [[META9]], !noalias [[META11]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
-; CHECK-NEXT: br i1 [[TMP12]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
+; CHECK-NEXT: br i1 [[TMP13]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: loop.body:
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[C_GEP:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV1]]
More information about the llvm-commits
mailing list