[llvm] [Loads] Check loop-varying pointer in isDereferenceableAndAlignedInLoop. (PR #120962)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 23 05:19:30 PST 2024
https://github.com/fhahn created https://github.com/llvm/llvm-project/pull/120962
If the load executes in a successor of the header, check if the loop-varying pointer is dereferenceable and aligned the branch in the header. This is stricter than necessary and we could instead look for any block in the loop that executes unconditionally and post-dominates the block with the access.
Also moves up the assumption check to make sure it is done for each pointer in the chain.
>From 559bc0cb9990b5e5fd3bafb2e8c3a0f7322483a2 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 22 Dec 2024 14:22:31 +0000
Subject: [PATCH] [Loads] Check loop-varying pointer in
isDereferenceableAndAlignedInLoop.
If the load executes in a successor of the header, check if the
loop-varying pointer is dereferenceable and aligned the branch in
the header. This is stricter than necessary and we could instead look
for any block in the loop that executes unconditionally and
post-dominates the block with the access.
Also moves up the assumption check to make sure it is done for each
pointer in the chain.
---
llvm/lib/Analysis/Loads.cpp | 59 ++++++++------
...able-info-from-assumption-constant-size.ll | 76 +++++--------------
2 files changed, 57 insertions(+), 78 deletions(-)
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index 54b9521fda8fd2..d8833ce8626cfd 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -48,6 +48,29 @@ static bool isDereferenceableAndAlignedPointer(
if (!Visited.insert(V).second)
return false;
+ if (CtxI) {
+ /// Look through assumes to see if both dereferencability and alignment can
+ /// be provent by an assume
+ RetainedKnowledge AlignRK;
+ RetainedKnowledge DerefRK;
+ if (getKnowledgeForValue(
+ V, {Attribute::Dereferenceable, Attribute::Alignment}, AC,
+ [&](RetainedKnowledge RK, Instruction *Assume, auto) {
+ if (!isValidAssumeForContext(Assume, CtxI, DT))
+ return false;
+ if (RK.AttrKind == Attribute::Alignment)
+ AlignRK = std::max(AlignRK, RK);
+ if (RK.AttrKind == Attribute::Dereferenceable)
+ DerefRK = std::max(DerefRK, RK);
+ if (AlignRK && DerefRK && AlignRK.ArgValue >= Alignment.value() &&
+ DerefRK.ArgValue >= Size.getZExtValue())
+ return true; // We have found what we needed so we stop looking
+ return false; // Other assumes may have better information. so
+ // keep looking
+ }))
+ return true;
+ }
+
// Note that it is not safe to speculate into a malloc'd region because
// malloc may return null.
@@ -171,29 +194,6 @@ static bool isDereferenceableAndAlignedPointer(
Size, DL, CtxI, AC, DT, TLI,
Visited, MaxDepth);
- if (CtxI) {
- /// Look through assumes to see if both dereferencability and alignment can
- /// be provent by an assume
- RetainedKnowledge AlignRK;
- RetainedKnowledge DerefRK;
- if (getKnowledgeForValue(
- V, {Attribute::Dereferenceable, Attribute::Alignment}, AC,
- [&](RetainedKnowledge RK, Instruction *Assume, auto) {
- if (!isValidAssumeForContext(Assume, CtxI, DT))
- return false;
- if (RK.AttrKind == Attribute::Alignment)
- AlignRK = std::max(AlignRK, RK);
- if (RK.AttrKind == Attribute::Dereferenceable)
- DerefRK = std::max(DerefRK, RK);
- if (AlignRK && DerefRK && AlignRK.ArgValue >= Alignment.value() &&
- DerefRK.ArgValue >= Size.getZExtValue())
- return true; // We have found what we needed so we stop looking
- return false; // Other assumes may have better information. so
- // keep looking
- }))
- return true;
- }
-
// If we don't know, assume the worst.
return false;
}
@@ -291,6 +291,19 @@ bool llvm::isDereferenceableAndAlignedInLoop(
return isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL,
HeaderFirstNonPHI, AC, &DT);
+ // If the load executes in a successor of the header, check if the
+ // loop-varying pointer is dereferenceable and aligned at the branch in the
+ // header. This is stricter than necessary and we could instead look for any
+ // block in the loop that executes unconditionally and post-dominates the
+ // block with the access.
+ if (LI->getParent() != L->getHeader() &&
+ L->getExitingBlock() == L->getLoopLatch() &&
+ isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL,
+ L->getHeader()->getTerminator(), AC,
+ &DT)) {
+ return true;
+ }
+
// Otherwise, check to see if we have a repeating access pattern where we can
// prove that all accesses are well aligned and dereferenceable.
auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Ptr));
diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
index 8baff7b33118bf..3b1f14929b3a5e 100644
--- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
@@ -11,8 +11,8 @@ define void @deref_assumption_in_header_constant_trip_count(ptr noalias %a, ptr
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], <2 x i64> [[VEC_IND]]
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
@@ -23,25 +23,8 @@ define void @deref_assumption_in_header_constant_trip_count(ptr noalias %a, ptr
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true)
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
-; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
-; CHECK: [[PRED_LOAD_IF]]:
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[TMP22]], i32 0
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
-; CHECK: [[PRED_LOAD_CONTINUE]]:
-; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP11]], %[[PRED_LOAD_IF]] ]
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
-; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
-; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
-; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP27]], i32 1
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_CONTINUE2]]:
-; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP17:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP17]]
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i32 0
@@ -529,17 +512,17 @@ define void @deref_assumption_in_then_constant_trip_count(ptr noalias %a, ptr no
; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK: [[PRED_LOAD_IF]]:
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 0
-; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP17]], align 4
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
; CHECK: [[PRED_LOAD_CONTINUE]]:
; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP9]], %[[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 1
-; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP23]], i32 1
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 1
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP13]], i32 1
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
; CHECK: [[PRED_LOAD_CONTINUE2]]:
; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
@@ -626,24 +609,24 @@ define void @deref_assumption_in_latch_constant_trip_count(ptr noalias %a, ptr n
; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK: [[PRED_LOAD_IF]]:
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP17]], align 4
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
; CHECK: [[PRED_LOAD_CONTINUE]]:
; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP9]], %[[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
-; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP23]], i32 1
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP13]], i32 1
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
; CHECK: [[PRED_LOAD_CONTINUE2]]:
; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
-; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP28]], i64 4), "dereferenceable"(ptr [[TMP20]], i64 4) ]
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP16]], i64 4), "dereferenceable"(ptr [[TMP18]], i64 4) ]
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP29]], i64 4), "dereferenceable"(ptr [[TMP19]], i64 4) ]
@@ -719,8 +702,8 @@ define void @deref_assumption_in_header_variable_trip_count(ptr noalias %a, ptr
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], <2 x i64> [[VEC_IND]]
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
@@ -731,25 +714,8 @@ define void @deref_assumption_in_header_variable_trip_count(ptr noalias %a, ptr
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true)
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
-; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
-; CHECK: [[PRED_LOAD_IF]]:
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[TMP22]], i32 0
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
-; CHECK: [[PRED_LOAD_CONTINUE]]:
-; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP11]], %[[PRED_LOAD_IF]] ]
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
-; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
-; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
-; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP27]], i32 1
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_CONTINUE2]]:
-; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP17:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP17]]
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i32 0
More information about the llvm-commits
mailing list