[llvm] [Loads] Check loop-varying pointer in isDereferenceableAndAlignedInLoop. (PR #120962)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 23 05:20:03 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
@llvm/pr-subscribers-llvm-transforms
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
If the load executes in a successor of the header, check if the loop-varying pointer is dereferenceable and aligned the branch in the header. This is stricter than necessary and we could instead look for any block in the loop that executes unconditionally and post-dominates the block with the access.
Also moves up the assumption check to make sure it is done for each pointer in the chain.
---
Full diff: https://github.com/llvm/llvm-project/pull/120962.diff
2 Files Affected:
- (modified) llvm/lib/Analysis/Loads.cpp (+36-23)
- (modified) llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll (+21-55)
``````````diff
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index 54b9521fda8fd2..d8833ce8626cfd 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -48,6 +48,29 @@ static bool isDereferenceableAndAlignedPointer(
if (!Visited.insert(V).second)
return false;
+ if (CtxI) {
+ /// Look through assumes to see if both dereferencability and alignment can
+ /// be provent by an assume
+ RetainedKnowledge AlignRK;
+ RetainedKnowledge DerefRK;
+ if (getKnowledgeForValue(
+ V, {Attribute::Dereferenceable, Attribute::Alignment}, AC,
+ [&](RetainedKnowledge RK, Instruction *Assume, auto) {
+ if (!isValidAssumeForContext(Assume, CtxI, DT))
+ return false;
+ if (RK.AttrKind == Attribute::Alignment)
+ AlignRK = std::max(AlignRK, RK);
+ if (RK.AttrKind == Attribute::Dereferenceable)
+ DerefRK = std::max(DerefRK, RK);
+ if (AlignRK && DerefRK && AlignRK.ArgValue >= Alignment.value() &&
+ DerefRK.ArgValue >= Size.getZExtValue())
+ return true; // We have found what we needed so we stop looking
+ return false; // Other assumes may have better information. so
+ // keep looking
+ }))
+ return true;
+ }
+
// Note that it is not safe to speculate into a malloc'd region because
// malloc may return null.
@@ -171,29 +194,6 @@ static bool isDereferenceableAndAlignedPointer(
Size, DL, CtxI, AC, DT, TLI,
Visited, MaxDepth);
- if (CtxI) {
- /// Look through assumes to see if both dereferencability and alignment can
- /// be provent by an assume
- RetainedKnowledge AlignRK;
- RetainedKnowledge DerefRK;
- if (getKnowledgeForValue(
- V, {Attribute::Dereferenceable, Attribute::Alignment}, AC,
- [&](RetainedKnowledge RK, Instruction *Assume, auto) {
- if (!isValidAssumeForContext(Assume, CtxI, DT))
- return false;
- if (RK.AttrKind == Attribute::Alignment)
- AlignRK = std::max(AlignRK, RK);
- if (RK.AttrKind == Attribute::Dereferenceable)
- DerefRK = std::max(DerefRK, RK);
- if (AlignRK && DerefRK && AlignRK.ArgValue >= Alignment.value() &&
- DerefRK.ArgValue >= Size.getZExtValue())
- return true; // We have found what we needed so we stop looking
- return false; // Other assumes may have better information. so
- // keep looking
- }))
- return true;
- }
-
// If we don't know, assume the worst.
return false;
}
@@ -291,6 +291,19 @@ bool llvm::isDereferenceableAndAlignedInLoop(
return isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL,
HeaderFirstNonPHI, AC, &DT);
+ // If the load executes in a successor of the header, check if the
+ // loop-varying pointer is dereferenceable and aligned at the branch in the
+ // header. This is stricter than necessary and we could instead look for any
+ // block in the loop that executes unconditionally and post-dominates the
+ // block with the access.
+ if (LI->getParent() != L->getHeader() &&
+ L->getExitingBlock() == L->getLoopLatch() &&
+ isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL,
+ L->getHeader()->getTerminator(), AC,
+ &DT)) {
+ return true;
+ }
+
// Otherwise, check to see if we have a repeating access pattern where we can
// prove that all accesses are well aligned and dereferenceable.
auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Ptr));
diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
index 8baff7b33118bf..3b1f14929b3a5e 100644
--- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
@@ -11,8 +11,8 @@ define void @deref_assumption_in_header_constant_trip_count(ptr noalias %a, ptr
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], <2 x i64> [[VEC_IND]]
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
@@ -23,25 +23,8 @@ define void @deref_assumption_in_header_constant_trip_count(ptr noalias %a, ptr
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true)
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
-; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
-; CHECK: [[PRED_LOAD_IF]]:
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[TMP22]], i32 0
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
-; CHECK: [[PRED_LOAD_CONTINUE]]:
-; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP11]], %[[PRED_LOAD_IF]] ]
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
-; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
-; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
-; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP27]], i32 1
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_CONTINUE2]]:
-; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP17:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP17]]
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i32 0
@@ -529,17 +512,17 @@ define void @deref_assumption_in_then_constant_trip_count(ptr noalias %a, ptr no
; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK: [[PRED_LOAD_IF]]:
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 0
-; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP17]], align 4
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
; CHECK: [[PRED_LOAD_CONTINUE]]:
; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP9]], %[[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 1
-; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP23]], i32 1
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 1
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP13]], i32 1
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
; CHECK: [[PRED_LOAD_CONTINUE2]]:
; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
@@ -626,24 +609,24 @@ define void @deref_assumption_in_latch_constant_trip_count(ptr noalias %a, ptr n
; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK: [[PRED_LOAD_IF]]:
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP17]], align 4
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
; CHECK: [[PRED_LOAD_CONTINUE]]:
; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP9]], %[[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
-; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP23]], i32 1
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP13]], i32 1
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
; CHECK: [[PRED_LOAD_CONTINUE2]]:
; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
-; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP28]], i64 4), "dereferenceable"(ptr [[TMP20]], i64 4) ]
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP16]], i64 4), "dereferenceable"(ptr [[TMP18]], i64 4) ]
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP29]], i64 4), "dereferenceable"(ptr [[TMP19]], i64 4) ]
@@ -719,8 +702,8 @@ define void @deref_assumption_in_header_variable_trip_count(ptr noalias %a, ptr
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], <2 x i64> [[VEC_IND]]
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
@@ -731,25 +714,8 @@ define void @deref_assumption_in_header_variable_trip_count(ptr noalias %a, ptr
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true)
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
-; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
-; CHECK: [[PRED_LOAD_IF]]:
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[TMP22]], i32 0
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
-; CHECK: [[PRED_LOAD_CONTINUE]]:
-; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP11]], %[[PRED_LOAD_IF]] ]
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
-; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
-; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
-; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP27]], i32 1
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_CONTINUE2]]:
-; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP17:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP17]]
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i32 0
``````````
</details>
https://github.com/llvm/llvm-project/pull/120962
More information about the llvm-commits
mailing list