[llvm] [LAA] strip dead code, simplify logic (NFC) (PR #92119)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Wed May 15 04:59:56 PDT 2024
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/92119
>From 0833a2b86529c5e982b4c274eb08dcb522be02f7 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <r at artagnon.com>
Date: Wed, 15 May 2024 11:03:53 +0100
Subject: [PATCH 1/2] [LAA] increase test coverage in symbolic-stride
The test symbolic-stride.ll does not exercise all codepaths in
getStrideFromPointer, particularly when the operand is an
SCEVIntegralCastExpr. Cover these codepaths as well.
---
.../LoopAccessAnalysis/symbolic-stride.ll | 121 ++++++++++++++++++
1 file changed, 121 insertions(+)
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
index 6cc045d7a681b..3da0f543c5c1c 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
@@ -95,6 +95,127 @@ exit:
ret void
}
+define void @single_stride_castexpr(i32 %offset, ptr %src, ptr %dst, i1 %cond) {
+; CHECK-LABEL: 'single_stride_castexpr'
+; CHECK-NEXT: inner.loop:
+; CHECK-NEXT: Memory dependences are safe with run-time checks
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group ([[GRP1:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep.dst = getelementptr i32, ptr %dst, i64 %iv.2
+; CHECK-NEXT: Against group ([[GRP2:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv.3
+; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group [[GRP1]]:
+; CHECK-NEXT: (Low: ((4 * %iv.1) + %dst) High: (804 + (4 * %iv.1) + %dst))
+; CHECK-NEXT: Member: {((4 * %iv.1) + %dst),+,4}<%inner.loop>
+; CHECK-NEXT: Group [[GRP2]]:
+; CHECK-NEXT: (Low: %src High: (804 + %src))
+; CHECK-NEXT: Member: {%src,+,4}<nuw><%inner.loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-NEXT: Equal predicate: %offset == 1
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+; CHECK-NEXT: [PSE] %gep.dst = getelementptr i32, ptr %dst, i64 %iv.2:
+; CHECK-NEXT: {((4 * %iv.1) + %dst),+,(4 * (sext i32 %offset to i64))<nsw>}<%inner.loop>
+; CHECK-NEXT: --> {((4 * %iv.1) + %dst),+,4}<%inner.loop>
+; CHECK-NEXT: outer.header:
+; CHECK-NEXT: Report: loop is not the innermost loop
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ %offset.ext = sext i32 %offset to i64
+ br label %outer.header
+
+outer.header:
+ %iv.1 = phi i64 [ 0, %entry ], [ %iv.2.next, %inner.loop ]
+ br i1 %cond, label %inner.loop, label %exit
+
+inner.loop:
+ %iv.2 = phi i64 [ %iv.1, %outer.header ], [ %iv.2.next, %inner.loop ]
+ %iv.3 = phi i32 [ 0, %outer.header ], [ %iv.3.next, %inner.loop ]
+ %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv.3
+ %load = load i32, ptr %gep.src, align 8
+ %gep.dst = getelementptr i32, ptr %dst, i64 %iv.2
+ store i32 %load, ptr %gep.dst, align 8
+ %iv.2.next = add i64 %iv.2, %offset.ext
+ %iv.3.next = add i32 %iv.3, 1
+ %ec = icmp eq i32 %iv.3, 200
+ br i1 %ec, label %outer.header, label %inner.loop
+
+exit:
+ ret void
+}
+
+define void @single_stride_castexpr_multiuse(i32 %offset, ptr %src, ptr %dst, i1 %cond) {
+; CHECK-LABEL: 'single_stride_castexpr_multiuse'
+; CHECK-NEXT: inner.loop:
+; CHECK-NEXT: Memory dependences are safe with run-time checks
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group ([[GRP3:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep.dst = getelementptr i32, ptr %dst, i64 %iv.2
+; CHECK-NEXT: Against group ([[GRP4:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv.3
+; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group [[GRP3]]:
+; CHECK-NEXT: (Low: (((4 * %iv.1) + %dst) umin ((4 * %iv.1) + (4 * (sext i32 %offset to i64) * (200 + (-1 * (zext i32 %offset to i64))<nsw>)<nsw>) + %dst)) High: (4 + (((4 * %iv.1) + %dst) umax ((4 * %iv.1) + (4 * (sext i32 %offset to i64) * (200 + (-1 * (zext i32 %offset to i64))<nsw>)<nsw>) + %dst))))
+; CHECK-NEXT: Member: {((4 * %iv.1) + %dst),+,(4 * (sext i32 %offset to i64))<nsw>}<%inner.loop>
+; CHECK-NEXT: Group [[GRP4]]:
+; CHECK-NEXT: (Low: ((4 * (zext i32 %offset to i64))<nuw><nsw> + %src) High: (804 + %src))
+; CHECK-NEXT: Member: {((4 * (zext i32 %offset to i64))<nuw><nsw> + %src),+,4}<%inner.loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+; CHECK-NEXT: outer.header:
+; CHECK-NEXT: Report: loop is not the innermost loop
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ %offset.ext = sext i32 %offset to i64
+ %offset.zext = zext i32 %offset to i64
+ br label %outer.header
+
+outer.header:
+ %iv.1 = phi i64 [ 0, %entry ], [ %iv.2.next, %inner.loop ]
+ br i1 %cond, label %inner.loop, label %exit
+
+inner.loop:
+ %iv.2 = phi i64 [ %iv.1, %outer.header ], [ %iv.2.next, %inner.loop ]
+ %iv.3 = phi i64 [ %offset.zext, %outer.header ], [ %iv.3.next, %inner.loop ]
+ %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv.3
+ %load = load i32, ptr %gep.src, align 8
+ %gep.dst = getelementptr i32, ptr %dst, i64 %iv.2
+ store i32 %load, ptr %gep.dst, align 8
+ %iv.2.next = add i64 %iv.2, %offset.ext
+ %iv.3.next = add i64 %iv.3, 1
+ %ec = icmp eq i64 %iv.3, 200
+ br i1 %ec, label %outer.header, label %inner.loop
+
+exit:
+ ret void
+}
+
; A loop with two symbolic strides.
define void @two_strides(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride.1, i64 %stride.2) {
; CHECK-LABEL: 'two_strides'
>From b5ae4372ab90d92c6a3fa08b0c512bcb21fd0048 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <r at artagnon.com>
Date: Tue, 14 May 2024 13:19:34 +0100
Subject: [PATCH 2/2] [LAA] strip unnecessary getUniqueCastUse
733b8b2 ([LAA] Simplify identification of speculatable strides [nfc])
refactored getStrideFromPointer() to compute directly on SCEVs, and
return an SCEV expression instead of a Value. However, it left behind a
call to getUniqueCastUse(), which is completely unnecessary. Remove
this, showing a positive test update, and simplify the surrounding
program logic.
---
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 36 ++++---------------
.../LoopAccessAnalysis/symbolic-stride.ll | 15 +++++---
2 files changed, 18 insertions(+), 33 deletions(-)
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index d071e53324408..2574da76e747b 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2656,7 +2656,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
SymbolicStrides, UncomputablePtr, false);
if (!CanDoRTIfNeeded) {
auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
- recordAnalysis("CantIdentifyArrayBounds", I)
+ recordAnalysis("CantIdentifyArrayBounds", I)
<< "cannot identify array bounds";
LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
<< "the array bounds.\n");
@@ -2873,21 +2873,6 @@ static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
return GEP->getOperand(InductionOperand);
}
-/// If a value has only one user that is a CastInst, return it.
-static Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) {
- Value *UniqueCast = nullptr;
- for (User *U : Ptr->users()) {
- CastInst *CI = dyn_cast<CastInst>(U);
- if (CI && CI->getType() == Ty) {
- if (!UniqueCast)
- UniqueCast = CI;
- else
- return nullptr;
- }
- }
- return UniqueCast;
-}
-
/// Get the stride of a pointer access in a loop. Looks for symbolic
/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
static const SCEV *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
@@ -2950,21 +2935,14 @@ static const SCEV *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *L
return nullptr;
// Look for the loop invariant symbolic value.
- const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
- if (!U) {
- const auto *C = dyn_cast<SCEVIntegralCastExpr>(V);
- if (!C)
- return nullptr;
- U = dyn_cast<SCEVUnknown>(C->getOperand());
- if (!U)
- return nullptr;
+ if (isa<SCEVUnknown>(V))
+ return V;
- // Match legacy behavior - this is not needed for correctness
- if (!getUniqueCastUse(U->getValue(), Lp, V->getType()))
- return nullptr;
- }
+ if (const auto *C = dyn_cast<SCEVIntegralCastExpr>(V))
+ if (isa<SCEVUnknown>(C->getOperand()))
+ return V;
- return V;
+ return nullptr;
}
void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
index 3da0f543c5c1c..8641f0a618b87 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
@@ -170,16 +170,23 @@ define void @single_stride_castexpr_multiuse(i32 %offset, ptr %src, ptr %dst, i1
; CHECK-NEXT: %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv.3
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group [[GRP3]]:
-; CHECK-NEXT: (Low: (((4 * %iv.1) + %dst) umin ((4 * %iv.1) + (4 * (sext i32 %offset to i64) * (200 + (-1 * (zext i32 %offset to i64))<nsw>)<nsw>) + %dst)) High: (4 + (((4 * %iv.1) + %dst) umax ((4 * %iv.1) + (4 * (sext i32 %offset to i64) * (200 + (-1 * (zext i32 %offset to i64))<nsw>)<nsw>) + %dst))))
-; CHECK-NEXT: Member: {((4 * %iv.1) + %dst),+,(4 * (sext i32 %offset to i64))<nsw>}<%inner.loop>
+; CHECK-NEXT: (Low: ((4 * %iv.1) + %dst) High: (804 + (4 * %iv.1) + (-4 * (zext i32 %offset to i64))<nsw> + %dst))
+; CHECK-NEXT: Member: {((4 * %iv.1) + %dst),+,4}<%inner.loop>
; CHECK-NEXT: Group [[GRP4]]:
-; CHECK-NEXT: (Low: ((4 * (zext i32 %offset to i64))<nuw><nsw> + %src) High: (804 + %src))
-; CHECK-NEXT: Member: {((4 * (zext i32 %offset to i64))<nuw><nsw> + %src),+,4}<%inner.loop>
+; CHECK-NEXT: (Low: (4 + %src) High: (808 + (-4 * (zext i32 %offset to i64))<nsw> + %src))
+; CHECK-NEXT: Member: {(4 + %src),+,4}<%inner.loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
+; CHECK-NEXT: Equal predicate: %offset == 1
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
+; CHECK-NEXT: [PSE] %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv.3:
+; CHECK-NEXT: {((4 * (zext i32 %offset to i64))<nuw><nsw> + %src),+,4}<%inner.loop>
+; CHECK-NEXT: --> {(4 + %src),+,4}<%inner.loop>
+; CHECK-NEXT: [PSE] %gep.dst = getelementptr i32, ptr %dst, i64 %iv.2:
+; CHECK-NEXT: {((4 * %iv.1) + %dst),+,(4 * (sext i32 %offset to i64))<nsw>}<%inner.loop>
+; CHECK-NEXT: --> {((4 * %iv.1) + %dst),+,4}<%inner.loop>
; CHECK-NEXT: outer.header:
; CHECK-NEXT: Report: loop is not the innermost loop
; CHECK-NEXT: Dependences:
More information about the llvm-commits
mailing list