[llvm] 50dfc9e - [LoopLoadElimination] Add support for stride equal to -1
Igor Kirillov via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 1 05:11:37 PDT 2023
Author: Igor Kirillov
Date: 2023-06-01T12:10:53Z
New Revision: 50dfc9e35d72bf783ebc514ad1e48bd4d0767c5d
URL: https://github.com/llvm/llvm-project/commit/50dfc9e35d72bf783ebc514ad1e48bd4d0767c5d
DIFF: https://github.com/llvm/llvm-project/commit/50dfc9e35d72bf783ebc514ad1e48bd4d0767c5d.diff
LOG: [LoopLoadElimination] Add support for stride equal to -1
This patch allows us to gain all the benefits provided by
LoopLoadElimination pass to descending loops.
Differential Revision: https://reviews.llvm.org/D151448
Added:
Modified:
llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
llvm/test/Transforms/LoopLoadElim/backward.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index e32b97e438641..179ccde8d0355 100644
--- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -88,8 +88,9 @@ struct StoreToLoadForwardingCandidate {
StoreToLoadForwardingCandidate(LoadInst *Load, StoreInst *Store)
: Load(Load), Store(Store) {}
- /// Return true if the dependence from the store to the load has a
- /// distance of one. E.g. A[i+1] = A[i]
+ /// Return true if the dependence from the store to the load has an
+ /// absolute distance of one.
+ /// E.g. A[i+1] = A[i] (or A[i-1] = A[i] for descending loop)
bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
Loop *L) const {
Value *LoadPtr = Load->getPointerOperand();
@@ -103,11 +104,19 @@ struct StoreToLoadForwardingCandidate {
DL.getTypeSizeInBits(getLoadStoreType(Store)) &&
"Should be a known dependence");
- // Currently we only support accesses with unit stride. FIXME: we should be
- // able to handle non unit stirde as well as long as the stride is equal to
- // the dependence distance.
- if (getPtrStride(PSE, LoadType, LoadPtr, L).value_or(0) != 1 ||
- getPtrStride(PSE, LoadType, StorePtr, L).value_or(0) != 1)
+ int64_t StrideLoad = getPtrStride(PSE, LoadType, LoadPtr, L).value_or(0);
+ int64_t StrideStore = getPtrStride(PSE, LoadType, StorePtr, L).value_or(0);
+ if (!StrideLoad || !StrideStore || StrideLoad != StrideStore)
+ return false;
+
+ // TODO: This check for stride values other than 1 and -1 can be eliminated.
+ // However, doing so may cause the LoopAccessAnalysis to overcompensate,
+ // generating numerous non-wrap runtime checks that may undermine the
+ // benefits of load elimination. To safely implement support for non-unit
+ // strides, we would need to ensure either that the processed case does not
+ // require these additional checks, or improve the LAA to handle them more
+ // efficiently, or potentially both.
+ if (std::abs(StrideLoad) != 1)
return false;
unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
@@ -120,7 +129,7 @@ struct StoreToLoadForwardingCandidate {
auto *Dist = cast<SCEVConstant>(
PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
const APInt &Val = Dist->getAPInt();
- return Val == TypeByteSize;
+ return Val == TypeByteSize * StrideLoad;
}
Value *getLoadPtr() const { return Load->getPointerOperand(); }
diff --git a/llvm/test/Transforms/LoopLoadElim/backward.ll b/llvm/test/Transforms/LoopLoadElim/backward.ll
index 01939dff5f0da..e55d25d4ca44d 100644
--- a/llvm/test/Transforms/LoopLoadElim/backward.ll
+++ b/llvm/test/Transforms/LoopLoadElim/backward.ll
@@ -30,3 +30,34 @@ for.body: ; preds = %for.body, %entry
for.end: ; preds = %for.body
ret void
}
+
+; Same but loop is descending.
+;
+; for (unsigned i = N; i > 0; i--)
+; A[i-1] = A[i] + B[i];
+define void @g(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, i64 %N) {
+entry:
+; CHECK: %0 = shl i64 %N, 2
+; CHECK: %scevgep = getelementptr i8, ptr %A, i64 %0
+; CHECK: %load_initial = load i32, ptr %scevgep, align 4
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+; CHECK: %store_forwarded = phi i32 [ %load_initial, %entry ], [ %add, %for.body ]
+ %i.09 = phi i64 [ %sub, %for.body ], [ %N, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.09
+ %load = load i32, ptr %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds i32, ptr %B, i64 %i.09
+ %load_1 = load i32, ptr %arrayidx1, align 4
+; CHECK: %add = add i32 %load_1, %store_forwarded
+ %add = add i32 %load_1, %load
+ %sub = add i64 %i.09, -1
+ %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %sub
+ store i32 %add, ptr %arrayidx2, align 4
+ %cmp.not = icmp eq i64 %sub, 0
+ br i1 %cmp.not, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
More information about the llvm-commits
mailing list