[PATCH] D151448: [LoopLoadElimination] Add support for stride equal to -1
Igor Kirillov via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu May 25 07:24:27 PDT 2023
igor.kirillov created this revision.
Herald added a subscriber: hiraditya.
Herald added a project: All.
igor.kirillov requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.
This patch allows us to gain all the benefits provided by
LoopLoadElimination pass to descending loops.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D151448
Files:
llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
llvm/test/Transforms/LoopLoadElim/backward.ll
Index: llvm/test/Transforms/LoopLoadElim/backward.ll
===================================================================
--- llvm/test/Transforms/LoopLoadElim/backward.ll
+++ llvm/test/Transforms/LoopLoadElim/backward.ll
@@ -30,3 +30,33 @@
for.end: ; preds = %for.body
ret void
}
+
+; Same but loop is descending.
+;
+; for (unsigned i = N; i > 0; i--)
+; A[i-1] = A[i] + B[i];
+define void @g(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, i64 %N) {
+entry:
+; CHECK: %0 = shl i64 %N, 2
+; CHECK: %scevgep = getelementptr i8, ptr %A, i64 %0
+; CHECK: %load_initial = load i32, ptr %scevgep, align 4
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+; CHECK: %store_forwarded = phi i32 [ %load_initial, %entry ], [ %add, %for.body ]
+ %i.09 = phi i64 [ %sub, %for.body ], [ %N, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.09
+ %load = load i32, ptr %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds i32, ptr %B, i64 %i.09
+ %load_1 = load i32, ptr %arrayidx1, align 4
+; CHECK: %add = add i32 %load_1, %store_forwarded
+ %add = add i32 %load_1, %load
+ %sub = add i64 %i.09, -1
+ %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %sub
+ store i32 %add, ptr %arrayidx2, align 4
+ %cmp.not = icmp eq i64 %sub, 0
+ br i1 %cmp.not, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
Index: llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -89,7 +89,8 @@
: Load(Load), Store(Store) {}
/// Return true if the dependence from the store to the load has a
- /// distance of one. E.g. A[i+1] = A[i]
+ /// distance of one.
+ /// E.g. A[i+1] = A[i] (or A[i-1] = A[i] for descending loop)
bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
Loop *L) const {
Value *LoadPtr = Load->getPointerOperand();
@@ -103,11 +104,19 @@
DL.getTypeSizeInBits(getLoadStoreType(Store)) &&
"Should be a known dependence");
- // Currently we only support accesses with unit stride. FIXME: we should be
- // able to handle non unit stirde as well as long as the stride is equal to
- // the dependence distance.
- if (getPtrStride(PSE, LoadType, LoadPtr, L).value_or(0) != 1 ||
- getPtrStride(PSE, LoadType, StorePtr, L).value_or(0) != 1)
+ int64_t StrideLoad = getPtrStride(PSE, LoadType, LoadPtr, L).value_or(0);
+ int64_t StrideStore = getPtrStride(PSE, LoadType, StorePtr, L).value_or(0);
+ if (!StrideLoad || !StrideStore || StrideLoad != StrideStore)
+ return false;
+
+ // TODO: This check for stride values other than 1 and -1 can be eliminated.
+ // However, doing so may cause the LoopAccessAnalysis to overcompensate,
+ // generating numerous non-wrap runtime checks that may undermine the
+ // benefits of load elimination. To safely implement support for non-unit
+ // strides, we would need to ensure either that the processed case does not
+ // require these additional checks, or improve the LAA to handle them more
+ // efficiently, or potentially both.
+ if (std::abs(StrideLoad) != 1)
return false;
unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
@@ -120,7 +129,7 @@
auto *Dist = cast<SCEVConstant>(
PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
const APInt &Val = Dist->getAPInt();
- return Val == TypeByteSize;
+ return Val == TypeByteSize * StrideLoad;
}
Value *getLoadPtr() const { return Load->getPointerOperand(); }
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D151448.525596.patch
Type: text/x-patch
Size: 3863 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230525/2f759872/attachment.bin>
More information about the llvm-commits
mailing list