[PATCH] D151448: [LoopLoadElimination] Add support for stride equal to -1

Igor Kirillov via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu May 25 07:24:27 PDT 2023


igor.kirillov created this revision.
Herald added a subscriber: hiraditya.
Herald added a project: All.
igor.kirillov requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

This patch allows us to gain all the benefits provided by
LoopLoadElimination pass to descending loops.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D151448

Files:
  llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
  llvm/test/Transforms/LoopLoadElim/backward.ll


Index: llvm/test/Transforms/LoopLoadElim/backward.ll
===================================================================
--- llvm/test/Transforms/LoopLoadElim/backward.ll
+++ llvm/test/Transforms/LoopLoadElim/backward.ll
@@ -30,3 +30,33 @@
 for.end:                                          ; preds = %for.body
   ret void
 }
+
+; Same but loop is descending.
+;
+;   for (unsigned i = N; i > 0; i--)
+;     A[i-1] = A[i] + B[i];
+define void @g(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, i64 %N) {
+entry:
+; CHECK: %0 = shl i64 %N, 2
+; CHECK: %scevgep = getelementptr i8, ptr %A, i64 %0
+; CHECK: %load_initial = load i32, ptr %scevgep, align 4
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+; CHECK: %store_forwarded = phi i32 [ %load_initial, %entry ], [ %add, %for.body ]
+  %i.09 = phi i64 [ %sub, %for.body ], [ %N, %entry ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.09
+  %load = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %B, i64 %i.09
+  %load_1 = load i32, ptr %arrayidx1, align 4
+; CHECK: %add = add i32 %load_1, %store_forwarded
+  %add = add i32 %load_1, %load
+  %sub = add i64 %i.09, -1
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %sub
+  store i32 %add, ptr %arrayidx2, align 4
+  %cmp.not = icmp eq i64 %sub, 0
+  br i1 %cmp.not, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
Index: llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -89,7 +89,8 @@
       : Load(Load), Store(Store) {}
 
   /// Return true if the dependence from the store to the load has a
-  /// distance of one.  E.g. A[i+1] = A[i]
+  /// distance of one.
+  /// E.g. A[i+1] = A[i] (or A[i-1] = A[i] for descending loop)
   bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
                                  Loop *L) const {
     Value *LoadPtr = Load->getPointerOperand();
@@ -103,11 +104,19 @@
                DL.getTypeSizeInBits(getLoadStoreType(Store)) &&
            "Should be a known dependence");
 
-    // Currently we only support accesses with unit stride.  FIXME: we should be
-    // able to handle non unit stirde as well as long as the stride is equal to
-    // the dependence distance.
-    if (getPtrStride(PSE, LoadType, LoadPtr, L).value_or(0) != 1 ||
-        getPtrStride(PSE, LoadType, StorePtr, L).value_or(0) != 1)
+    int64_t StrideLoad = getPtrStride(PSE, LoadType, LoadPtr, L).value_or(0);
+    int64_t StrideStore = getPtrStride(PSE, LoadType, StorePtr, L).value_or(0);
+    if (!StrideLoad || !StrideStore || StrideLoad != StrideStore)
+      return false;
+
+    // TODO: This check for stride values other than 1 and -1 can be eliminated.
+    // However, doing so may cause the LoopAccessAnalysis to overcompensate,
+    // generating numerous non-wrap runtime checks that may undermine the
+    // benefits of load elimination. To safely implement support for non-unit
+    // strides, we would need to ensure either that the processed case does not
+    // require these additional checks, or improve the LAA to handle them more
+    // efficiently, or potentially both.
+    if (std::abs(StrideLoad) != 1)
       return false;
 
     unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
@@ -120,7 +129,7 @@
     auto *Dist = cast<SCEVConstant>(
         PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
     const APInt &Val = Dist->getAPInt();
-    return Val == TypeByteSize;
+    return Val == TypeByteSize * StrideLoad;
   }
 
   Value *getLoadPtr() const { return Load->getPointerOperand(); }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D151448.525596.patch
Type: text/x-patch
Size: 3863 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230525/2f759872/attachment.bin>


More information about the llvm-commits mailing list