[llvm] 50dfc9e - [LoopLoadElimination] Add support for stride equal to -1

Igor Kirillov via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 1 05:11:37 PDT 2023


Author: Igor Kirillov
Date: 2023-06-01T12:10:53Z
New Revision: 50dfc9e35d72bf783ebc514ad1e48bd4d0767c5d

URL: https://github.com/llvm/llvm-project/commit/50dfc9e35d72bf783ebc514ad1e48bd4d0767c5d
DIFF: https://github.com/llvm/llvm-project/commit/50dfc9e35d72bf783ebc514ad1e48bd4d0767c5d.diff

LOG: [LoopLoadElimination] Add support for stride equal to -1

This patch allows us to gain all the benefits provided by
LoopLoadElimination pass to descending loops.

Differential Revision: https://reviews.llvm.org/D151448

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
    llvm/test/Transforms/LoopLoadElim/backward.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index e32b97e438641..179ccde8d0355 100644
--- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -88,8 +88,9 @@ struct StoreToLoadForwardingCandidate {
   StoreToLoadForwardingCandidate(LoadInst *Load, StoreInst *Store)
       : Load(Load), Store(Store) {}
 
-  /// Return true if the dependence from the store to the load has a
-  /// distance of one.  E.g. A[i+1] = A[i]
+  /// Return true if the dependence from the store to the load has an
+  /// absolute distance of one.
+  /// E.g. A[i+1] = A[i] (or A[i-1] = A[i] for descending loop)
   bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
                                  Loop *L) const {
     Value *LoadPtr = Load->getPointerOperand();
@@ -103,11 +104,19 @@ struct StoreToLoadForwardingCandidate {
                DL.getTypeSizeInBits(getLoadStoreType(Store)) &&
            "Should be a known dependence");
 
-    // Currently we only support accesses with unit stride.  FIXME: we should be
-    // able to handle non unit stirde as well as long as the stride is equal to
-    // the dependence distance.
-    if (getPtrStride(PSE, LoadType, LoadPtr, L).value_or(0) != 1 ||
-        getPtrStride(PSE, LoadType, StorePtr, L).value_or(0) != 1)
+    int64_t StrideLoad = getPtrStride(PSE, LoadType, LoadPtr, L).value_or(0);
+    int64_t StrideStore = getPtrStride(PSE, LoadType, StorePtr, L).value_or(0);
+    if (!StrideLoad || !StrideStore || StrideLoad != StrideStore)
+      return false;
+
+    // TODO: This check for stride values other than 1 and -1 can be eliminated.
+    // However, doing so may cause the LoopAccessAnalysis to overcompensate,
+    // generating numerous non-wrap runtime checks that may undermine the
+    // benefits of load elimination. To safely implement support for non-unit
+    // strides, we would need to ensure either that the processed case does not
+    // require these additional checks, or improve the LAA to handle them more
+    // efficiently, or potentially both.
+    if (std::abs(StrideLoad) != 1)
       return false;
 
     unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
@@ -120,7 +129,7 @@ struct StoreToLoadForwardingCandidate {
     auto *Dist = cast<SCEVConstant>(
         PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
     const APInt &Val = Dist->getAPInt();
-    return Val == TypeByteSize;
+    return Val == TypeByteSize * StrideLoad;
   }
 
   Value *getLoadPtr() const { return Load->getPointerOperand(); }

diff  --git a/llvm/test/Transforms/LoopLoadElim/backward.ll b/llvm/test/Transforms/LoopLoadElim/backward.ll
index 01939dff5f0da..e55d25d4ca44d 100644
--- a/llvm/test/Transforms/LoopLoadElim/backward.ll
+++ b/llvm/test/Transforms/LoopLoadElim/backward.ll
@@ -30,3 +30,34 @@ for.body:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.body
   ret void
 }
+
+; Same but loop is descending.
+;
+;   for (unsigned i = N; i > 0; i--)
+;     A[i-1] = A[i] + B[i];
+define void @g(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, i64 %N) {
+entry:
+; CHECK: %0 = shl i64 %N, 2
+; CHECK: %scevgep = getelementptr i8, ptr %A, i64 %0
+; CHECK: %load_initial = load i32, ptr %scevgep, align 4
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+; CHECK: %store_forwarded = phi i32 [ %load_initial, %entry ], [ %add, %for.body ]
+  %i.09 = phi i64 [ %sub, %for.body ], [ %N, %entry ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.09
+  %load = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %B, i64 %i.09
+  %load_1 = load i32, ptr %arrayidx1, align 4
+; CHECK: %add = add i32 %load_1, %store_forwarded
+  %add = add i32 %load_1, %load
+  %sub = add i64 %i.09, -1
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %sub
+  store i32 %add, ptr %arrayidx2, align 4
+  %cmp.not = icmp eq i64 %sub, 0
+  br i1 %cmp.not, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+


        


More information about the llvm-commits mailing list