[llvm] ecb9d8e - [LICM] Hoist LOAD without sinking the STORE

Djordje Todorovic via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 1 04:28:37 PST 2021


Author: Djordje Todorovic
Date: 2021-12-01T04:27:50-08:00
New Revision: ecb9d8e4e3c4623c2edcd2c50727103927d31508

URL: https://github.com/llvm/llvm-project/commit/ecb9d8e4e3c4623c2edcd2c50727103927d31508
DIFF: https://github.com/llvm/llvm-project/commit/ecb9d8e4e3c4623c2edcd2c50727103927d31508.diff

LOG: [LICM] Hoist LOAD without sinking the STORE

When doing load/store promotion within LICM, if we
cannot prove that it is safe to sink the store we won't
hoist the load, even though we can prove the load could
be dereferenced and moved outside the loop. This patch
implements the load promotion by moving it in the loop
preheader by inserting proper PHI in the loop. The store
is kept as is in the loop. By doing this, we avoid doing
the load from a memory location in each iteration.

Please consider this small example:

loop {
  var = *ptr;
  if (var) break;
  *ptr= var + 1;
}
After this patch, it will be:

var0 = *ptr;
loop {
  var1 = phi (var0, var2);
  if (var1) break;
  var2 = var1 + 1;
  *ptr = var2;
}
This addresses some problems from [0].

[0] https://bugs.llvm.org/show_bug.cgi?id=51193

Differential revision: https://reviews.llvm.org/D113289

Added: 
    llvm/test/Transforms/LICM/hoist-load-without-store.ll

Modified: 
    llvm/include/llvm/Transforms/Utils/SSAUpdater.h
    llvm/lib/Transforms/Scalar/LICM.cpp
    llvm/lib/Transforms/Utils/SSAUpdater.cpp
    llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll
    llvm/test/Transforms/LICM/promote-capture.ll
    llvm/test/Transforms/LICM/scalar-promote-memmodel.ll
    llvm/test/Transforms/LICM/scalar-promote.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h
index 22b2295cc9d70..c233e3dc168ed 100644
--- a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h
+++ b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h
@@ -169,6 +169,10 @@ class LoadAndStorePromoter {
 
   /// Called to update debug info associated with the instruction.
   virtual void updateDebugInfo(Instruction *I) const {}
+
+  /// Return false if a sub-class wants to keep one of the loads/stores
+  /// after the SSA construction.
+  virtual bool shouldDelete(Instruction *I) const { return true; }
 };
 
 } // end namespace llvm

diff  --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 0d52448efb2b2..6f97f3e93123a 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -1860,6 +1860,7 @@ class LoopPromoter : public LoadAndStorePromoter {
   bool UnorderedAtomic;
   AAMDNodes AATags;
   ICFLoopSafetyInfo &SafetyInfo;
+  bool CanInsertStoresInExitBlocks;
 
   // We're about to add a use of V in a loop exit block.  Insert an LCSSA phi
   // (if legal) if doing so would add an out-of-loop use to an instruction
@@ -1886,12 +1887,13 @@ class LoopPromoter : public LoadAndStorePromoter {
                SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
                MemorySSAUpdater *MSSAU, LoopInfo &li, DebugLoc dl,
                Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
-               ICFLoopSafetyInfo &SafetyInfo)
+               ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks)
       : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
         LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
         PredCache(PIC), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
         Alignment(Alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
-        SafetyInfo(SafetyInfo) {}
+        SafetyInfo(SafetyInfo),
+        CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks) {}
 
   bool isInstInList(Instruction *I,
                     const SmallVectorImpl<Instruction *> &) const override {
@@ -1903,7 +1905,7 @@ class LoopPromoter : public LoadAndStorePromoter {
     return PointerMustAliases.count(Ptr);
   }
 
-  void doExtraRewritesBeforeFinalDeletion() override {
+  void insertStoresInLoopExitBlocks() {
     // Insert stores after in the loop exit blocks.  Each exit block gets a
     // store of the live-out values that feed them.  Since we've already told
     // the SSA updater about the defs in the loop and the preheader
@@ -1937,10 +1939,21 @@ class LoopPromoter : public LoadAndStorePromoter {
     }
   }
 
+  void doExtraRewritesBeforeFinalDeletion() override {
+    if (CanInsertStoresInExitBlocks)
+      insertStoresInLoopExitBlocks();
+  }
+
   void instructionDeleted(Instruction *I) const override {
     SafetyInfo.removeInstruction(I);
     MSSAU->removeMemoryAccess(I);
   }
+
+  bool shouldDelete(Instruction *I) const override {
+    if (isa<StoreInst>(I))
+      return CanInsertStoresInExitBlocks;
+    return true;
+  }
 };
 
 bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L,
@@ -2039,6 +2052,7 @@ bool llvm::promoteLoopAccessesToScalars(
 
   bool DereferenceableInPH = false;
   bool SafeToInsertStore = false;
+  bool FoundLoadToPromote = false;
 
   SmallVector<Instruction *, 64> LoopUses;
 
@@ -2086,6 +2100,7 @@ bool llvm::promoteLoopAccessesToScalars(
 
         SawUnorderedAtomic |= Load->isAtomic();
         SawNotAtomic |= !Load->isAtomic();
+        FoundLoadToPromote = true;
 
         Align InstAlignment = Load->getAlign();
 
@@ -2197,13 +2212,20 @@ bool llvm::promoteLoopAccessesToScalars(
     }
   }
 
-  // If we've still failed to prove we can sink the store, give up.
-  if (!SafeToInsertStore)
+  // If we've still failed to prove we can sink the store, hoist the load
+  // only, if possible.
+  if (!SafeToInsertStore && !FoundLoadToPromote)
+    // If we cannot hoist the load either, give up.
     return false;
 
-  // Otherwise, this is safe to promote, lets do it!
-  LLVM_DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
-                    << '\n');
+  // Lets do the promotion!
+  if (SafeToInsertStore)
+    LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr
+                      << '\n');
+  else
+    LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr
+                      << '\n');
+
   ORE->emit([&]() {
     return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
                               LoopUses[0])
@@ -2222,7 +2244,8 @@ bool llvm::promoteLoopAccessesToScalars(
   SSAUpdater SSA(&NewPHIs);
   LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
                         InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL,
-                        Alignment, SawUnorderedAtomic, AATags, *SafetyInfo);
+                        Alignment, SawUnorderedAtomic, AATags, *SafetyInfo,
+                        SafeToInsertStore);
 
   // Set up the preheader to have a definition of the value.  It is the live-out
   // value from the preheader that uses in the loop will use.

diff  --git a/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/llvm/lib/Transforms/Utils/SSAUpdater.cpp
index 5893ce15b129c..7d99921766587 100644
--- a/llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -446,6 +446,9 @@ void LoadAndStorePromoter::run(const SmallVectorImpl<Instruction *> &Insts) {
   // Now that everything is rewritten, delete the old instructions from the
   // function.  They should all be dead now.
   for (Instruction *User : Insts) {
+    if (!shouldDelete(User))
+      continue;
+
     // If this is a load that still has uses, then the load must have been added
     // as a live value in the SSAUpdate data structure for a block (e.g. because
     // the loaded value was stored later).  In this case, we need to recursively

diff  --git a/llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll b/llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll
index 48882eca44cc3..e5a75cca8ee7e 100644
--- a/llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll
+++ b/llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll
@@ -5,12 +5,12 @@ target triple = "x86_64-unknown-linux-gnu"
 ; RUN: opt -O2 -S < %s | FileCheck %s
 
 ; CHECK-LABEL: main
-; CHECK: if.end
-; CHECK: store
 ; CHECK: memset
 ; CHECK: if.then
 ; CHECK: store
-; CHECK: memset
+; CHECK: if.end
+; CHECK: store
+; CHECK: store
 
 @d = common global i32 0, align 4
 @b = common global i32 0, align 4

diff  --git a/llvm/test/Transforms/LICM/hoist-load-without-store.ll b/llvm/test/Transforms/LICM/hoist-load-without-store.ll
new file mode 100644
index 0000000000000..275a531727371
--- /dev/null
+++ b/llvm/test/Transforms/LICM/hoist-load-without-store.ll
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -licm -S < %s | FileCheck %s
+
+;; C reproducer:
+;; void f(int *ptr, int n) {
+;;   for (int i = 0; i < n; ++i) {
+;;     int x = *ptr;
+;;     if (x)
+;;       break;
+;;
+;;     *ptr = x + 1;
+;;   }
+;; }
+
+define dso_local void @f(i32* nocapture %ptr, i32 %n) {
+; CHECK-LABEL: @f(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP7:%.*]] = icmp slt i32 0, [[N:%.*]]
+; CHECK-NEXT:    br i1 [[CMP7]], label [[FOR_BODY_LR_PH:%.*]], label [[CLEANUP1:%.*]]
+; CHECK:       for.body.lr.ph:
+; CHECK-NEXT:    [[PTR_PROMOTED:%.*]] = load i32, i32* [[PTR:%.*]], align 4
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[PTR_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ 1, [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[IF_END]] ]
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END]], label [[FOR_BODY_CLEANUP1_CRIT_EDGE:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    store i32 1, i32* [[PTR]], align 4
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP1_CRIT_EDGE:%.*]]
+; CHECK:       for.body.cleanup1_crit_edge:
+; CHECK-NEXT:    br label [[CLEANUP1]]
+; CHECK:       for.cond.cleanup1_crit_edge:
+; CHECK-NEXT:    br label [[CLEANUP1]]
+; CHECK:       cleanup1:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp7 = icmp slt i32 0, %n
+  br i1 %cmp7, label %for.body.lr.ph, label %cleanup1
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %if.end
+  %i.08 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %if.end ]
+  %0 = load i32, i32* %ptr, align 4
+  %tobool.not = icmp eq i32 %0, 0
+  br i1 %tobool.not, label %if.end, label %for.body.cleanup1_crit_edge
+
+if.end:                                           ; preds = %for.body
+  store i32 1, i32* %ptr, align 4
+  %inc = add nuw nsw i32 %i.08, 1
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %for.cond.cleanup1_crit_edge
+
+for.body.cleanup1_crit_edge:                      ; preds = %for.body
+  br label %cleanup1
+
+for.cond.cleanup1_crit_edge:                      ; preds = %if.end
+  br label %cleanup1
+
+cleanup1:                                         ; preds = %for.cond.cleanup1_crit_edge, %for.body.cleanup1_crit_edge, %entry
+  ret void
+}

diff  --git a/llvm/test/Transforms/LICM/promote-capture.ll b/llvm/test/Transforms/LICM/promote-capture.ll
index 1a2603d1c9866..945036e6e1753 100644
--- a/llvm/test/Transforms/LICM/promote-capture.ll
+++ b/llvm/test/Transforms/LICM/promote-capture.ll
@@ -111,17 +111,19 @@ define void @test_captured_before_loop(i32 %len) {
 ; CHECK-NEXT:    [[COUNT:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    store i32 0, i32* [[COUNT]], align 4
 ; CHECK-NEXT:    call void @capture(i32* [[COUNT]])
+; CHECK-NEXT:    [[COUNT_PROMOTED:%.*]] = load i32, i32* [[COUNT]], align 4
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[C_INC2:%.*]] = phi i32 [ [[COUNT_PROMOTED]], [[ENTRY:%.*]] ], [ [[C_INC1:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[I_NEXT:%.*]], [[LATCH]] ]
 ; CHECK-NEXT:    [[COND:%.*]] = call i1 @cond(i32 [[I]])
 ; CHECK-NEXT:    br i1 [[COND]], label [[IF:%.*]], label [[LATCH]]
 ; CHECK:       if:
-; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[COUNT]], align 4
-; CHECK-NEXT:    [[C_INC:%.*]] = add i32 [[C]], 1
+; CHECK-NEXT:    [[C_INC:%.*]] = add i32 [[C_INC2]], 1
 ; CHECK-NEXT:    store i32 [[C_INC]], i32* [[COUNT]], align 4
 ; CHECK-NEXT:    br label [[LATCH]]
 ; CHECK:       latch:
+; CHECK-NEXT:    [[C_INC1]] = phi i32 [ [[C_INC]], [[IF]] ], [ [[C_INC2]], [[LOOP]] ]
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[LEN:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]

diff  --git a/llvm/test/Transforms/LICM/scalar-promote-memmodel.ll b/llvm/test/Transforms/LICM/scalar-promote-memmodel.ll
index c3bae731fb6bc..33076b39e908b 100644
--- a/llvm/test/Transforms/LICM/scalar-promote-memmodel.ll
+++ b/llvm/test/Transforms/LICM/scalar-promote-memmodel.ll
@@ -11,19 +11,21 @@ define void @bar(i32 %n, i32 %b) nounwind uwtable ssp {
 ; CHECK-LABEL: @bar(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[B:%.*]], 0
+; CHECK-NEXT:    [[G_PROMOTED:%.*]] = load i32, i32* @g, align 4
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
-; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC5:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    [[INC2:%.*]] = phi i32 [ [[G_PROMOTED]], [[ENTRY:%.*]] ], [ [[INC1:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC5:%.*]], [[FOR_INC]] ]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], [[N:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[FOR_INC]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* @g, align 4
-; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[INC2]], 1
 ; CHECK-NEXT:    store i32 [[INC]], i32* @g, align 4
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
+; CHECK-NEXT:    [[INC1]] = phi i32 [ [[INC]], [[IF_THEN]] ], [ [[INC2]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[INC5]] = add nsw i32 [[I_0]], 1
 ; CHECK-NEXT:    br label [[FOR_COND]]
 ; CHECK:       for.end:

diff  --git a/llvm/test/Transforms/LICM/scalar-promote.ll b/llvm/test/Transforms/LICM/scalar-promote.ll
index 290e990f85133..c064edb8cd93f 100644
--- a/llvm/test/Transforms/LICM/scalar-promote.ll
+++ b/llvm/test/Transforms/LICM/scalar-promote.ll
@@ -315,17 +315,19 @@ define i32 @test7bad() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[LOCAL:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    call void @capture(i32* [[LOCAL]])
+; CHECK-NEXT:    [[LOCAL_PROMOTED:%.*]] = load i32, i32* [[LOCAL]], align 4
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[ELSE:%.*]] ]
-; CHECK-NEXT:    [[X:%.*]] = load i32, i32* [[LOCAL]], align 4
-; CHECK-NEXT:    [[X2:%.*]] = call i32 @opaque(i32 [[X]])
+; CHECK-NEXT:    [[X22:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], [[ENTRY:%.*]] ], [ [[X21:%.*]], [[ELSE:%.*]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[ELSE]] ]
+; CHECK-NEXT:    [[X2:%.*]] = call i32 @opaque(i32 [[X22]])
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X2]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[IF:%.*]], label [[ELSE]]
 ; CHECK:       if:
 ; CHECK-NEXT:    store i32 [[X2]], i32* [[LOCAL]], align 4
 ; CHECK-NEXT:    br label [[ELSE]]
 ; CHECK:       else:
+; CHECK-NEXT:    [[X21]] = phi i32 [ [[X2]], [[IF]] ], [ [[X22]], [[LOOP]] ]
 ; CHECK-NEXT:    [[NEXT]] = add i32 [[J]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[NEXT]], 0
 ; CHECK-NEXT:    br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]


        


More information about the llvm-commits mailing list