[llvm] 2cdc6f2 - Reland "[LICM] Hoist LOAD without sinking the STORE"

Djordje Todorovic via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 2 03:54:23 PST 2021


Author: Djordje Todorovic
Date: 2021-12-02T03:53:50-08:00
New Revision: 2cdc6f2ca62e83fec445114fbbe6276e9ab2a7d0

URL: https://github.com/llvm/llvm-project/commit/2cdc6f2ca62e83fec445114fbbe6276e9ab2a7d0
DIFF: https://github.com/llvm/llvm-project/commit/2cdc6f2ca62e83fec445114fbbe6276e9ab2a7d0.diff

LOG: Reland "[LICM] Hoist LOAD without sinking the STORE"

When doing load/store promotion within LICM, if we
cannot prove that it is safe to sink the store we won't
hoist the load, even though we can prove the load could
be dereferenced and moved outside the loop. This patch
implements the load promotion by moving it in the loop
preheader by inserting proper PHI in the loop. The store
is kept as is in the loop. By doing this, we avoid doing
the load from a memory location in each iteration.

Please consider this small example:

loop {
  var = *ptr;
  if (var) break;
  *ptr= var + 1;
}
After this patch, it will be:

var0 = *ptr;
loop {
  var1 = phi (var0, var2);
  if (var1) break;
  var2 = var1 + 1;
  *ptr = var2;
}
This addresses some problems from [0].

[0] https://bugs.llvm.org/show_bug.cgi?id=51193

Differential revision: https://reviews.llvm.org/D113289

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/Utils/SSAUpdater.h
    llvm/lib/Transforms/Scalar/LICM.cpp
    llvm/lib/Transforms/Utils/SSAUpdater.cpp
    llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll
    llvm/test/Transforms/LICM/hoist-load-without-store.ll
    llvm/test/Transforms/LICM/promote-capture.ll
    llvm/test/Transforms/LICM/scalar-promote-memmodel.ll
    llvm/test/Transforms/LICM/scalar-promote-opaque-ptrs.ll
    llvm/test/Transforms/LICM/scalar-promote.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h
index 22b2295cc9d7..c233e3dc168e 100644
--- a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h
+++ b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h
@@ -169,6 +169,10 @@ class LoadAndStorePromoter {
 
   /// Called to update debug info associated with the instruction.
   virtual void updateDebugInfo(Instruction *I) const {}
+
+  /// Return false if a sub-class wants to keep one of the loads/stores
+  /// after the SSA construction.
+  virtual bool shouldDelete(Instruction *I) const { return true; }
 };
 
 } // end namespace llvm

diff  --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 0d52448efb2b..6f97f3e93123 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -1860,6 +1860,7 @@ class LoopPromoter : public LoadAndStorePromoter {
   bool UnorderedAtomic;
   AAMDNodes AATags;
   ICFLoopSafetyInfo &SafetyInfo;
+  bool CanInsertStoresInExitBlocks;
 
   // We're about to add a use of V in a loop exit block.  Insert an LCSSA phi
   // (if legal) if doing so would add an out-of-loop use to an instruction
@@ -1886,12 +1887,13 @@ class LoopPromoter : public LoadAndStorePromoter {
                SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
                MemorySSAUpdater *MSSAU, LoopInfo &li, DebugLoc dl,
                Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
-               ICFLoopSafetyInfo &SafetyInfo)
+               ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks)
       : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
         LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
         PredCache(PIC), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
         Alignment(Alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
-        SafetyInfo(SafetyInfo) {}
+        SafetyInfo(SafetyInfo),
+        CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks) {}
 
   bool isInstInList(Instruction *I,
                     const SmallVectorImpl<Instruction *> &) const override {
@@ -1903,7 +1905,7 @@ class LoopPromoter : public LoadAndStorePromoter {
     return PointerMustAliases.count(Ptr);
   }
 
-  void doExtraRewritesBeforeFinalDeletion() override {
+  void insertStoresInLoopExitBlocks() {
     // Insert stores after in the loop exit blocks.  Each exit block gets a
     // store of the live-out values that feed them.  Since we've already told
     // the SSA updater about the defs in the loop and the preheader
@@ -1937,10 +1939,21 @@ class LoopPromoter : public LoadAndStorePromoter {
     }
   }
 
+  void doExtraRewritesBeforeFinalDeletion() override {
+    if (CanInsertStoresInExitBlocks)
+      insertStoresInLoopExitBlocks();
+  }
+
   void instructionDeleted(Instruction *I) const override {
     SafetyInfo.removeInstruction(I);
     MSSAU->removeMemoryAccess(I);
   }
+
+  bool shouldDelete(Instruction *I) const override {
+    if (isa<StoreInst>(I))
+      return CanInsertStoresInExitBlocks;
+    return true;
+  }
 };
 
 bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L,
@@ -2039,6 +2052,7 @@ bool llvm::promoteLoopAccessesToScalars(
 
   bool DereferenceableInPH = false;
   bool SafeToInsertStore = false;
+  bool FoundLoadToPromote = false;
 
   SmallVector<Instruction *, 64> LoopUses;
 
@@ -2086,6 +2100,7 @@ bool llvm::promoteLoopAccessesToScalars(
 
         SawUnorderedAtomic |= Load->isAtomic();
         SawNotAtomic |= !Load->isAtomic();
+        FoundLoadToPromote = true;
 
         Align InstAlignment = Load->getAlign();
 
@@ -2197,13 +2212,20 @@ bool llvm::promoteLoopAccessesToScalars(
     }
   }
 
-  // If we've still failed to prove we can sink the store, give up.
-  if (!SafeToInsertStore)
+  // If we've still failed to prove we can sink the store, hoist the load
+  // only, if possible.
+  if (!SafeToInsertStore && !FoundLoadToPromote)
+    // If we cannot hoist the load either, give up.
     return false;
 
-  // Otherwise, this is safe to promote, lets do it!
-  LLVM_DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
-                    << '\n');
+  // Lets do the promotion!
+  if (SafeToInsertStore)
+    LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr
+                      << '\n');
+  else
+    LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr
+                      << '\n');
+
   ORE->emit([&]() {
     return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
                               LoopUses[0])
@@ -2222,7 +2244,8 @@ bool llvm::promoteLoopAccessesToScalars(
   SSAUpdater SSA(&NewPHIs);
   LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
                         InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL,
-                        Alignment, SawUnorderedAtomic, AATags, *SafetyInfo);
+                        Alignment, SawUnorderedAtomic, AATags, *SafetyInfo,
+                        SafeToInsertStore);
 
   // Set up the preheader to have a definition of the value.  It is the live-out
   // value from the preheader that uses in the loop will use.

diff  --git a/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/llvm/lib/Transforms/Utils/SSAUpdater.cpp
index 5893ce15b129..7d9992176658 100644
--- a/llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -446,6 +446,9 @@ void LoadAndStorePromoter::run(const SmallVectorImpl<Instruction *> &Insts) {
   // Now that everything is rewritten, delete the old instructions from the
   // function.  They should all be dead now.
   for (Instruction *User : Insts) {
+    if (!shouldDelete(User))
+      continue;
+
     // If this is a load that still has uses, then the load must have been added
     // as a live value in the SSAUpdate data structure for a block (e.g. because
     // the loaded value was stored later).  In this case, we need to recursively

diff  --git a/llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll b/llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll
index 48882eca44cc..e5a75cca8ee7 100644
--- a/llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll
+++ b/llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll
@@ -5,12 +5,12 @@ target triple = "x86_64-unknown-linux-gnu"
 ; RUN: opt -O2 -S < %s | FileCheck %s
 
 ; CHECK-LABEL: main
-; CHECK: if.end
-; CHECK: store
 ; CHECK: memset
 ; CHECK: if.then
 ; CHECK: store
-; CHECK: memset
+; CHECK: if.end
+; CHECK: store
+; CHECK: store
 
 @d = common global i32 0, align 4
 @b = common global i32 0, align 4

diff  --git a/llvm/test/Transforms/LICM/hoist-load-without-store.ll b/llvm/test/Transforms/LICM/hoist-load-without-store.ll
index b464f6b7328d..275a53172737 100644
--- a/llvm/test/Transforms/LICM/hoist-load-without-store.ll
+++ b/llvm/test/Transforms/LICM/hoist-load-without-store.ll
@@ -18,10 +18,11 @@ define dso_local void @f(i32* nocapture %ptr, i32 %n) {
 ; CHECK-NEXT:    [[CMP7:%.*]] = icmp slt i32 0, [[N:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP7]], label [[FOR_BODY_LR_PH:%.*]], label [[CLEANUP1:%.*]]
 ; CHECK:       for.body.lr.ph:
+; CHECK-NEXT:    [[PTR_PROMOTED:%.*]] = load i32, i32* [[PTR:%.*]], align 4
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[IF_END:%.*]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[PTR_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ 1, [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[IF_END]] ]
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END]], label [[FOR_BODY_CLEANUP1_CRIT_EDGE:%.*]]
 ; CHECK:       if.end:

diff  --git a/llvm/test/Transforms/LICM/promote-capture.ll b/llvm/test/Transforms/LICM/promote-capture.ll
index 1a2603d1c986..945036e6e175 100644
--- a/llvm/test/Transforms/LICM/promote-capture.ll
+++ b/llvm/test/Transforms/LICM/promote-capture.ll
@@ -111,17 +111,19 @@ define void @test_captured_before_loop(i32 %len) {
 ; CHECK-NEXT:    [[COUNT:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    store i32 0, i32* [[COUNT]], align 4
 ; CHECK-NEXT:    call void @capture(i32* [[COUNT]])
+; CHECK-NEXT:    [[COUNT_PROMOTED:%.*]] = load i32, i32* [[COUNT]], align 4
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[C_INC2:%.*]] = phi i32 [ [[COUNT_PROMOTED]], [[ENTRY:%.*]] ], [ [[C_INC1:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[I_NEXT:%.*]], [[LATCH]] ]
 ; CHECK-NEXT:    [[COND:%.*]] = call i1 @cond(i32 [[I]])
 ; CHECK-NEXT:    br i1 [[COND]], label [[IF:%.*]], label [[LATCH]]
 ; CHECK:       if:
-; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[COUNT]], align 4
-; CHECK-NEXT:    [[C_INC:%.*]] = add i32 [[C]], 1
+; CHECK-NEXT:    [[C_INC:%.*]] = add i32 [[C_INC2]], 1
 ; CHECK-NEXT:    store i32 [[C_INC]], i32* [[COUNT]], align 4
 ; CHECK-NEXT:    br label [[LATCH]]
 ; CHECK:       latch:
+; CHECK-NEXT:    [[C_INC1]] = phi i32 [ [[C_INC]], [[IF]] ], [ [[C_INC2]], [[LOOP]] ]
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[LEN:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]

diff  --git a/llvm/test/Transforms/LICM/scalar-promote-memmodel.ll b/llvm/test/Transforms/LICM/scalar-promote-memmodel.ll
index c3bae731fb6b..33076b39e908 100644
--- a/llvm/test/Transforms/LICM/scalar-promote-memmodel.ll
+++ b/llvm/test/Transforms/LICM/scalar-promote-memmodel.ll
@@ -11,19 +11,21 @@ define void @bar(i32 %n, i32 %b) nounwind uwtable ssp {
 ; CHECK-LABEL: @bar(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[B:%.*]], 0
+; CHECK-NEXT:    [[G_PROMOTED:%.*]] = load i32, i32* @g, align 4
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
-; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC5:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    [[INC2:%.*]] = phi i32 [ [[G_PROMOTED]], [[ENTRY:%.*]] ], [ [[INC1:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC5:%.*]], [[FOR_INC]] ]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], [[N:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[FOR_INC]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* @g, align 4
-; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[INC2]], 1
 ; CHECK-NEXT:    store i32 [[INC]], i32* @g, align 4
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
+; CHECK-NEXT:    [[INC1]] = phi i32 [ [[INC]], [[IF_THEN]] ], [ [[INC2]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[INC5]] = add nsw i32 [[I_0]], 1
 ; CHECK-NEXT:    br label [[FOR_COND]]
 ; CHECK:       for.end:

diff  --git a/llvm/test/Transforms/LICM/scalar-promote-opaque-ptrs.ll b/llvm/test/Transforms/LICM/scalar-promote-opaque-ptrs.ll
index da4bae936dc1..b239b6fb0296 100644
--- a/llvm/test/Transforms/LICM/scalar-promote-opaque-ptrs.ll
+++ b/llvm/test/Transforms/LICM/scalar-promote-opaque-ptrs.ll
@@ -314,17 +314,19 @@ define i32 @test7bad() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[LOCAL:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    call void @capture(ptr [[LOCAL]])
+; CHECK-NEXT:    [[LOCAL_PROMOTED:%.*]] = load i32, ptr [[LOCAL]], align 4
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[ELSE:%.*]] ]
-; CHECK-NEXT:    [[X:%.*]] = load i32, ptr [[LOCAL]], align 4
-; CHECK-NEXT:    [[X2:%.*]] = call i32 @opaque(i32 [[X]])
+; CHECK-NEXT:    [[X22:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], [[ENTRY:%.*]] ], [ [[X21:%.*]], [[ELSE:%.*]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[ELSE]] ]
+; CHECK-NEXT:    [[X2:%.*]] = call i32 @opaque(i32 [[X22]])
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X2]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[IF:%.*]], label [[ELSE]]
 ; CHECK:       if:
 ; CHECK-NEXT:    store i32 [[X2]], ptr [[LOCAL]], align 4
 ; CHECK-NEXT:    br label [[ELSE]]
 ; CHECK:       else:
+; CHECK-NEXT:    [[X21]] = phi i32 [ [[X2]], [[IF]] ], [ [[X22]], [[LOOP]] ]
 ; CHECK-NEXT:    [[NEXT]] = add i32 [[J]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[NEXT]], 0
 ; CHECK-NEXT:    br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]

diff  --git a/llvm/test/Transforms/LICM/scalar-promote.ll b/llvm/test/Transforms/LICM/scalar-promote.ll
index 290e990f8513..c064edb8cd93 100644
--- a/llvm/test/Transforms/LICM/scalar-promote.ll
+++ b/llvm/test/Transforms/LICM/scalar-promote.ll
@@ -315,17 +315,19 @@ define i32 @test7bad() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[LOCAL:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    call void @capture(i32* [[LOCAL]])
+; CHECK-NEXT:    [[LOCAL_PROMOTED:%.*]] = load i32, i32* [[LOCAL]], align 4
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[ELSE:%.*]] ]
-; CHECK-NEXT:    [[X:%.*]] = load i32, i32* [[LOCAL]], align 4
-; CHECK-NEXT:    [[X2:%.*]] = call i32 @opaque(i32 [[X]])
+; CHECK-NEXT:    [[X22:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], [[ENTRY:%.*]] ], [ [[X21:%.*]], [[ELSE:%.*]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[ELSE]] ]
+; CHECK-NEXT:    [[X2:%.*]] = call i32 @opaque(i32 [[X22]])
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X2]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[IF:%.*]], label [[ELSE]]
 ; CHECK:       if:
 ; CHECK-NEXT:    store i32 [[X2]], i32* [[LOCAL]], align 4
 ; CHECK-NEXT:    br label [[ELSE]]
 ; CHECK:       else:
+; CHECK-NEXT:    [[X21]] = phi i32 [ [[X2]], [[IF]] ], [ [[X22]], [[LOOP]] ]
 ; CHECK-NEXT:    [[NEXT]] = add i32 [[J]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[NEXT]], 0
 ; CHECK-NEXT:    br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]


        


More information about the llvm-commits mailing list