[llvm] dd3eea6 - [LICM] Support sinking in LNICM

via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 12 08:56:40 PDT 2021


Author: maekawatoshiki
Date: 2021-08-13T00:56:26+09:00
New Revision: dd3eea65662a675b78ac2319fd675a22345f5ca3

URL: https://github.com/llvm/llvm-project/commit/dd3eea65662a675b78ac2319fd675a22345f5ca3
DIFF: https://github.com/llvm/llvm-project/commit/dd3eea65662a675b78ac2319fd675a22345f5ca3.diff

LOG: [LICM] Support sinking in LNICM

Currently, LNICM pass does not support sinking instructions out of loop nest.
This patch enables LNICM to sink down as many instructions to the exit block of outermost loop as possible.

Reviewed By: Whitney

Differential Revision: https://reviews.llvm.org/D107219

Added: 
    llvm/test/Transforms/LICM/lnicm-sink.ll

Modified: 
    llvm/include/llvm/Transforms/Utils/LoopUtils.h
    llvm/lib/Transforms/Scalar/LICM.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 247b911b7c8f9..9428e2d10e4e1 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -147,11 +147,23 @@ class SinkAndHoistLICMFlags {
 /// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all
 /// instructions of the loop and loop safety information as
 /// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
+/// \p CurLoop is a loop to do sinking on. \p OutermostLoop is used only when
+/// this function is called by \p sinkRegionForLoopNest.
 bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
                 BlockFrequencyInfo *, TargetLibraryInfo *,
-                TargetTransformInfo *, Loop *, AliasSetTracker *,
+                TargetTransformInfo *, Loop *CurLoop, AliasSetTracker *,
                 MemorySSAUpdater *, ICFLoopSafetyInfo *,
-                SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
+                SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *,
+                Loop *OutermostLoop = nullptr);
+
+/// Call sinkRegion on loops contained within the specified loop
+/// in order from innermost to outermost.
+bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *,
+                           DominatorTree *, BlockFrequencyInfo *,
+                           TargetLibraryInfo *, TargetTransformInfo *, Loop *,
+                           AliasSetTracker *, MemorySSAUpdater *,
+                           ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &,
+                           OptimizationRemarkEmitter *);
 
 /// Walk the specified region of the CFG (defined by all blocks
 /// dominated by the specified block, and that are in the current loop) in depth

diff  --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 30058df3ded50..33c5abf0fe302 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -151,7 +151,8 @@ cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
                                   const LoopSafetyInfo *SafetyInfo,
-                                  TargetTransformInfo *TTI, bool &FreeInLoop);
+                                  TargetTransformInfo *TTI, bool &FreeInLoop,
+                                  bool LoopNestMode);
 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
                   BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
                   MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
@@ -436,8 +437,13 @@ bool LoopInvariantCodeMotion::runOnLoop(
   // instructions, we perform another pass to hoist them out of the loop.
   if (L->hasDedicatedExits())
     Changed |=
-        sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L,
-                   CurAST.get(), MSSAU.get(), &SafetyInfo, *Flags.get(), ORE);
+        LoopNestMode
+            ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI, DT,
+                                    BFI, TLI, TTI, L, CurAST.get(), MSSAU.get(),
+                                    &SafetyInfo, *Flags.get(), ORE)
+            : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI,
+                         L, CurAST.get(), MSSAU.get(), &SafetyInfo,
+                         *Flags.get(), ORE);
   Flags->setIsSink(false);
   if (Preheader)
     Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
@@ -555,7 +561,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
                       Loop *CurLoop, AliasSetTracker *CurAST,
                       MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
                       SinkAndHoistLICMFlags &Flags,
-                      OptimizationRemarkEmitter *ORE) {
+                      OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) {
 
   // Verify inputs.
   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
@@ -598,8 +604,10 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
       // operands of the instruction are loop invariant.
       //
       bool FreeInLoop = false;
+      bool LoopNestMode = OutermostLoop != nullptr;
       if (!I.mayHaveSideEffects() &&
-          isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
+          isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
+                                SafetyInfo, TTI, FreeInLoop, LoopNestMode) &&
           canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
                              ORE)) {
         if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) {
@@ -618,6 +626,26 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
   return Changed;
 }
 
+bool llvm::sinkRegionForLoopNest(
+    DomTreeNode *N, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
+    BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
+    Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
+    ICFLoopSafetyInfo *SafetyInfo, SinkAndHoistLICMFlags &Flags,
+    OptimizationRemarkEmitter *ORE) {
+
+  bool Changed = false;
+  SmallPriorityWorklist<Loop *, 4> Worklist;
+  Worklist.insert(CurLoop);
+  appendLoopsToWorklist(*CurLoop, Worklist);
+  while (!Worklist.empty()) {
+    Loop *L = Worklist.pop_back_val();
+    Changed |=
+        sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L,
+                   CurAST, MSSAU, SafetyInfo, Flags, ORE, CurLoop);
+  }
+  return Changed;
+}
+
 namespace {
 // This is a helper class for hoistRegion to make it able to hoist control flow
 // in order to be able to hoist phis. The way this works is that we initially
@@ -1443,7 +1471,8 @@ static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
 /// (e.g.,  a GEP can be folded into a load as an addressing mode in the loop).
 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
                                   const LoopSafetyInfo *SafetyInfo,
-                                  TargetTransformInfo *TTI, bool &FreeInLoop) {
+                                  TargetTransformInfo *TTI, bool &FreeInLoop,
+                                  bool LoopNestMode) {
   const auto &BlockColors = SafetyInfo->getBlockColors();
   bool IsFree = isFreeInLoop(I, CurLoop, TTI);
   for (const User *U : I.users()) {
@@ -1460,6 +1489,15 @@ static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
         if (!BlockColors.empty() &&
             BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
           return false;
+
+      if (LoopNestMode) {
+        while (isa<PHINode>(UI) && UI->hasOneUser() &&
+               UI->getNumOperands() == 1) {
+          if (!CurLoop->contains(UI))
+            break;
+          UI = cast<Instruction>(U->user_back());
+        }
+      }
     }
 
     if (CurLoop->contains(UI)) {

diff  --git a/llvm/test/Transforms/LICM/lnicm-sink.ll b/llvm/test/Transforms/LICM/lnicm-sink.ll
new file mode 100644
index 0000000000000..0dc47e5f1a4a0
--- /dev/null
+++ b/llvm/test/Transforms/LICM/lnicm-sink.ll
@@ -0,0 +1,68 @@
+; RUN: opt -passes='loop(licm)' -S %s | FileCheck %s --check-prefixes CHECK,LICM
+; RUN: opt -passes='loop(lnicm)' -S %s | FileCheck %s --check-prefixes CHECK,LNICM
+
+; This test represents the following function:
+;
+; double sin(double);
+; int abs(int);
+; double test(double x, int y[10]) {
+;   double t = 0; int s = 0;
+;   for (int i = 0; i < 10; i++) {
+;     for (int j = 0; j < 10; j++) {
+;       t = sin(x);
+;       s = abs(i);
+;     }
+;     y[i] = s;
+;   }
+;   return t;
+; }
+;
+; We only want to sink the call of sin out of the loop nest.
+; LICM also sinks the call of abs out of j-loop, but LNICM doesn't do so
+; to try to make a perfect loop nest. (though y[i] = s; still prevents the 
+; loop nest from being a perfect loop nest in this test case)
+
+define dso_local double @test(double %x, i32* noalias %y) {
+entry:
+  br label %for.body
+
+for.body:
+  %i.02 = phi i32 [ 0, %entry ], [ %inc6, %for.end ]
+  br label %for.body3
+
+; CHECK: for.body3:
+; LNICM: call i32 @abs(i32 %i.02)
+; LICM-NOT: call i32 @abs(i32 %i.02)
+for.body3:
+  %j.01 = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ]
+  %call = call double @sin(double %x)
+  %call4 = call i32 @abs(i32 %i.02)
+  %inc = add nsw i32 %j.01, 1
+  %cmp2 = icmp slt i32 %inc, 10
+  br i1 %cmp2, label %for.body3, label %for.end
+
+; CHECK: for.end:
+; LICM: call i32 @abs(i32 %i.02)
+; LNICM-NOT: call i32 @abs(i32 %i.02)
+for.end:
+  %s.1.lcssa = phi i32 [ %call4, %for.body3 ]
+  %t.1.lcssa = phi double [ %call, %for.body3 ]
+  %idxprom = sext i32 %i.02 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %y, i64 %idxprom
+  store i32 %s.1.lcssa, i32* %arrayidx, align 4
+  %inc6 = add nsw i32 %i.02, 1
+  %cmp = icmp slt i32 %inc6, 10
+  br i1 %cmp, label %for.body, label %for.end7
+
+; CHECK: for.end7:
+; CHECK: call double @sin(double %x)
+for.end7:
+  %t.0.lcssa = phi double [ %t.1.lcssa, %for.end ]
+  ret double %t.0.lcssa
+}
+
+declare dso_local double @sin(double) #0
+
+declare dso_local i32 @abs(i32) #0
+
+attributes #0 = { nounwind readnone willreturn }


        


More information about the llvm-commits mailing list