[llvm] 36bdc3d - [LoopFusion] Move instructions from FC0.Latch to FC1.Latch.

Whitney Tsang via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 17 14:10:39 PST 2019


Author: Whitney Tsang
Date: 2019-12-17T22:10:23Z
New Revision: 36bdc3dc35a509450d52a79225b1509f587236e9

URL: https://github.com/llvm/llvm-project/commit/36bdc3dc35a509450d52a79225b1509f587236e9
DIFF: https://github.com/llvm/llvm-project/commit/36bdc3dc35a509450d52a79225b1509f587236e9.diff

LOG: [LoopFusion] Move instructions from FC0.Latch to FC1.Latch.

Summary:This PR move instructions from FC0.Latch bottom up to the
beginning of FC1.Latch as long as they are proven safe.

To illustrate why this is beneficial, let's consider the following
example:
Before Fusion:
header1:
  br header2
header2:
  br header2, latch1
latch1:
  br header1, preheader3
preheader3:
  br header3
header3:
  br header4
header4:
  br header4, latch3
latch3:
  br header3, exit3

After Fusion (before this PR):
header1:
  br header2
header2:
  br header2, latch1
latch1:
  br header3
header3:
  br header4
header4:
  br header4, latch3
latch3:
  br header1, exit3

Note that preheader3 is removed during fusion before this PR.
Notice that we cannot fuse loop2 with loop4 as there exists block latch1
in between.
This PR move instructions from latch1 to beginning of latch3, and remove
block latch1. LoopFusion is now able to fuse loop nest recursively.

After Fusion (after this PR):
header1:
  br header2
header2:
  br header3
header3:
  br header4
header4:
  br header2, latch3
latch3:
  br header1, exit3

Reviewer: kbarton, jdoerfert, Meinersbur, dmgreen, fhahn, hfinkel,
bmahjour, etiotto
Reviewed By: kbarton, Meinersbur
Subscribers: hiraditya, llvm-commits
Tag: LLVM
Differential Revision: https://reviews.llvm.org/D71165

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
    llvm/lib/Transforms/Scalar/LoopFuse.cpp
    llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
    llvm/test/Transforms/LoopFusion/four_loops.ll
    llvm/test/Transforms/LoopFusion/guarded.ll
    llvm/test/Transforms/LoopFusion/loop_nest.ll
    llvm/test/Transforms/LoopFusion/simple.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
index 0d2ea8c6ff7f..32eb7cc2ab04 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
@@ -45,6 +45,12 @@ bool isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
                         const DominatorTree &DT, const PostDominatorTree &PDT,
                         DependenceInfo &DI);
 
+/// Move instructions from \p FromBB bottom up to the beginning of \p ToBB
+/// when proven safe.
+void moveInstsBottomUp(BasicBlock &FromBB, BasicBlock &ToBB,
+                       const DominatorTree &DT, const PostDominatorTree &PDT,
+                       DependenceInfo &DI);
+
 } // end namespace llvm
 
 #endif // LLVM_TRANSFORMS_UTILS_CODEMOVERUTILS_H

diff  --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index e5ea9149dce8..a7f4242853fc 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -1114,6 +1114,29 @@ struct LoopFuser {
     return FC.ExitBlock->size() == 1;
   }
 
+  /// Simplify the condition of the latch branch of \p FC to true, when both of
+  /// its successors are the same.
+  void simplifyLatchBranch(const FusionCandidate &FC) const {
+    BranchInst *FCLatchBranch = dyn_cast<BranchInst>(FC.Latch->getTerminator());
+    if (FCLatchBranch) {
+      assert(FCLatchBranch->isConditional() &&
+             FCLatchBranch->getSuccessor(0) == FCLatchBranch->getSuccessor(1) &&
+             "Expecting the two successors of FCLatchBranch to be the same");
+      FCLatchBranch->setCondition(
+          llvm::ConstantInt::getTrue(FCLatchBranch->getCondition()->getType()));
+    }
+  }
+
+  /// Move instructions from FC0.Latch to FC1.Latch. If FC0.Latch has an unique
+  /// successor, then merge FC0.Latch with its unique successor.
+  void mergeLatch(const FusionCandidate &FC0, const FusionCandidate &FC1) {
+    moveInstsBottomUp(*FC0.Latch, *FC1.Latch, DT, PDT, DI);
+    if (BasicBlock *Succ = FC0.Latch->getUniqueSuccessor()) {
+      MergeBlockIntoPredecessor(Succ, &DTU, &LI);
+      DTU.flush();
+    }
+  }
+
   /// Fuse two fusion candidates, creating a new fused loop.
   ///
   /// This method contains the mechanics of fusing two loops, represented by \p
@@ -1247,6 +1270,10 @@ struct LoopFuser {
     FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
     FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
 
+    // Change the condition of FC0 latch branch to true, as both successors of
+    // the branch are the same.
+    simplifyLatchBranch(FC0);
+
     // If FC0.Latch and FC0.ExitingBlock are the same then we have already
     // performed the updates above.
     if (FC0.Latch != FC0.ExitingBlock)
@@ -1269,9 +1296,15 @@ struct LoopFuser {
 
     // Is there a way to keep SE up-to-date so we don't need to forget the loops
     // and rebuild the information in subsequent passes of fusion?
+    // Note: Need to forget the loops before merging the loop latches, as
+    // mergeLatch may remove the only block in FC1.
     SE.forgetLoop(FC1.L);
     SE.forgetLoop(FC0.L);
 
+    // Move instructions from FC0.Latch to FC1.Latch.
+    // Note: mergeLatch requires an updated DT.
+    mergeLatch(FC0, FC1);
+
     // Merge the loops.
     SmallVector<BasicBlock *, 8> Blocks(FC1.L->block_begin(),
                                         FC1.L->block_end());
@@ -1491,6 +1524,10 @@ struct LoopFuser {
     FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
     FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
 
+    // Change the condition of FC0 latch branch to true, as both successors of
+    // the branch are the same.
+    simplifyLatchBranch(FC0);
+
     // If FC0.Latch and FC0.ExitingBlock are the same then we have already
     // performed the updates above.
     if (FC0.Latch != FC0.ExitingBlock)
@@ -1522,9 +1559,15 @@ struct LoopFuser {
 
     // Is there a way to keep SE up-to-date so we don't need to forget the loops
     // and rebuild the information in subsequent passes of fusion?
+    // Note: Need to forget the loops before merging the loop latches, as
+    // mergeLatch may remove the only block in FC1.
     SE.forgetLoop(FC1.L);
     SE.forgetLoop(FC0.L);
 
+    // Move instructions from FC0.Latch to FC1.Latch.
+    // Note: mergeLatch requires an updated DT.
+    mergeLatch(FC0, FC1);
+
     // Merge the loops.
     SmallVector<BasicBlock *, 8> Blocks(FC1.L->block_begin(),
                                         FC1.L->block_end());

diff  --git a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
index 7a3a8adfea83..93395ac761ab 100644
--- a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
@@ -117,9 +117,9 @@ bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
   if (MoveForward) {
     // When I is being moved forward, we need to make sure the InsertPoint
     // dominates every users. Or else, a user may be using an undefined I.
-    for (const Value *User : I.users())
-      if (auto *UserInst = dyn_cast<Instruction>(User))
-        if (!DT.dominates(&InsertPoint, UserInst))
+    for (const Use &U : I.uses())
+      if (auto *UserInst = dyn_cast<Instruction>(U.getUser()))
+        if (UserInst != &InsertPoint && !DT.dominates(&InsertPoint, U))
           return false;
   } else {
     // When I is being moved backward, we need to make sure all its opernads
@@ -173,3 +173,17 @@ bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
 
   return true;
 }
+
+void llvm::moveInstsBottomUp(BasicBlock &FromBB, BasicBlock &ToBB,
+                             const DominatorTree &DT,
+                             const PostDominatorTree &PDT, DependenceInfo &DI) {
+  for (auto It = ++FromBB.rbegin(); It != FromBB.rend();) {
+    Instruction *MovePos = ToBB.getFirstNonPHIOrDbg();
+    Instruction &I = *It;
+    // Increment the iterator before modifying FromBB.
+    ++It;
+
+    if (isSafeToMoveBefore(I, *MovePos, DT, PDT, DI))
+      I.moveBefore(MovePos);
+  }
+}

diff  --git a/llvm/test/Transforms/LoopFusion/four_loops.ll b/llvm/test/Transforms/LoopFusion/four_loops.ll
index 771e92813f6b..8f3822b8a942 100644
--- a/llvm/test/Transforms/LoopFusion/four_loops.ll
+++ b/llvm/test/Transforms/LoopFusion/four_loops.ll
@@ -9,20 +9,14 @@
 ; CHECK-NEXT: bb:
 ; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]+]]
 ; CHECK: [[LOOP1HEADER]]
-; CHECK: br label %[[LOOP1LATCH:bb[0-9]+]]
-; CHECK: [[LOOP1LATCH]]
-; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]+]], label %[[LOOP2BODY]]
+; CHECK: br label %[[LOOP2BODY:bb[0-9]+]]
 ; CHECK: [[LOOP2BODY]]
-; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]]
-; CHECK: [[LOOP2LATCH]]
-; CHECK: br i1 %{{.*}}, label %[[LOOP3BODY:bb[0-9]+]], label %[[LOOP3BODY]]
+; CHECK: br label %[[LOOP3BODY:bb[0-9]+]]
 ; CHECK: [[LOOP3BODY]]
-; CHECK: br label %[[LOOP3LATCH:bb[0-9]+]]
-; CHECK: [[LOOP3LATCH]]
-; CHECK: br i1 %{{.*}}, label %[[LOOP4BODY:bb[0-9]+]], label %[[LOOP4BODY]]
+; CHECK: br label %[[LOOP4BODY:bb[0-9]+]]
 ; CHECK: [[LOOP4BODY]]
-; CHECK: br label %[[LOOP4LATCH:bb[0-9]+]]
-; CHECK: [[LOOP4LATCH]]
+; CHECK: br label %[[LOOP1LATCH:bb[0-9]+]]
+; CHECK: [[LOOP1LATCH]]
 ; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[LOOPEXIT:bb[0-9]+]]
 ; CHECK: ret void
 define void @dep_free() {

diff  --git a/llvm/test/Transforms/LoopFusion/guarded.ll b/llvm/test/Transforms/LoopFusion/guarded.ll
index 1a131178d25e..9242e0a518e9 100644
--- a/llvm/test/Transforms/LoopFusion/guarded.ll
+++ b/llvm/test/Transforms/LoopFusion/guarded.ll
@@ -8,8 +8,6 @@
 ; CHECK: [[LOOP1PREHEADER]]
 ; CHECK-NEXT: br label %[[LOOP1BODY:bb[0-9]*]]
 ; CHECK: [[LOOP1BODY]]
-; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]*]], label %[[LOOP2BODY]]
-; CHECK: [[LOOP2BODY]]
 ; CHECK: br i1 %{{.*}}, label %[[LOOP1BODY]], label %[[LOOP2EXIT:bb[0-9]+]]
 ; CHECK: [[LOOP2EXIT]]
 ; CHECK: br label %[[LOOP1SUCC]]

diff  --git a/llvm/test/Transforms/LoopFusion/loop_nest.ll b/llvm/test/Transforms/LoopFusion/loop_nest.ll
index 8445bedce3fc..44a0ac8093da 100644
--- a/llvm/test/Transforms/LoopFusion/loop_nest.ll
+++ b/llvm/test/Transforms/LoopFusion/loop_nest.ll
@@ -25,19 +25,16 @@
 ; CHECK: [[LOOP1HEADER]]
 ; CHECK: br label %[[LOOP3HEADER:bb[0-9]+]]
 ; CHECK: [[LOOP3HEADER]]
-; CHECK: br label %[[LOOP3LATCH:bb[0-9]+]]
-; CHECK: [[LOOP3LATCH]]
-; CHECK: br i1 %{{.*}}, label %[[LOOP3HEADER]], label %[[LOOP1LATCH:bb[0-9]+]]
-; CHECK: [[LOOP1LATCH]]
-; CHECK: br i1 %{{.*}}, label %[[LOOP2PREHEADER:bb[0-9]+]], label %[[LOOP2PREHEADER]]
-; CHECK: [[LOOP2PREHEADER]]
+; CHECK: br label %[[LOOP2HEADER:bb[0-9]+]]
+; CHECK: [[LOOP2HEADER]]
 ; CHECK: br label %[[LOOP4HEADER:bb[0-9]+]]
 ; CHECK: [[LOOP4HEADER]]
-; CHECK: br label %[[LOOP4LATCH:bb[0-9]+]] 
-; CHECK: [[LOOP4LATCH]]
-; CHECK: br i1 %{{.*}}, label %[[LOOP4HEADER]], label %[[LOOP2LATCH:bb[0-9]+]]
-; CHECK: [[LOOP2LATCH]]
-; CHECK:  br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[LOOP1EXIT:bb[0-9]*]]
+; CHECK: br i1 %{{.*}}, label %[[LOOP3HEADER]], label %[[LOOP1LATCH:bb[0-9]+]]
+; CHECK: [[LOOP1LATCH]]
+; CHECK-NEXT: %inc.outer.fc0 = add nuw nsw i64 %indvars.iv105, 1
+; CHECK-NEXT: %add.outer.fc0 = add nuw nsw i32 %.06, 1
+; CHECK-NEXT: %cmp.outer.fc0 = icmp ne i64 %inc.outer.fc0, 100
+; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[LOOP1EXIT:bb[0-9]*]]
 ; CHECK: ret void
 
 ; TODO: The current version of loop fusion does not allow the inner loops to be
@@ -48,8 +45,8 @@ bb:
   br label %bb16
 
 bb16:                                   ; preds = %bb, %bb27
-  %.06 = phi i32 [ 0, %bb ], [ %tmp28, %bb27 ]
-  %indvars.iv105 = phi i64 [ 0, %bb ], [ %indvars.iv.next11, %bb27 ]
+  %.06 = phi i32 [ 0, %bb ], [ %add.outer.fc0, %bb27 ]
+  %indvars.iv105 = phi i64 [ 0, %bb ], [ %inc.outer.fc0, %bb27 ]
   br label %bb18
 
 bb30:                                   ; preds = %bb27
@@ -73,10 +70,10 @@ bb25:                                             ; preds = %bb18
   br i1 %exitcond9, label %bb18, label %bb27
 
 bb27:                                             ; preds = %bb25
-  %indvars.iv.next11 = add nuw nsw i64 %indvars.iv105, 1
-  %tmp28 = add nuw nsw i32 %.06, 1
-  %exitcond12 = icmp ne i64 %indvars.iv.next11, 100
-  br i1 %exitcond12, label %bb16, label %bb30
+  %inc.outer.fc0 = add nuw nsw i64 %indvars.iv105, 1
+  %add.outer.fc0 = add nuw nsw i32 %.06, 1
+  %cmp.outer.fc0 = icmp ne i64 %inc.outer.fc0, 100
+  br i1 %cmp.outer.fc0, label %bb16, label %bb30
 
 bb33:                                   ; preds = %bb30, %bb45
   %.023 = phi i32 [ 0, %bb30 ], [ %tmp46, %bb45 ]

diff  --git a/llvm/test/Transforms/LoopFusion/simple.ll b/llvm/test/Transforms/LoopFusion/simple.ll
index dc7d8d089eab..aeb626126d0b 100644
--- a/llvm/test/Transforms/LoopFusion/simple.ll
+++ b/llvm/test/Transforms/LoopFusion/simple.ll
@@ -6,9 +6,7 @@
 ; CHECK-NEXT: bb:
 ; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]*]]
 ; CHECK: [[LOOP1HEADER]]
-; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]]
-; CHECK: [[LOOP1LATCH]]
-; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]+]], label %[[LOOP2HEADER]]
+; CHECK: br label %[[LOOP2HEADER:bb[0-9]*]]
 ; CHECK: [[LOOP2HEADER]]
 ; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]]
 ; CHECK: [[LOOP2LATCH]]
@@ -72,9 +70,7 @@ bb29:                                             ; preds = %bb18
 ; CHECK: [[LOOP1PREHEADER]]
 ; CHECK: br label %[[LOOP1HEADER:bb[0-9]*]]
 ; CHECK: [[LOOP1HEADER]]
-; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]]
-; CHECK: [[LOOP1LATCH]]
-; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]*]], label %[[LOOP2HEADER]]
+; CHECK: br label %[[LOOP2HEADER:bb[0-9]*]]
 ; CHECK: [[LOOP2HEADER]]
 ; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]]
 ; CHECK: [[LOOP2LATCH]]
@@ -129,9 +125,7 @@ bb27:                                             ; preds = %bb17
 ; CHECK-NEXT: bb:
 ; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]*]]
 ; CHECK: [[LOOP1HEADER]]
-; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]]
-; CHECK: [[LOOP1LATCH]]
-; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]+]], label %[[LOOP2HEADER]]
+; CHECK: br label %[[LOOP2HEADER:bb[0-9]*]]
 ; CHECK: [[LOOP2HEADER]]
 ; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]]
 ; CHECK: [[LOOP2LATCH]]
@@ -179,8 +173,6 @@ bb19:                                             ; preds = %bb18
 ; CHECK: [[LOOP1PREHEADER]]
 ; CHECK: br label %[[LOOP1HEADER:bb[0-9]*]]
 ; CHECK: [[LOOP1HEADER]]
-; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]*]], label %[[LOOP2HEADER]]
-; CHECK: [[LOOP2HEADER]]
 ; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[EXITBLOCK]]
 ; CHECK: ret void
 define void @raw_only_parametric(i32* noalias %arg, i32 %arg4) {
@@ -217,9 +209,7 @@ bb23:                                             ; preds = %bb17, %bb
 ; CHECK-NEXT: bb:
 ; CHECK: br label %[[LOOP1HEADER:bb[0-9]*]]
 ; CHECK: [[LOOP1HEADER]]
-; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]]
-; CHECK: [[LOOP1LATCH]]
-; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]+]], label %[[LOOP2HEADER]]
+; CHECK: br label %[[LOOP2HEADER:bb[0-9]*]]
 ; CHECK: [[LOOP2HEADER]]
 ; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]]
 ; CHECK: [[LOOP2LATCH]]
@@ -266,3 +256,53 @@ bb25:                                             ; preds = %bb19
 bb26:                                             ; preds = %bb25
   ret void
 }
+
+; Test that instructions in loop 1 latch are moved to the beginning of loop 2
+; latch iff it is proven safe. %inc.first and %cmp.first are moved, but
+; `store i32 0, i32* %Ai.first` is not.
+
+; CHECK: void @flow_dep
+; CHECK-LABEL: entry:
+; CHECK-NEXT: br label %for.first
+; CHECK-LABEL: for.first:
+; CHECK: store i32 0, i32* %Ai.first
+; CHECK: %Ai.second =
+; CHECK: br label %for.second.latch
+; CHECK-LABEL: for.second.latch:
+; CHECK-NEXT: %inc.first = add nsw i64 %i.first, 1
+; CHECK-NEXT: %cmp.first = icmp slt i64 %inc.first, 100
+; CHECK: br i1 %cmp.second, label %for.first, label %for.end
+; CHECK-LABEL: for.end:
+; CHECK-NEXT: ret void
+
+define void @flow_dep(i32* noalias %A, i32* noalias %B) {
+entry:
+  br label %for.first
+
+for.first:
+  %i.first = phi i64 [ 0, %entry ], [ %inc.first, %for.first ]
+  %Ai.first = getelementptr inbounds i32, i32* %A, i64 %i.first
+  store i32 0, i32* %Ai.first, align 4
+  %inc.first = add nsw i64 %i.first, 1
+  %cmp.first = icmp slt i64 %inc.first, 100
+  br i1 %cmp.first, label %for.first, label %for.second.preheader
+
+for.second.preheader:
+  br label %for.second
+
+for.second:
+  %i.second = phi i64 [ %inc.second, %for.second.latch ], [ 0, %for.second.preheader ]
+  %Ai.second = getelementptr inbounds i32, i32* %A, i64 %i.second
+  %0 = load i32, i32* %Ai.second, align 4
+  %Bi = getelementptr inbounds i32, i32* %B, i64 %i.second
+  store i32 %0, i32* %Bi, align 4
+  br label %for.second.latch
+
+for.second.latch:
+  %inc.second = add nsw i64 %i.second, 1
+  %cmp.second = icmp slt i64 %inc.second, 100
+  br i1 %cmp.second, label %for.second, label %for.end
+
+for.end:
+  ret void
+}


        


More information about the llvm-commits mailing list