[llvm] da58e68 - [LoopFusion] Move instructions from FC1.Preheader to FC0.Preheader when

Whitney Tsang via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 29 07:06:22 PST 2020


Author: Whitney Tsang
Date: 2020-01-29T15:06:11Z
New Revision: da58e68fdf1ba9efd833f8b46216a6642a139178

URL: https://github.com/llvm/llvm-project/commit/da58e68fdf1ba9efd833f8b46216a6642a139178
DIFF: https://github.com/llvm/llvm-project/commit/da58e68fdf1ba9efd833f8b46216a6642a139178.diff

LOG: [LoopFusion] Move instructions from FC1.Preheader to FC0.Preheader when
proven safe.

Summary:
Currently LoopFusion give up when the second loop nest preheader is
not empty. For example:

for (int i = 0; i < 100; ++i) {}
x+=1;
for (int i = 0; i < 100; ++i) {}
The above example should be safe to fuse.
This PR moves instructions in FC1 preheader (e.g. x+=1; ) to
FC0 preheader, which then LoopFusion is able to fuse them.
Reviewer: kbarton, Meinersbur, jdoerfert, dmgreen, fhahn, hfinkel,
bmahjour, etiotto
Reviewed By: jdoerfert
Subscribers: hiraditya, llvm-commits
Tag: LLVM
Differential Revision: https://reviews.llvm.org/D71821

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
    llvm/lib/Transforms/Scalar/LoopFuse.cpp
    llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
    llvm/test/Transforms/LoopFusion/diagnostics_missed.ll
    llvm/test/Transforms/LoopFusion/guarded.ll
    llvm/test/Transforms/LoopFusion/simple.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
index d1ee37708458..3f5c0aed2abf 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
@@ -41,6 +41,12 @@ bool isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
                         DominatorTree &DT, const PostDominatorTree &PDT,
                         DependenceInfo &DI);
 
+/// Return true if all instructions (except the terminator) in \p BB can be
+/// safely moved before \p InsertPoint.
+bool isSafeToMoveBefore(BasicBlock &BB, Instruction &InsertPoint,
+                        DominatorTree &DT, const PostDominatorTree &PDT,
+                        DependenceInfo &DI);
+
 /// Move instructions, in an order-preserving manner, from \p FromBB to the
 /// beginning of \p ToBB when proven safe.
 void moveInstructionsToTheBeginning(BasicBlock &FromBB, BasicBlock &ToBB,
@@ -48,6 +54,12 @@ void moveInstructionsToTheBeginning(BasicBlock &FromBB, BasicBlock &ToBB,
                                     const PostDominatorTree &PDT,
                                     DependenceInfo &DI);
 
+/// Move instructions, in an order-preserving manner, from \p FromBB to the end
+/// of \p ToBB when proven safe.
+void moveInstructionsToTheEnd(BasicBlock &FromBB, BasicBlock &ToBB,
+                              DominatorTree &DT, const PostDominatorTree &PDT,
+                              DependenceInfo &DI);
+
 } // end namespace llvm
 
 #endif // LLVM_TRANSFORMS_UTILS_CODEMOVERUTILS_H

diff  --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index 430d4bc23de9..24597e228870 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -86,7 +86,9 @@ STATISTIC(UnknownTripCount, "Loop has unknown trip count");
 STATISTIC(UncomputableTripCount, "SCEV cannot compute trip count of loop");
 STATISTIC(NonEqualTripCount, "Loop trip counts are not the same");
 STATISTIC(NonAdjacent, "Loops are not adjacent");
-STATISTIC(NonEmptyPreheader, "Loop has a non-empty preheader");
+STATISTIC(
+    NonEmptyPreheader,
+    "Loop has a non-empty preheader with instructions that cannot be moved");
 STATISTIC(FusionNotBeneficial, "Fusion is not beneficial");
 STATISTIC(NonIdenticalGuards, "Candidates have 
diff erent guards");
 STATISTIC(NonEmptyExitBlock, "Candidate has a non-empty exit block");
@@ -738,19 +740,21 @@ struct LoopFuser {
             continue;
           }
 
-          // The following three checks look for empty blocks in FC0 and FC1. If
-          // any of these blocks are non-empty, we do not fuse. This is done
-          // because we currently do not have the safety checks to determine if
-          // it is safe to move the blocks past other blocks in the loop. Once
-          // these checks are added, these conditions can be relaxed.
-          if (!isEmptyPreheader(*FC1)) {
-            LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "
-                                 "preheader. Not fusing.\n");
+          if (!isSafeToMoveBefore(*FC1->Preheader,
+                                  *FC0->Preheader->getTerminator(), DT, PDT,
+                                  DI)) {
+            LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe "
+                                 "instructions in preheader. Not fusing.\n");
             reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
                                                        NonEmptyPreheader);
             continue;
           }
 
+          // The following two checks look for empty blocks in FC0 and FC1. If
+          // any of these blocks are non-empty, we do not fuse. This is done
+          // because we currently do not have the safety checks to determine if
+          // it is safe to move the blocks past other blocks in the loop. Once
+          // these checks are added, these conditions can be relaxed.
           if (FC0->GuardBranch && !isEmptyExitBlock(*FC0)) {
             LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty exit "
                                  "block. Not fusing.\n");
@@ -1166,6 +1170,10 @@ struct LoopFuser {
     LLVM_DEBUG(dbgs() << "Fusion Candidate 0: \n"; FC0.dump();
                dbgs() << "Fusion Candidate 1: \n"; FC1.dump(););
 
+    // Move instructions from the preheader of FC1 to the end of the preheader
+    // of FC0.
+    moveInstructionsToTheEnd(*FC1.Preheader, *FC0.Preheader, DT, PDT, DI);
+
     // Fusing guarded loops is handled slightly 
diff erently than non-guarded
     // loops and has been broken out into a separate method instead of trying to
     // intersperse the logic within a single method.

diff  --git a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
index 0f7f1e5b9579..2a637601417d 100644
--- a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
@@ -380,6 +380,17 @@ bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
   return true;
 }
 
+bool llvm::isSafeToMoveBefore(BasicBlock &BB, Instruction &InsertPoint,
+                              DominatorTree &DT, const PostDominatorTree &PDT,
+                              DependenceInfo &DI) {
+  return llvm::all_of(BB, [&](Instruction &I) {
+    if (BB.getTerminator() == &I)
+      return true;
+
+    return isSafeToMoveBefore(I, InsertPoint, DT, PDT, DI);
+  });
+}
+
 void llvm::moveInstructionsToTheBeginning(BasicBlock &FromBB, BasicBlock &ToBB,
                                           DominatorTree &DT,
                                           const PostDominatorTree &PDT,
@@ -394,3 +405,15 @@ void llvm::moveInstructionsToTheBeginning(BasicBlock &FromBB, BasicBlock &ToBB,
       I.moveBefore(MovePos);
   }
 }
+
+void llvm::moveInstructionsToTheEnd(BasicBlock &FromBB, BasicBlock &ToBB,
+                                    DominatorTree &DT,
+                                    const PostDominatorTree &PDT,
+                                    DependenceInfo &DI) {
+  Instruction *MovePos = ToBB.getTerminator();
+  while (FromBB.size() > 1) {
+    Instruction &I = FromBB.front();
+    if (isSafeToMoveBefore(I, *MovePos, DT, PDT, DI))
+      I.moveBefore(MovePos);
+  }
+}

diff  --git a/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll b/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll
index 0926e035396c..fe88f3d458b5 100644
--- a/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll
+++ b/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll
@@ -104,7 +104,7 @@ for.end15:                                        ; preds = %for.cond.cleanup5
   ret void
 }
 
-; CHECK: remark: diagnostics_missed.c:38:3: [negative_dependence]: entry and for.end: Loop has a non-empty preheader
+; CHECK: remark: diagnostics_missed.c:38:3: [negative_dependence]: entry and for.end: Dependencies prevent fusion
 define void @negative_dependence(i32* noalias %A) !dbg !51 {
 entry:
   br label %for.body
@@ -185,6 +185,36 @@ for.end12:                                        ; preds = %for.inc10
 ; Function Attrs: nounwind readnone speculatable willreturn
 declare void @llvm.dbg.value(metadata, metadata, metadata) #0
 
+; CHECK: remark: diagnostics_missed.c:62:3: [unsafe_preheader]: for.first.preheader and for.second.preheader: Loop has a non-empty preheader with instructions that cannot be moved
+define void @unsafe_preheader(i32* noalias %A, i32* noalias %B) {
+for.first.preheader:
+  br label %for.first, !dbg !80
+
+for.first:
+  %i.02 = phi i64 [ 0, %for.first.preheader ], [ %inc, %for.first ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.02
+  store i32 0, i32* %arrayidx, align 4
+  %inc = add nsw i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, 100
+  br i1 %cmp, label %for.first, label %for.second.preheader
+
+for.second.preheader:
+  call void @bar()
+  br label %for.second
+
+for.second:
+  %j.01 = phi i64 [ 0, %for.second.preheader ], [ %inc6, %for.second ]
+  %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.01
+  store i32 0, i32* %arrayidx4, align 4
+  %inc6 = add nsw i64 %j.01, 1
+  %cmp2 = icmp slt i64 %inc6, 100
+  br i1 %cmp2, label %for.second, label %for.end
+
+for.end:
+  ret void
+}
+declare void @bar()
+
 attributes #0 = { nounwind readnone speculatable willreturn }
 
 !llvm.dbg.cu = !{!2}
@@ -267,3 +297,7 @@ attributes #0 = { nounwind readnone speculatable willreturn }
 !74 = !DILocation(line: 51, column: 3, scope: !70)
 !75 = !DILocation(line: 52, column: 15, scope: !70)
 !76 = !DILocation(line: 57, column: 3, scope: !63)
+!77 = distinct !DISubprogram(name: "unsafe_preheader", scope: !3, file: !3, line: 60, type: !15, scopeLine: 60, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !78)
+!78 = !{}
+!79 = distinct !DILexicalBlock(scope: !77, file: !3, line: 3, column: 5)
+!80 = !DILocation(line: 62, column: 3, scope: !79)

diff  --git a/llvm/test/Transforms/LoopFusion/guarded.ll b/llvm/test/Transforms/LoopFusion/guarded.ll
index 9242e0a518e9..9d64a39daab9 100644
--- a/llvm/test/Transforms/LoopFusion/guarded.ll
+++ b/llvm/test/Transforms/LoopFusion/guarded.ll
@@ -63,3 +63,59 @@ bb15:                               ; preds = %bb9
 bb12:                                        ; preds = %bb15, %bb14
   ret void
 }
+
+; Test that `%add` is moved in for.first.preheader, and the two loops for.first
+; and for.second are fused.
+
+; CHECK: void @moveinsts_preheader
+; CHECK-LABEL: for.first.guard:
+; CHECK: br i1 %cmp.guard, label %for.first.preheader, label %for.end
+; CHECK-LABEL: for.first.preheader:
+; CHECK-NEXT:  %add = add nsw i32 %x, 1
+; CHECK-NEXT:  br label %for.first
+; CHECK-LABEL: for.first:
+; CHECK:   br i1 %cmp.j, label %for.first, label %for.second.exit
+; CHECK-LABEL: for.second.exit:
+; CHECK-NEXT:   br label %for.end
+; CHECK-LABEL: for.end:
+; CHECK-NEXT:   ret void
+define void @moveinsts_preheader(i32* noalias %A, i32* noalias %B, i64 %N, i32 %x) {
+for.first.guard:
+  %cmp.guard = icmp slt i64 0, %N
+  br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard
+
+for.first.preheader:
+  br label %for.first
+
+for.first:
+  %i = phi i64 [ %inc.i, %for.first ], [ 0, %for.first.preheader ]
+  %Ai = getelementptr inbounds i32, i32* %A, i64 %i
+  store i32 0, i32* %Ai, align 4
+  %inc.i = add nsw i64 %i, 1
+  %cmp.i = icmp slt i64 %inc.i, %N
+  br i1 %cmp.i, label %for.first, label %for.first.exit
+
+for.first.exit:
+  br label %for.second.guard
+
+for.second.guard:
+  br i1 %cmp.guard, label %for.second.preheader, label %for.end
+
+for.second.preheader:
+  %add = add nsw i32 %x, 1
+  br label %for.second
+
+for.second:
+  %j = phi i64 [ %inc.j, %for.second ], [ 0, %for.second.preheader ]
+  %Bj = getelementptr inbounds i32, i32* %B, i64 %j
+  store i32 0, i32* %Bj, align 4
+  %inc.j = add nsw i64 %j, 1
+  %cmp.j = icmp slt i64 %inc.j, %N
+  br i1 %cmp.j, label %for.second, label %for.second.exit
+
+for.second.exit:
+  br label %for.end
+
+for.end:
+  ret void
+}

diff  --git a/llvm/test/Transforms/LoopFusion/simple.ll b/llvm/test/Transforms/LoopFusion/simple.ll
index aeb626126d0b..290986947dfb 100644
--- a/llvm/test/Transforms/LoopFusion/simple.ll
+++ b/llvm/test/Transforms/LoopFusion/simple.ll
@@ -306,3 +306,88 @@ for.second.latch:
 for.end:
   ret void
 }
+
+; Test that `%add` is moved in basic block entry, and the two loops for.first
+; and for.second are fused.
+
+; CHECK: i32 @moveinsts_preheader
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %add = add nsw i32 %x, 1
+; CHECK-NEXT: br label %for.first
+; CHECK-LABEL: for.first:
+; CHECK: br i1 %cmp.j, label %for.first, label %for.second.exit
+; CHECK-LABEL: for.second.exit:
+; CHECK-NEXT: ret i32 %add
+
+define i32 @moveinsts_preheader(i32* %A, i32 %x) {
+entry:
+  br label %for.first
+
+for.first:
+  %i = phi i64 [ 0, %entry ], [ %inc.i, %for.first ]
+  %Ai = getelementptr inbounds i32, i32* %A, i64 %i
+  store i32 0, i32* %Ai, align 4
+  %inc.i = add nsw i64 %i, 1
+  %cmp.i = icmp slt i64 %inc.i, 100
+  br i1 %cmp.i, label %for.first, label %for.first.exit
+
+for.first.exit:
+  %add = add nsw i32 %x, 1
+  br label %for.second
+
+for.second:
+  %j = phi i64 [ 0, %for.first.exit ], [ %inc.j, %for.second ]
+  %Aj = getelementptr inbounds i32, i32* %A, i64 %j
+  store i32 2, i32* %Aj, align 4
+  %inc.j = add nsw i64 %j, 1
+  %cmp.j = icmp slt i64 %inc.j, 100
+  br i1 %cmp.j, label %for.second, label %for.second.exit
+
+for.second.exit:
+  ret i32 %add
+}
+
+; Test that `%add` cannot be moved to basic block entry, as it uses %i, which
+; defined after basic block entry. And the two loops for.first and for.second
+; are not fused.
+
+; CHECK: i64 @unsafe_preheader
+; CHECK-LABEL: entry:
+; CHECK-NEXT: br label %for.first
+; CHECK-LABEL: for.first:
+; CHECK: br i1 %cmp.i, label %for.first, label %for.first.exit
+; CHECK-LABEL: for.first.exit:
+; CHECK-NEXT: %add = add nsw i64 %x, %i
+; CHECK-NEXT: br label %for.second
+; CHECK-LABEL: for.second:
+; CHECK: br i1 %cmp.j, label %for.second, label %for.second.exit
+; CHECK-LABEL: for.second.exit:
+; CHECK-NEXT: ret i64 %add
+
+define i64 @unsafe_preheader(i32* %A, i64 %x) {
+entry:
+  br label %for.first
+
+for.first:
+  %i = phi i64 [ 0, %entry ], [ %inc.i, %for.first ]
+  %Ai = getelementptr inbounds i32, i32* %A, i64 %i
+  store i32 0, i32* %Ai, align 4
+  %inc.i = add nsw i64 %i, 1
+  %cmp.i = icmp slt i64 %inc.i, 100
+  br i1 %cmp.i, label %for.first, label %for.first.exit
+
+for.first.exit:
+  %add = add nsw i64 %x, %i
+  br label %for.second
+
+for.second:
+  %j = phi i64 [ 0, %for.first.exit ], [ %inc.j, %for.second ]
+  %Aj = getelementptr inbounds i32, i32* %A, i64 %j
+  store i32 2, i32* %Aj, align 4
+  %inc.j = add nsw i64 %j, 1
+  %cmp.j = icmp slt i64 %inc.j, 100
+  br i1 %cmp.j, label %for.second, label %for.second.exit
+
+for.second.exit:
+  ret i64 %add
+}


        


More information about the llvm-commits mailing list