[llvm] ae05b9d - Sink/hoist memory instructions between loop fusion candidates

Aaron Ballman via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 7 04:59:25 PDT 2022


Author: Aaron Kogon
Date: 2022-09-07T07:42:00-04:00
New Revision: ae05b9dc305fc9d56d16fe102e528905da1187f8

URL: https://github.com/llvm/llvm-project/commit/ae05b9dc305fc9d56d16fe102e528905da1187f8
DIFF: https://github.com/llvm/llvm-project/commit/ae05b9dc305fc9d56d16fe102e528905da1187f8.diff

LOG: Sink/hoist memory instructions between loop fusion candidates

Currently, instructions in the preheader of the second of two fusion
candidates are sunk and hoisted whenever possible, to try to allow the
loops to fuse. Memory instructions are skipped, and are never sunk or
hoisted. This change adds memory instructions for sinking/hoisting
consideration.

This change uses DependenceAnalysis to check if a mem inst in the
preheader of FC1 depends on an instruction in FC0's header, across
which it will be hoisted, or FC1's header, across which it will be
sunk. We reject cases where the dependency is a data hazard.

Differential Revision: https://reviews.llvm.org/D131606

Added: 
    llvm/test/Transforms/LoopFusion/hoist_load.ll
    llvm/test/Transforms/LoopFusion/hoist_store.ll
    llvm/test/Transforms/LoopFusion/no_sink_hoist_atomic.ll
    llvm/test/Transforms/LoopFusion/no_sink_hoist_load.ll
    llvm/test/Transforms/LoopFusion/no_sink_hoist_volatile.ll
    llvm/test/Transforms/LoopFusion/sink_load.ll
    llvm/test/Transforms/LoopFusion/sink_store.ll

Modified: 
    llvm/lib/Transforms/Scalar/LoopFuse.cpp
    llvm/test/Transforms/LoopFusion/no_sink_hoist_store.ll
    llvm/test/Transforms/LoopFusion/no_sink_hoist_unknown_function.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index d937300014ba..deaab0b43676 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -703,7 +703,6 @@ struct LoopFuser {
   std::pair<bool, Optional<unsigned>>
   haveIdenticalTripCounts(const FusionCandidate &FC0,
                           const FusionCandidate &FC1) const {
-
     const SCEV *TripCount0 = SE.getBackedgeTakenCount(FC0.L);
     if (isa<SCEVCouldNotCompute>(TripCount0)) {
       UncomputableTripCount++;
@@ -1040,6 +1039,112 @@ struct LoopFuser {
     return Fused;
   }
 
+  // Returns true if the instruction \p I can be hoisted to the end of the
+  // preheader of \p FC0. \p SafeToHoist contains the instructions that are
+  // known to be safe to hoist. The instructions encountered that cannot be
+  // hoisted are in \p NotHoisting.
+  // TODO: Move functionality into CodeMoverUtils
+  bool canHoistInst(Instruction &I,
+                    const SmallVector<Instruction *, 4> &SafeToHoist,
+                    const SmallVector<Instruction *, 4> &NotHoisting,
+                    const FusionCandidate &FC0) const {
+    const BasicBlock *FC0PreheaderTarget = FC0.Preheader->getSingleSuccessor();
+    assert(FC0PreheaderTarget &&
+           "Expected single successor for loop preheader.");
+
+    for (Use &Op : I.operands()) {
+      if (auto *OpInst = dyn_cast<Instruction>(Op)) {
+        bool OpHoisted = is_contained(SafeToHoist, OpInst);
+        // Check if we have already decided to hoist this operand. In this
+        // case, it does not dominate FC0 *yet*, but will after we hoist it.
+        if (!(OpHoisted || DT.dominates(OpInst, FC0PreheaderTarget))) {
+          return false;
+        }
+      }
+    }
+
+    // If this isn't a memory inst, hoisting is safe
+    if (!I.mayReadOrWriteMemory())
+      return true;
+
+    LLVM_DEBUG(dbgs() << "Checking if this mem inst can be hoisted.\n");
+    for (Instruction *NotHoistedInst : NotHoisting) {
+      if (auto D = DI.depends(&I, NotHoistedInst, true)) {
+        // Dependency is not read-before-write, write-before-read or
+        // write-before-write
+        if (D->isFlow() || D->isAnti() || D->isOutput()) {
+          LLVM_DEBUG(dbgs() << "Inst depends on an instruction in FC1's "
+                               "preheader that is not being hoisted.\n");
+          return false;
+        }
+      }
+    }
+
+    for (Instruction *ReadInst : FC0.MemReads) {
+      if (auto D = DI.depends(ReadInst, &I, true)) {
+        // Dependency is not read-before-write
+        if (D->isAnti()) {
+          LLVM_DEBUG(dbgs() << "Inst depends on a read instruction in FC0.\n");
+          return false;
+        }
+      }
+    }
+
+    for (Instruction *WriteInst : FC0.MemWrites) {
+      if (auto D = DI.depends(WriteInst, &I, true)) {
+        // Dependency is not write-before-read or write-before-write
+        if (D->isFlow() || D->isOutput()) {
+          LLVM_DEBUG(dbgs() << "Inst depends on a write instruction in FC0.\n");
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  // Returns true if the instruction \p I can be sunk to the top of the exit
+  // block of \p FC1.
+  // TODO: Move functionality into CodeMoverUtils
+  bool canSinkInst(Instruction &I, const FusionCandidate &FC1) const {
+    for (User *U : I.users()) {
+      if (auto *UI{dyn_cast<Instruction>(U)}) {
+        // Cannot sink if user in loop
+        // If FC1 has phi users of this value, we cannot sink it into FC1.
+        if (FC1.L->contains(UI)) {
+          // Cannot hoist or sink this instruction. No hoisting/sinking
+          // should take place, loops should not fuse
+          return false;
+        }
+      }
+    }
+
+    // If this isn't a memory inst, sinking is safe
+    if (!I.mayReadOrWriteMemory())
+      return true;
+
+    for (Instruction *ReadInst : FC1.MemReads) {
+      if (auto D = DI.depends(&I, ReadInst, true)) {
+        // Dependency is not write-before-read
+        if (D->isFlow()) {
+          LLVM_DEBUG(dbgs() << "Inst depends on a read instruction in FC1.\n");
+          return false;
+        }
+      }
+    }
+
+    for (Instruction *WriteInst : FC1.MemWrites) {
+      if (auto D = DI.depends(&I, WriteInst, true)) {
+        // Dependency is not write-before-write or read-before-write
+        if (D->isOutput() || D->isAnti()) {
+          LLVM_DEBUG(dbgs() << "Inst depends on a write instruction in FC1.\n");
+          return false;
+        }
+      }
+    }
+
+    return true;
+  }
+
   /// Collect instructions in the \p FC1 Preheader that can be hoisted
   /// to the \p FC0 Preheader or sunk into the \p FC1 Body
   bool collectMovablePreheaderInsts(
@@ -1047,6 +1152,10 @@ struct LoopFuser {
       SmallVector<Instruction *, 4> &SafeToHoist,
       SmallVector<Instruction *, 4> &SafeToSink) const {
     BasicBlock *FC1Preheader = FC1.Preheader;
+    // Save the instructions that are not being hoisted, so we know not to hoist
+    // mem insts that they dominate.
+    SmallVector<Instruction *, 4> NotHoisting;
+
     for (Instruction &I : *FC1Preheader) {
       // Can't move a branch
       if (&I == FC1Preheader->getTerminator())
@@ -1055,52 +1164,33 @@ struct LoopFuser {
       // TODO: The case of mayReadFromMemory we can handle but requires
       // additional work with a dependence analysis so for now we give
       // up on memory reads.
-      if (I.mayHaveSideEffects() || I.mayReadFromMemory()) {
-        LLVM_DEBUG(dbgs() << "Inst: " << I << " may have side-effects.\n");
+      if (I.mayThrow() || !I.willReturn()) {
+        LLVM_DEBUG(dbgs() << "Inst: " << I << " may throw or won't return.\n");
         return false;
       }
 
       LLVM_DEBUG(dbgs() << "Checking Inst: " << I << "\n");
 
-      // First check if can be hoisted
-      // If the operands of this instruction dominate the FC0 Preheader
-      // target block, then it is safe to move them to the end of the FC0
-      const BasicBlock *FC0PreheaderTarget =
-          FC0.Preheader->getSingleSuccessor();
-      assert(FC0PreheaderTarget &&
-             "Expected single successor for loop preheader.");
-      bool CanHoistInst = true;
-      for (Use &Op : I.operands()) {
-        if (auto *OpInst = dyn_cast<Instruction>(Op)) {
-          bool OpHoisted = is_contained(SafeToHoist, OpInst);
-          // Check if we have already decided to hoist this operand. In this
-          // case, it does not dominate FC0 *yet*, but will after we hoist it.
-          if (!(OpHoisted || DT.dominates(OpInst, FC0PreheaderTarget))) {
-            CanHoistInst = false;
-            break;
-          }
-        }
+      if (I.isAtomic() || I.isVolatile()) {
+        LLVM_DEBUG(
+            dbgs() << "\tInstruction is volatile or atomic. Cannot move it.\n");
+        return false;
       }
-      if (CanHoistInst) {
+
+      if (canHoistInst(I, SafeToHoist, NotHoisting, FC0)) {
         SafeToHoist.push_back(&I);
         LLVM_DEBUG(dbgs() << "\tSafe to hoist.\n");
       } else {
         LLVM_DEBUG(dbgs() << "\tCould not hoist. Trying to sink...\n");
+        NotHoisting.push_back(&I);
 
-        for (User *U : I.users()) {
-          if (auto *UI{dyn_cast<Instruction>(U)}) {
-            // Cannot sink if user in loop
-            // If FC1 has phi users of this value, we cannot sink it into FC1.
-            if (FC1.L->contains(UI)) {
-              // Cannot hoist or sink this instruction. No hoisting/sinking
-              // should take place, loops should not fuse
-              LLVM_DEBUG(dbgs() << "\tCould not sink.\n");
-              return false;
-            }
-          }
+        if (canSinkInst(I, FC1)) {
+          SafeToSink.push_back(&I);
+          LLVM_DEBUG(dbgs() << "\tSafe to sink.\n");
+        } else {
+          LLVM_DEBUG(dbgs() << "\tCould not sink.\n");
+          return false;
         }
-        SafeToSink.push_back(&I);
-        LLVM_DEBUG(dbgs() << "\tSafe to sink.\n");
       }
     }
     LLVM_DEBUG(
@@ -1331,7 +1421,6 @@ struct LoopFuser {
                           const FusionCandidate &FC1,
                           SmallVector<Instruction *, 4> &HoistInsts,
                           SmallVector<Instruction *, 4> &SinkInsts) const {
-
     // All preheader instructions except the branch must be hoisted or sunk
     assert(HoistInsts.size() + SinkInsts.size() == FC1.Preheader->size() - 1 &&
            "Attempting to sink and hoist preheader instructions, but not all "

diff  --git a/llvm/test/Transforms/LoopFusion/hoist_load.ll b/llvm/test/Transforms/LoopFusion/hoist_load.ll
new file mode 100644
index 000000000000..f04f36ff1933
--- /dev/null
+++ b/llvm/test/Transforms/LoopFusion/hoist_load.ll
@@ -0,0 +1,45 @@
+; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; CHECK: Safe to hoist.
+
+ at A = common global [100 x i32] zeroinitializer, align 16
+define void @hoist_preheader(i32 %N) {
+; CHECK-LABEL: @hoist_preheader(
+; CHECK-NEXT:  pre1:
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = load i32, i32* [[PTR]], align 4
+; CHECK-NEXT:    br label [[BODY1:%.*]]
+; CHECK:       body1:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[BODY1]] ], [ 0, [[PRE1:%.*]] ]
+; CHECK-NEXT:    [[I2:%.*]] = phi i32 [ [[I_NEXT2:%.*]], [[BODY1]] ], [ 0, [[PRE1]] ]
+; CHECK-NEXT:    [[I_NEXT]] = add i32 1, [[I]]
+; CHECK-NEXT:    [[COND:%.*]] = icmp ne i32 [[I]], [[N:%.*]]
+; CHECK-NEXT:    [[I_NEXT2]] = add i32 1, [[I2]]
+; CHECK-NEXT:    [[COND2:%.*]] = icmp ne i32 [[I2]], [[N]]
+; CHECK-NEXT:    br i1 [[COND2]], label [[BODY1]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+pre1:
+  %ptr = alloca i32
+  br label %body1
+
+body1:  ; preds = %pre1, %body1
+  %i = phi i32 [%i_next, %body1], [0, %pre1]
+  %i_next = add i32 1, %i
+  %cond = icmp ne i32 %i, %N
+  br i1 %cond, label %body1, label %pre2
+
+pre2:
+  %b = load i32, i32 * %ptr
+  br label %body2
+
+body2:  ; preds = %pre2, %body2
+  %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+  %i_next2 = add i32 1, %i2
+  %cond2 = icmp ne i32 %i2, %N
+  br i1 %cond2, label %body2, label %exit
+
+exit:
+  ret void
+}

diff  --git a/llvm/test/Transforms/LoopFusion/hoist_store.ll b/llvm/test/Transforms/LoopFusion/hoist_store.ll
new file mode 100644
index 000000000000..5a99d6a6f6a6
--- /dev/null
+++ b/llvm/test/Transforms/LoopFusion/hoist_store.ll
@@ -0,0 +1,45 @@
+; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; CHECK: Safe to hoist.
+
+ at A = common global [100 x i32] zeroinitializer, align 16
+define void @hoist_preheader(i32 %N) {
+; CHECK-LABEL: @hoist_preheader(
+; CHECK-NEXT:  pre1:
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 3, i32* [[PTR]], align 4
+; CHECK-NEXT:    br label [[BODY1:%.*]]
+; CHECK:       body1:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[BODY1]] ], [ 0, [[PRE1:%.*]] ]
+; CHECK-NEXT:    [[I2:%.*]] = phi i32 [ [[I_NEXT2:%.*]], [[BODY1]] ], [ 0, [[PRE1]] ]
+; CHECK-NEXT:    [[I_NEXT]] = add i32 1, [[I]]
+; CHECK-NEXT:    [[COND:%.*]] = icmp ne i32 [[I]], [[N:%.*]]
+; CHECK-NEXT:    [[I_NEXT2]] = add i32 1, [[I2]]
+; CHECK-NEXT:    [[COND2:%.*]] = icmp ne i32 [[I2]], [[N]]
+; CHECK-NEXT:    br i1 [[COND2]], label [[BODY1]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+pre1:
+  %ptr = alloca i32
+  br label %body1
+
+body1:  ; preds = %pre1, %body1
+  %i = phi i32 [%i_next, %body1], [0, %pre1]
+  %i_next = add i32 1, %i
+  %cond = icmp ne i32 %i, %N
+  br i1 %cond, label %body1, label %pre2
+
+pre2:
+  store i32 3, i32* %ptr
+  br label %body2
+
+body2:  ; preds = %pre2, %body2
+  %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+  %i_next2 = add i32 1, %i2
+  %cond2 = icmp ne i32 %i2, %N
+  br i1 %cond2, label %body2, label %exit
+
+exit:
+  ret void
+}

diff  --git a/llvm/test/Transforms/LoopFusion/no_sink_hoist_atomic.ll b/llvm/test/Transforms/LoopFusion/no_sink_hoist_atomic.ll
new file mode 100644
index 000000000000..6f7aa27673a6
--- /dev/null
+++ b/llvm/test/Transforms/LoopFusion/no_sink_hoist_atomic.ll
@@ -0,0 +1,38 @@
+; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; CHECK: Could not hoist/sink all instructions
+
+define void @sink_preheader(i32 %N) {
+; CHECK:pre1:
+; CHECK-NEXT:   %ptr = alloca i32
+; CHECK-NEXT:  br label %body1
+pre1:
+  %ptr = alloca i32
+  br label %body1
+
+; CHECK:body1: 
+; CHECK-NOT: store atomic i32 3, i32* %ptr seq_cst, align 4
+body1:  ; preds = %pre1, %body1
+  %i = phi i32 [%i_next, %body1], [0, %pre1]
+  %i_next = add i32 1, %i
+  %cond = icmp ne i32 %i, %N
+  br i1 %cond, label %body1, label %pre2
+
+; CHECK:pre2:
+; CHECK-NEXT: store atomic i32 3, i32* %ptr seq_cst, align 4
+pre2:
+  store atomic i32 3, i32* %ptr seq_cst, align 4
+  br label %body2
+
+; CHECK: body2:
+; CHECK-NOT:  store atomic i32 3, i32* %ptr seq_cst, align 4
+body2:  ; preds = %pre2, %body2
+  %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+  %i_next2 = add i32 1, %i2
+  %cond2 = icmp ne i32 %i2, %N
+  br i1 %cond2, label %body2, label %exit
+
+; CHECK: exit:
+exit:
+  ret void
+}

diff  --git a/llvm/test/Transforms/LoopFusion/no_sink_hoist_load.ll b/llvm/test/Transforms/LoopFusion/no_sink_hoist_load.ll
new file mode 100644
index 000000000000..774fc444183b
--- /dev/null
+++ b/llvm/test/Transforms/LoopFusion/no_sink_hoist_load.ll
@@ -0,0 +1,42 @@
+; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; CHECK: Could not hoist/sink all instructions
+
+ at A = common global [100 x i32] zeroinitializer, align 16
+define void @sink_preheader(i32 %N) {
+; CHECK:pre1:
+; CHECK-NEXT:  %ptr = alloca i32, align 4
+; CHECK-NEXT:  br label %body1
+pre1:
+  %ptr = alloca i32, align 4
+  br label %body1
+
+; CHECK:body1: 
+; CHECK-NOT:  %stay =
+body1:  ; preds = %pre1, %body1
+  %i = phi i32 [%i_next, %body1], [0, %pre1]
+  %i_next = add i32 1, %i
+  %cond = icmp ne i32 %i, %N
+  store i32 3, i32* %ptr
+  br i1 %cond, label %body1, label %pre2
+
+; CHECK:pre2:
+; CHECK-NEXT: %stay = load i32, i32* %ptr
+pre2:
+  %stay = load i32, i32* %ptr
+  br label %body2
+
+; CHECK: body2:
+; CHECK-NOT: %stay =
+body2:  ; preds = %pre2, %body2
+  %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+  %i_next2 = add i32 1, %i2
+  %cond2 = icmp ne i32 %i2, %N
+  store i32 3, i32* %ptr
+  br i1 %cond2, label %body2, label %exit
+
+; CHECK: exit:
+; CHECK-NOT: %stay =
+exit:
+  ret void
+}

diff  --git a/llvm/test/Transforms/LoopFusion/no_sink_hoist_store.ll b/llvm/test/Transforms/LoopFusion/no_sink_hoist_store.ll
index ee77c50d6e1e..abf5593de77a 100644
--- a/llvm/test/Transforms/LoopFusion/no_sink_hoist_store.ll
+++ b/llvm/test/Transforms/LoopFusion/no_sink_hoist_store.ll
@@ -1,6 +1,5 @@
 ; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
 ; REQUIRES: asserts
-; CHECK: have side-effects.
 ; CHECK: Could not hoist/sink all instructions
 
 @A = common global [100 x i32] zeroinitializer, align 16
@@ -9,15 +8,16 @@ define void @sink_preheader(i32 %N) {
 ; CHECK-NEXT:  %ptr = alloca i32, align 4
 ; CHECK-NEXT:  br label %body1
 pre1:
-  %ptr = alloca i32
+  %ptr = alloca i32, align 4
   br label %body1
 
 ; CHECK:body1: 
-; CHECK-NOT:  %stay =
+; CHECK-NOT:  store i32 3, i32* %ptr
 body1:  ; preds = %pre1, %body1
   %i = phi i32 [%i_next, %body1], [0, %pre1]
   %i_next = add i32 1, %i
   %cond = icmp ne i32 %i, %N
+  %load1 = load i32, i32* %ptr
   br i1 %cond, label %body1, label %pre2
 
 ; CHECK:pre2:
@@ -27,15 +27,16 @@ pre2:
   br label %body2
 
 ; CHECK: body2:
-; CHECK-NOT: %stay =
+; CHECK-NOT: store i32 3, i32* %ptr
 body2:  ; preds = %pre2, %body2
   %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
   %i_next2 = add i32 1, %i2
   %cond2 = icmp ne i32 %i2, %N
+  %load2 = load i32, i32* %ptr
   br i1 %cond2, label %body2, label %exit
 
 ; CHECK: exit:
-; CHECK-NOT: %stay =
+; CHECK-NOT: store i32 3, i32* %ptr
 exit:
   ret void
 }

diff  --git a/llvm/test/Transforms/LoopFusion/no_sink_hoist_unknown_function.ll b/llvm/test/Transforms/LoopFusion/no_sink_hoist_unknown_function.ll
index b89043a5cfdc..17f2a4765ff4 100644
--- a/llvm/test/Transforms/LoopFusion/no_sink_hoist_unknown_function.ll
+++ b/llvm/test/Transforms/LoopFusion/no_sink_hoist_unknown_function.ll
@@ -1,6 +1,5 @@
 ; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
 ; REQUIRES: asserts
-; CHECK: may have side-effects
 ; CHECK: Could not hoist/sink all instructions
 
 declare void @unknown_func()
@@ -12,7 +11,7 @@ pre1:
   br label %body1
 
 ; CHECK:body1: 
-; CHECK-NOT:  %stay =
+; CHECK-NOT: call void @unknown_func()
 body1:  ; preds = %pre1, %body1
   %i = phi i32 [%i_next, %body1], [0, %pre1]
   %i_next = add i32 1, %i
@@ -26,7 +25,7 @@ pre2:
   br label %body2
 
 ; CHECK: body2:
-; CHECK-NOT: %stay =
+; CHECK-NOT: call void @unknown_func()
 body2:  ; preds = %pre2, %body2
   %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
   %i_next2 = add i32 1, %i2
@@ -34,7 +33,6 @@ body2:  ; preds = %pre2, %body2
   br i1 %cond2, label %body2, label %exit
 
 ; CHECK: exit:
-; CHECK-NOT: %stay =
 exit:
   ret void
 }

diff  --git a/llvm/test/Transforms/LoopFusion/no_sink_hoist_volatile.ll b/llvm/test/Transforms/LoopFusion/no_sink_hoist_volatile.ll
new file mode 100644
index 000000000000..4981d45ac6af
--- /dev/null
+++ b/llvm/test/Transforms/LoopFusion/no_sink_hoist_volatile.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; CHECK: Could not hoist/sink all instructions
+
+declare void @unknown_func()
+
+define void @sink_preheader(i32 %N) {
+; CHECK:pre1:
+; CHECK-NEXT:  %ptr = alloca i32
+; CHECK-NEXT:  br label %body1
+pre1:
+  %ptr = alloca i32
+  br label %body1
+
+; CHECK:body1: 
+; CHECK-NOT: store volatile i32 3, i32* %ptr
+body1:  ; preds = %pre1, %body1
+  %i = phi i32 [%i_next, %body1], [0, %pre1]
+  %i_next = add i32 1, %i
+  %cond = icmp ne i32 %i, %N
+  br i1 %cond, label %body1, label %pre2
+
+; CHECK:pre2:
+; CHECK-NEXT: store volatile i32 3, i32* %ptr
+pre2:
+  store volatile i32 3, i32* %ptr
+  br label %body2
+
+; CHECK: body2:
+; CHECK-NOT: store volatile i32 3, i32* %ptr
+body2:  ; preds = %pre2, %body2
+  %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+  %i_next2 = add i32 1, %i2
+  %cond2 = icmp ne i32 %i2, %N
+  br i1 %cond2, label %body2, label %exit
+
+; CHECK: exit:
+exit:
+  ret void
+}

diff  --git a/llvm/test/Transforms/LoopFusion/sink_load.ll b/llvm/test/Transforms/LoopFusion/sink_load.ll
new file mode 100644
index 000000000000..6cabedb31559
--- /dev/null
+++ b/llvm/test/Transforms/LoopFusion/sink_load.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; CHECK: Safe to sink.
+
+ at A = common global [100 x i32] zeroinitializer, align 16
+define void @sink_preheader(i32 %N) {
+; CHECK-LABEL: @sink_preheader(
+; CHECK-NEXT:  pre1:
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    br label [[BODY1:%.*]]
+; CHECK:       body1:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[BODY1]] ], [ 0, [[PRE1:%.*]] ]
+; CHECK-NEXT:    [[I2:%.*]] = phi i32 [ [[I_NEXT2:%.*]], [[BODY1]] ], [ 0, [[PRE1]] ]
+; CHECK-NEXT:    [[I_NEXT]] = add i32 1, [[I]]
+; CHECK-NEXT:    [[COND:%.*]] = icmp ne i32 [[I]], [[N:%.*]]
+; CHECK-NEXT:    store i32 3, i32* [[PTR]], align 4
+; CHECK-NEXT:    [[I_NEXT2]] = add i32 1, [[I2]]
+; CHECK-NEXT:    [[COND2:%.*]] = icmp ne i32 [[I2]], [[N]]
+; CHECK-NEXT:    br i1 [[COND2]], label [[BODY1]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[B:%.*]] = load i32, i32* [[PTR]], align 4
+; CHECK-NEXT:    ret void
+;
+pre1:
+  %ptr = alloca i32
+  br label %body1
+
+body1:  ; preds = %pre1, %body1
+  %i = phi i32 [%i_next, %body1], [0, %pre1]
+  %i_next = add i32 1, %i
+  %cond = icmp ne i32 %i, %N
+  store i32 3, i32* %ptr
+  br i1 %cond, label %body1, label %pre2
+
+pre2:
+  %b = load i32, i32 * %ptr
+  br label %body2
+
+body2:  ; preds = %pre2, %body2
+  %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+  %i_next2 = add i32 1, %i2
+  %cond2 = icmp ne i32 %i2, %N
+  br i1 %cond2, label %body2, label %exit
+
+exit:
+  ret void
+}

diff  --git a/llvm/test/Transforms/LoopFusion/sink_store.ll b/llvm/test/Transforms/LoopFusion/sink_store.ll
new file mode 100644
index 000000000000..bcd3e6d89f37
--- /dev/null
+++ b/llvm/test/Transforms/LoopFusion/sink_store.ll
@@ -0,0 +1,46 @@
+; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+ at A = common global [100 x i32] zeroinitializer, align 16
+define void @sink_preheader(i32 %N) {
+; CHECK-LABEL: @sink_preheader(
+; CHECK-NEXT:  pre1:
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    br label [[BODY1:%.*]]
+; CHECK:       body1:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[BODY1]] ], [ 0, [[PRE1:%.*]] ]
+; CHECK-NEXT:    [[I2:%.*]] = phi i32 [ [[I_NEXT2:%.*]], [[BODY1]] ], [ 0, [[PRE1]] ]
+; CHECK-NEXT:    [[I_NEXT]] = add i32 1, [[I]]
+; CHECK-NEXT:    [[COND:%.*]] = icmp ne i32 [[I]], [[N:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = load i32, i32* [[PTR]], align 4
+; CHECK-NEXT:    [[I_NEXT2]] = add i32 1, [[I2]]
+; CHECK-NEXT:    [[COND2:%.*]] = icmp ne i32 [[I2]], [[N]]
+; CHECK-NEXT:    br i1 [[COND2]], label [[BODY1]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    store i32 3, i32* [[PTR]], align 4
+; CHECK-NEXT:    ret void
+;
+pre1:
+  %ptr = alloca i32
+  br label %body1
+
+body1:  ; preds = %pre1, %body1
+  %i = phi i32 [%i_next, %body1], [0, %pre1]
+  %i_next = add i32 1, %i
+  %cond = icmp ne i32 %i, %N
+  %b = load i32, i32 * %ptr
+  br i1 %cond, label %body1, label %pre2
+
+pre2:
+  store i32 3, i32* %ptr
+  br label %body2
+
+body2:  ; preds = %pre2, %body2
+  %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+  %i_next2 = add i32 1, %i2
+  %cond2 = icmp ne i32 %i2, %N
+  br i1 %cond2, label %body2, label %exit
+
+exit:
+  ret void
+}


        


More information about the llvm-commits mailing list