[llvm] 629d880 - [LoopUnrollAndJam] Visit phi operand dependencies in post-order

Joshua Cao via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 5 00:11:42 PST 2023


Author: Joshua Cao
Date: 2023-01-05T00:05:49-08:00
New Revision: 629d880dc527cd7a8d692cd24af196db4ea8646b

URL: https://github.com/llvm/llvm-project/commit/629d880dc527cd7a8d692cd24af196db4ea8646b
DIFF: https://github.com/llvm/llvm-project/commit/629d880dc527cd7a8d692cd24af196db4ea8646b.diff

LOG: [LoopUnrollAndJam] Visit phi operand dependencies in post-order

Fixes https://github.com/llvm/llvm-project/issues/58565

The previous implementation visits operands in pre-order, but this does
not guarantee an instruction is visited before its uses. This can cause
instructions to be copied in the incorrect order. For example:

```
a = ...
b = add a, 1
c = add a, b
d = add b, a
```

Pre-order visits does not guarantee the order in which `a` and `b` are
visited. LoopUnrollAndJam may incorrectly insert `b` before `a`.

This patch implements post-order visits. By visiting dependencies first,
we guarantee that an instruction's dependencies are visited first.

Differential Revision: https://reviews.llvm.org/D140255

Added: 
    llvm/test/Transforms/LoopUnrollAndJam/dependencies_visit_order.ll

Modified: 
    llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
index 416aa2105193f..b125e952ec941 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -137,25 +137,28 @@ static bool partitionOuterLoopBlocks(Loop *L, Loop *SubLoop,
 template <typename T>
 static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch,
                                      BasicBlockSet &AftBlocks, T Visit) {
-  SmallVector<Instruction *, 8> Worklist;
   SmallPtrSet<Instruction *, 8> VisitedInstr;
-  for (auto &Phi : Header->phis()) {
-    Value *V = Phi.getIncomingValueForBlock(Latch);
-    if (Instruction *I = dyn_cast<Instruction>(V))
-      Worklist.push_back(I);
-  }
 
-  while (!Worklist.empty()) {
-    Instruction *I = Worklist.pop_back_val();
-    if (!Visit(I))
-      return false;
+  std::function<bool(Instruction * I)> ProcessInstr = [&](Instruction *I) {
+    if (VisitedInstr.count(I))
+      return true;
+
     VisitedInstr.insert(I);
 
     if (AftBlocks.count(I->getParent()))
       for (auto &U : I->operands())
         if (Instruction *II = dyn_cast<Instruction>(U))
-          if (!VisitedInstr.count(II))
-            Worklist.push_back(II);
+          if (!ProcessInstr(II))
+            return false;
+
+    return Visit(I);
+  };
+
+  for (auto &Phi : Header->phis()) {
+    Value *V = Phi.getIncomingValueForBlock(Latch);
+    if (Instruction *I = dyn_cast<Instruction>(V))
+      if (!ProcessInstr(I))
+        return false;
   }
 
   return true;
@@ -168,20 +171,12 @@ static void moveHeaderPhiOperandsToForeBlocks(BasicBlock *Header,
                                               BasicBlockSet &AftBlocks) {
   // We need to ensure we move the instructions in the correct order,
   // starting with the earliest required instruction and moving forward.
-  std::vector<Instruction *> Visited;
   processHeaderPhiOperands(Header, Latch, AftBlocks,
-                           [&Visited, &AftBlocks](Instruction *I) {
+                           [&AftBlocks, &InsertLoc](Instruction *I) {
                              if (AftBlocks.count(I->getParent()))
-                               Visited.push_back(I);
+                               I->moveBefore(InsertLoc);
                              return true;
                            });
-
-  // Move all instructions in program order to before the InsertLoc
-  BasicBlock *InsertLocBB = InsertLoc->getParent();
-  for (Instruction *I : reverse(Visited)) {
-    if (I->getParent() != InsertLocBB)
-      I->moveBefore(InsertLoc);
-  }
 }
 
 /*

diff  --git a/llvm/test/Transforms/LoopUnrollAndJam/dependencies_visit_order.ll b/llvm/test/Transforms/LoopUnrollAndJam/dependencies_visit_order.ll
new file mode 100644
index 0000000000000..9444d59f343d0
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnrollAndJam/dependencies_visit_order.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 < %s -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+define void @test1() {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br i1 false, label [[BB1_BB43_CRIT_EDGE_UNR_LCSSA:%.*]], label [[BB_NEW:%.*]]
+; CHECK:       bb.new:
+; CHECK-NEXT:    br label [[BB5_PREHEADER:%.*]]
+; CHECK:       bb5.preheader:
+; CHECK-NEXT:    [[I10:%.*]] = phi i16 [ 0, [[BB_NEW]] ], [ [[I42_3:%.*]], [[BB38:%.*]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i16 [ 0, [[BB_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[BB38]] ]
+; CHECK-NEXT:    [[I4017:%.*]] = zext i16 [[I10]] to i64
+; CHECK-NEXT:    [[I13_I:%.*]] = add nuw nsw i64 [[I4017]], 1
+; CHECK-NEXT:    [[I42:%.*]] = trunc i64 [[I13_I]] to i16
+; CHECK-NEXT:    [[NITER_NEXT:%.*]] = add nuw nsw i16 [[NITER]], 1
+; CHECK-NEXT:    [[I4017_1:%.*]] = zext i16 [[I42]] to i64
+; CHECK-NEXT:    [[I13_I_1:%.*]] = add nuw nsw i64 [[I4017_1]], 1
+; CHECK-NEXT:    [[I42_1:%.*]] = trunc i64 [[I13_I_1]] to i16
+; CHECK-NEXT:    [[NITER_NEXT_1:%.*]] = add nuw nsw i16 [[NITER_NEXT]], 1
+; CHECK-NEXT:    [[I4017_2:%.*]] = zext i16 [[I42_1]] to i64
+; CHECK-NEXT:    [[I13_I_2:%.*]] = add nuw nsw i64 [[I4017_2]], 1
+; CHECK-NEXT:    [[I42_2:%.*]] = trunc i64 [[I13_I_2]] to i16
+; CHECK-NEXT:    [[NITER_NEXT_2:%.*]] = add nuw nsw i16 [[NITER_NEXT_1]], 1
+; CHECK-NEXT:    [[I4017_3:%.*]] = zext i16 [[I42_2]] to i64
+; CHECK-NEXT:    [[I13_I_3:%.*]] = add nuw nsw i64 [[I4017_3]], 1
+; CHECK-NEXT:    [[I42_3]] = trunc i64 [[I13_I_3]] to i16
+; CHECK-NEXT:    [[NITER_NEXT_3]] = add nuw i16 [[NITER_NEXT_2]], 1
+; CHECK-NEXT:    br label [[BB10_PREHEADER:%.*]]
+; CHECK:       bb10.preheader:
+; CHECK-NEXT:    br i1 true, label [[BB38]], label [[BB10_PREHEADER]]
+; CHECK:       bb38:
+; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i16 [[NITER_NEXT_3]], -28
+; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[BB1_BB43_CRIT_EDGE_UNR_LCSSA_LOOPEXIT:%.*]], label [[BB5_PREHEADER]], !llvm.loop [[LOOP1:![0-9]+]]
+; CHECK:       bb1.bb43_crit_edge.unr-lcssa.loopexit:
+; CHECK-NEXT:    [[I10_UNR_PH:%.*]] = phi i16 [ [[I42_3]], [[BB38]] ]
+; CHECK-NEXT:    br label [[BB1_BB43_CRIT_EDGE_UNR_LCSSA]]
+; CHECK:       bb1.bb43_crit_edge.unr-lcssa:
+; CHECK-NEXT:    [[I10_UNR:%.*]] = phi i16 [ 0, [[BB:%.*]] ], [ [[I10_UNR_PH]], [[BB1_BB43_CRIT_EDGE_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    br i1 true, label [[BB5_PREHEADER_EPIL_PREHEADER:%.*]], label [[BB1_BB43_CRIT_EDGE:%.*]]
+; CHECK:       bb5.preheader.epil.preheader:
+; CHECK-NEXT:    br label [[BB5_PREHEADER_EPIL:%.*]]
+; CHECK:       bb5.preheader.epil:
+; CHECK-NEXT:    [[I10_EPIL:%.*]] = phi i16 [ [[I10_UNR]], [[BB5_PREHEADER_EPIL_PREHEADER]] ], [ [[I42_EPIL:%.*]], [[BB38_EPIL:%.*]] ]
+; CHECK-NEXT:    [[EPIL_ITER:%.*]] = phi i16 [ 0, [[BB5_PREHEADER_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], [[BB38_EPIL]] ]
+; CHECK-NEXT:    br label [[BB10_PREHEADER_EPIL:%.*]]
+; CHECK:       bb10.preheader.epil:
+; CHECK-NEXT:    br i1 true, label [[BB38_EPIL]], label [[BB10_PREHEADER_EPIL]]
+; CHECK:       bb38.epil:
+; CHECK-NEXT:    [[I4017_EPIL:%.*]] = zext i16 [[I10_EPIL]] to i64
+; CHECK-NEXT:    [[I13_I_EPIL:%.*]] = add i64 [[I4017_EPIL]], 1
+; CHECK-NEXT:    [[I14_I_EPIL:%.*]] = select i1 true, i64 [[I13_I_EPIL]], i64 [[I4017_EPIL]]
+; CHECK-NEXT:    [[I42_EPIL]] = trunc i64 [[I14_I_EPIL]] to i16
+; CHECK-NEXT:    [[I3_NOT_EPIL:%.*]] = icmp eq i16 [[I10_EPIL]], -27
+; CHECK-NEXT:    [[EPIL_ITER_NEXT]] = add i16 [[EPIL_ITER]], 1
+; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i16 [[EPIL_ITER_NEXT]], 2
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[BB5_PREHEADER_EPIL]], label [[BB1_BB43_CRIT_EDGE_EPILOG_LCSSA:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       bb1.bb43_crit_edge.epilog-lcssa:
+; CHECK-NEXT:    br label [[BB1_BB43_CRIT_EDGE]]
+; CHECK:       bb1.bb43_crit_edge:
+; CHECK-NEXT:    ret void
+;
+bb:
+  br label %bb5.preheader
+
+bb5.preheader:                                    ; preds = %bb38, %bb
+  %i10 = phi i16 [ 0, %bb ], [ %i42, %bb38 ]
+  br label %bb10.preheader
+
+bb10.preheader:                                   ; preds = %bb10.preheader, %bb5.preheader
+  br i1 true, label %bb38, label %bb10.preheader
+
+bb38:                                             ; preds = %bb10.preheader
+  %i4017 = zext i16 %i10 to i64
+  %i13.i = add i64 %i4017, 1
+  %i14.i = select i1 true, i64 %i13.i, i64 %i4017
+  %i42 = trunc i64 %i14.i to i16
+  %i3.not = icmp eq i16 %i10, -27
+  br i1 %i3.not, label %bb1.bb43_crit_edge, label %bb5.preheader
+
+bb1.bb43_crit_edge:                               ; preds = %bb38
+  ret void
+}


        


More information about the llvm-commits mailing list