[llvm] 629d880 - [LoopUnrollAndJam] Visit phi operand dependencies in post-order
Joshua Cao via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 5 00:11:42 PST 2023
Author: Joshua Cao
Date: 2023-01-05T00:05:49-08:00
New Revision: 629d880dc527cd7a8d692cd24af196db4ea8646b
URL: https://github.com/llvm/llvm-project/commit/629d880dc527cd7a8d692cd24af196db4ea8646b
DIFF: https://github.com/llvm/llvm-project/commit/629d880dc527cd7a8d692cd24af196db4ea8646b.diff
LOG: [LoopUnrollAndJam] Visit phi operand dependencies in post-order
Fixes https://github.com/llvm/llvm-project/issues/58565
The previous implementation visits operands in pre-order, but this does
not guarantee an instruction is visited before its uses. This can cause
instructions to be copied in the incorrect order. For example:
```
a = ...
b = add a, 1
c = add a, b
d = add b, a
```
Pre-order visits does not guarantee the order in which `a` and `b` are
visited. LoopUnrollAndJam may incorrectly insert `b` before `a`.
This patch implements post-order visits. By visiting dependencies first,
we guarantee that an instruction's dependencies are visited first.
Differential Revision: https://reviews.llvm.org/D140255
Added:
llvm/test/Transforms/LoopUnrollAndJam/dependencies_visit_order.ll
Modified:
llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
index 416aa2105193f..b125e952ec941 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -137,25 +137,28 @@ static bool partitionOuterLoopBlocks(Loop *L, Loop *SubLoop,
template <typename T>
static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch,
BasicBlockSet &AftBlocks, T Visit) {
- SmallVector<Instruction *, 8> Worklist;
SmallPtrSet<Instruction *, 8> VisitedInstr;
- for (auto &Phi : Header->phis()) {
- Value *V = Phi.getIncomingValueForBlock(Latch);
- if (Instruction *I = dyn_cast<Instruction>(V))
- Worklist.push_back(I);
- }
- while (!Worklist.empty()) {
- Instruction *I = Worklist.pop_back_val();
- if (!Visit(I))
- return false;
+ std::function<bool(Instruction * I)> ProcessInstr = [&](Instruction *I) {
+ if (VisitedInstr.count(I))
+ return true;
+
VisitedInstr.insert(I);
if (AftBlocks.count(I->getParent()))
for (auto &U : I->operands())
if (Instruction *II = dyn_cast<Instruction>(U))
- if (!VisitedInstr.count(II))
- Worklist.push_back(II);
+ if (!ProcessInstr(II))
+ return false;
+
+ return Visit(I);
+ };
+
+ for (auto &Phi : Header->phis()) {
+ Value *V = Phi.getIncomingValueForBlock(Latch);
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (!ProcessInstr(I))
+ return false;
}
return true;
@@ -168,20 +171,12 @@ static void moveHeaderPhiOperandsToForeBlocks(BasicBlock *Header,
BasicBlockSet &AftBlocks) {
// We need to ensure we move the instructions in the correct order,
// starting with the earliest required instruction and moving forward.
- std::vector<Instruction *> Visited;
processHeaderPhiOperands(Header, Latch, AftBlocks,
- [&Visited, &AftBlocks](Instruction *I) {
+ [&AftBlocks, &InsertLoc](Instruction *I) {
if (AftBlocks.count(I->getParent()))
- Visited.push_back(I);
+ I->moveBefore(InsertLoc);
return true;
});
-
- // Move all instructions in program order to before the InsertLoc
- BasicBlock *InsertLocBB = InsertLoc->getParent();
- for (Instruction *I : reverse(Visited)) {
- if (I->getParent() != InsertLocBB)
- I->moveBefore(InsertLoc);
- }
}
/*
diff --git a/llvm/test/Transforms/LoopUnrollAndJam/dependencies_visit_order.ll b/llvm/test/Transforms/LoopUnrollAndJam/dependencies_visit_order.ll
new file mode 100644
index 0000000000000..9444d59f343d0
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnrollAndJam/dependencies_visit_order.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 < %s -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+define void @test1() {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: br i1 false, label [[BB1_BB43_CRIT_EDGE_UNR_LCSSA:%.*]], label [[BB_NEW:%.*]]
+; CHECK: bb.new:
+; CHECK-NEXT: br label [[BB5_PREHEADER:%.*]]
+; CHECK: bb5.preheader:
+; CHECK-NEXT: [[I10:%.*]] = phi i16 [ 0, [[BB_NEW]] ], [ [[I42_3:%.*]], [[BB38:%.*]] ]
+; CHECK-NEXT: [[NITER:%.*]] = phi i16 [ 0, [[BB_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[BB38]] ]
+; CHECK-NEXT: [[I4017:%.*]] = zext i16 [[I10]] to i64
+; CHECK-NEXT: [[I13_I:%.*]] = add nuw nsw i64 [[I4017]], 1
+; CHECK-NEXT: [[I42:%.*]] = trunc i64 [[I13_I]] to i16
+; CHECK-NEXT: [[NITER_NEXT:%.*]] = add nuw nsw i16 [[NITER]], 1
+; CHECK-NEXT: [[I4017_1:%.*]] = zext i16 [[I42]] to i64
+; CHECK-NEXT: [[I13_I_1:%.*]] = add nuw nsw i64 [[I4017_1]], 1
+; CHECK-NEXT: [[I42_1:%.*]] = trunc i64 [[I13_I_1]] to i16
+; CHECK-NEXT: [[NITER_NEXT_1:%.*]] = add nuw nsw i16 [[NITER_NEXT]], 1
+; CHECK-NEXT: [[I4017_2:%.*]] = zext i16 [[I42_1]] to i64
+; CHECK-NEXT: [[I13_I_2:%.*]] = add nuw nsw i64 [[I4017_2]], 1
+; CHECK-NEXT: [[I42_2:%.*]] = trunc i64 [[I13_I_2]] to i16
+; CHECK-NEXT: [[NITER_NEXT_2:%.*]] = add nuw nsw i16 [[NITER_NEXT_1]], 1
+; CHECK-NEXT: [[I4017_3:%.*]] = zext i16 [[I42_2]] to i64
+; CHECK-NEXT: [[I13_I_3:%.*]] = add nuw nsw i64 [[I4017_3]], 1
+; CHECK-NEXT: [[I42_3]] = trunc i64 [[I13_I_3]] to i16
+; CHECK-NEXT: [[NITER_NEXT_3]] = add nuw i16 [[NITER_NEXT_2]], 1
+; CHECK-NEXT: br label [[BB10_PREHEADER:%.*]]
+; CHECK: bb10.preheader:
+; CHECK-NEXT: br i1 true, label [[BB38]], label [[BB10_PREHEADER]]
+; CHECK: bb38:
+; CHECK-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i16 [[NITER_NEXT_3]], -28
+; CHECK-NEXT: br i1 [[NITER_NCMP_3]], label [[BB1_BB43_CRIT_EDGE_UNR_LCSSA_LOOPEXIT:%.*]], label [[BB5_PREHEADER]], !llvm.loop [[LOOP1:![0-9]+]]
+; CHECK: bb1.bb43_crit_edge.unr-lcssa.loopexit:
+; CHECK-NEXT: [[I10_UNR_PH:%.*]] = phi i16 [ [[I42_3]], [[BB38]] ]
+; CHECK-NEXT: br label [[BB1_BB43_CRIT_EDGE_UNR_LCSSA]]
+; CHECK: bb1.bb43_crit_edge.unr-lcssa:
+; CHECK-NEXT: [[I10_UNR:%.*]] = phi i16 [ 0, [[BB:%.*]] ], [ [[I10_UNR_PH]], [[BB1_BB43_CRIT_EDGE_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT: br i1 true, label [[BB5_PREHEADER_EPIL_PREHEADER:%.*]], label [[BB1_BB43_CRIT_EDGE:%.*]]
+; CHECK: bb5.preheader.epil.preheader:
+; CHECK-NEXT: br label [[BB5_PREHEADER_EPIL:%.*]]
+; CHECK: bb5.preheader.epil:
+; CHECK-NEXT: [[I10_EPIL:%.*]] = phi i16 [ [[I10_UNR]], [[BB5_PREHEADER_EPIL_PREHEADER]] ], [ [[I42_EPIL:%.*]], [[BB38_EPIL:%.*]] ]
+; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i16 [ 0, [[BB5_PREHEADER_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], [[BB38_EPIL]] ]
+; CHECK-NEXT: br label [[BB10_PREHEADER_EPIL:%.*]]
+; CHECK: bb10.preheader.epil:
+; CHECK-NEXT: br i1 true, label [[BB38_EPIL]], label [[BB10_PREHEADER_EPIL]]
+; CHECK: bb38.epil:
+; CHECK-NEXT: [[I4017_EPIL:%.*]] = zext i16 [[I10_EPIL]] to i64
+; CHECK-NEXT: [[I13_I_EPIL:%.*]] = add i64 [[I4017_EPIL]], 1
+; CHECK-NEXT: [[I14_I_EPIL:%.*]] = select i1 true, i64 [[I13_I_EPIL]], i64 [[I4017_EPIL]]
+; CHECK-NEXT: [[I42_EPIL]] = trunc i64 [[I14_I_EPIL]] to i16
+; CHECK-NEXT: [[I3_NOT_EPIL:%.*]] = icmp eq i16 [[I10_EPIL]], -27
+; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i16 [[EPIL_ITER]], 1
+; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i16 [[EPIL_ITER_NEXT]], 2
+; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label [[BB5_PREHEADER_EPIL]], label [[BB1_BB43_CRIT_EDGE_EPILOG_LCSSA:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: bb1.bb43_crit_edge.epilog-lcssa:
+; CHECK-NEXT: br label [[BB1_BB43_CRIT_EDGE]]
+; CHECK: bb1.bb43_crit_edge:
+; CHECK-NEXT: ret void
+;
+bb:
+ br label %bb5.preheader
+
+bb5.preheader: ; preds = %bb38, %bb
+ %i10 = phi i16 [ 0, %bb ], [ %i42, %bb38 ]
+ br label %bb10.preheader
+
+bb10.preheader: ; preds = %bb10.preheader, %bb5.preheader
+ br i1 true, label %bb38, label %bb10.preheader
+
+bb38: ; preds = %bb10.preheader
+ %i4017 = zext i16 %i10 to i64
+ %i13.i = add i64 %i4017, 1
+ %i14.i = select i1 true, i64 %i13.i, i64 %i4017
+ %i42 = trunc i64 %i14.i to i16
+ %i3.not = icmp eq i16 %i10, -27
+ br i1 %i3.not, label %bb1.bb43_crit_edge, label %bb5.preheader
+
+bb1.bb43_crit_edge: ; preds = %bb38
+ ret void
+}
More information about the llvm-commits
mailing list