[llvm] [LoopPeel] LCSSA form is destroyed by LoopPeel, preserve it (PR #78696)
Vedant Paranjape via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 1 11:15:22 PST 2024
vedantparanjape-amd wrote:
```
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 7cfeb019af97..937a31e0a532 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1269,11 +1269,37 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
<< " iterations";
});
+ dbgs() << "========================================\n";
+ L->dump();
+ dbgs() << "========================================\n";
+ L->getParentLoop()->dump();
+ dbgs() << "========================================\n";
+ dbgs() << "Islcssa before PL: " << L->isRecursivelyLCSSAForm(DT, *LI) << "\n";
+ dbgs() << "Islcssa (parent) before PL: " << L->getParentLoop()->isRecursivelyLCSSAForm(DT, *LI) << "\n";
+ dbgs() << "========================================\n";
+
ValueToValueMapTy VMap;
if (peelLoop(L, PP.PeelCount, LI, &SE, DT, &AC, PreserveLCSSA, VMap)) {
+ L->dump();
+ dbgs() << "========================================\n";
+ L->getParentLoop()->dump();
+ dbgs() << "========================================\n";
+ dbgs() << "Islcssa after PL: " << L->isRecursivelyLCSSAForm(DT, *LI) << "\n";
+ dbgs() << "Islcssa (parent) after PL: " << L->getParentLoop()->isRecursivelyLCSSAForm(DT, *LI) << "\n";
+ dbgs() << "========================================\n";
+
simplifyLoopAfterUnroll(L, true, LI, &SE, &DT, &AC, &TTI);
// If the loop was peeled, we already "used up" the profile information
// we had, so we don't want to unroll or peel again.
+
+ L->dump();
+ dbgs() << "========================================\n";
+ L->getParentLoop()->dump();
+ dbgs() << "========================================\n";
+ dbgs() << "Islcssa after SLAU: " << L->isRecursivelyLCSSAForm(DT, *LI) << "\n";
+ dbgs() << "Islcssa (parent) after SLAU: " << L->getParentLoop()->isRecursivelyLCSSAForm(DT, *LI) << "\n";
+ dbgs() << "========================================\n";
+
if (PP.PeelProfiledIterations)
L->setLoopAlreadyUnrolled();
return LoopUnrollResult::PartiallyUnrolled;
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index ee6f7b35750a..17f7a5afe13b 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -218,11 +218,33 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
const TargetTransformInfo *TTI) {
using namespace llvm::PatternMatch;
+ dbgs() << "+++ bb before +++\n";
+ L->getHeader()->getParent()->dump();
+ dbgs() << "+++ bb before +++\n";
+ L->dump();
+ dbgs() << "========================================\n";
+ L->getParentLoop()->dump();
+ dbgs() << "========================================\n";
+ dbgs() << "Islcssa (start) inside SLAU: " << L->isRecursivelyLCSSAForm(*DT, *LI) << "\n";
+ dbgs() << "Islcssa (start) (parent) after SLAU: " << L->getParentLoop()->isRecursivelyLCSSAForm(*DT, *LI) << "\n";
+ dbgs() << "========================================\n";
+
// Simplify any new induction variables in the partially unrolled loop.
if (SE && SimplifyIVs) {
SmallVector<WeakTrackingVH, 16> DeadInsts;
simplifyLoopIVs(L, SE, DT, LI, TTI, DeadInsts);
+ dbgs() << "+++ bb after +++\n";
+ L->getHeader()->getParent()->dump();
+ dbgs() << "+++ bb after +++\n";
+ L->dump();
+ dbgs() << "========================================\n";
+ L->getParentLoop()->dump();
+ dbgs() << "========================================\n";
+ dbgs() << "Islcssa (middle-in) inside SLAU: " << L->isRecursivelyLCSSAForm(*DT, *LI) << "\n";
+ dbgs() << "Islcssa (middle-in) (parent) after SLAU: " << L->getParentLoop()->isRecursivelyLCSSAForm(*DT, *LI) << "\n";
+ dbgs() << "========================================\n";
+
// Aggressively clean up dead instructions that simplifyLoopIVs already
// identified. Any remaining should be cleaned up below.
while (!DeadInsts.empty()) {
```
Applying this patch, shows that simplifyLoopAfterUnroll destroys the LCSSA form of the outermost loop. I have attached the results of this patch below
```
+++ bb before +++
define void @wombat() gc "statepoint-example" !prof !0 {
bb:
br label %bb1
bb1.loopexit: ; preds = %bb12
br label %bb1
bb1: ; preds = %bb1.loopexit, %bb
%phi = phi i32 [ 1, %bb ], [ 0, %bb1.loopexit ]
br label %bb2.peel.begin
bb2.peel.begin: ; preds = %bb1
br label %bb2.peel
bb2.peel: ; preds = %bb2.peel.begin
br label %bb4.peel
bb4.peel: ; preds = %bb2.peel
%trunc.peel = trunc i64 0 to i32
br i1 true, label %bb9.peel, label %bb7.loopexit2
bb9.peel: ; preds = %bb4.peel
%add.peel = add i32 1, %phi
br i1 true, label %bb9.1.peel, label %bb7.loopexit2
bb9.1.peel: ; preds = %bb9.peel
%add.1.peel = add i32 1, %phi
%sext.1.peel = sext i32 %add.1.peel to i64
br i1 false, label %bb12.preheader, label %bb2.peel.next
bb2.peel.next: ; preds = %bb9.1.peel
br label %bb2.peel.next1
bb2.peel.next1: ; preds = %bb2.peel.next
br label %bb1.peel.newph
bb1.peel.newph: ; preds = %bb2.peel.next1
br label %bb2
bb2: ; preds = %bb9.1, %bb1.peel.newph
%phi3 = phi i64 [ %sext.1.peel, %bb1.peel.newph ], [ %sext.1, %bb9.1 ]
br label %bb4
bb4: ; preds = %bb2
%trunc = trunc i64 %phi3 to i32
br i1 true, label %bb9, label %bb7.loopexit
bb7.loopexit: ; preds = %bb4, %bb9
%phi8.ph = phi i32 [ %add, %bb9 ], [ %trunc, %bb4 ]
br label %bb7
bb7.loopexit2: ; preds = %bb4.peel, %bb9.peel
%phi8.ph3 = phi i32 [ %add.peel, %bb9.peel ], [ %trunc.peel, %bb4.peel ]
br label %bb7
bb7: ; preds = %bb7.loopexit2, %bb7.loopexit
%phi8 = phi i32 [ %phi8.ph, %bb7.loopexit ], [ %phi8.ph3, %bb7.loopexit2 ]
ret void
bb9: ; preds = %bb4
%add = add i32 1, %phi
br i1 true, label %bb9.1, label %bb7.loopexit
bb9.1: ; preds = %bb9
%add.1 = add i32 1, %phi
%sext.1 = sext i32 %add.1 to i64
br i1 false, label %bb12.preheader.loopexit, label %bb2, !llvm.loop !1
bb12.preheader.loopexit: ; preds = %bb9.1
br label %bb12.preheader
bb12.preheader: ; preds = %bb12.preheader.loopexit, %bb9.1.peel
br label %bb12
bb12: ; preds = %bb12.preheader, %bb12
br i1 false, label %bb1.loopexit, label %bb12, !prof !3
}
+++ bb before +++
Parallel Loop at depth 2 containing: %bb2<header>,%bb4<exiting>,%bb9<exiting>,%bb9.1<latch><exiting>
========================================
Loop at depth 1 containing: %bb1<header>,%bb2,%bb4<exiting>,%bb9<exiting>,%bb12,%bb12.preheader,%bb1.loopexit<latch>,%bb9.1,%bb2.peel.begin,%bb2.peel.next,%bb1.peel.newph,%bb2.peel,%bb4.peel<exiting>,%bb9.peel<exiting>,%bb9.1.peel,%bb2.peel.next1,%bb12.preheader.loopexit
Parallel Loop at depth 2 containing: %bb2<header>,%bb4<exiting>,%bb9<exiting>,%bb9.1<latch><exiting>
Loop at depth 2 containing: %bb12<header><latch><exiting>
========================================
Islcssa (start) inside SLAU: 1
Islcssa (start) (parent) after SLAU: 1
========================================
+++ bb after +++
define void @wombat() gc "statepoint-example" !prof !0 {
bb:
br label %bb1
bb1.loopexit: ; preds = %bb12
br label %bb1
bb1: ; preds = %bb1.loopexit, %bb
%phi = phi i32 [ 1, %bb ], [ 0, %bb1.loopexit ]
br label %bb2.peel.begin
bb2.peel.begin: ; preds = %bb1
br label %bb2.peel
bb2.peel: ; preds = %bb2.peel.begin
br label %bb4.peel
bb4.peel: ; preds = %bb2.peel
%trunc.peel = trunc i64 0 to i32
br i1 true, label %bb9.peel, label %bb7.loopexit2
bb9.peel: ; preds = %bb4.peel
%add.peel = add i32 1, %phi
br i1 true, label %bb9.1.peel, label %bb7.loopexit2
bb9.1.peel: ; preds = %bb9.peel
%add.1.peel = add i32 1, %phi
%sext.1.peel = sext i32 %add.1.peel to i64
br i1 false, label %bb12.preheader, label %bb2.peel.next
bb2.peel.next: ; preds = %bb9.1.peel
br label %bb2.peel.next1
bb2.peel.next1: ; preds = %bb2.peel.next
br label %bb1.peel.newph
bb1.peel.newph: ; preds = %bb2.peel.next1
%0 = add nuw nsw i32 %phi, 1
br label %bb2
bb2: ; preds = %bb9.1, %bb1.peel.newph
%phi3 = phi i64 [ %sext.1.peel, %bb1.peel.newph ], [ %sext.1, %bb9.1 ]
br label %bb4
bb4: ; preds = %bb2
%trunc = trunc i64 %phi3 to i32
br i1 true, label %bb9, label %bb7.loopexit
bb7.loopexit: ; preds = %bb4, %bb9
%phi8.ph = phi i32 [ %add, %bb9 ], [ %trunc, %bb4 ]
br label %bb7
bb7.loopexit2: ; preds = %bb4.peel, %bb9.peel
%phi8.ph3 = phi i32 [ %add.peel, %bb9.peel ], [ %trunc.peel, %bb4.peel ]
br label %bb7
bb7: ; preds = %bb7.loopexit2, %bb7.loopexit
%phi8 = phi i32 [ %0, %bb7.loopexit ], [ %phi8.ph3, %bb7.loopexit2 ]
ret void
bb9: ; preds = %bb4
%add = add i32 1, %phi
br i1 true, label %bb9.1, label %bb7.loopexit
bb9.1: ; preds = %bb9
%add.1 = add i32 1, %phi
%sext.1 = sext i32 %add.1 to i64
br i1 false, label %bb12.preheader.loopexit, label %bb2, !llvm.loop !1
bb12.preheader.loopexit: ; preds = %bb9.1
br label %bb12.preheader
bb12.preheader: ; preds = %bb12.preheader.loopexit, %bb9.1.peel
br label %bb12
bb12: ; preds = %bb12.preheader, %bb12
br i1 false, label %bb1.loopexit, label %bb12, !prof !3
}
+++ bb after +++
Parallel Loop at depth 2 containing: %bb2<header>,%bb4<exiting>,%bb9<exiting>,%bb9.1<latch><exiting>
========================================
Loop at depth 1 containing: %bb1<header>,%bb2,%bb4<exiting>,%bb9<exiting>,%bb12,%bb12.preheader,%bb1.loopexit<latch>,%bb9.1,%bb2.peel.begin,%bb2.peel.next,%bb1.peel.newph,%bb2.peel,%bb4.peel<exiting>,%bb9.peel<exiting>,%bb9.1.peel,%bb2.peel.next1,%bb12.preheader.loopexit
Parallel Loop at depth 2 containing: %bb2<header>,%bb4<exiting>,%bb9<exiting>,%bb9.1<latch><exiting>
Loop at depth 2 containing: %bb12<header><latch><exiting>
========================================
Islcssa (middle-in) inside SLAU: 1
Islcssa (middle-in) (parent) after SLAU: 0
========================================
Parallel Loop at depth 2 containing: %bb2<header>,%bb4<exiting>,%bb9<exiting>,%bb9.1<latch><exiting>
========================================
Loop at depth 1 containing: %bb1<header>,%bb2,%bb4<exiting>,%bb9<exiting>,%bb12,%bb12.preheader,%bb1.loopexit<latch>,%bb9.1,%bb2.peel.begin,%bb2.peel.next,%bb1.peel.newph,%bb2.peel,%bb4.peel<exiting>,%bb9.peel<exiting>,%bb9.1.peel,%bb2.peel.next1,%bb12.preheader.loopexit
Parallel Loop at depth 2 containing: %bb2<header>,%bb4<exiting>,%bb9<exiting>,%bb9.1<latch><exiting>
Loop at depth 2 containing: %bb12<header><latch><exiting>
========================================
Islcssa (middle) inside SLAU: 1
Islcssa (middle) (parent) after SLAU: 0
========================================
Parallel Loop at depth 2 containing: %bb2<header>,%bb4<exiting>,%bb9<exiting>,%bb9.1<latch><exiting>
========================================
Loop at depth 1 containing: %bb1<header>,%bb2,%bb4<exiting>,%bb9<exiting>,%bb12,%bb12.preheader,%bb1.loopexit<latch>,%bb9.1,%bb2.peel.begin,%bb2.peel.next,%bb1.peel.newph,%bb2.peel,%bb4.peel<exiting>,%bb9.peel<exiting>,%bb9.1.peel,%bb2.peel.next1,%bb12.preheader.loopexit
Parallel Loop at depth 2 containing: %bb2<header>,%bb4<exiting>,%bb9<exiting>,%bb9.1<latch><exiting>
Loop at depth 2 containing: %bb12<header><latch><exiting>
========================================
Islcssa after SLAU: 1
Islcssa (parent) after SLAU: 0
========================================
Loop Unroll: F[wombat] Loop %bb12
Loop Size = 3
PEELING loop %bb12 with iteration count 1!
========================================
Loop at depth 2 containing: %bb12<header><latch><exiting>
========================================
Loop at depth 1 containing: %bb1<header>,%bb2,%bb4<exiting>,%bb9<exiting>,%bb12,%bb12.preheader,%bb1.loopexit<latch>,%bb9.1,%bb2.peel.begin,%bb2.peel.next,%bb1.peel.newph,%bb2.peel,%bb4.peel<exiting>,%bb9.peel<exiting>,%bb9.1.peel,%bb2.peel.next1,%bb12.preheader.loopexit
Parallel Loop at depth 2 containing: %bb2<header>,%bb4<exiting>,%bb9<exiting>,%bb9.1<latch><exiting>
Loop at depth 2 containing: %bb12<header><latch><exiting>
========================================
Islcssa before PL: 1
Islcssa (parent) before PL: 0
========================================
opt: /home/vparanja/llvm-project-upstream/llvm/lib/Transforms/Utils/LoopSimplify.cpp:712: bool llvm::simplifyLoop(llvm::Loop*, llvm::DominatorTree*, llvm::LoopInfo*, llvm::ScalarEvolution*, llvm::AssumptionCache*, llvm::MemorySSAUpdater*, bool): Assertion `L->isRecursivelyLCSSAForm(*DT, *LI) && "Requested to preserve LCSSA, but it's already broken."' failed.
```
@nikic the issue is with simplifyLoopIVs function call, it seems to move the incoming values to Phi node from successor to predecessor. Is this expected behaviour ? If yes, I think we can simply add something to reform LCSSA after this call. Here's the diff of before and after: https://godbolt.org/z/9jo115Tas (look at bb1.peel.newph and bb7)
https://github.com/llvm/llvm-project/pull/78696
More information about the llvm-commits
mailing list