[llvm] [LoopPeel] LCSSA form is destroyed by LoopPeel, preserve it (PR #78696)

Vedant Paranjape via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 1 11:15:22 PST 2024


vedantparanjape-amd wrote:

```
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 7cfeb019af97..937a31e0a532 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1269,11 +1269,37 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
              << " iterations";
     });

+    dbgs() << "========================================\n";
+    L->dump();
+    dbgs() << "========================================\n";
+    L->getParentLoop()->dump();
+    dbgs() << "========================================\n";
+    dbgs() << "Islcssa before PL: " << L->isRecursivelyLCSSAForm(DT, *LI) << "\n";
+    dbgs() << "Islcssa (parent) before PL: " << L->getParentLoop()->isRecursivelyLCSSAForm(DT, *LI) << "\n";
+    dbgs() << "========================================\n";
+
     ValueToValueMapTy VMap;
     if (peelLoop(L, PP.PeelCount, LI, &SE, DT, &AC, PreserveLCSSA, VMap)) {
+      L->dump();
+      dbgs() << "========================================\n";
+      L->getParentLoop()->dump();
+      dbgs() << "========================================\n";
+      dbgs() << "Islcssa after PL: " << L->isRecursivelyLCSSAForm(DT, *LI) << "\n";
+      dbgs() << "Islcssa (parent) after PL: " << L->getParentLoop()->isRecursivelyLCSSAForm(DT, *LI) << "\n";
+      dbgs() << "========================================\n";
+
       simplifyLoopAfterUnroll(L, true, LI, &SE, &DT, &AC, &TTI);
       // If the loop was peeled, we already "used up" the profile information
       // we had, so we don't want to unroll or peel again.
+
+      L->dump();
+      dbgs() << "========================================\n";
+      L->getParentLoop()->dump();
+      dbgs() << "========================================\n";
+      dbgs() << "Islcssa after SLAU: " << L->isRecursivelyLCSSAForm(DT, *LI) << "\n";
+      dbgs() << "Islcssa (parent) after SLAU: " << L->getParentLoop()->isRecursivelyLCSSAForm(DT, *LI) << "\n";
+      dbgs() << "========================================\n";
+
       if (PP.PeelProfiledIterations)
         L->setLoopAlreadyUnrolled();
       return LoopUnrollResult::PartiallyUnrolled;
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index ee6f7b35750a..17f7a5afe13b 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -218,11 +218,33 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
                                    const TargetTransformInfo *TTI) {
   using namespace llvm::PatternMatch;

+  dbgs() << "+++ bb before +++\n";
+  L->getHeader()->getParent()->dump();
+  dbgs() << "+++ bb before +++\n";
+  L->dump();
+  dbgs() << "========================================\n";
+  L->getParentLoop()->dump();
+  dbgs() << "========================================\n";
+  dbgs() << "Islcssa (start) inside SLAU: " << L->isRecursivelyLCSSAForm(*DT, *LI) << "\n";
+  dbgs() << "Islcssa (start) (parent) after SLAU: " << L->getParentLoop()->isRecursivelyLCSSAForm(*DT, *LI) << "\n";
+  dbgs() << "========================================\n";
+
   // Simplify any new induction variables in the partially unrolled loop.
   if (SE && SimplifyIVs) {
     SmallVector<WeakTrackingVH, 16> DeadInsts;
     simplifyLoopIVs(L, SE, DT, LI, TTI, DeadInsts);

+    dbgs() << "+++ bb after +++\n";
+    L->getHeader()->getParent()->dump();
+    dbgs() << "+++ bb after +++\n";
+    L->dump();
+    dbgs() << "========================================\n";
+    L->getParentLoop()->dump();
+    dbgs() << "========================================\n";
+    dbgs() << "Islcssa (middle-in) inside SLAU: " << L->isRecursivelyLCSSAForm(*DT, *LI) << "\n";
+    dbgs() << "Islcssa (middle-in) (parent) after SLAU: " << L->getParentLoop()->isRecursivelyLCSSAForm(*DT, *LI) << "\n";
+    dbgs() << "========================================\n";
+
     // Aggressively clean up dead instructions that simplifyLoopIVs already
     // identified. Any remaining should be cleaned up below.
     while (!DeadInsts.empty()) {
```
Applying this patch, shows that simplifyLoopAfterUnroll destroys the LCSSA form of the outermost loop. I have attached the results of this patch below

```
+++ bb before +++
define void @wombat() gc "statepoint-example" !prof !0 {
bb:
  br label %bb1

bb1.loopexit:                                     ; preds = %bb12
  br label %bb1

bb1:                                              ; preds = %bb1.loopexit, %bb
  %phi = phi i32 [ 1, %bb ], [ 0, %bb1.loopexit ]
  br label %bb2.peel.begin

bb2.peel.begin:                                   ; preds = %bb1
  br label %bb2.peel

bb2.peel:                                         ; preds = %bb2.peel.begin
  br label %bb4.peel

bb4.peel:                                         ; preds = %bb2.peel
  %trunc.peel = trunc i64 0 to i32
  br i1 true, label %bb9.peel, label %bb7.loopexit2

bb9.peel:                                         ; preds = %bb4.peel
  %add.peel = add i32 1, %phi
  br i1 true, label %bb9.1.peel, label %bb7.loopexit2

bb9.1.peel:                                       ; preds = %bb9.peel
  %add.1.peel = add i32 1, %phi
  %sext.1.peel = sext i32 %add.1.peel to i64
  br i1 false, label %bb12.preheader, label %bb2.peel.next

bb2.peel.next:                                    ; preds = %bb9.1.peel
  br label %bb2.peel.next1

bb2.peel.next1:                                   ; preds = %bb2.peel.next
  br label %bb1.peel.newph

bb1.peel.newph:                                   ; preds = %bb2.peel.next1
  br label %bb2

bb2:                                              ; preds = %bb9.1, %bb1.peel.newph
  %phi3 = phi i64 [ %sext.1.peel, %bb1.peel.newph ], [ %sext.1, %bb9.1 ]
  br label %bb4

bb4:                                              ; preds = %bb2
  %trunc = trunc i64 %phi3 to i32
  br i1 true, label %bb9, label %bb7.loopexit

bb7.loopexit:                                     ; preds = %bb4, %bb9
  %phi8.ph = phi i32 [ %add, %bb9 ], [ %trunc, %bb4 ]
  br label %bb7

bb7.loopexit2:                                    ; preds = %bb4.peel, %bb9.peel
  %phi8.ph3 = phi i32 [ %add.peel, %bb9.peel ], [ %trunc.peel, %bb4.peel ]
  br label %bb7

bb7:                                              ; preds = %bb7.loopexit2, %bb7.loopexit
  %phi8 = phi i32 [ %phi8.ph, %bb7.loopexit ], [ %phi8.ph3, %bb7.loopexit2 ]
  ret void

bb9:                                              ; preds = %bb4
  %add = add i32 1, %phi
  br i1 true, label %bb9.1, label %bb7.loopexit

bb9.1:                                            ; preds = %bb9
  %add.1 = add i32 1, %phi
  %sext.1 = sext i32 %add.1 to i64
  br i1 false, label %bb12.preheader.loopexit, label %bb2, !llvm.loop !1

bb12.preheader.loopexit:                          ; preds = %bb9.1
  br label %bb12.preheader

bb12.preheader:                                   ; preds = %bb12.preheader.loopexit, %bb9.1.peel
  br label %bb12

bb12:                                             ; preds = %bb12.preheader, %bb12
  br i1 false, label %bb1.loopexit, label %bb12, !prof !3
}

+++ bb before +++
Parallel Loop at depth 2 containing: %bb2<header>,%bb4<exiting>,%bb9<exiting>,%bb9.1<latch><exiting>
========================================
Loop at depth 1 containing: %bb1<header>,%bb2,%bb4<exiting>,%bb9<exiting>,%bb12,%bb12.preheader,%bb1.loopexit<latch>,%bb9.1,%bb2.peel.begin,%bb2.peel.next,%bb1.peel.newph,%bb2.peel,%bb4.peel<exiting>,%bb9.peel<exiting>,%bb9.1.peel,%bb2.peel.next1,%bb12.preheader.loopexit
    Parallel Loop at depth 2 containing: %bb2<header>,%bb4<exiting>,%bb9<exiting>,%bb9.1<latch><exiting>
    Loop at depth 2 containing: %bb12<header><latch><exiting>
========================================
Islcssa (start) inside SLAU: 1
Islcssa (start) (parent) after SLAU: 1
========================================
+++ bb after +++
define void @wombat() gc "statepoint-example" !prof !0 {
bb:
  br label %bb1

bb1.loopexit:                                     ; preds = %bb12
  br label %bb1

bb1:                                              ; preds = %bb1.loopexit, %bb
  %phi = phi i32 [ 1, %bb ], [ 0, %bb1.loopexit ]
  br label %bb2.peel.begin

bb2.peel.begin:                                   ; preds = %bb1
  br label %bb2.peel

bb2.peel:                                         ; preds = %bb2.peel.begin
  br label %bb4.peel

bb4.peel:                                         ; preds = %bb2.peel
  %trunc.peel = trunc i64 0 to i32
  br i1 true, label %bb9.peel, label %bb7.loopexit2

bb9.peel:                                         ; preds = %bb4.peel
  %add.peel = add i32 1, %phi
  br i1 true, label %bb9.1.peel, label %bb7.loopexit2

bb9.1.peel:                                       ; preds = %bb9.peel
  %add.1.peel = add i32 1, %phi
  %sext.1.peel = sext i32 %add.1.peel to i64
  br i1 false, label %bb12.preheader, label %bb2.peel.next

bb2.peel.next:                                    ; preds = %bb9.1.peel
  br label %bb2.peel.next1

bb2.peel.next1:                                   ; preds = %bb2.peel.next
  br label %bb1.peel.newph

bb1.peel.newph:                                   ; preds = %bb2.peel.next1
  %0 = add nuw nsw i32 %phi, 1
  br label %bb2

bb2:                                              ; preds = %bb9.1, %bb1.peel.newph
  %phi3 = phi i64 [ %sext.1.peel, %bb1.peel.newph ], [ %sext.1, %bb9.1 ]
  br label %bb4

bb4:                                              ; preds = %bb2
  %trunc = trunc i64 %phi3 to i32
  br i1 true, label %bb9, label %bb7.loopexit

bb7.loopexit:                                     ; preds = %bb4, %bb9
  %phi8.ph = phi i32 [ %add, %bb9 ], [ %trunc, %bb4 ]
  br label %bb7

bb7.loopexit2:                                    ; preds = %bb4.peel, %bb9.peel
  %phi8.ph3 = phi i32 [ %add.peel, %bb9.peel ], [ %trunc.peel, %bb4.peel ]
  br label %bb7

bb7:                                              ; preds = %bb7.loopexit2, %bb7.loopexit
  %phi8 = phi i32 [ %0, %bb7.loopexit ], [ %phi8.ph3, %bb7.loopexit2 ]
  ret void

bb9:                                              ; preds = %bb4
  %add = add i32 1, %phi
  br i1 true, label %bb9.1, label %bb7.loopexit

bb9.1:                                            ; preds = %bb9
  %add.1 = add i32 1, %phi
  %sext.1 = sext i32 %add.1 to i64
  br i1 false, label %bb12.preheader.loopexit, label %bb2, !llvm.loop !1

bb12.preheader.loopexit:                          ; preds = %bb9.1
  br label %bb12.preheader

bb12.preheader:                                   ; preds = %bb12.preheader.loopexit, %bb9.1.peel
  br label %bb12

bb12:                                             ; preds = %bb12.preheader, %bb12
  br i1 false, label %bb1.loopexit, label %bb12, !prof !3
}

+++ bb after +++
Parallel Loop at depth 2 containing: %bb2<header>,%bb4<exiting>,%bb9<exiting>,%bb9.1<latch><exiting>
========================================
Loop at depth 1 containing: %bb1<header>,%bb2,%bb4<exiting>,%bb9<exiting>,%bb12,%bb12.preheader,%bb1.loopexit<latch>,%bb9.1,%bb2.peel.begin,%bb2.peel.next,%bb1.peel.newph,%bb2.peel,%bb4.peel<exiting>,%bb9.peel<exiting>,%bb9.1.peel,%bb2.peel.next1,%bb12.preheader.loopexit
    Parallel Loop at depth 2 containing: %bb2<header>,%bb4<exiting>,%bb9<exiting>,%bb9.1<latch><exiting>
    Loop at depth 2 containing: %bb12<header><latch><exiting>
========================================
Islcssa (middle-in) inside SLAU: 1
Islcssa (middle-in) (parent) after SLAU: 0
========================================
Parallel Loop at depth 2 containing: %bb2<header>,%bb4<exiting>,%bb9<exiting>,%bb9.1<latch><exiting>
========================================
Loop at depth 1 containing: %bb1<header>,%bb2,%bb4<exiting>,%bb9<exiting>,%bb12,%bb12.preheader,%bb1.loopexit<latch>,%bb9.1,%bb2.peel.begin,%bb2.peel.next,%bb1.peel.newph,%bb2.peel,%bb4.peel<exiting>,%bb9.peel<exiting>,%bb9.1.peel,%bb2.peel.next1,%bb12.preheader.loopexit
    Parallel Loop at depth 2 containing: %bb2<header>,%bb4<exiting>,%bb9<exiting>,%bb9.1<latch><exiting>
    Loop at depth 2 containing: %bb12<header><latch><exiting>
========================================
Islcssa (middle) inside SLAU: 1
Islcssa (middle) (parent) after SLAU: 0
========================================
Parallel Loop at depth 2 containing: %bb2<header>,%bb4<exiting>,%bb9<exiting>,%bb9.1<latch><exiting>
========================================
Loop at depth 1 containing: %bb1<header>,%bb2,%bb4<exiting>,%bb9<exiting>,%bb12,%bb12.preheader,%bb1.loopexit<latch>,%bb9.1,%bb2.peel.begin,%bb2.peel.next,%bb1.peel.newph,%bb2.peel,%bb4.peel<exiting>,%bb9.peel<exiting>,%bb9.1.peel,%bb2.peel.next1,%bb12.preheader.loopexit
    Parallel Loop at depth 2 containing: %bb2<header>,%bb4<exiting>,%bb9<exiting>,%bb9.1<latch><exiting>
    Loop at depth 2 containing: %bb12<header><latch><exiting>
========================================
Islcssa after SLAU: 1
Islcssa (parent) after SLAU: 0
========================================
Loop Unroll: F[wombat] Loop %bb12
  Loop Size = 3
PEELING loop %bb12 with iteration count 1!
========================================
Loop at depth 2 containing: %bb12<header><latch><exiting>
========================================
Loop at depth 1 containing: %bb1<header>,%bb2,%bb4<exiting>,%bb9<exiting>,%bb12,%bb12.preheader,%bb1.loopexit<latch>,%bb9.1,%bb2.peel.begin,%bb2.peel.next,%bb1.peel.newph,%bb2.peel,%bb4.peel<exiting>,%bb9.peel<exiting>,%bb9.1.peel,%bb2.peel.next1,%bb12.preheader.loopexit
    Parallel Loop at depth 2 containing: %bb2<header>,%bb4<exiting>,%bb9<exiting>,%bb9.1<latch><exiting>
    Loop at depth 2 containing: %bb12<header><latch><exiting>
========================================
Islcssa before PL: 1
Islcssa (parent) before PL: 0
========================================
opt: /home/vparanja/llvm-project-upstream/llvm/lib/Transforms/Utils/LoopSimplify.cpp:712: bool llvm::simplifyLoop(llvm::Loop*, llvm::DominatorTree*, llvm::LoopInfo*, llvm::ScalarEvolution*, llvm::AssumptionCache*, llvm::MemorySSAUpdater*, bool): Assertion `L->isRecursivelyLCSSAForm(*DT, *LI) && "Requested to preserve LCSSA, but it's already broken."' failed.
```
@nikic the issue is with simplifyLoopIVs function call, it seems to move the incoming values to Phi node from successor to predecessor. Is this expected behaviour ? If yes, I think we can simply add something to reform LCSSA after this call. Here's the diff of before and after: https://godbolt.org/z/9jo115Tas (look at bb1.peel.newph and bb7)

https://github.com/llvm/llvm-project/pull/78696


More information about the llvm-commits mailing list