[llvm] 85c3088 - [StructurizeCFG] Fix region nodes ordering

Mon Jun 1 02:53:35 PDT 2020

Author: Ehud Katz
Date: 2020-06-01T12:50:35+03:00
New Revision: 85c308804966e52a14bc98874a6cc6e42cbb1d40

URL: https://github.com/llvm/llvm-project/commit/85c308804966e52a14bc98874a6cc6e42cbb1d40
DIFF: https://github.com/llvm/llvm-project/commit/85c308804966e52a14bc98874a6cc6e42cbb1d40.diff

LOG: [StructurizeCFG] Fix region nodes ordering

This is a reimplementation of the `orderNodes` function, as the old
implementation didn't take into account all cases.
The new implementation uses SCCs instead of Loops to take account of
irreducible loops.

Fix PR41509

Differential Revision: https://reviews.llvm.org/D79037

Added: 
    llvm/test/Transforms/StructurizeCFG/interleaved-loop-order.ll
    llvm/test/Transforms/StructurizeCFG/nested-loop-subregion.ll

Modified: 
    llvm/lib/Transforms/Scalar/StructurizeCFG.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 84d8eebd9720..ef59f2412f68 100644

--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -8,13 +8,12 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
-#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/RegionInfo.h"
 #include "llvm/Analysis/RegionIterator.h"
 #include "llvm/Analysis/RegionPass.h"
@@ -90,6 +89,59 @@ using BBPredicates = DenseMap<BasicBlock *, Value *>;
 using PredMap = DenseMap<BasicBlock *, BBPredicates>;
 using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
 
+// A traits type that is intended to be used in graph algorithms. The graph
+// traits starts at an entry node, and traverses the RegionNodes that are in
+// the Nodes set.
+struct SubGraphTraits {
+  using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>;
+  using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType;
+
+  // This wraps a set of Nodes into the iterator, so we know which edges to
+  // filter out.
+  class WrappedSuccIterator
+      : public iterator_adaptor_base<
+            WrappedSuccIterator, BaseSuccIterator,
+            typename std::iterator_traits<BaseSuccIterator>::iterator_category,
+            NodeRef, std::ptr
diff _t, NodeRef *, NodeRef> {
+    SmallDenseSet<RegionNode *> *Nodes;
+
+  public:
+    WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes)
+        : iterator_adaptor_base(It), Nodes(Nodes) {}
+
+    NodeRef operator*() const { return {*I, Nodes}; }
+  };
+
+  static bool filterAll(const NodeRef &N) { return true; }
+  static bool filterSet(const NodeRef &N) { return N.second->count(N.first); }
+
+  using ChildIteratorType =
+      filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>;
+
+  static NodeRef getEntryNode(Region *R) {
+    return {GraphTraits<Region *>::getEntryNode(R), nullptr};
+  }
+
+  static NodeRef getEntryNode(NodeRef N) { return N; }
+
+  static iterator_range<ChildIteratorType> children(const NodeRef &N) {
+    auto *filter = N.second ? &filterSet : &filterAll;
+    return make_filter_range(
+        make_range<WrappedSuccIterator>(
+            {GraphTraits<RegionNode *>::child_begin(N.first), N.second},
+            {GraphTraits<RegionNode *>::child_end(N.first), N.second}),
+        filter);
+  }
+
+  static ChildIteratorType child_begin(const NodeRef &N) {
+    return children(N).begin();
+  }
+
+  static ChildIteratorType child_end(const NodeRef &N) {
+    return children(N).end();
+  }
+};
+
 /// Finds the nearest common dominator of a set of BasicBlocks.
 ///
 /// For every BB you add to the set, you can specify whether we "remember" the
@@ -194,7 +246,6 @@ class StructurizeCFG : public RegionPass {
 
   LegacyDivergenceAnalysis *DA;
   DominatorTree *DT;
-  LoopInfo *LI;
 
   SmallVector<RegionNode *, 8> Order;
   BBSet Visited;
@@ -214,9 +265,6 @@ class StructurizeCFG : public RegionPass {
 
   void orderNodes();
 
-  Loop *getAdjustedLoop(RegionNode *RN);
-  unsigned getAdjustedLoopDepth(RegionNode *RN);
-
   void analyzeLoops(RegionNode *N);
 
   Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
@@ -282,7 +330,6 @@ class StructurizeCFG : public RegionPass {
       AU.addRequired<LegacyDivergenceAnalysis>();
     AU.addRequiredID(LowerSwitchID);
     AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addRequired<LoopInfoWrapperPass>();
 
     AU.addPreserved<DominatorTreeWrapperPass>();
     RegionPass::getAnalysisUsage(AU);
@@ -314,75 +361,60 @@ bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
   return false;
 }
 
-/// Use the exit block to determine the loop if RN is a SubRegion.
-Loop *StructurizeCFG::getAdjustedLoop(RegionNode *RN) {
-  if (RN->isSubRegion()) {
-    Region *SubRegion = RN->getNodeAs<Region>();
-    return LI->getLoopFor(SubRegion->getExit());
-  }
-
-  return LI->getLoopFor(RN->getEntry());
-}
-
-/// Use the exit block to determine the loop depth if RN is a SubRegion.
-unsigned StructurizeCFG::getAdjustedLoopDepth(RegionNode *RN) {
-  if (RN->isSubRegion()) {
-    Region *SubR = RN->getNodeAs<Region>();
-    return LI->getLoopDepth(SubR->getExit());
-  }
-
-  return LI->getLoopDepth(RN->getEntry());
-}
-
-/// Build up the general order of nodes
+/// Build up the general order of nodes, by performing a topology sort of the
+/// parent region's nodes, while ensuring that there is no outer cycle node
+/// between any two inner cycle nodes.
 void StructurizeCFG::orderNodes() {
-  ReversePostOrderTraversal<Region*> RPOT(ParentRegion);
-  SmallDenseMap<Loop*, unsigned, 8> LoopBlocks;
-
-  // The reverse post-order traversal of the list gives us an ordering close
-  // to what we want.  The only problem with it is that sometimes backedges
-  // for outer loops will be visited before backedges for inner loops.
-  for (RegionNode *RN : RPOT) {
-    Loop *Loop = getAdjustedLoop(RN);
-    ++LoopBlocks[Loop];
-  }
-
-  unsigned CurrentLoopDepth = 0;
-  Loop *CurrentLoop = nullptr;
-  for (auto I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
-    RegionNode *RN = cast<RegionNode>(*I);
-    unsigned LoopDepth = getAdjustedLoopDepth(RN);
-
-    if (is_contained(Order, *I))
-      continue;
-
-    if (LoopDepth < CurrentLoopDepth) {
-      // Make sure we have visited all blocks in this loop before moving back to
-      // the outer loop.
+  Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion),
+                             GraphTraits<Region *>::nodes_end(ParentRegion)));
+  if (Order.empty())
+    return;
 
-      auto LoopI = I;
-      while (unsigned &BlockCount = LoopBlocks[CurrentLoop]) {
-        LoopI++;
-        if (getAdjustedLoop(cast<RegionNode>(*LoopI)) == CurrentLoop) {
-          --BlockCount;
-          Order.push_back(*LoopI);
-        }
+  SmallDenseSet<RegionNode *> Nodes;
+  auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion);
+
+  // A list of range indices of SCCs in Order, to be processed.
+  SmallVector<std::pair<unsigned, unsigned>, 8> WorkList;
+  unsigned I = 0, E = Order.size();
+  while (true) {
+    // Run through all the SCCs in the subgraph starting with Entry.
+    for (auto SCCI =
+             scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin(
+                 EntryNode);
+         !SCCI.isAtEnd(); ++SCCI) {
+      auto &SCC = *SCCI;
+
+      // An SCC up to the size of 2, can be reduced to an entry (the last node),
+      // and a possible additional node. Therefore, it is already in order, and
+      // there is no need to add it to the work-list.
+      unsigned Size = SCC.size();
+      if (Size > 2)
+        WorkList.emplace_back(I, I + Size);
+
+      // Add the SCC nodes to the Order array.
+      for (auto &N : SCC) {
+        assert(I < E && "SCC size mismatch!");
+        Order[I++] = N.first;
       }
     }
+    assert(I == E && "SCC size mismatch!");
 
-    CurrentLoop = getAdjustedLoop(RN);
-    if (CurrentLoop)
-      LoopBlocks[CurrentLoop]--;
+    // If there are no more SCCs to order, then we are done.
+    if (WorkList.empty())
+      break;
 
-    CurrentLoopDepth = LoopDepth;
-    Order.push_back(*I);
-  }
+    std::tie(I, E) = WorkList.pop_back_val();
+
+    // Collect the set of nodes in the SCC's subgraph. These are only the
+    // possible child nodes; we do not add the entry (last node) otherwise we
+    // will have the same exact SCC all over again.
+    Nodes.clear();
+    Nodes.insert(Order.begin() + I, Order.begin() + E - 1);
 
-  // This pass originally used a post-order traversal and then operated on
-  // the list in reverse. Now that we are using a reverse post-order traversal
-  // rather than re-working the whole pass to operate on the list in order,
-  // we just reverse the list and continue to operate on it in reverse.
-  std::reverse(Order.begin(), Order.end());
+    // Update the entry node.
+    EntryNode.first = Order[E - 1];
+    EntryNode.second = &Nodes;
+  }
 }
 
 /// Determine the end of the loops
@@ -490,8 +522,7 @@ void StructurizeCFG::collectInfos() {
   for (RegionNode *RN : reverse(Order)) {
     LLVM_DEBUG(dbgs() << "Visiting: "
                       << (RN->isSubRegion() ? "SubRegion with entry: " : "")
-                      << RN->getEntry()->getName() << " Loop Depth: "
-                      << LI->getLoopDepth(RN->getEntry()) << "\n");
+                      << RN->getEntry()->getName() << "\n");
 
     // Analyze all the conditions leading to a node
     gatherPredicates(RN);
@@ -1013,7 +1044,6 @@ bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
   ParentRegion = R;
 
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
 
   orderNodes();
   collectInfos();

diff  --git a/llvm/test/Transforms/StructurizeCFG/interleaved-loop-order.ll b/llvm/test/Transforms/StructurizeCFG/interleaved-loop-order.ll
new file mode 100644
index 000000000000..05713c890fb9
--- /dev/null
+++ b/llvm/test/Transforms/StructurizeCFG/interleaved-loop-order.ll
@@ -0,0 +1,262 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
+
+; This test have an outer loop containing an inner loop,
+; for which there is an interleaved post-order traversal.
+;
+; This used to produce incorrect code.
+; For example %outer.loop.body used to branched to %inner.loop.end
+; (instead of %inner.loop.header).
+
+define i1 @test_nested(i32 %x, i1 %b1, i1 %b2, i1 %b3) {
+; CHECK-LABEL: @test_nested(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B3_INV:%.*]] = xor i1 [[B3:%.*]], true
+; CHECK-NEXT:    br label [[OUTER_LOOP_HEADER:%.*]]
+; CHECK:       Flow12:
+; CHECK-NEXT:    br i1 [[TMP3:%.*]], label [[EXIT_TRUE:%.*]], label [[FLOW13:%.*]]
+; CHECK:       exit.true:
+; CHECK-NEXT:    br label [[FLOW13]]
+; CHECK:       Flow13:
+; CHECK-NEXT:    br i1 [[TMP2:%.*]], label [[NEWDEFAULT:%.*]], label [[FLOW14:%.*]]
+; CHECK:       NewDefault:
+; CHECK-NEXT:    br label [[EXIT_FALSE:%.*]]
+; CHECK:       Flow14:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ false, [[EXIT_FALSE]] ], [ true, [[FLOW13]] ]
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       exit.false:
+; CHECK-NEXT:    br label [[FLOW14]]
+; CHECK:       outer.loop.header:
+; CHECK-NEXT:    br i1 [[B1:%.*]], label [[OUTER_LOOP_BODY:%.*]], label [[FLOW3:%.*]]
+; CHECK:       outer.loop.body:
+; CHECK-NEXT:    br label [[INNER_LOOP_HEADER:%.*]]
+; CHECK:       Flow3:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i1 [ [[TMP16:%.*]], [[FLOW11:%.*]] ], [ true, [[OUTER_LOOP_HEADER]] ]
+; CHECK-NEXT:    [[TMP2]] = phi i1 [ [[TMP12:%.*]], [[FLOW11]] ], [ false, [[OUTER_LOOP_HEADER]] ]
+; CHECK-NEXT:    [[TMP3]] = phi i1 [ false, [[FLOW11]] ], [ true, [[OUTER_LOOP_HEADER]] ]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FLOW12:%.*]], label [[OUTER_LOOP_HEADER]]
+; CHECK:       inner.loop.header:
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i1 [ [[TMP8:%.*]], [[FLOW4:%.*]] ], [ false, [[OUTER_LOOP_BODY]] ]
+; CHECK-NEXT:    br i1 [[B2:%.*]], label [[INNER_LOOP_BODY:%.*]], label [[FLOW4]]
+; CHECK:       Flow6:
+; CHECK-NEXT:    [[TMP5:%.*]] = phi i1 [ false, [[INNER_LOOP_LATCH:%.*]] ], [ true, [[LEAFBLOCK:%.*]] ]
+; CHECK-NEXT:    br label [[FLOW5:%.*]]
+; CHECK:       Flow7:
+; CHECK-NEXT:    br i1 [[TMP10:%.*]], label [[INNER_LOOP_END:%.*]], label [[FLOW8:%.*]]
+; CHECK:       inner.loop.end:
+; CHECK-NEXT:    br label [[FLOW8]]
+; CHECK:       inner.loop.body:
+; CHECK-NEXT:    br i1 [[B3_INV]], label [[INNER_LOOP_BODY_ELSE:%.*]], label [[FLOW:%.*]]
+; CHECK:       inner.loop.body.else:
+; CHECK-NEXT:    br label [[FLOW]]
+; CHECK:       Flow:
+; CHECK-NEXT:    [[TMP6:%.*]] = phi i1 [ false, [[INNER_LOOP_BODY_ELSE]] ], [ true, [[INNER_LOOP_BODY]] ]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[INNER_LOOP_BODY_THEN:%.*]], label [[INNER_LOOP_COND:%.*]]
+; CHECK:       inner.loop.body.then:
+; CHECK-NEXT:    br label [[INNER_LOOP_COND]]
+; CHECK:       Flow4:
+; CHECK-NEXT:    [[TMP7:%.*]] = phi i1 [ [[TMP17:%.*]], [[FLOW5]] ], [ true, [[INNER_LOOP_HEADER]] ]
+; CHECK-NEXT:    [[TMP8]] = phi i1 [ [[TMP18:%.*]], [[FLOW5]] ], [ [[TMP4]], [[INNER_LOOP_HEADER]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = phi i1 [ [[TMP19:%.*]], [[FLOW5]] ], [ false, [[INNER_LOOP_HEADER]] ]
+; CHECK-NEXT:    [[TMP10]] = phi i1 [ false, [[FLOW5]] ], [ true, [[INNER_LOOP_HEADER]] ]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[FLOW7:%.*]], label [[INNER_LOOP_HEADER]]
+; CHECK:       inner.loop.cond:
+; CHECK-NEXT:    br label [[NODEBLOCK:%.*]]
+; CHECK:       NodeBlock:
+; CHECK-NEXT:    [[PIVOT:%.*]] = icmp slt i32 [[X:%.*]], 1
+; CHECK-NEXT:    br i1 [[PIVOT]], label [[LEAFBLOCK]], label [[FLOW5]]
+; CHECK:       Flow8:
+; CHECK-NEXT:    [[TMP11:%.*]] = phi i1 [ true, [[INNER_LOOP_END]] ], [ false, [[FLOW7]] ]
+; CHECK-NEXT:    br i1 [[TMP9]], label [[LEAFBLOCK1:%.*]], label [[FLOW9:%.*]]
+; CHECK:       LeafBlock1:
+; CHECK-NEXT:    [[SWITCHLEAF2:%.*]] = icmp eq i32 [[X]], 1
+; CHECK-NEXT:    br i1 [[SWITCHLEAF2]], label [[INNER_LOOP_BREAK:%.*]], label [[FLOW10:%.*]]
+; CHECK:       LeafBlock:
+; CHECK-NEXT:    [[SWITCHLEAF:%.*]] = icmp eq i32 [[X]], 0
+; CHECK-NEXT:    br i1 [[SWITCHLEAF]], label [[INNER_LOOP_LATCH]], label [[FLOW6:%.*]]
+; CHECK:       Flow9:
+; CHECK-NEXT:    [[TMP12]] = phi i1 [ [[TMP14:%.*]], [[FLOW10]] ], [ [[TMP8]], [[FLOW8]] ]
+; CHECK-NEXT:    [[TMP13:%.*]] = phi i1 [ [[TMP15:%.*]], [[FLOW10]] ], [ [[TMP11]], [[FLOW8]] ]
+; CHECK-NEXT:    br i1 [[TMP13]], label [[OUTER_LOOP_CLEANUP:%.*]], label [[FLOW11]]
+; CHECK:       inner.loop.break:
+; CHECK-NEXT:    br label [[FLOW10]]
+; CHECK:       Flow10:
+; CHECK-NEXT:    [[TMP14]] = phi i1 [ false, [[INNER_LOOP_BREAK]] ], [ true, [[LEAFBLOCK1]] ]
+; CHECK-NEXT:    [[TMP15]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP11]], [[LEAFBLOCK1]] ]
+; CHECK-NEXT:    br label [[FLOW9]]
+; CHECK:       outer.loop.cleanup:
+; CHECK-NEXT:    br label [[OUTER_LOOP_LATCH:%.*]]
+; CHECK:       Flow11:
+; CHECK-NEXT:    [[TMP16]] = phi i1 [ false, [[OUTER_LOOP_LATCH]] ], [ true, [[FLOW9]] ]
+; CHECK-NEXT:    br label [[FLOW3]]
+; CHECK:       outer.loop.latch:
+; CHECK-NEXT:    br label [[FLOW11]]
+; CHECK:       Flow5:
+; CHECK-NEXT:    [[TMP17]] = phi i1 [ [[TMP5]], [[FLOW6]] ], [ true, [[NODEBLOCK]] ]
+; CHECK-NEXT:    [[TMP18]] = phi i1 [ [[TMP5]], [[FLOW6]] ], [ [[TMP4]], [[NODEBLOCK]] ]
+; CHECK-NEXT:    [[TMP19]] = phi i1 [ false, [[FLOW6]] ], [ true, [[NODEBLOCK]] ]
+; CHECK-NEXT:    br label [[FLOW4]]
+; CHECK:       inner.loop.latch:
+; CHECK-NEXT:    br label [[FLOW6]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i1 [[TMP0]]
+;
+entry:
+  br label %outer.loop.header
+
+exit.true:                                       ; preds = %outer.loop.header
+  br label %exit
+
+exit.false:                                      ; preds = %inner.loop.cond
+  br label %exit
+
+outer.loop.header:                               ; preds = %outer.loop.latch, %entry
+  br i1 %b1, label %outer.loop.body, label %exit.true
+
+outer.loop.body:                                 ; preds = %outer.loop.header
+  br label %inner.loop.header
+
+inner.loop.header:                               ; preds = %inner.loop.latch, %outer.loop.body
+  br i1 %b2, label %inner.loop.body, label %inner.loop.end
+
+inner.loop.end:                                  ; preds = %inner.loop.header
+  br label %outer.loop.cleanup
+
+inner.loop.body:                                 ; preds = %inner.loop.header
+  br i1 %b3, label %inner.loop.body.then, label %inner.loop.body.else
+
+inner.loop.body.else:                            ; preds = %inner.loop.body
+  br label %inner.loop.cond
+
+inner.loop.body.then:                            ; preds = %inner.loop.body
+  br label %inner.loop.cond
+
+inner.loop.cond:                                 ; preds = %inner.loop.body.then, %inner.loop.body.else
+  switch i32 %x, label %exit.false [
+  i32 0, label %inner.loop.latch
+  i32 1, label %inner.loop.break
+  ]
+
+inner.loop.break:                                ; preds = %inner.loop.cond
+  br label %outer.loop.cleanup
+
+outer.loop.cleanup:                              ; preds = %inner.loop.break, %inner.loop.end
+  br label %outer.loop.latch
+
+outer.loop.latch:                                ; preds = %outer.loop.cleanup
+  br label %outer.loop.header
+
+inner.loop.latch:                                ; preds = %inner.loop.cond
+  br label %inner.loop.header
+
+exit:                                            ; preds = %exit.false, %exit.true
+  %r = phi i1 [ true, %exit.true ], [ false, %exit.false ]
+  ret i1 %r
+}
+
+; This test checks sibling loops that by default have an
+; interleaved post-order traversal.
+
+define void @test_siblings(i1 %b1, i1 %b2, i1 %b3, i1 %b4, i1 %b5, i1 %b6, i1 %b7, i1 %b8, i1 %b9) {
+; CHECK-LABEL: @test_siblings(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B9_INV:%.*]] = xor i1 [[B9:%.*]], true
+; CHECK-NEXT:    [[B6_INV:%.*]] = xor i1 [[B6:%.*]], true
+; CHECK-NEXT:    [[B2_INV:%.*]] = xor i1 [[B2:%.*]], true
+; CHECK-NEXT:    [[B8_INV:%.*]] = xor i1 [[B8:%.*]], true
+; CHECK-NEXT:    [[B5_INV:%.*]] = xor i1 [[B5:%.*]], true
+; CHECK-NEXT:    [[B3_INV:%.*]] = xor i1 [[B3:%.*]], true
+; CHECK-NEXT:    [[B4_INV:%.*]] = xor i1 [[B4:%.*]], true
+; CHECK-NEXT:    [[B1_INV:%.*]] = xor i1 [[B1:%.*]], true
+; CHECK-NEXT:    br i1 [[B1_INV]], label [[IF_ELSE:%.*]], label [[FLOW:%.*]]
+; CHECK:       if.else:
+; CHECK-NEXT:    br label [[FLOW]]
+; CHECK:       Flow:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ [[TMP0]], [[FLOW1:%.*]] ], [ [[B2]], [[IF_ELSE]] ], [ false, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i1 [ [[TMP5:%.*]], [[FLOW1]] ], [ [[B2_INV]], [[IF_ELSE]] ], [ false, [[ENTRY]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i1 [ false, [[FLOW1]] ], [ false, [[IF_ELSE]] ], [ true, [[ENTRY]] ]
+; CHECK-NEXT:    br i1 [[TMP2]], label [[LOOP1_HEADER:%.*]], label [[FLOW1]]
+; CHECK:       loop1.header:
+; CHECK-NEXT:    br i1 [[B3_INV]], label [[LOOP1_BODY:%.*]], label [[FLOW2:%.*]]
+; CHECK:       Flow2:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i1 [ true, [[LOOP1_BODY]] ], [ [[TMP1]], [[LOOP1_HEADER]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i1 [ [[B5_INV]], [[LOOP1_BODY]] ], [ [[B3]], [[LOOP1_HEADER]] ]
+; CHECK-NEXT:    br i1 [[TMP4]], label [[LOOP1_LATCH:%.*]], label [[FLOW3:%.*]]
+; CHECK:       loop1.latch:
+; CHECK-NEXT:    br label [[FLOW3]]
+; CHECK:       Flow1:
+; CHECK-NEXT:    [[TMP5]] = phi i1 [ [[TMP6:%.*]], [[FLOW3]] ], [ [[TMP1]], [[FLOW]] ]
+; CHECK-NEXT:    br i1 true, label [[FLOW4:%.*]], label [[FLOW]]
+; CHECK:       loop1.body:
+; CHECK-NEXT:    br label [[FLOW2]]
+; CHECK:       Flow3:
+; CHECK-NEXT:    [[TMP6]] = phi i1 [ false, [[LOOP1_LATCH]] ], [ [[TMP3]], [[FLOW2]] ]
+; CHECK-NEXT:    br label [[FLOW1]]
+; CHECK:       Flow4:
+; CHECK-NEXT:    [[TMP7:%.*]] = phi i1 [ false, [[FLOW5:%.*]] ], [ [[TMP5]], [[FLOW1]] ]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[LOOP2_HEADER:%.*]], label [[FLOW5]]
+; CHECK:       loop2.header:
+; CHECK-NEXT:    br i1 [[B6_INV]], label [[LOOP2_BODY:%.*]], label [[FLOW6:%.*]]
+; CHECK:       Flow5:
+; CHECK-NEXT:    [[TMP8:%.*]] = phi i1 [ [[TMP11:%.*]], [[FLOW7:%.*]] ], [ false, [[FLOW4]] ]
+; CHECK-NEXT:    br i1 true, label [[FLOW8:%.*]], label [[FLOW4]]
+; CHECK:       loop2.body:
+; CHECK-NEXT:    br label [[FLOW6]]
+; CHECK:       Flow6:
+; CHECK-NEXT:    [[TMP9:%.*]] = phi i1 [ true, [[LOOP2_BODY]] ], [ false, [[LOOP2_HEADER]] ]
+; CHECK-NEXT:    [[TMP10:%.*]] = phi i1 [ [[B7:%.*]], [[LOOP2_BODY]] ], [ [[B6]], [[LOOP2_HEADER]] ]
+; CHECK-NEXT:    br i1 [[TMP10]], label [[LOOP2_LATCH:%.*]], label [[FLOW7]]
+; CHECK:       loop2.latch:
+; CHECK-NEXT:    br label [[FLOW7]]
+; CHECK:       Flow7:
+; CHECK-NEXT:    [[TMP11]] = phi i1 [ false, [[LOOP2_LATCH]] ], [ [[TMP9]], [[FLOW6]] ]
+; CHECK-NEXT:    br label [[FLOW5]]
+; CHECK:       Flow8:
+; CHECK-NEXT:    [[TMP12:%.*]] = phi i1 [ false, [[FLOW10:%.*]] ], [ [[TMP0]], [[FLOW5]] ]
+; CHECK-NEXT:    [[TMP13:%.*]] = phi i1 [ false, [[FLOW10]] ], [ [[TMP8]], [[FLOW5]] ]
+; CHECK-NEXT:    br i1 [[TMP13]], label [[LOOP3_HEADER:%.*]], label [[FLOW9:%.*]]
+; CHECK:       loop3.header:
+; CHECK-NEXT:    br label [[FLOW9]]
+; CHECK:       Flow9:
+; CHECK-NEXT:    [[TMP14:%.*]] = phi i1 [ true, [[LOOP3_HEADER]] ], [ [[TMP12]], [[FLOW8]] ]
+; CHECK-NEXT:    br i1 [[TMP14]], label [[LOOP3_LATCH:%.*]], label [[FLOW10]]
+; CHECK:       loop3.latch:
+; CHECK-NEXT:    br label [[FLOW10]]
+; CHECK:       Flow10:
+; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[FLOW8]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br i1 %b1, label %loop1.header, label %if.else
+
+if.else:
+  br i1 %b2, label %loop3.latch, label %loop2.header
+
+loop1.header:
+  br i1 %b3, label %loop1.latch, label %loop1.body
+
+loop1.latch:
+  br i1 %b4, label %loop1.header, label %exit
+
+loop1.body:
+  br i1 %b5, label %loop2.header, label %loop1.latch
+
+loop2.header:
+  br i1 %b6, label %loop2.latch, label %loop2.body
+
+loop2.body:
+  br i1 %b7, label %loop2.latch, label %loop3.header
+
+loop2.latch:
+  br i1 %b8, label %loop2.header, label %exit
+
+loop3.header:
+  br label %loop3.latch
+
+loop3.latch:
+  br i1 %b9, label %loop3.header, label %exit
+
+exit:
+  ret void
+}

diff  --git a/llvm/test/Transforms/StructurizeCFG/nested-loop-subregion.ll b/llvm/test/Transforms/StructurizeCFG/nested-loop-subregion.ll
new file mode 100644
index 000000000000..70023d7ed98a
--- /dev/null
+++ b/llvm/test/Transforms/StructurizeCFG/nested-loop-subregion.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
+
+define void @test(i1 %b1, i1 %b2, i1 %b3, i1 %b4) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  A:
+; CHECK-NEXT:    [[B2_INV:%.*]] = xor i1 [[B2:%.*]], true
+; CHECK-NEXT:    br i1 [[B1:%.*]], label [[B:%.*]], label [[H:%.*]]
+; CHECK:       B:
+; CHECK-NEXT:    br label [[C:%.*]]
+; CHECK:       C:
+; CHECK-NEXT:    br i1 [[B2_INV]], label [[E:%.*]], label [[FLOW:%.*]]
+; CHECK:       Flow:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ [[B3:%.*]], [[E]] ], [ true, [[C]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i1 [ false, [[E]] ], [ true, [[C]] ]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[FLOW1:%.*]], label [[C]]
+; CHECK:       Flow1:
+; CHECK-NEXT:    br i1 [[TMP1]], label [[D:%.*]], label [[F:%.*]]
+; CHECK:       D:
+; CHECK-NEXT:    br label [[F]]
+; CHECK:       E:
+; CHECK-NEXT:    br label [[FLOW]]
+; CHECK:       F:
+; CHECK-NEXT:    br label [[G:%.*]]
+; CHECK:       G:
+; CHECK-NEXT:    br i1 [[B4:%.*]], label [[FLOW2:%.*]], label [[B]]
+; CHECK:       Flow2:
+; CHECK-NEXT:    br label [[H]]
+; CHECK:       H:
+; CHECK-NEXT:    ret void
+;
+A:
+  br i1 %b1, label %B, label %H
+
+B:
+  br label %C
+
+C:
+  br i1 %b2, label %D, label %E
+
+D:
+  br label %F
+
+E:
+  br i1 %b3, label %F, label %C
+
+F:
+  br label %G
+
+G:
+  br i1 %b4, label %H, label %B
+
+H:
+  ret void
+}