[llvm] [SimplifyCFG] Simplify nested branches (PR #97067)

Yingwei Zheng via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 28 08:07:09 PDT 2024


https://github.com/dtcxzyw created https://github.com/llvm/llvm-project/pull/97067

This patch folds the following pattern (I don't know what to call this):
```
bb0:
   br i1 %cond1, label %bb1, label %bb2
bb1:
  br i1 %cond2, label %bb3, label %bb4
bb2:
  br i1 %cond2, label %bb4, label %bb3
bb3:
  ...
bb4:
  ...
```
into
```
bb0:
  %cond = xor i1 %cond1, %cond2
  br i1 %cond, label %bb4, label %bb3
bb3:
  ...
bb4:
  ...
```

Alive2: https://alive2.llvm.org/ce/z/5iOJEL
Closes https://github.com/llvm/llvm-project/issues/97022.

I found this pattern in some verilator-generated code, which is widely used in RTL simulation. This fold will reduces branches and improves the performance of CPU frontend. To my surprise, this pattern is also common in C/C++ code base.
Affected libraries/applications: cmake/cvc5/freetype/git/gromacs/jq/linux/openblas/openmpi/openssl/php/postgres/ruby/sqlite/wireshark/z3/...




>From ffc27c851d814c9ee92b2fdc597b7fb5a00f6d89 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Fri, 28 Jun 2024 22:34:37 +0800
Subject: [PATCH 1/2] [SimplifyCFG] Add pre-commit tests from PR97022. NFC.

---
 .../Transforms/SimplifyCFG/branch-fold.ll     | 295 ++++++++++++++++++
 1 file changed, 295 insertions(+)

diff --git a/llvm/test/Transforms/SimplifyCFG/branch-fold.ll b/llvm/test/Transforms/SimplifyCFG/branch-fold.ll
index 2f5fb4f33013d..c0f7dc496f907 100644
--- a/llvm/test/Transforms/SimplifyCFG/branch-fold.ll
+++ b/llvm/test/Transforms/SimplifyCFG/branch-fold.ll
@@ -146,3 +146,298 @@ Succ:
 }
 
 declare void @dummy()
+
+define void @fold_nested_branch1(i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @fold_nested_branch1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br i1 [[COND2:%.*]], label [[BB3:%.*]], label [[BB4:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br i1 [[COND2]], label [[BB4]], label [[BB3]]
+; CHECK:       common.ret:
+; CHECK-NEXT:    ret void
+; CHECK:       bb3:
+; CHECK-NEXT:    call void @sideeffect1()
+; CHECK-NEXT:    br label [[COMMON_RET:%.*]]
+; CHECK:       bb4:
+; CHECK-NEXT:    call void @sideeffect2()
+; CHECK-NEXT:    br label [[COMMON_RET]]
+;
+entry:
+  br i1 %cond1, label %bb1, label %bb2
+
+bb1:
+  br i1 %cond2, label %bb3, label %bb4
+
+bb2:
+  br i1 %cond2, label %bb4, label %bb3
+
+bb3:
+  call void @sideeffect1()
+  ret void
+
+bb4:
+  call void @sideeffect2()
+  ret void
+}
+
+define void @fold_nested_branch2(i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @fold_nested_branch2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br i1 [[COND2:%.*]], label [[BB3:%.*]], label [[COMMON_RET:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br i1 [[COND2]], label [[BB4:%.*]], label [[BB3]]
+; CHECK:       common.ret:
+; CHECK-NEXT:    ret void
+; CHECK:       bb3:
+; CHECK-NEXT:    call void @sideeffect1()
+; CHECK-NEXT:    br label [[COMMON_RET]]
+; CHECK:       bb4:
+; CHECK-NEXT:    call void @sideeffect2()
+; CHECK-NEXT:    br label [[COMMON_RET]]
+;
+entry:
+  br i1 %cond1, label %bb1, label %bb2
+
+bb1:
+  br i1 %cond2, label %bb3, label %bb5
+
+bb2:
+  br i1 %cond2, label %bb4, label %bb3
+
+bb3:
+  call void @sideeffect1()
+  ret void
+
+bb4:
+  call void @sideeffect2()
+  ret void
+
+bb5:
+  ret void
+}
+
+define void @fold_nested_branch3(i1 %cond1, i1 %cond2, i1 %cond3) {
+; CHECK-LABEL: @fold_nested_branch3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br i1 [[COND2:%.*]], label [[BB3:%.*]], label [[BB4:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br i1 [[COND3:%.*]], label [[BB4]], label [[BB3]]
+; CHECK:       common.ret:
+; CHECK-NEXT:    ret void
+; CHECK:       bb3:
+; CHECK-NEXT:    call void @sideeffect1()
+; CHECK-NEXT:    br label [[COMMON_RET:%.*]]
+; CHECK:       bb4:
+; CHECK-NEXT:    call void @sideeffect2()
+; CHECK-NEXT:    br label [[COMMON_RET]]
+;
+entry:
+  br i1 %cond1, label %bb1, label %bb2
+
+bb1:
+  br i1 %cond2, label %bb3, label %bb4
+
+bb2:
+  br i1 %cond3, label %bb4, label %bb3
+
+bb3:
+  call void @sideeffect1()
+  ret void
+
+bb4:
+  call void @sideeffect2()
+  ret void
+}
+
+define void @fold_nested_branch4(i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @fold_nested_branch4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    call void @sideeffect1()
+; CHECK-NEXT:    br i1 [[COND2:%.*]], label [[BB3:%.*]], label [[BB4:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br i1 [[COND2]], label [[BB4]], label [[BB3]]
+; CHECK:       common.ret:
+; CHECK-NEXT:    ret void
+; CHECK:       bb3:
+; CHECK-NEXT:    call void @sideeffect1()
+; CHECK-NEXT:    br label [[COMMON_RET:%.*]]
+; CHECK:       bb4:
+; CHECK-NEXT:    call void @sideeffect2()
+; CHECK-NEXT:    br label [[COMMON_RET]]
+;
+entry:
+  br i1 %cond1, label %bb1, label %bb2
+
+bb1:
+  call void @sideeffect1()
+  br i1 %cond2, label %bb3, label %bb4
+
+bb2:
+  br i1 %cond2, label %bb4, label %bb3
+
+bb3:
+  call void @sideeffect1()
+  ret void
+
+bb4:
+  call void @sideeffect2()
+  ret void
+}
+
+define i32 @fold_nested_branch5(i1 %cond1, i1 %cond2, i32 %x) {
+; CHECK-LABEL: @fold_nested_branch5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br i1 [[COND2:%.*]], label [[COMMON_RET:%.*]], label [[BB4:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br i1 [[COND2]], label [[BB4]], label [[COMMON_RET]]
+; CHECK:       common.ret:
+; CHECK-NEXT:    [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[BB4]] ], [ 0, [[BB1]] ], [ [[X:%.*]], [[BB2]] ]
+; CHECK-NEXT:    ret i32 [[COMMON_RET_OP]]
+; CHECK:       bb4:
+; CHECK-NEXT:    call void @sideeffect2()
+; CHECK-NEXT:    br label [[COMMON_RET]]
+;
+entry:
+  br i1 %cond1, label %bb1, label %bb2
+
+bb1:
+  br i1 %cond2, label %bb3, label %bb4
+
+bb2:
+  br i1 %cond2, label %bb4, label %bb3
+
+bb3:
+  %ret = phi i32 [ 0, %bb1 ], [ %x, %bb2 ]
+  ret i32 %ret
+
+bb4:
+  call void @sideeffect2()
+  ret i32 0
+}
+
+define void @fold_nested_branch6(i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @fold_nested_branch6(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COND1_NOT:%.*]] = xor i1 [[COND1:%.*]], true
+; CHECK-NEXT:    [[BRMERGE:%.*]] = select i1 [[COND1_NOT]], i1 true, i1 [[COND2:%.*]]
+; CHECK-NEXT:    br i1 [[BRMERGE]], label [[BB3:%.*]], label [[BB4:%.*]]
+; CHECK:       common.ret:
+; CHECK-NEXT:    ret void
+; CHECK:       bb3:
+; CHECK-NEXT:    call void @sideeffect1()
+; CHECK-NEXT:    br label [[COMMON_RET:%.*]]
+; CHECK:       bb4:
+; CHECK-NEXT:    call void @sideeffect2()
+; CHECK-NEXT:    br label [[COMMON_RET]]
+;
+entry:
+  br i1 %cond1, label %bb1, label %bb2
+
+bb1:
+  br i1 %cond2, label %bb3, label %bb4
+
+bb2:
+  br i1 %cond2, label %bb1, label %bb3
+
+bb3:
+  call void @sideeffect1()
+  ret void
+
+bb4:
+  call void @sideeffect2()
+  ret void
+}
+
+define void @fold_nested_branch7(i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @fold_nested_branch7(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[BB0:%.*]]
+; CHECK:       bb0:
+; CHECK-NEXT:    br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br i1 [[COND2:%.*]], label [[BB3:%.*]], label [[BB4:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br i1 [[COND2]], label [[BB0]], label [[BB3]]
+; CHECK:       common.ret:
+; CHECK-NEXT:    ret void
+; CHECK:       bb3:
+; CHECK-NEXT:    call void @sideeffect1()
+; CHECK-NEXT:    br label [[COMMON_RET:%.*]]
+; CHECK:       bb4:
+; CHECK-NEXT:    call void @sideeffect2()
+; CHECK-NEXT:    br label [[COMMON_RET]]
+;
+entry:
+  br label %bb0
+
+bb0:
+  br i1 %cond1, label %bb1, label %bb2
+
+bb1:
+  br i1 %cond2, label %bb3, label %bb4
+
+bb2:
+  br i1 %cond2, label %bb0, label %bb3
+
+bb3:
+  call void @sideeffect1()
+  ret void
+
+bb4:
+  call void @sideeffect2()
+  ret void
+}
+
+; freq(bb4) = 1 * 4 + 2 * 5 = 14
+; freq(bb3) = 1 * 3 + 2 * 6 = 15
+define void @fold_nested_branch_prof(i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @fold_nested_branch_prof(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br i1 [[COND2:%.*]], label [[BB3:%.*]], label [[BB4:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br i1 [[COND2]], label [[BB4]], label [[BB3]], !prof [[PROF2:![0-9]+]]
+; CHECK:       common.ret:
+; CHECK-NEXT:    ret void
+; CHECK:       bb3:
+; CHECK-NEXT:    call void @sideeffect1()
+; CHECK-NEXT:    br label [[COMMON_RET:%.*]]
+; CHECK:       bb4:
+; CHECK-NEXT:    call void @sideeffect2()
+; CHECK-NEXT:    br label [[COMMON_RET]]
+;
+entry:
+  br i1 %cond1, label %bb1, label %bb2, !prof !0 ; 1:2
+
+bb1:
+  br i1 %cond2, label %bb3, label %bb4, !prof !1 ; 3:4
+
+bb2:
+  br i1 %cond2, label %bb4, label %bb3, !prof !2 ; 5:6
+
+bb3:
+  call void @sideeffect1()
+  ret void
+
+bb4:
+  call void @sideeffect2()
+  ret void
+}
+
+!0 = !{!"branch_weights", i32 1, i32 2}
+!1 = !{!"branch_weights", i32 3, i32 4}
+!2 = !{!"branch_weights", i32 5, i32 6}
+
+declare void @sideeffect1()
+declare void @sideeffect2()

>From f65104016387813b5ceb2c99b17e06dcd1c51f8d Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Fri, 28 Jun 2024 22:57:10 +0800
Subject: [PATCH 2/2] [SimplifyCFG] Simplify nested branches.

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     | 85 +++++++++++++++++++
 .../Transforms/SimplifyCFG/branch-fold.ll     | 16 ++--
 2 files changed, 91 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 6847bb7502429..092ac3d5e5087 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7468,6 +7468,91 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
           if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
             return requestResimplify();
 
+  {
+    // Fold the following pattern:
+    // bb0:
+    //   br i1 %cond1, label %bb1, label %bb2
+    // bb1:
+    //   br i1 %cond2, label %bb3, label %bb4
+    // bb2:
+    //   br i1 %cond2, label %bb4, label %bb3
+    // bb3:
+    //   ...
+    // bb4:
+    //   ...
+    // into
+    // bb0:
+    //   %cond = xor i1 %cond1, %cond2
+    //   br i1 %cond, label %bb4, label %bb3
+    // bb3:
+    //   ...
+    // bb4:
+    //   ...
+    // NOTE: %cond2 always dominates the terminator of bb0.
+
+    BasicBlock *BB1 = BI->getSuccessor(0);
+    BasicBlock *BB2 = BI->getSuccessor(1);
+    auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
+      if (Succ == BB)
+        return false;
+      if (Succ->sizeWithoutDebug() > 1)
+        return false;
+      SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
+      if (!SuccBI || !SuccBI->isConditional())
+        return false;
+      BasicBlock *Succ1 = SuccBI->getSuccessor(0);
+      BasicBlock *Succ2 = SuccBI->getSuccessor(1);
+      return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
+             !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
+    };
+    BranchInst *BB1BI, *BB2BI;
+    if (IsSimpleSuccessor(BB1, BB1BI) && IsSimpleSuccessor(BB2, BB2BI) &&
+        BB1BI->getCondition() == BB2BI->getCondition() &&
+        BB1BI->getSuccessor(0) == BB2BI->getSuccessor(1) &&
+        BB1BI->getSuccessor(1) == BB2BI->getSuccessor(0)) {
+      BasicBlock *BB3 = BB1BI->getSuccessor(0);
+      BasicBlock *BB4 = BB1BI->getSuccessor(1);
+      IRBuilder<> Builder(BI);
+      BI->setCondition(
+          Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
+      BB1->removePredecessor(BB);
+      BI->setSuccessor(0, BB4);
+      BB2->removePredecessor(BB);
+      BI->setSuccessor(1, BB3);
+      if (DTU) {
+        SmallVector<DominatorTree::UpdateType, 4> Updates;
+        Updates.push_back({DominatorTree::Delete, BB, BB1});
+        Updates.push_back({DominatorTree::Insert, BB, BB4});
+        Updates.push_back({DominatorTree::Delete, BB, BB2});
+        Updates.push_back({DominatorTree::Insert, BB, BB3});
+
+        DTU->applyUpdates(Updates);
+      }
+      bool HasWeight = false;
+      uint64_t BBTWeight, BBFWeight;
+      if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
+        HasWeight = true;
+      else
+        BBTWeight = BBFWeight = 1;
+      uint64_t BB1TWeight, BB1FWeight;
+      if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
+        HasWeight = true;
+      else
+        BB1TWeight = BB1FWeight = 1;
+      uint64_t BB2TWeight, BB2FWeight;
+      if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
+        HasWeight = true;
+      else
+        BB2TWeight = BB2FWeight = 1;
+      uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
+                             BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
+      if (HasWeight) {
+        FitWeights(Weights);
+        setBranchWeights(BI, Weights[0], Weights[1], /*IsExpected=*/false);
+      }
+    }
+  }
+
   return false;
 }
 
diff --git a/llvm/test/Transforms/SimplifyCFG/branch-fold.ll b/llvm/test/Transforms/SimplifyCFG/branch-fold.ll
index c0f7dc496f907..491eb52feafc7 100644
--- a/llvm/test/Transforms/SimplifyCFG/branch-fold.ll
+++ b/llvm/test/Transforms/SimplifyCFG/branch-fold.ll
@@ -150,11 +150,8 @@ declare void @dummy()
 define void @fold_nested_branch1(i1 %cond1, i1 %cond2) {
 ; CHECK-LABEL: @fold_nested_branch1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
-; CHECK:       bb1:
-; CHECK-NEXT:    br i1 [[COND2:%.*]], label [[BB3:%.*]], label [[BB4:%.*]]
-; CHECK:       bb2:
-; CHECK-NEXT:    br i1 [[COND2]], label [[BB4]], label [[BB3]]
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[COND1:%.*]], [[COND2:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[BB4:%.*]], label [[BB3:%.*]]
 ; CHECK:       common.ret:
 ; CHECK-NEXT:    ret void
 ; CHECK:       bb3:
@@ -403,11 +400,8 @@ bb4:
 define void @fold_nested_branch_prof(i1 %cond1, i1 %cond2) {
 ; CHECK-LABEL: @fold_nested_branch_prof(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]], !prof [[PROF0:![0-9]+]]
-; CHECK:       bb1:
-; CHECK-NEXT:    br i1 [[COND2:%.*]], label [[BB3:%.*]], label [[BB4:%.*]], !prof [[PROF1:![0-9]+]]
-; CHECK:       bb2:
-; CHECK-NEXT:    br i1 [[COND2]], label [[BB4]], label [[BB3]], !prof [[PROF2:![0-9]+]]
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[COND1:%.*]], [[COND2:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[BB4:%.*]], label [[BB3:%.*]], !prof ![[PROF0:[0-9]+]]
 ; CHECK:       common.ret:
 ; CHECK-NEXT:    ret void
 ; CHECK:       bb3:
@@ -439,5 +433,7 @@ bb4:
 !1 = !{!"branch_weights", i32 3, i32 4}
 !2 = !{!"branch_weights", i32 5, i32 6}
 
+;CHECK: ![[PROF0]] = !{!"branch_weights", i32 14, i32 15}
+
 declare void @sideeffect1()
 declare void @sideeffect2()



More information about the llvm-commits mailing list