[llvm] [SimplifyCFG] Simplify nested branches (PR #97067)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 28 08:07:09 PDT 2024
https://github.com/dtcxzyw created https://github.com/llvm/llvm-project/pull/97067
This patch folds the following pattern (I don't know what to call this):
```
bb0:
br i1 %cond1, label %bb1, label %bb2
bb1:
br i1 %cond2, label %bb3, label %bb4
bb2:
br i1 %cond2, label %bb4, label %bb3
bb3:
...
bb4:
...
```
into
```
bb0:
%cond = xor i1 %cond1, %cond2
br i1 %cond, label %bb4, label %bb3
bb3:
...
bb4:
...
```
Alive2: https://alive2.llvm.org/ce/z/5iOJEL
Closes https://github.com/llvm/llvm-project/issues/97022.
I found this pattern in some verilator-generated code, which is widely used in RTL simulation. This fold will reduces branches and improves the performance of CPU frontend. To my surprise, this pattern is also common in C/C++ code base.
Affected libraries/applications: cmake/cvc5/freetype/git/gromacs/jq/linux/openblas/openmpi/openssl/php/postgres/ruby/sqlite/wireshark/z3/...
>From ffc27c851d814c9ee92b2fdc597b7fb5a00f6d89 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Fri, 28 Jun 2024 22:34:37 +0800
Subject: [PATCH 1/2] [SimplifyCFG] Add pre-commit tests from PR97022. NFC.
---
.../Transforms/SimplifyCFG/branch-fold.ll | 295 ++++++++++++++++++
1 file changed, 295 insertions(+)
diff --git a/llvm/test/Transforms/SimplifyCFG/branch-fold.ll b/llvm/test/Transforms/SimplifyCFG/branch-fold.ll
index 2f5fb4f33013d..c0f7dc496f907 100644
--- a/llvm/test/Transforms/SimplifyCFG/branch-fold.ll
+++ b/llvm/test/Transforms/SimplifyCFG/branch-fold.ll
@@ -146,3 +146,298 @@ Succ:
}
declare void @dummy()
+
+define void @fold_nested_branch1(i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @fold_nested_branch1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br i1 [[COND2:%.*]], label [[BB3:%.*]], label [[BB4:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: br i1 [[COND2]], label [[BB4]], label [[BB3]]
+; CHECK: common.ret:
+; CHECK-NEXT: ret void
+; CHECK: bb3:
+; CHECK-NEXT: call void @sideeffect1()
+; CHECK-NEXT: br label [[COMMON_RET:%.*]]
+; CHECK: bb4:
+; CHECK-NEXT: call void @sideeffect2()
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+entry:
+ br i1 %cond1, label %bb1, label %bb2
+
+bb1:
+ br i1 %cond2, label %bb3, label %bb4
+
+bb2:
+ br i1 %cond2, label %bb4, label %bb3
+
+bb3:
+ call void @sideeffect1()
+ ret void
+
+bb4:
+ call void @sideeffect2()
+ ret void
+}
+
+define void @fold_nested_branch2(i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @fold_nested_branch2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br i1 [[COND2:%.*]], label [[BB3:%.*]], label [[COMMON_RET:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: br i1 [[COND2]], label [[BB4:%.*]], label [[BB3]]
+; CHECK: common.ret:
+; CHECK-NEXT: ret void
+; CHECK: bb3:
+; CHECK-NEXT: call void @sideeffect1()
+; CHECK-NEXT: br label [[COMMON_RET]]
+; CHECK: bb4:
+; CHECK-NEXT: call void @sideeffect2()
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+entry:
+ br i1 %cond1, label %bb1, label %bb2
+
+bb1:
+ br i1 %cond2, label %bb3, label %bb5
+
+bb2:
+ br i1 %cond2, label %bb4, label %bb3
+
+bb3:
+ call void @sideeffect1()
+ ret void
+
+bb4:
+ call void @sideeffect2()
+ ret void
+
+bb5:
+ ret void
+}
+
+define void @fold_nested_branch3(i1 %cond1, i1 %cond2, i1 %cond3) {
+; CHECK-LABEL: @fold_nested_branch3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br i1 [[COND2:%.*]], label [[BB3:%.*]], label [[BB4:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: br i1 [[COND3:%.*]], label [[BB4]], label [[BB3]]
+; CHECK: common.ret:
+; CHECK-NEXT: ret void
+; CHECK: bb3:
+; CHECK-NEXT: call void @sideeffect1()
+; CHECK-NEXT: br label [[COMMON_RET:%.*]]
+; CHECK: bb4:
+; CHECK-NEXT: call void @sideeffect2()
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+entry:
+ br i1 %cond1, label %bb1, label %bb2
+
+bb1:
+ br i1 %cond2, label %bb3, label %bb4
+
+bb2:
+ br i1 %cond3, label %bb4, label %bb3
+
+bb3:
+ call void @sideeffect1()
+ ret void
+
+bb4:
+ call void @sideeffect2()
+ ret void
+}
+
+define void @fold_nested_branch4(i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @fold_nested_branch4(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: call void @sideeffect1()
+; CHECK-NEXT: br i1 [[COND2:%.*]], label [[BB3:%.*]], label [[BB4:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: br i1 [[COND2]], label [[BB4]], label [[BB3]]
+; CHECK: common.ret:
+; CHECK-NEXT: ret void
+; CHECK: bb3:
+; CHECK-NEXT: call void @sideeffect1()
+; CHECK-NEXT: br label [[COMMON_RET:%.*]]
+; CHECK: bb4:
+; CHECK-NEXT: call void @sideeffect2()
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+entry:
+ br i1 %cond1, label %bb1, label %bb2
+
+bb1:
+ call void @sideeffect1()
+ br i1 %cond2, label %bb3, label %bb4
+
+bb2:
+ br i1 %cond2, label %bb4, label %bb3
+
+bb3:
+ call void @sideeffect1()
+ ret void
+
+bb4:
+ call void @sideeffect2()
+ ret void
+}
+
+define i32 @fold_nested_branch5(i1 %cond1, i1 %cond2, i32 %x) {
+; CHECK-LABEL: @fold_nested_branch5(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br i1 [[COND2:%.*]], label [[COMMON_RET:%.*]], label [[BB4:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: br i1 [[COND2]], label [[BB4]], label [[COMMON_RET]]
+; CHECK: common.ret:
+; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[BB4]] ], [ 0, [[BB1]] ], [ [[X:%.*]], [[BB2]] ]
+; CHECK-NEXT: ret i32 [[COMMON_RET_OP]]
+; CHECK: bb4:
+; CHECK-NEXT: call void @sideeffect2()
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+entry:
+ br i1 %cond1, label %bb1, label %bb2
+
+bb1:
+ br i1 %cond2, label %bb3, label %bb4
+
+bb2:
+ br i1 %cond2, label %bb4, label %bb3
+
+bb3:
+ %ret = phi i32 [ 0, %bb1 ], [ %x, %bb2 ]
+ ret i32 %ret
+
+bb4:
+ call void @sideeffect2()
+ ret i32 0
+}
+
+define void @fold_nested_branch6(i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @fold_nested_branch6(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[COND1_NOT:%.*]] = xor i1 [[COND1:%.*]], true
+; CHECK-NEXT: [[BRMERGE:%.*]] = select i1 [[COND1_NOT]], i1 true, i1 [[COND2:%.*]]
+; CHECK-NEXT: br i1 [[BRMERGE]], label [[BB3:%.*]], label [[BB4:%.*]]
+; CHECK: common.ret:
+; CHECK-NEXT: ret void
+; CHECK: bb3:
+; CHECK-NEXT: call void @sideeffect1()
+; CHECK-NEXT: br label [[COMMON_RET:%.*]]
+; CHECK: bb4:
+; CHECK-NEXT: call void @sideeffect2()
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+entry:
+ br i1 %cond1, label %bb1, label %bb2
+
+bb1:
+ br i1 %cond2, label %bb3, label %bb4
+
+bb2:
+ br i1 %cond2, label %bb1, label %bb3
+
+bb3:
+ call void @sideeffect1()
+ ret void
+
+bb4:
+ call void @sideeffect2()
+ ret void
+}
+
+define void @fold_nested_branch7(i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @fold_nested_branch7(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[BB0:%.*]]
+; CHECK: bb0:
+; CHECK-NEXT: br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br i1 [[COND2:%.*]], label [[BB3:%.*]], label [[BB4:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: br i1 [[COND2]], label [[BB0]], label [[BB3]]
+; CHECK: common.ret:
+; CHECK-NEXT: ret void
+; CHECK: bb3:
+; CHECK-NEXT: call void @sideeffect1()
+; CHECK-NEXT: br label [[COMMON_RET:%.*]]
+; CHECK: bb4:
+; CHECK-NEXT: call void @sideeffect2()
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+entry:
+ br label %bb0
+
+bb0:
+ br i1 %cond1, label %bb1, label %bb2
+
+bb1:
+ br i1 %cond2, label %bb3, label %bb4
+
+bb2:
+ br i1 %cond2, label %bb0, label %bb3
+
+bb3:
+ call void @sideeffect1()
+ ret void
+
+bb4:
+ call void @sideeffect2()
+ ret void
+}
+
+; freq(bb4) = 1 * 4 + 2 * 5 = 14
+; freq(bb3) = 1 * 3 + 2 * 6 = 15
+define void @fold_nested_branch_prof(i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @fold_nested_branch_prof(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK: bb1:
+; CHECK-NEXT: br i1 [[COND2:%.*]], label [[BB3:%.*]], label [[BB4:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK: bb2:
+; CHECK-NEXT: br i1 [[COND2]], label [[BB4]], label [[BB3]], !prof [[PROF2:![0-9]+]]
+; CHECK: common.ret:
+; CHECK-NEXT: ret void
+; CHECK: bb3:
+; CHECK-NEXT: call void @sideeffect1()
+; CHECK-NEXT: br label [[COMMON_RET:%.*]]
+; CHECK: bb4:
+; CHECK-NEXT: call void @sideeffect2()
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+entry:
+ br i1 %cond1, label %bb1, label %bb2, !prof !0 ; 1:2
+
+bb1:
+ br i1 %cond2, label %bb3, label %bb4, !prof !1 ; 3:4
+
+bb2:
+ br i1 %cond2, label %bb4, label %bb3, !prof !2 ; 5:6
+
+bb3:
+ call void @sideeffect1()
+ ret void
+
+bb4:
+ call void @sideeffect2()
+ ret void
+}
+
+!0 = !{!"branch_weights", i32 1, i32 2}
+!1 = !{!"branch_weights", i32 3, i32 4}
+!2 = !{!"branch_weights", i32 5, i32 6}
+
+declare void @sideeffect1()
+declare void @sideeffect2()
>From f65104016387813b5ceb2c99b17e06dcd1c51f8d Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Fri, 28 Jun 2024 22:57:10 +0800
Subject: [PATCH 2/2] [SimplifyCFG] Simplify nested branches.
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 85 +++++++++++++++++++
.../Transforms/SimplifyCFG/branch-fold.ll | 16 ++--
2 files changed, 91 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 6847bb7502429..092ac3d5e5087 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7468,6 +7468,91 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
return requestResimplify();
+ {
+ // Fold the following pattern:
+ // bb0:
+ // br i1 %cond1, label %bb1, label %bb2
+ // bb1:
+ // br i1 %cond2, label %bb3, label %bb4
+ // bb2:
+ // br i1 %cond2, label %bb4, label %bb3
+ // bb3:
+ // ...
+ // bb4:
+ // ...
+ // into
+ // bb0:
+ // %cond = xor i1 %cond1, %cond2
+ // br i1 %cond, label %bb4, label %bb3
+ // bb3:
+ // ...
+ // bb4:
+ // ...
+ // NOTE: %cond2 always dominates the terminator of bb0.
+
+ BasicBlock *BB1 = BI->getSuccessor(0);
+ BasicBlock *BB2 = BI->getSuccessor(1);
+ auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
+ if (Succ == BB)
+ return false;
+ if (Succ->sizeWithoutDebug() > 1)
+ return false;
+ SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
+ if (!SuccBI || !SuccBI->isConditional())
+ return false;
+ BasicBlock *Succ1 = SuccBI->getSuccessor(0);
+ BasicBlock *Succ2 = SuccBI->getSuccessor(1);
+ return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
+ !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
+ };
+ BranchInst *BB1BI, *BB2BI;
+ if (IsSimpleSuccessor(BB1, BB1BI) && IsSimpleSuccessor(BB2, BB2BI) &&
+ BB1BI->getCondition() == BB2BI->getCondition() &&
+ BB1BI->getSuccessor(0) == BB2BI->getSuccessor(1) &&
+ BB1BI->getSuccessor(1) == BB2BI->getSuccessor(0)) {
+ BasicBlock *BB3 = BB1BI->getSuccessor(0);
+ BasicBlock *BB4 = BB1BI->getSuccessor(1);
+ IRBuilder<> Builder(BI);
+ BI->setCondition(
+ Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
+ BB1->removePredecessor(BB);
+ BI->setSuccessor(0, BB4);
+ BB2->removePredecessor(BB);
+ BI->setSuccessor(1, BB3);
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 4> Updates;
+ Updates.push_back({DominatorTree::Delete, BB, BB1});
+ Updates.push_back({DominatorTree::Insert, BB, BB4});
+ Updates.push_back({DominatorTree::Delete, BB, BB2});
+ Updates.push_back({DominatorTree::Insert, BB, BB3});
+
+ DTU->applyUpdates(Updates);
+ }
+ bool HasWeight = false;
+ uint64_t BBTWeight, BBFWeight;
+ if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
+ HasWeight = true;
+ else
+ BBTWeight = BBFWeight = 1;
+ uint64_t BB1TWeight, BB1FWeight;
+ if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
+ HasWeight = true;
+ else
+ BB1TWeight = BB1FWeight = 1;
+ uint64_t BB2TWeight, BB2FWeight;
+ if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
+ HasWeight = true;
+ else
+ BB2TWeight = BB2FWeight = 1;
+ uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
+ BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
+ if (HasWeight) {
+ FitWeights(Weights);
+ setBranchWeights(BI, Weights[0], Weights[1], /*IsExpected=*/false);
+ }
+ }
+ }
+
return false;
}
diff --git a/llvm/test/Transforms/SimplifyCFG/branch-fold.ll b/llvm/test/Transforms/SimplifyCFG/branch-fold.ll
index c0f7dc496f907..491eb52feafc7 100644
--- a/llvm/test/Transforms/SimplifyCFG/branch-fold.ll
+++ b/llvm/test/Transforms/SimplifyCFG/branch-fold.ll
@@ -150,11 +150,8 @@ declare void @dummy()
define void @fold_nested_branch1(i1 %cond1, i1 %cond2) {
; CHECK-LABEL: @fold_nested_branch1(
; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
-; CHECK: bb1:
-; CHECK-NEXT: br i1 [[COND2:%.*]], label [[BB3:%.*]], label [[BB4:%.*]]
-; CHECK: bb2:
-; CHECK-NEXT: br i1 [[COND2]], label [[BB4]], label [[BB3]]
+; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND1:%.*]], [[COND2:%.*]]
+; CHECK-NEXT: br i1 [[TMP0]], label [[BB4:%.*]], label [[BB3:%.*]]
; CHECK: common.ret:
; CHECK-NEXT: ret void
; CHECK: bb3:
@@ -403,11 +400,8 @@ bb4:
define void @fold_nested_branch_prof(i1 %cond1, i1 %cond2) {
; CHECK-LABEL: @fold_nested_branch_prof(
; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]], !prof [[PROF0:![0-9]+]]
-; CHECK: bb1:
-; CHECK-NEXT: br i1 [[COND2:%.*]], label [[BB3:%.*]], label [[BB4:%.*]], !prof [[PROF1:![0-9]+]]
-; CHECK: bb2:
-; CHECK-NEXT: br i1 [[COND2]], label [[BB4]], label [[BB3]], !prof [[PROF2:![0-9]+]]
+; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND1:%.*]], [[COND2:%.*]]
+; CHECK-NEXT: br i1 [[TMP0]], label [[BB4:%.*]], label [[BB3:%.*]], !prof ![[PROF0:[0-9]+]]
; CHECK: common.ret:
; CHECK-NEXT: ret void
; CHECK: bb3:
@@ -439,5 +433,7 @@ bb4:
!1 = !{!"branch_weights", i32 3, i32 4}
!2 = !{!"branch_weights", i32 5, i32 6}
+;CHECK: ![[PROF0]] = !{!"branch_weights", i32 14, i32 15}
+
declare void @sideeffect1()
declare void @sideeffect2()
More information about the llvm-commits
mailing list