[llvm] [SimplifyCFG] Simplify switch instruction that has duplicate arms (PR #114262)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 30 09:49:21 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Michael Maitland (michaelmaitland)
<details>
<summary>Changes</summary>
I noticed that the two C functions emitted different IR:
```
int switch_duplicate_arms(int switch_val, int v, int w) {
switch (switch_val) {
default:
break;
case 0:
w = v;
break;
case 1:
w = v;
break;
}
return w;
}
int if_duplicate_arms(int switch_val, int v, int w) {
if (switch_val == 0)
w = v;
else if (switch_val == 1)
w = v;
return v0;
}
```
For `switch_duplicate_arms`, we generate IR that looks like this:
```
define i32 @<!-- -->switch_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) {
switch i32 %1, label %7 [
i32 0, label %5
i32 1, label %6
]
5:
br label %7
6:
br label %7
7:
%8 = phi i32 [ %3, %4 ], [ %2, %6 ], [ %2, %5 ]
ret i32 %8
}
```
For the equivalent `if_duplicate_arms`, we generate:
```
define i32 @<!-- -->if_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) {
%5 = icmp ult i32 %1, 2
%6 = select i1 %5, i32 %2, i32 %3
ret i32 %6
}
```
For `switch_duplicate_arms`, taking case 0 and 1 are the same since %5 and %6
branch to the same location and the incoming values for %8 are the same from
those blocks. We could remove one on the duplicate switch targets and update
the switch with the single target.
On RISC-V, prior to this patch, we generate the following code:
```
switch_duplicate_arms:
li a4, 1
beq a1, a4, .LBB0_2
mv a0, a3
bnez a1, .LBB0_3
.LBB0_2:
mv a0, a2
.LBB0_3:
ret
if_duplicate_arms:
li a4, 2
mv a0, a2
bltu a1, a4, .LBB1_2
mv a0, a3
.LBB1_2:
ret
```
After this patch, the O3 code is optimized to the icmp + select pair, which
gives us the same code gen as `if_duplicate_arms`, as desired.
This may help with both code size and further switch simplification. I found
that this patch causes no significant impact to spec2006/int/ref and
spec2017/intrate/ref.
---
Full diff: https://github.com/llvm/llvm-project/pull/114262.diff
5 Files Affected:
- (modified) llvm/lib/Transforms/Utils/SimplifyCFG.cpp (+92)
- (modified) llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll (+2-4)
- (modified) llvm/test/Transforms/SimplifyCFG/HoistCode.ll (+2-4)
- (added) llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll (+140)
- (modified) llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll (+2-4)
``````````diff
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 72228b445a8b6e..f44364ea507b7b 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -276,6 +276,7 @@ class SimplifyCFGOpt {
bool simplifyCleanupReturn(CleanupReturnInst *RI);
bool simplifyUnreachable(UnreachableInst *UI);
bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
+ bool simplifyDuplicateSwitchArms(SwitchInst *SI);
bool simplifyIndirectBr(IndirectBrInst *IBI);
bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
@@ -7436,6 +7437,94 @@ static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder,
return true;
}
+bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
+ // Simplify the case where multiple arms contain only a terminator, the
+ // terminators are the same, and their sucessor PHIS incoming values are the
+ // same.
+
+ // Find BBs that are candidates for simplification.
+ SmallPtrSet<BasicBlock *, 8> BBs;
+ for (auto &Case : SI->cases()) {
+ BasicBlock *BB = Case.getCaseSuccessor();
+
+ // FIXME: This case needs some extra care because the terminators other than
+ // SI need to be updated.
+ if (!BB->hasNPredecessors(1))
+ continue;
+
+ // FIXME: Relax that the terminator is a BranchInst by checking for equality
+ // on other kinds of terminators.
+ Instruction *T = BB->getTerminator();
+ if (T && BB->size() == 1 && isa<BranchInst>(T))
+ BBs.insert(BB);
+ }
+
+ auto IsBranchEq = [](BranchInst *A, BranchInst *B) {
+ if (A->isConditional() != B->isConditional())
+ return false;
+
+ if (A->isConditional() && A->getCondition() != B->getCondition())
+ return false;
+
+ if (A->getNumSuccessors() != B->getNumSuccessors())
+ return false;
+
+ for (unsigned I = 0; I < A->getNumSuccessors(); ++I)
+ if (A->getSuccessor(I) != B->getSuccessor(I))
+ return false;
+
+ // Need to check that PHIs in sucessors have matching values
+ for (auto *Succ : A->successors()) {
+ for (PHINode &Phi : Succ->phis())
+ if (Phi.getIncomingValueForBlock(A->getParent()) !=
+ Phi.getIncomingValueForBlock(B->getParent()))
+ return false;
+ }
+
+ return true;
+ };
+
+ // Construct a map from candidate basic block to an equivalent basic block
+ // to replace it with. All equivalent basic blocks should be replaced with
+ // the same basic block. To do this, if there is no equivalent BB in the map,
+ // then insert into the map BB -> BB. Otherwise, we should check only elements
+ // in the map for equivalence to ensure that all equivalent BB get replaced
+ // by the BB in the map. Replacing BB with BB has no impact, so we skip
+ // a call to setSuccessor when we do the actual replacement.
+ DenseMap<BasicBlock *, BasicBlock *> ReplaceWith;
+ for (BasicBlock *BB : BBs) {
+ bool Inserted = false;
+ for (auto KV : ReplaceWith) {
+ if (IsBranchEq(cast<BranchInst>(BB->getTerminator()),
+ cast<BranchInst>(KV.first->getTerminator()))) {
+ ReplaceWith[BB] = KV.first;
+ Inserted = true;
+ break;
+ }
+ }
+ if (!Inserted)
+ ReplaceWith[BB] = BB;
+ }
+
+ // Do the replacement in SI.
+ bool MadeChange = false;
+ // There is no fast lookup of BasicBlock -> Cases, so we iterate over cases
+ // and check that the case was a candidate. BBs is already filtered, so
+ // hopefully calling contains on it is not too expensive.
+ for (auto &Case : SI->cases()) {
+ BasicBlock *OldSucc = Case.getCaseSuccessor();
+ if (!BBs.contains(OldSucc))
+ continue;
+ BasicBlock *NewSucc = ReplaceWith[OldSucc];
+ if (OldSucc != NewSucc) {
+ Case.setSuccessor(NewSucc);
+ MadeChange = true;
+ }
+ }
+
+ return MadeChange;
+}
+
bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
BasicBlock *BB = SI->getParent();
@@ -7496,6 +7585,9 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
return requestResimplify();
+ if (simplifyDuplicateSwitchArms(SI))
+ return requestResimplify();
+
return false;
}
diff --git a/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll b/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll
index 8ad455eb9e7f22..4623eb2c5dd3c1 100644
--- a/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll
+++ b/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll
@@ -139,16 +139,14 @@ define i32 @PR34471(i32 %x) {
; NO_FWD-NEXT: switch i32 [[X:%.*]], label [[ELSE3:%.*]] [
; NO_FWD-NEXT: i32 17, label [[RETURN:%.*]]
; NO_FWD-NEXT: i32 19, label [[IF19:%.*]]
-; NO_FWD-NEXT: i32 42, label [[IF42:%.*]]
+; NO_FWD-NEXT: i32 42, label [[IF19]]
; NO_FWD-NEXT: ]
; NO_FWD: if19:
; NO_FWD-NEXT: br label [[RETURN]]
-; NO_FWD: if42:
-; NO_FWD-NEXT: br label [[RETURN]]
; NO_FWD: else3:
; NO_FWD-NEXT: br label [[RETURN]]
; NO_FWD: return:
-; NO_FWD-NEXT: [[R:%.*]] = phi i32 [ [[X]], [[IF19]] ], [ [[X]], [[IF42]] ], [ 0, [[ELSE3]] ], [ 17, [[ENTRY:%.*]] ]
+; NO_FWD-NEXT: [[R:%.*]] = phi i32 [ [[X]], [[IF19]] ], [ 0, [[ELSE3]] ], [ 17, [[ENTRY:%.*]] ]
; NO_FWD-NEXT: ret i32 [[R]]
;
; FWD-LABEL: @PR34471(
diff --git a/llvm/test/Transforms/SimplifyCFG/HoistCode.ll b/llvm/test/Transforms/SimplifyCFG/HoistCode.ll
index fe0b48028a3b62..fbe41d891c1ec5 100644
--- a/llvm/test/Transforms/SimplifyCFG/HoistCode.ll
+++ b/llvm/test/Transforms/SimplifyCFG/HoistCode.ll
@@ -65,14 +65,12 @@ define float @PR39535min_switch(i64 %i, float %x) {
; CHECK-NEXT: entry:
; CHECK-NEXT: switch i64 [[I:%.*]], label [[END:%.*]] [
; CHECK-NEXT: i64 1, label [[BB1:%.*]]
-; CHECK-NEXT: i64 2, label [[BB2:%.*]]
+; CHECK-NEXT: i64 2, label [[BB1]]
; CHECK-NEXT: ]
; CHECK: bb1:
; CHECK-NEXT: br label [[END]]
-; CHECK: bb2:
-; CHECK-NEXT: br label [[END]]
; CHECK: end:
-; CHECK-NEXT: [[COND:%.*]] = phi fast float [ [[X:%.*]], [[BB1]] ], [ [[X]], [[BB2]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[COND:%.*]] = phi fast float [ [[X:%.*]], [[BB1]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
; CHECK-NEXT: ret float [[COND]]
;
entry:
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll b/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll
new file mode 100644
index 00000000000000..b12db656fdf681
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll
@@ -0,0 +1,140 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s -check-prefix=SIMPLIFY-CFG
+; RUN: opt < %s -O3 -S | FileCheck %s -check-prefix=O3
+
+define i32 @switch_all_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) {
+; SIMPLIFY-CFG-LABEL: define i32 @switch_all_duplicate_arms(
+; SIMPLIFY-CFG-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) {
+; SIMPLIFY-CFG-NEXT: switch i32 [[TMP1]], label %[[BB6:.*]] [
+; SIMPLIFY-CFG-NEXT: i32 0, label %[[BB5:.*]]
+; SIMPLIFY-CFG-NEXT: i32 1, label %[[BB5]]
+; SIMPLIFY-CFG-NEXT: ]
+; SIMPLIFY-CFG: [[BB5]]:
+; SIMPLIFY-CFG-NEXT: br label %[[BB6]]
+; SIMPLIFY-CFG: [[BB6]]:
+; SIMPLIFY-CFG-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP4:%.*]] ], [ [[TMP2]], %[[BB5]] ]
+; SIMPLIFY-CFG-NEXT: ret i32 [[TMP8]]
+;
+; O3-LABEL: define i32 @switch_all_duplicate_arms(
+; O3-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; O3-NEXT: [[SWITCH:%.*]] = icmp ult i32 [[TMP1]], 2
+; O3-NEXT: [[TMP8:%.*]] = select i1 [[SWITCH]], i32 [[TMP2]], i32 [[TMP3]]
+; O3-NEXT: ret i32 [[TMP8]]
+;
+ switch i32 %1, label %7 [
+ i32 0, label %5
+ i32 1, label %6
+ ]
+
+5:
+ br label %7
+
+6:
+ br label %7
+
+7:
+ %8 = phi i32 [ %3, %4 ], [ %2, %6 ], [ %2, %5 ]
+ ret i32 %8
+}
+
+define i32 @switch_some_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) {
+; SIMPLIFY-CFG-LABEL: define i32 @switch_some_duplicate_arms(
+; SIMPLIFY-CFG-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) {
+; SIMPLIFY-CFG-NEXT: switch i32 [[TMP1]], label %[[BB8:.*]] [
+; SIMPLIFY-CFG-NEXT: i32 0, label %[[BB6:.*]]
+; SIMPLIFY-CFG-NEXT: i32 1, label %[[BB6]]
+; SIMPLIFY-CFG-NEXT: i32 2, label %[[BB7:.*]]
+; SIMPLIFY-CFG-NEXT: ]
+; SIMPLIFY-CFG: [[BB6]]:
+; SIMPLIFY-CFG-NEXT: br label %[[BB8]]
+; SIMPLIFY-CFG: [[BB7]]:
+; SIMPLIFY-CFG-NEXT: br label %[[BB8]]
+; SIMPLIFY-CFG: [[BB8]]:
+; SIMPLIFY-CFG-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ]
+; SIMPLIFY-CFG-NEXT: ret i32 [[TMP10]]
+;
+; O3-LABEL: define i32 @switch_some_duplicate_arms(
+; O3-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; O3-NEXT: switch i32 [[TMP1]], label %[[BB8:.*]] [
+; O3-NEXT: i32 0, label %[[BB6:.*]]
+; O3-NEXT: i32 1, label %[[BB6]]
+; O3-NEXT: i32 2, label %[[BB7:.*]]
+; O3-NEXT: ]
+; O3: [[BB6]]:
+; O3-NEXT: br label %[[BB8]]
+; O3: [[BB7]]:
+; O3-NEXT: br label %[[BB8]]
+; O3: [[BB8]]:
+; O3-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ]
+; O3-NEXT: ret i32 [[TMP10]]
+;
+ switch i32 %1, label %9 [
+ i32 0, label %6
+ i32 1, label %7
+ i32 2, label %8
+ ]
+
+6:
+ br label %9
+
+7:
+ br label %9
+
+8:
+ br label %9
+
+9:
+ %10 = phi i32 [ %3, %5 ], [ %4, %8 ], [ %2, %7 ], [ %2, %6 ]
+ ret i32 %10
+}
+
+define i32 @switch_duplicate_arms_multipred(i1 %0, i32 %1, i32 %2, i32 %3, i32 %4) {
+; SIMPLIFY-CFG-LABEL: define i32 @switch_duplicate_arms_multipred(
+; SIMPLIFY-CFG-SAME: i1 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) {
+; SIMPLIFY-CFG-NEXT: br i1 [[TMP0]], label %[[BB6:.*]], label %[[BB7:.*]]
+; SIMPLIFY-CFG: [[BB6]]:
+; SIMPLIFY-CFG-NEXT: switch i32 [[TMP2]], label %[[BB9:.*]] [
+; SIMPLIFY-CFG-NEXT: i32 0, label %[[BB7]]
+; SIMPLIFY-CFG-NEXT: i32 1, label %[[BB8:.*]]
+; SIMPLIFY-CFG-NEXT: ]
+; SIMPLIFY-CFG: [[BB7]]:
+; SIMPLIFY-CFG-NEXT: br label %[[BB9]]
+; SIMPLIFY-CFG: [[BB8]]:
+; SIMPLIFY-CFG-NEXT: br label %[[BB9]]
+; SIMPLIFY-CFG: [[BB9]]:
+; SIMPLIFY-CFG-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP4]], %[[BB6]] ], [ [[TMP3]], %[[BB8]] ], [ [[TMP3]], %[[BB7]] ]
+; SIMPLIFY-CFG-NEXT: ret i32 [[TMP10]]
+;
+; O3-LABEL: define i32 @switch_duplicate_arms_multipred(
+; O3-SAME: i1 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; O3-NEXT: br i1 [[TMP0]], label %[[BB6:.*]], label %[[BB7:.*]]
+; O3: [[BB6]]:
+; O3-NEXT: switch i32 [[TMP2]], label %[[BB9:.*]] [
+; O3-NEXT: i32 0, label %[[BB7]]
+; O3-NEXT: i32 1, label %[[BB8:.*]]
+; O3-NEXT: ]
+; O3: [[BB7]]:
+; O3-NEXT: br label %[[BB9]]
+; O3: [[BB8]]:
+; O3-NEXT: br label %[[BB9]]
+; O3: [[BB9]]:
+; O3-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP4]], %[[BB6]] ], [ [[TMP3]], %[[BB8]] ], [ [[TMP3]], %[[BB7]] ]
+; O3-NEXT: ret i32 [[TMP10]]
+;
+ br i1 %0, label %6, label %7
+6:
+ switch i32 %2, label %9 [
+ i32 0, label %7
+ i32 1, label %8
+ ]
+
+7:
+ br label %9
+
+8:
+ br label %9
+
+9:
+ %10 = phi i32 [ %4, %6 ], [ %3, %8 ], [ %3, %7 ]
+ ret i32 %10
+}
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll b/llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll
index 1e2f18b3f339d4..50998e447b71dc 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll
@@ -272,16 +272,14 @@ define i8 @switch_to_select_two_case_results_no_default(i32 %i) {
; CHECK-NEXT: i32 0, label [[END:%.*]]
; CHECK-NEXT: i32 2, label [[END]]
; CHECK-NEXT: i32 4, label [[CASE3:%.*]]
-; CHECK-NEXT: i32 6, label [[CASE4:%.*]]
+; CHECK-NEXT: i32 6, label [[CASE3]]
; CHECK-NEXT: ]
; CHECK: case3:
; CHECK-NEXT: br label [[END]]
-; CHECK: case4:
-; CHECK-NEXT: br label [[END]]
; CHECK: default:
; CHECK-NEXT: unreachable
; CHECK: end:
-; CHECK-NEXT: [[T0:%.*]] = phi i8 [ 44, [[CASE3]] ], [ 44, [[CASE4]] ], [ 42, [[ENTRY:%.*]] ], [ 42, [[ENTRY]] ]
+; CHECK-NEXT: [[T0:%.*]] = phi i8 [ 44, [[CASE3]] ], [ 42, [[ENTRY:%.*]] ], [ 42, [[ENTRY]] ]
; CHECK-NEXT: ret i8 [[T0]]
;
entry:
``````````
</details>
https://github.com/llvm/llvm-project/pull/114262
More information about the llvm-commits
mailing list