[llvm] [SimplifyCFG] Simplify switch instruction that has duplicate arms (PR #114262)

Michael Maitland via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 30 14:50:57 PDT 2024


https://github.com/michaelmaitland updated https://github.com/llvm/llvm-project/pull/114262

>From fbfa46a57becdedf4a93406f7e6b3cec84bd61c9 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Wed, 30 Oct 2024 06:11:43 -0700
Subject: [PATCH 1/5] [SimplifyCFG] precommit tests for simplify switch with
 duplicate arms

---
 .../Transforms/SimplifyCFG/switch-dup-bbs.ll  | 154 ++++++++++++++++++
 1 file changed, 154 insertions(+)
 create mode 100644 llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll

diff --git a/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll b/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll
new file mode 100644
index 00000000000000..3575597e408562
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll
@@ -0,0 +1,154 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s -check-prefix=SIMPLIFY-CFG
+; RUN: opt < %s -O3 -S | FileCheck %s -check-prefix=O3
+
+define i32 @switch_all_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) {
+; SIMPLIFY-CFG-LABEL: define i32 @switch_all_duplicate_arms(
+; SIMPLIFY-CFG-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) {
+; SIMPLIFY-CFG-NEXT:    switch i32 [[TMP1]], label %[[BB7:.*]] [
+; SIMPLIFY-CFG-NEXT:      i32 0, label %[[BB5:.*]]
+; SIMPLIFY-CFG-NEXT:      i32 1, label %[[BB6:.*]]
+; SIMPLIFY-CFG-NEXT:    ]
+; SIMPLIFY-CFG:       [[BB5]]:
+; SIMPLIFY-CFG-NEXT:    br label %[[BB7]]
+; SIMPLIFY-CFG:       [[BB6]]:
+; SIMPLIFY-CFG-NEXT:    br label %[[BB7]]
+; SIMPLIFY-CFG:       [[BB7]]:
+; SIMPLIFY-CFG-NEXT:    [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP4:%.*]] ], [ [[TMP2]], %[[BB6]] ], [ [[TMP2]], %[[BB5]] ]
+; SIMPLIFY-CFG-NEXT:    ret i32 [[TMP8]]
+;
+; O3-LABEL: define i32 @switch_all_duplicate_arms(
+; O3-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; O3-NEXT:    switch i32 [[TMP1]], label %[[BB7:.*]] [
+; O3-NEXT:      i32 0, label %[[BB5:.*]]
+; O3-NEXT:      i32 1, label %[[BB6:.*]]
+; O3-NEXT:    ]
+; O3:       [[BB5]]:
+; O3-NEXT:    br label %[[BB7]]
+; O3:       [[BB6]]:
+; O3-NEXT:    br label %[[BB7]]
+; O3:       [[BB7]]:
+; O3-NEXT:    [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP4:%.*]] ], [ [[TMP2]], %[[BB6]] ], [ [[TMP2]], %[[BB5]] ]
+; O3-NEXT:    ret i32 [[TMP8]]
+;
+  switch i32 %1, label %7 [
+  i32 0, label %5
+  i32 1, label %6
+  ]
+
+5:
+  br label %7
+
+6:
+  br label %7
+
+7:
+  %8 = phi i32 [ %3, %4 ], [ %2, %6 ], [ %2, %5 ]
+  ret i32 %8
+}
+
+define i32 @switch_some_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) {
+; SIMPLIFY-CFG-LABEL: define i32 @switch_some_duplicate_arms(
+; SIMPLIFY-CFG-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) {
+; SIMPLIFY-CFG-NEXT:    switch i32 [[TMP1]], label %[[BB9:.*]] [
+; SIMPLIFY-CFG-NEXT:      i32 0, label %[[BB6:.*]]
+; SIMPLIFY-CFG-NEXT:      i32 1, label %[[BB7:.*]]
+; SIMPLIFY-CFG-NEXT:      i32 2, label %[[BB8:.*]]
+; SIMPLIFY-CFG-NEXT:    ]
+; SIMPLIFY-CFG:       [[BB6]]:
+; SIMPLIFY-CFG-NEXT:    br label %[[BB9]]
+; SIMPLIFY-CFG:       [[BB7]]:
+; SIMPLIFY-CFG-NEXT:    br label %[[BB9]]
+; SIMPLIFY-CFG:       [[BB8]]:
+; SIMPLIFY-CFG-NEXT:    br label %[[BB9]]
+; SIMPLIFY-CFG:       [[BB9]]:
+; SIMPLIFY-CFG-NEXT:    [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB8]] ], [ [[TMP2]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ]
+; SIMPLIFY-CFG-NEXT:    ret i32 [[TMP10]]
+;
+; O3-LABEL: define i32 @switch_some_duplicate_arms(
+; O3-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; O3-NEXT:    switch i32 [[TMP1]], label %[[BB9:.*]] [
+; O3-NEXT:      i32 0, label %[[BB6:.*]]
+; O3-NEXT:      i32 1, label %[[BB7:.*]]
+; O3-NEXT:      i32 2, label %[[BB8:.*]]
+; O3-NEXT:    ]
+; O3:       [[BB6]]:
+; O3-NEXT:    br label %[[BB9]]
+; O3:       [[BB7]]:
+; O3-NEXT:    br label %[[BB9]]
+; O3:       [[BB8]]:
+; O3-NEXT:    br label %[[BB9]]
+; O3:       [[BB9]]:
+; O3-NEXT:    [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB8]] ], [ [[TMP2]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ]
+; O3-NEXT:    ret i32 [[TMP10]]
+;
+  switch i32 %1, label %9 [
+  i32 0, label %6
+  i32 1, label %7
+  i32 2, label %8
+  ]
+
+6:
+  br label %9
+
+7:
+  br label %9
+
+8:
+  br label %9
+
+9:
+  %10 = phi i32 [ %3, %5 ], [ %4, %8 ], [ %2, %7 ], [ %2, %6 ]
+  ret i32 %10
+}
+
+define i32 @switch_duplicate_arms_multipred(i1 %0, i32 %1, i32 %2, i32 %3, i32 %4) {
+; SIMPLIFY-CFG-LABEL: define i32 @switch_duplicate_arms_multipred(
+; SIMPLIFY-CFG-SAME: i1 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) {
+; SIMPLIFY-CFG-NEXT:    br i1 [[TMP0]], label %[[BB6:.*]], label %[[BB7:.*]]
+; SIMPLIFY-CFG:       [[BB6]]:
+; SIMPLIFY-CFG-NEXT:    switch i32 [[TMP2]], label %[[BB9:.*]] [
+; SIMPLIFY-CFG-NEXT:      i32 0, label %[[BB7]]
+; SIMPLIFY-CFG-NEXT:      i32 1, label %[[BB8:.*]]
+; SIMPLIFY-CFG-NEXT:    ]
+; SIMPLIFY-CFG:       [[BB7]]:
+; SIMPLIFY-CFG-NEXT:    br label %[[BB9]]
+; SIMPLIFY-CFG:       [[BB8]]:
+; SIMPLIFY-CFG-NEXT:    br label %[[BB9]]
+; SIMPLIFY-CFG:       [[BB9]]:
+; SIMPLIFY-CFG-NEXT:    [[TMP10:%.*]] = phi i32 [ [[TMP4]], %[[BB6]] ], [ [[TMP3]], %[[BB8]] ], [ [[TMP3]], %[[BB7]] ]
+; SIMPLIFY-CFG-NEXT:    ret i32 [[TMP10]]
+;
+; O3-LABEL: define i32 @switch_duplicate_arms_multipred(
+; O3-SAME: i1 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; O3-NEXT:    br i1 [[TMP0]], label %[[BB6:.*]], label %[[BB7:.*]]
+; O3:       [[BB6]]:
+; O3-NEXT:    switch i32 [[TMP2]], label %[[BB9:.*]] [
+; O3-NEXT:      i32 0, label %[[BB7]]
+; O3-NEXT:      i32 1, label %[[BB8:.*]]
+; O3-NEXT:    ]
+; O3:       [[BB7]]:
+; O3-NEXT:    br label %[[BB9]]
+; O3:       [[BB8]]:
+; O3-NEXT:    br label %[[BB9]]
+; O3:       [[BB9]]:
+; O3-NEXT:    [[TMP10:%.*]] = phi i32 [ [[TMP4]], %[[BB6]] ], [ [[TMP3]], %[[BB8]] ], [ [[TMP3]], %[[BB7]] ]
+; O3-NEXT:    ret i32 [[TMP10]]
+;
+  br i1 %0, label %6, label %7
+6:
+  switch i32 %2, label %9 [
+  i32 0, label %7
+  i32 1, label %8
+  ]
+
+7:
+  br label %9
+
+8:
+  br label %9
+
+9:
+  %10 = phi i32 [ %4, %6 ], [ %3, %8 ], [ %3, %7 ]
+  ret i32 %10
+}

>From 2db9f0055072a2172cdd4d3066b30d98d095bed3 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Wed, 30 Oct 2024 09:01:56 -0700
Subject: [PATCH 2/5] [SimplifyCFG] Simplify switch instruction that has
 duplicate arms

I noticed that the two C functions emitted different IR:

```
int switch_duplicate_arms(int switch_val, int v, int w) {
  switch (switch_val) {
  default:
    break;
  case 0:
    w = v;
    break;
  case 1:
    w = v;
    break;
  }
  return w;
}

int if_duplicate_arms(int switch_val, int v, int w) {
  if (switch_val == 0)
    w = v;
  else if (switch_val == 1)
    w = v;
  return v0;
}
```

For `switch_duplicate_arms`, we generate IR that looks like this:

```
define i32 @switch_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) {
  switch i32 %1, label %7 [
    i32 0, label %5
    i32 1, label %6
  ]

5:
  br label %7

6:
  br label %7

7:
  %8 = phi i32 [ %3, %4 ], [ %2, %6 ], [ %2, %5 ]
  ret i32 %8
}
```

For the equivalent `if_duplicate_arms`, we generate:
```
define i32 @if_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) {
  %5 = icmp ult i32 %1, 2
  %6 = select i1 %5, i32 %2, i32 %3
  ret i32 %6
}
```

For `switch_duplicate_arms`, taking case 0 and 1 are the same since %5 and %6
branch to the same location and the incoming values for %8 are the same from
those blocks. We could remove one on the duplicate switch targets and update
the switch with the single target.

On RISC-V, prior to this patch, we generate the following code:
```
switch_duplicate_arms:
        li      a4, 1
        beq     a1, a4, .LBB0_2
        mv      a0, a3
        bnez    a1, .LBB0_3
.LBB0_2:
        mv      a0, a2
.LBB0_3:
        ret

if_duplicate_arms:
        li      a4, 2
        mv      a0, a2
        bltu    a1, a4, .LBB1_2
        mv      a0, a3
.LBB1_2:
        ret
```

After this patch, the O3 code is optimized to the icmp + select pair, which
gives us the same code gen as `if_duplicate_arms` as desired.

This may help with both code size and further switch simplification. I found
that this patch causes no significant impact to spec2006/int/ref and
spec2017/intrate/ref.
---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     | 92 +++++++++++++++++++
 .../ForwardSwitchConditionToPHI.ll            |  6 +-
 llvm/test/Transforms/SimplifyCFG/HoistCode.ll |  6 +-
 .../Transforms/SimplifyCFG/switch-dup-bbs.ll  | 50 ++++------
 .../SimplifyCFG/switch-to-select-two-case.ll  |  6 +-
 5 files changed, 116 insertions(+), 44 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 72228b445a8b6e..eb0c2346e08f27 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -276,6 +276,7 @@ class SimplifyCFGOpt {
   bool simplifyCleanupReturn(CleanupReturnInst *RI);
   bool simplifyUnreachable(UnreachableInst *UI);
   bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
+  bool simplifyDuplicateSwitchArms(SwitchInst *SI);
   bool simplifyIndirectBr(IndirectBrInst *IBI);
   bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
   bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
@@ -7436,6 +7437,94 @@ static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder,
   return true;
 }
 
+bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
+  // Simplify the case where multiple arms contain only a terminator, the
+  // terminators are the same, and their sucessor PHIS incoming values are the
+  // same.
+
+  // Find BBs that are candidates for simplification.
+  SmallPtrSet<BasicBlock *, 8> BBs;
+  for (auto &Case : SI->cases()) {
+    BasicBlock *BB = Case.getCaseSuccessor();
+
+    // FIXME: This case needs some extra care because the terminators other than
+    // SI need to be updated.
+    if (!BB->hasNPredecessors(1))
+      continue;
+
+    // FIXME: Relax that the terminator is a BranchInst by checking for equality
+    // on other kinds of terminators.
+    Instruction *T = BB->getTerminator();
+    if (T && BB->size() == 1 && isa<BranchInst>(T))
+      BBs.insert(BB);
+  }
+
+  auto IsBranchEq = [](BranchInst *A, BranchInst *B) {
+    if (A->isConditional() != B->isConditional())
+      return false;
+
+    if (A->isConditional() && A->getCondition() != B->getCondition())
+      return false;
+
+    if (A->getNumSuccessors() != B->getNumSuccessors())
+      return false;
+
+    for (unsigned I = 0; I < A->getNumSuccessors(); ++I)
+      if (A->getSuccessor(I) != B->getSuccessor(I))
+        return false;
+
+    // Need to check that PHIs in sucessors have matching values
+    for (auto *Succ : A->successors()) {
+      for (PHINode &Phi : Succ->phis())
+        if (Phi.getIncomingValueForBlock(A->getParent()) !=
+            Phi.getIncomingValueForBlock(B->getParent()))
+          return false;
+    }
+
+    return true;
+  };
+
+  // Construct a map from candidate basic block to an equivalent basic block
+  // to replace it with. All equivalent basic blocks should be replaced with
+  // the same basic block. To do this, if there is no equivalent BB in the map,
+  // then insert into the map BB -> BB. Otherwise, we should check only elements
+  // in the map for equivalence to ensure that all equivalent BB get replaced
+  // by the BB in the map. Replacing BB with BB has no impact, so we skip
+  // a call to setSuccessor when we do the actual replacement.
+  DenseMap<BasicBlock *, BasicBlock *> ReplaceWith;
+  for (BasicBlock *BB : BBs) {
+    bool Inserted = false;
+    for (auto KV : ReplaceWith) {
+      if (IsBranchEq(cast<BranchInst>(BB->getTerminator()),
+                     cast<BranchInst>(KV.first->getTerminator()))) {
+        ReplaceWith[BB] = KV.first;
+        Inserted = true;
+        break;
+      }
+    }
+    if (!Inserted)
+      ReplaceWith[BB] = BB;
+  }
+
+  // Do the replacement in SI.
+  bool MadeChange = false;
+  // There is no fast lookup of BasicBlock -> Cases, so we iterate over cases
+  // and check that the case was a candidate. BBs is already filtered, so
+  // hopefully calling contains on it is not too expensive.
+  for (auto &Case : SI->cases()) {
+    BasicBlock *OldSucc = Case.getCaseSuccessor();
+    if (!BBs.contains(OldSucc))
+      continue;
+    BasicBlock *NewSucc = ReplaceWith[OldSucc];
+    if (OldSucc != NewSucc) {
+      Case.setSuccessor(NewSucc);
+      MadeChange = true;
+    }
+  }
+
+  return MadeChange;
+}
+
 bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
   BasicBlock *BB = SI->getParent();
 
@@ -7496,6 +7585,9 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
       hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
     return requestResimplify();
 
+  if (simplifyDuplicateSwitchArms(SI))
+    return requestResimplify();
+
   return false;
 }
 
diff --git a/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll b/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll
index 8ad455eb9e7f22..4623eb2c5dd3c1 100644
--- a/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll
+++ b/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll
@@ -139,16 +139,14 @@ define i32 @PR34471(i32 %x) {
 ; NO_FWD-NEXT:    switch i32 [[X:%.*]], label [[ELSE3:%.*]] [
 ; NO_FWD-NEXT:      i32 17, label [[RETURN:%.*]]
 ; NO_FWD-NEXT:      i32 19, label [[IF19:%.*]]
-; NO_FWD-NEXT:      i32 42, label [[IF42:%.*]]
+; NO_FWD-NEXT:      i32 42, label [[IF19]]
 ; NO_FWD-NEXT:    ]
 ; NO_FWD:       if19:
 ; NO_FWD-NEXT:    br label [[RETURN]]
-; NO_FWD:       if42:
-; NO_FWD-NEXT:    br label [[RETURN]]
 ; NO_FWD:       else3:
 ; NO_FWD-NEXT:    br label [[RETURN]]
 ; NO_FWD:       return:
-; NO_FWD-NEXT:    [[R:%.*]] = phi i32 [ [[X]], [[IF19]] ], [ [[X]], [[IF42]] ], [ 0, [[ELSE3]] ], [ 17, [[ENTRY:%.*]] ]
+; NO_FWD-NEXT:    [[R:%.*]] = phi i32 [ [[X]], [[IF19]] ], [ 0, [[ELSE3]] ], [ 17, [[ENTRY:%.*]] ]
 ; NO_FWD-NEXT:    ret i32 [[R]]
 ;
 ; FWD-LABEL: @PR34471(
diff --git a/llvm/test/Transforms/SimplifyCFG/HoistCode.ll b/llvm/test/Transforms/SimplifyCFG/HoistCode.ll
index fe0b48028a3b62..fbe41d891c1ec5 100644
--- a/llvm/test/Transforms/SimplifyCFG/HoistCode.ll
+++ b/llvm/test/Transforms/SimplifyCFG/HoistCode.ll
@@ -65,14 +65,12 @@ define float @PR39535min_switch(i64 %i, float %x) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    switch i64 [[I:%.*]], label [[END:%.*]] [
 ; CHECK-NEXT:      i64 1, label [[BB1:%.*]]
-; CHECK-NEXT:      i64 2, label [[BB2:%.*]]
+; CHECK-NEXT:      i64 2, label [[BB1]]
 ; CHECK-NEXT:    ]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    br label [[END]]
-; CHECK:       bb2:
-; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi fast float [ [[X:%.*]], [[BB1]] ], [ [[X]], [[BB2]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[COND:%.*]] = phi fast float [ [[X:%.*]], [[BB1]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret float [[COND]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll b/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll
index 3575597e408562..b12db656fdf681 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll
@@ -5,30 +5,20 @@
 define i32 @switch_all_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) {
 ; SIMPLIFY-CFG-LABEL: define i32 @switch_all_duplicate_arms(
 ; SIMPLIFY-CFG-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) {
-; SIMPLIFY-CFG-NEXT:    switch i32 [[TMP1]], label %[[BB7:.*]] [
+; SIMPLIFY-CFG-NEXT:    switch i32 [[TMP1]], label %[[BB6:.*]] [
 ; SIMPLIFY-CFG-NEXT:      i32 0, label %[[BB5:.*]]
-; SIMPLIFY-CFG-NEXT:      i32 1, label %[[BB6:.*]]
+; SIMPLIFY-CFG-NEXT:      i32 1, label %[[BB5]]
 ; SIMPLIFY-CFG-NEXT:    ]
 ; SIMPLIFY-CFG:       [[BB5]]:
-; SIMPLIFY-CFG-NEXT:    br label %[[BB7]]
+; SIMPLIFY-CFG-NEXT:    br label %[[BB6]]
 ; SIMPLIFY-CFG:       [[BB6]]:
-; SIMPLIFY-CFG-NEXT:    br label %[[BB7]]
-; SIMPLIFY-CFG:       [[BB7]]:
-; SIMPLIFY-CFG-NEXT:    [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP4:%.*]] ], [ [[TMP2]], %[[BB6]] ], [ [[TMP2]], %[[BB5]] ]
+; SIMPLIFY-CFG-NEXT:    [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP4:%.*]] ], [ [[TMP2]], %[[BB5]] ]
 ; SIMPLIFY-CFG-NEXT:    ret i32 [[TMP8]]
 ;
 ; O3-LABEL: define i32 @switch_all_duplicate_arms(
 ; O3-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-; O3-NEXT:    switch i32 [[TMP1]], label %[[BB7:.*]] [
-; O3-NEXT:      i32 0, label %[[BB5:.*]]
-; O3-NEXT:      i32 1, label %[[BB6:.*]]
-; O3-NEXT:    ]
-; O3:       [[BB5]]:
-; O3-NEXT:    br label %[[BB7]]
-; O3:       [[BB6]]:
-; O3-NEXT:    br label %[[BB7]]
-; O3:       [[BB7]]:
-; O3-NEXT:    [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP4:%.*]] ], [ [[TMP2]], %[[BB6]] ], [ [[TMP2]], %[[BB5]] ]
+; O3-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[TMP1]], 2
+; O3-NEXT:    [[TMP8:%.*]] = select i1 [[SWITCH]], i32 [[TMP2]], i32 [[TMP3]]
 ; O3-NEXT:    ret i32 [[TMP8]]
 ;
   switch i32 %1, label %7 [
@@ -50,36 +40,32 @@ define i32 @switch_all_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) {
 define i32 @switch_some_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) {
 ; SIMPLIFY-CFG-LABEL: define i32 @switch_some_duplicate_arms(
 ; SIMPLIFY-CFG-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) {
-; SIMPLIFY-CFG-NEXT:    switch i32 [[TMP1]], label %[[BB9:.*]] [
+; SIMPLIFY-CFG-NEXT:    switch i32 [[TMP1]], label %[[BB8:.*]] [
 ; SIMPLIFY-CFG-NEXT:      i32 0, label %[[BB6:.*]]
-; SIMPLIFY-CFG-NEXT:      i32 1, label %[[BB7:.*]]
-; SIMPLIFY-CFG-NEXT:      i32 2, label %[[BB8:.*]]
+; SIMPLIFY-CFG-NEXT:      i32 1, label %[[BB6]]
+; SIMPLIFY-CFG-NEXT:      i32 2, label %[[BB7:.*]]
 ; SIMPLIFY-CFG-NEXT:    ]
 ; SIMPLIFY-CFG:       [[BB6]]:
-; SIMPLIFY-CFG-NEXT:    br label %[[BB9]]
+; SIMPLIFY-CFG-NEXT:    br label %[[BB8]]
 ; SIMPLIFY-CFG:       [[BB7]]:
-; SIMPLIFY-CFG-NEXT:    br label %[[BB9]]
+; SIMPLIFY-CFG-NEXT:    br label %[[BB8]]
 ; SIMPLIFY-CFG:       [[BB8]]:
-; SIMPLIFY-CFG-NEXT:    br label %[[BB9]]
-; SIMPLIFY-CFG:       [[BB9]]:
-; SIMPLIFY-CFG-NEXT:    [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB8]] ], [ [[TMP2]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ]
+; SIMPLIFY-CFG-NEXT:    [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ]
 ; SIMPLIFY-CFG-NEXT:    ret i32 [[TMP10]]
 ;
 ; O3-LABEL: define i32 @switch_some_duplicate_arms(
 ; O3-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) local_unnamed_addr #[[ATTR0]] {
-; O3-NEXT:    switch i32 [[TMP1]], label %[[BB9:.*]] [
+; O3-NEXT:    switch i32 [[TMP1]], label %[[BB8:.*]] [
 ; O3-NEXT:      i32 0, label %[[BB6:.*]]
-; O3-NEXT:      i32 1, label %[[BB7:.*]]
-; O3-NEXT:      i32 2, label %[[BB8:.*]]
+; O3-NEXT:      i32 1, label %[[BB6]]
+; O3-NEXT:      i32 2, label %[[BB7:.*]]
 ; O3-NEXT:    ]
 ; O3:       [[BB6]]:
-; O3-NEXT:    br label %[[BB9]]
+; O3-NEXT:    br label %[[BB8]]
 ; O3:       [[BB7]]:
-; O3-NEXT:    br label %[[BB9]]
+; O3-NEXT:    br label %[[BB8]]
 ; O3:       [[BB8]]:
-; O3-NEXT:    br label %[[BB9]]
-; O3:       [[BB9]]:
-; O3-NEXT:    [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB8]] ], [ [[TMP2]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ]
+; O3-NEXT:    [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ]
 ; O3-NEXT:    ret i32 [[TMP10]]
 ;
   switch i32 %1, label %9 [
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll b/llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll
index 1e2f18b3f339d4..50998e447b71dc 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll
@@ -272,16 +272,14 @@ define i8 @switch_to_select_two_case_results_no_default(i32 %i) {
 ; CHECK-NEXT:      i32 0, label [[END:%.*]]
 ; CHECK-NEXT:      i32 2, label [[END]]
 ; CHECK-NEXT:      i32 4, label [[CASE3:%.*]]
-; CHECK-NEXT:      i32 6, label [[CASE4:%.*]]
+; CHECK-NEXT:      i32 6, label [[CASE3]]
 ; CHECK-NEXT:    ]
 ; CHECK:       case3:
 ; CHECK-NEXT:    br label [[END]]
-; CHECK:       case4:
-; CHECK-NEXT:    br label [[END]]
 ; CHECK:       default:
 ; CHECK-NEXT:    unreachable
 ; CHECK:       end:
-; CHECK-NEXT:    [[T0:%.*]] = phi i8 [ 44, [[CASE3]] ], [ 44, [[CASE4]] ], [ 42, [[ENTRY:%.*]] ], [ 42, [[ENTRY]] ]
+; CHECK-NEXT:    [[T0:%.*]] = phi i8 [ 44, [[CASE3]] ], [ 42, [[ENTRY:%.*]] ], [ 42, [[ENTRY]] ]
 ; CHECK-NEXT:    ret i8 [[T0]]
 ;
 entry:

>From 4e560672205f5328152251ac8cd1199494f9cda0 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Wed, 30 Oct 2024 12:12:02 -0700
Subject: [PATCH 3/5] fixup! respond to review

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index eb0c2346e08f27..74cb086f2b609e 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7463,19 +7463,26 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
     if (A->isConditional() != B->isConditional())
       return false;
 
-    if (A->isConditional() && A->getCondition() != B->getCondition())
-      return false;
+    if (A->isConditional()) {
+      // If the conditions are instructions, check equality up to commutativity.
+      // Otherwise, check that the two Values are the same.
+      Value *AC = A->getCondition();
+      Value *BC = B->getCondition();
+      auto *ACI = dyn_cast<Instruction>(AC);
+      auto *BCI = dyn_cast<Instruction>(BC);
+      if ((ACI && BCI && !areIdenticalUpToCommutativity(ACI, BCI)) && AC != BC)
+        return false;
+    }
 
     if (A->getNumSuccessors() != B->getNumSuccessors())
       return false;
 
-    for (unsigned I = 0; I < A->getNumSuccessors(); ++I)
-      if (A->getSuccessor(I) != B->getSuccessor(I))
+    for (unsigned I = 0; I < A->getNumSuccessors(); ++I) {
+      BasicBlock *ASucc = A->getSuccessor(I);
+      if (ASucc != B->getSuccessor(I))
         return false;
-
-    // Need to check that PHIs in sucessors have matching values
-    for (auto *Succ : A->successors()) {
-      for (PHINode &Phi : Succ->phis())
+      // Need to check that PHIs in sucessors have matching values
+      for (PHINode &Phi : ASucc->phis())
         if (Phi.getIncomingValueForBlock(A->getParent()) !=
             Phi.getIncomingValueForBlock(B->getParent()))
           return false;

>From 9e106554ee9a52a9f3838b46195162ddc07ca18c Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Wed, 30 Oct 2024 12:31:54 -0700
Subject: [PATCH 4/5] fixup! refactor for general approach

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 74cb086f2b609e..3809591f676258 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7455,7 +7455,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
     // FIXME: Relax that the terminator is a BranchInst by checking for equality
     // on other kinds of terminators.
     Instruction *T = BB->getTerminator();
-    if (T && BB->size() == 1 && isa<BranchInst>(T))
+    if (T && isa<BranchInst>(T))
       BBs.insert(BB);
   }
 
@@ -7491,6 +7491,16 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
     return true;
   };
 
+  auto IsBBEqualTo = [&IsBranchEq](BasicBlock *A, BasicBlock *B) {
+    // FIXME: Support more than just a single BranchInst. One way we could do
+    // this is by taking a hashing approach.
+    if (A->size() != 1 || B->size() != 1)
+      return false;
+
+    return IsBranchEq(cast<BranchInst>(A->getTerminator()),
+                      cast<BranchInst>(B->getTerminator()));
+  };
+
   // Construct a map from candidate basic block to an equivalent basic block
   // to replace it with. All equivalent basic blocks should be replaced with
   // the same basic block. To do this, if there is no equivalent BB in the map,
@@ -7502,8 +7512,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
   for (BasicBlock *BB : BBs) {
     bool Inserted = false;
     for (auto KV : ReplaceWith) {
-      if (IsBranchEq(cast<BranchInst>(BB->getTerminator()),
-                     cast<BranchInst>(KV.first->getTerminator()))) {
+      if (IsBBEqualTo(BB, KV.first)) {
         ReplaceWith[BB] = KV.first;
         Inserted = true;
         break;

>From 4b091ef14130208a6b098bb5bc5d02ff8e87ae21 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Wed, 30 Oct 2024 14:50:37 -0700
Subject: [PATCH 5/5] fixup! move PHI checks

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 3809591f676258..f6838643e660bb 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7477,16 +7477,9 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
     if (A->getNumSuccessors() != B->getNumSuccessors())
       return false;
 
-    for (unsigned I = 0; I < A->getNumSuccessors(); ++I) {
-      BasicBlock *ASucc = A->getSuccessor(I);
-      if (ASucc != B->getSuccessor(I))
+    for (unsigned I = 0; I < A->getNumSuccessors(); ++I)
+      if (A->getSuccessor(I) != B->getSuccessor(I))
         return false;
-      // Need to check that PHIs in sucessors have matching values
-      for (PHINode &Phi : ASucc->phis())
-        if (Phi.getIncomingValueForBlock(A->getParent()) !=
-            Phi.getIncomingValueForBlock(B->getParent()))
-          return false;
-    }
 
     return true;
   };
@@ -7497,8 +7490,16 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
     if (A->size() != 1 || B->size() != 1)
       return false;
 
-    return IsBranchEq(cast<BranchInst>(A->getTerminator()),
-                      cast<BranchInst>(B->getTerminator()));
+    if (!IsBranchEq(cast<BranchInst>(A->getTerminator()),
+                    cast<BranchInst>(B->getTerminator())))
+      return false;
+
+    // Need to check that PHIs in sucessors have matching values
+    for (auto *Succ : A->getTerminator()->getSuccessors())
+      for (PHINode &Phi : Succ->phis())
+        if (Phi.getIncomingValueForBlock(A->getParent()) !=
+            Phi.getIncomingValueForBlock(B->getParent()))
+          return false;
   };
 
   // Construct a map from candidate basic block to an equivalent basic block



More information about the llvm-commits mailing list