[llvm] [SimplifyCFG] Simplify switch instruction that has duplicate arms (PR #114262)

Michael Maitland via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 5 08:05:56 PST 2024


https://github.com/michaelmaitland updated https://github.com/llvm/llvm-project/pull/114262

>From fbfa46a57becdedf4a93406f7e6b3cec84bd61c9 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Wed, 30 Oct 2024 06:11:43 -0700
Subject: [PATCH 01/24] [SimplifyCFG] precommit tests for simplify switch with
 duplicate arms

---
 .../Transforms/SimplifyCFG/switch-dup-bbs.ll  | 154 ++++++++++++++++++
 1 file changed, 154 insertions(+)
 create mode 100644 llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll

diff --git a/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll b/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll
new file mode 100644
index 00000000000000..3575597e408562
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll
@@ -0,0 +1,154 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s -check-prefix=SIMPLIFY-CFG
+; RUN: opt < %s -O3 -S | FileCheck %s -check-prefix=O3
+
+define i32 @switch_all_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) {
+; SIMPLIFY-CFG-LABEL: define i32 @switch_all_duplicate_arms(
+; SIMPLIFY-CFG-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) {
+; SIMPLIFY-CFG-NEXT:    switch i32 [[TMP1]], label %[[BB7:.*]] [
+; SIMPLIFY-CFG-NEXT:      i32 0, label %[[BB5:.*]]
+; SIMPLIFY-CFG-NEXT:      i32 1, label %[[BB6:.*]]
+; SIMPLIFY-CFG-NEXT:    ]
+; SIMPLIFY-CFG:       [[BB5]]:
+; SIMPLIFY-CFG-NEXT:    br label %[[BB7]]
+; SIMPLIFY-CFG:       [[BB6]]:
+; SIMPLIFY-CFG-NEXT:    br label %[[BB7]]
+; SIMPLIFY-CFG:       [[BB7]]:
+; SIMPLIFY-CFG-NEXT:    [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP4:%.*]] ], [ [[TMP2]], %[[BB6]] ], [ [[TMP2]], %[[BB5]] ]
+; SIMPLIFY-CFG-NEXT:    ret i32 [[TMP8]]
+;
+; O3-LABEL: define i32 @switch_all_duplicate_arms(
+; O3-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; O3-NEXT:    switch i32 [[TMP1]], label %[[BB7:.*]] [
+; O3-NEXT:      i32 0, label %[[BB5:.*]]
+; O3-NEXT:      i32 1, label %[[BB6:.*]]
+; O3-NEXT:    ]
+; O3:       [[BB5]]:
+; O3-NEXT:    br label %[[BB7]]
+; O3:       [[BB6]]:
+; O3-NEXT:    br label %[[BB7]]
+; O3:       [[BB7]]:
+; O3-NEXT:    [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP4:%.*]] ], [ [[TMP2]], %[[BB6]] ], [ [[TMP2]], %[[BB5]] ]
+; O3-NEXT:    ret i32 [[TMP8]]
+;
+  switch i32 %1, label %7 [
+  i32 0, label %5
+  i32 1, label %6
+  ]
+
+5:
+  br label %7
+
+6:
+  br label %7
+
+7:
+  %8 = phi i32 [ %3, %4 ], [ %2, %6 ], [ %2, %5 ]
+  ret i32 %8
+}
+
+define i32 @switch_some_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) {
+; SIMPLIFY-CFG-LABEL: define i32 @switch_some_duplicate_arms(
+; SIMPLIFY-CFG-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) {
+; SIMPLIFY-CFG-NEXT:    switch i32 [[TMP1]], label %[[BB9:.*]] [
+; SIMPLIFY-CFG-NEXT:      i32 0, label %[[BB6:.*]]
+; SIMPLIFY-CFG-NEXT:      i32 1, label %[[BB7:.*]]
+; SIMPLIFY-CFG-NEXT:      i32 2, label %[[BB8:.*]]
+; SIMPLIFY-CFG-NEXT:    ]
+; SIMPLIFY-CFG:       [[BB6]]:
+; SIMPLIFY-CFG-NEXT:    br label %[[BB9]]
+; SIMPLIFY-CFG:       [[BB7]]:
+; SIMPLIFY-CFG-NEXT:    br label %[[BB9]]
+; SIMPLIFY-CFG:       [[BB8]]:
+; SIMPLIFY-CFG-NEXT:    br label %[[BB9]]
+; SIMPLIFY-CFG:       [[BB9]]:
+; SIMPLIFY-CFG-NEXT:    [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB8]] ], [ [[TMP2]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ]
+; SIMPLIFY-CFG-NEXT:    ret i32 [[TMP10]]
+;
+; O3-LABEL: define i32 @switch_some_duplicate_arms(
+; O3-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; O3-NEXT:    switch i32 [[TMP1]], label %[[BB9:.*]] [
+; O3-NEXT:      i32 0, label %[[BB6:.*]]
+; O3-NEXT:      i32 1, label %[[BB7:.*]]
+; O3-NEXT:      i32 2, label %[[BB8:.*]]
+; O3-NEXT:    ]
+; O3:       [[BB6]]:
+; O3-NEXT:    br label %[[BB9]]
+; O3:       [[BB7]]:
+; O3-NEXT:    br label %[[BB9]]
+; O3:       [[BB8]]:
+; O3-NEXT:    br label %[[BB9]]
+; O3:       [[BB9]]:
+; O3-NEXT:    [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB8]] ], [ [[TMP2]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ]
+; O3-NEXT:    ret i32 [[TMP10]]
+;
+  switch i32 %1, label %9 [
+  i32 0, label %6
+  i32 1, label %7
+  i32 2, label %8
+  ]
+
+6:
+  br label %9
+
+7:
+  br label %9
+
+8:
+  br label %9
+
+9:
+  %10 = phi i32 [ %3, %5 ], [ %4, %8 ], [ %2, %7 ], [ %2, %6 ]
+  ret i32 %10
+}
+
+define i32 @switch_duplicate_arms_multipred(i1 %0, i32 %1, i32 %2, i32 %3, i32 %4) {
+; SIMPLIFY-CFG-LABEL: define i32 @switch_duplicate_arms_multipred(
+; SIMPLIFY-CFG-SAME: i1 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) {
+; SIMPLIFY-CFG-NEXT:    br i1 [[TMP0]], label %[[BB6:.*]], label %[[BB7:.*]]
+; SIMPLIFY-CFG:       [[BB6]]:
+; SIMPLIFY-CFG-NEXT:    switch i32 [[TMP2]], label %[[BB9:.*]] [
+; SIMPLIFY-CFG-NEXT:      i32 0, label %[[BB7]]
+; SIMPLIFY-CFG-NEXT:      i32 1, label %[[BB8:.*]]
+; SIMPLIFY-CFG-NEXT:    ]
+; SIMPLIFY-CFG:       [[BB7]]:
+; SIMPLIFY-CFG-NEXT:    br label %[[BB9]]
+; SIMPLIFY-CFG:       [[BB8]]:
+; SIMPLIFY-CFG-NEXT:    br label %[[BB9]]
+; SIMPLIFY-CFG:       [[BB9]]:
+; SIMPLIFY-CFG-NEXT:    [[TMP10:%.*]] = phi i32 [ [[TMP4]], %[[BB6]] ], [ [[TMP3]], %[[BB8]] ], [ [[TMP3]], %[[BB7]] ]
+; SIMPLIFY-CFG-NEXT:    ret i32 [[TMP10]]
+;
+; O3-LABEL: define i32 @switch_duplicate_arms_multipred(
+; O3-SAME: i1 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; O3-NEXT:    br i1 [[TMP0]], label %[[BB6:.*]], label %[[BB7:.*]]
+; O3:       [[BB6]]:
+; O3-NEXT:    switch i32 [[TMP2]], label %[[BB9:.*]] [
+; O3-NEXT:      i32 0, label %[[BB7]]
+; O3-NEXT:      i32 1, label %[[BB8:.*]]
+; O3-NEXT:    ]
+; O3:       [[BB7]]:
+; O3-NEXT:    br label %[[BB9]]
+; O3:       [[BB8]]:
+; O3-NEXT:    br label %[[BB9]]
+; O3:       [[BB9]]:
+; O3-NEXT:    [[TMP10:%.*]] = phi i32 [ [[TMP4]], %[[BB6]] ], [ [[TMP3]], %[[BB8]] ], [ [[TMP3]], %[[BB7]] ]
+; O3-NEXT:    ret i32 [[TMP10]]
+;
+  br i1 %0, label %6, label %7
+6:
+  switch i32 %2, label %9 [
+  i32 0, label %7
+  i32 1, label %8
+  ]
+
+7:
+  br label %9
+
+8:
+  br label %9
+
+9:
+  %10 = phi i32 [ %4, %6 ], [ %3, %8 ], [ %3, %7 ]
+  ret i32 %10
+}

>From 2db9f0055072a2172cdd4d3066b30d98d095bed3 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Wed, 30 Oct 2024 09:01:56 -0700
Subject: [PATCH 02/24] [SimplifyCFG] Simplify switch instruction that has
 duplicate arms

I noticed that the two C functions emitted different IR:

```
int switch_duplicate_arms(int switch_val, int v, int w) {
  switch (switch_val) {
  default:
    break;
  case 0:
    w = v;
    break;
  case 1:
    w = v;
    break;
  }
  return w;
}

int if_duplicate_arms(int switch_val, int v, int w) {
  if (switch_val == 0)
    w = v;
  else if (switch_val == 1)
    w = v;
  return v0;
}
```

For `switch_duplicate_arms`, we generate IR that looks like this:

```
define i32 @switch_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) {
  switch i32 %1, label %7 [
    i32 0, label %5
    i32 1, label %6
  ]

5:
  br label %7

6:
  br label %7

7:
  %8 = phi i32 [ %3, %4 ], [ %2, %6 ], [ %2, %5 ]
  ret i32 %8
}
```

For the equivalent `if_duplicate_arms`, we generate:
```
define i32 @if_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) {
  %5 = icmp ult i32 %1, 2
  %6 = select i1 %5, i32 %2, i32 %3
  ret i32 %6
}
```

For `switch_duplicate_arms`, taking case 0 and 1 are the same since %5 and %6
branch to the same location and the incoming values for %8 are the same from
those blocks. We could remove one on the duplicate switch targets and update
the switch with the single target.

On RISC-V, prior to this patch, we generate the following code:
```
switch_duplicate_arms:
        li      a4, 1
        beq     a1, a4, .LBB0_2
        mv      a0, a3
        bnez    a1, .LBB0_3
.LBB0_2:
        mv      a0, a2
.LBB0_3:
        ret

if_duplicate_arms:
        li      a4, 2
        mv      a0, a2
        bltu    a1, a4, .LBB1_2
        mv      a0, a3
.LBB1_2:
        ret
```

After this patch, the O3 code is optimized to the icmp + select pair, which
gives us the same code gen as `if_duplicate_arms` as desired.

This may help with both code size and further switch simplification. I found
that this patch causes no significant impact to spec2006/int/ref and
spec2017/intrate/ref.
---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     | 92 +++++++++++++++++++
 .../ForwardSwitchConditionToPHI.ll            |  6 +-
 llvm/test/Transforms/SimplifyCFG/HoistCode.ll |  6 +-
 .../Transforms/SimplifyCFG/switch-dup-bbs.ll  | 50 ++++------
 .../SimplifyCFG/switch-to-select-two-case.ll  |  6 +-
 5 files changed, 116 insertions(+), 44 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 72228b445a8b6e..eb0c2346e08f27 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -276,6 +276,7 @@ class SimplifyCFGOpt {
   bool simplifyCleanupReturn(CleanupReturnInst *RI);
   bool simplifyUnreachable(UnreachableInst *UI);
   bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
+  bool simplifyDuplicateSwitchArms(SwitchInst *SI);
   bool simplifyIndirectBr(IndirectBrInst *IBI);
   bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
   bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
@@ -7436,6 +7437,94 @@ static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder,
   return true;
 }
 
+bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
+  // Simplify the case where multiple arms contain only a terminator, the
+  // terminators are the same, and their sucessor PHIS incoming values are the
+  // same.
+
+  // Find BBs that are candidates for simplification.
+  SmallPtrSet<BasicBlock *, 8> BBs;
+  for (auto &Case : SI->cases()) {
+    BasicBlock *BB = Case.getCaseSuccessor();
+
+    // FIXME: This case needs some extra care because the terminators other than
+    // SI need to be updated.
+    if (!BB->hasNPredecessors(1))
+      continue;
+
+    // FIXME: Relax that the terminator is a BranchInst by checking for equality
+    // on other kinds of terminators.
+    Instruction *T = BB->getTerminator();
+    if (T && BB->size() == 1 && isa<BranchInst>(T))
+      BBs.insert(BB);
+  }
+
+  auto IsBranchEq = [](BranchInst *A, BranchInst *B) {
+    if (A->isConditional() != B->isConditional())
+      return false;
+
+    if (A->isConditional() && A->getCondition() != B->getCondition())
+      return false;
+
+    if (A->getNumSuccessors() != B->getNumSuccessors())
+      return false;
+
+    for (unsigned I = 0; I < A->getNumSuccessors(); ++I)
+      if (A->getSuccessor(I) != B->getSuccessor(I))
+        return false;
+
+    // Need to check that PHIs in sucessors have matching values
+    for (auto *Succ : A->successors()) {
+      for (PHINode &Phi : Succ->phis())
+        if (Phi.getIncomingValueForBlock(A->getParent()) !=
+            Phi.getIncomingValueForBlock(B->getParent()))
+          return false;
+    }
+
+    return true;
+  };
+
+  // Construct a map from candidate basic block to an equivalent basic block
+  // to replace it with. All equivalent basic blocks should be replaced with
+  // the same basic block. To do this, if there is no equivalent BB in the map,
+  // then insert into the map BB -> BB. Otherwise, we should check only elements
+  // in the map for equivalence to ensure that all equivalent BB get replaced
+  // by the BB in the map. Replacing BB with BB has no impact, so we skip
+  // a call to setSuccessor when we do the actual replacement.
+  DenseMap<BasicBlock *, BasicBlock *> ReplaceWith;
+  for (BasicBlock *BB : BBs) {
+    bool Inserted = false;
+    for (auto KV : ReplaceWith) {
+      if (IsBranchEq(cast<BranchInst>(BB->getTerminator()),
+                     cast<BranchInst>(KV.first->getTerminator()))) {
+        ReplaceWith[BB] = KV.first;
+        Inserted = true;
+        break;
+      }
+    }
+    if (!Inserted)
+      ReplaceWith[BB] = BB;
+  }
+
+  // Do the replacement in SI.
+  bool MadeChange = false;
+  // There is no fast lookup of BasicBlock -> Cases, so we iterate over cases
+  // and check that the case was a candidate. BBs is already filtered, so
+  // hopefully calling contains on it is not too expensive.
+  for (auto &Case : SI->cases()) {
+    BasicBlock *OldSucc = Case.getCaseSuccessor();
+    if (!BBs.contains(OldSucc))
+      continue;
+    BasicBlock *NewSucc = ReplaceWith[OldSucc];
+    if (OldSucc != NewSucc) {
+      Case.setSuccessor(NewSucc);
+      MadeChange = true;
+    }
+  }
+
+  return MadeChange;
+}
+
 bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
   BasicBlock *BB = SI->getParent();
 
@@ -7496,6 +7585,9 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
       hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
     return requestResimplify();
 
+  if (simplifyDuplicateSwitchArms(SI))
+    return requestResimplify();
+
   return false;
 }
 
diff --git a/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll b/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll
index 8ad455eb9e7f22..4623eb2c5dd3c1 100644
--- a/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll
+++ b/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll
@@ -139,16 +139,14 @@ define i32 @PR34471(i32 %x) {
 ; NO_FWD-NEXT:    switch i32 [[X:%.*]], label [[ELSE3:%.*]] [
 ; NO_FWD-NEXT:      i32 17, label [[RETURN:%.*]]
 ; NO_FWD-NEXT:      i32 19, label [[IF19:%.*]]
-; NO_FWD-NEXT:      i32 42, label [[IF42:%.*]]
+; NO_FWD-NEXT:      i32 42, label [[IF19]]
 ; NO_FWD-NEXT:    ]
 ; NO_FWD:       if19:
 ; NO_FWD-NEXT:    br label [[RETURN]]
-; NO_FWD:       if42:
-; NO_FWD-NEXT:    br label [[RETURN]]
 ; NO_FWD:       else3:
 ; NO_FWD-NEXT:    br label [[RETURN]]
 ; NO_FWD:       return:
-; NO_FWD-NEXT:    [[R:%.*]] = phi i32 [ [[X]], [[IF19]] ], [ [[X]], [[IF42]] ], [ 0, [[ELSE3]] ], [ 17, [[ENTRY:%.*]] ]
+; NO_FWD-NEXT:    [[R:%.*]] = phi i32 [ [[X]], [[IF19]] ], [ 0, [[ELSE3]] ], [ 17, [[ENTRY:%.*]] ]
 ; NO_FWD-NEXT:    ret i32 [[R]]
 ;
 ; FWD-LABEL: @PR34471(
diff --git a/llvm/test/Transforms/SimplifyCFG/HoistCode.ll b/llvm/test/Transforms/SimplifyCFG/HoistCode.ll
index fe0b48028a3b62..fbe41d891c1ec5 100644
--- a/llvm/test/Transforms/SimplifyCFG/HoistCode.ll
+++ b/llvm/test/Transforms/SimplifyCFG/HoistCode.ll
@@ -65,14 +65,12 @@ define float @PR39535min_switch(i64 %i, float %x) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    switch i64 [[I:%.*]], label [[END:%.*]] [
 ; CHECK-NEXT:      i64 1, label [[BB1:%.*]]
-; CHECK-NEXT:      i64 2, label [[BB2:%.*]]
+; CHECK-NEXT:      i64 2, label [[BB1]]
 ; CHECK-NEXT:    ]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    br label [[END]]
-; CHECK:       bb2:
-; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi fast float [ [[X:%.*]], [[BB1]] ], [ [[X]], [[BB2]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[COND:%.*]] = phi fast float [ [[X:%.*]], [[BB1]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret float [[COND]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll b/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll
index 3575597e408562..b12db656fdf681 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll
@@ -5,30 +5,20 @@
 define i32 @switch_all_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) {
 ; SIMPLIFY-CFG-LABEL: define i32 @switch_all_duplicate_arms(
 ; SIMPLIFY-CFG-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) {
-; SIMPLIFY-CFG-NEXT:    switch i32 [[TMP1]], label %[[BB7:.*]] [
+; SIMPLIFY-CFG-NEXT:    switch i32 [[TMP1]], label %[[BB6:.*]] [
 ; SIMPLIFY-CFG-NEXT:      i32 0, label %[[BB5:.*]]
-; SIMPLIFY-CFG-NEXT:      i32 1, label %[[BB6:.*]]
+; SIMPLIFY-CFG-NEXT:      i32 1, label %[[BB5]]
 ; SIMPLIFY-CFG-NEXT:    ]
 ; SIMPLIFY-CFG:       [[BB5]]:
-; SIMPLIFY-CFG-NEXT:    br label %[[BB7]]
+; SIMPLIFY-CFG-NEXT:    br label %[[BB6]]
 ; SIMPLIFY-CFG:       [[BB6]]:
-; SIMPLIFY-CFG-NEXT:    br label %[[BB7]]
-; SIMPLIFY-CFG:       [[BB7]]:
-; SIMPLIFY-CFG-NEXT:    [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP4:%.*]] ], [ [[TMP2]], %[[BB6]] ], [ [[TMP2]], %[[BB5]] ]
+; SIMPLIFY-CFG-NEXT:    [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP4:%.*]] ], [ [[TMP2]], %[[BB5]] ]
 ; SIMPLIFY-CFG-NEXT:    ret i32 [[TMP8]]
 ;
 ; O3-LABEL: define i32 @switch_all_duplicate_arms(
 ; O3-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-; O3-NEXT:    switch i32 [[TMP1]], label %[[BB7:.*]] [
-; O3-NEXT:      i32 0, label %[[BB5:.*]]
-; O3-NEXT:      i32 1, label %[[BB6:.*]]
-; O3-NEXT:    ]
-; O3:       [[BB5]]:
-; O3-NEXT:    br label %[[BB7]]
-; O3:       [[BB6]]:
-; O3-NEXT:    br label %[[BB7]]
-; O3:       [[BB7]]:
-; O3-NEXT:    [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP4:%.*]] ], [ [[TMP2]], %[[BB6]] ], [ [[TMP2]], %[[BB5]] ]
+; O3-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[TMP1]], 2
+; O3-NEXT:    [[TMP8:%.*]] = select i1 [[SWITCH]], i32 [[TMP2]], i32 [[TMP3]]
 ; O3-NEXT:    ret i32 [[TMP8]]
 ;
   switch i32 %1, label %7 [
@@ -50,36 +40,32 @@ define i32 @switch_all_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) {
 define i32 @switch_some_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) {
 ; SIMPLIFY-CFG-LABEL: define i32 @switch_some_duplicate_arms(
 ; SIMPLIFY-CFG-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) {
-; SIMPLIFY-CFG-NEXT:    switch i32 [[TMP1]], label %[[BB9:.*]] [
+; SIMPLIFY-CFG-NEXT:    switch i32 [[TMP1]], label %[[BB8:.*]] [
 ; SIMPLIFY-CFG-NEXT:      i32 0, label %[[BB6:.*]]
-; SIMPLIFY-CFG-NEXT:      i32 1, label %[[BB7:.*]]
-; SIMPLIFY-CFG-NEXT:      i32 2, label %[[BB8:.*]]
+; SIMPLIFY-CFG-NEXT:      i32 1, label %[[BB6]]
+; SIMPLIFY-CFG-NEXT:      i32 2, label %[[BB7:.*]]
 ; SIMPLIFY-CFG-NEXT:    ]
 ; SIMPLIFY-CFG:       [[BB6]]:
-; SIMPLIFY-CFG-NEXT:    br label %[[BB9]]
+; SIMPLIFY-CFG-NEXT:    br label %[[BB8]]
 ; SIMPLIFY-CFG:       [[BB7]]:
-; SIMPLIFY-CFG-NEXT:    br label %[[BB9]]
+; SIMPLIFY-CFG-NEXT:    br label %[[BB8]]
 ; SIMPLIFY-CFG:       [[BB8]]:
-; SIMPLIFY-CFG-NEXT:    br label %[[BB9]]
-; SIMPLIFY-CFG:       [[BB9]]:
-; SIMPLIFY-CFG-NEXT:    [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB8]] ], [ [[TMP2]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ]
+; SIMPLIFY-CFG-NEXT:    [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ]
 ; SIMPLIFY-CFG-NEXT:    ret i32 [[TMP10]]
 ;
 ; O3-LABEL: define i32 @switch_some_duplicate_arms(
 ; O3-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) local_unnamed_addr #[[ATTR0]] {
-; O3-NEXT:    switch i32 [[TMP1]], label %[[BB9:.*]] [
+; O3-NEXT:    switch i32 [[TMP1]], label %[[BB8:.*]] [
 ; O3-NEXT:      i32 0, label %[[BB6:.*]]
-; O3-NEXT:      i32 1, label %[[BB7:.*]]
-; O3-NEXT:      i32 2, label %[[BB8:.*]]
+; O3-NEXT:      i32 1, label %[[BB6]]
+; O3-NEXT:      i32 2, label %[[BB7:.*]]
 ; O3-NEXT:    ]
 ; O3:       [[BB6]]:
-; O3-NEXT:    br label %[[BB9]]
+; O3-NEXT:    br label %[[BB8]]
 ; O3:       [[BB7]]:
-; O3-NEXT:    br label %[[BB9]]
+; O3-NEXT:    br label %[[BB8]]
 ; O3:       [[BB8]]:
-; O3-NEXT:    br label %[[BB9]]
-; O3:       [[BB9]]:
-; O3-NEXT:    [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB8]] ], [ [[TMP2]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ]
+; O3-NEXT:    [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ]
 ; O3-NEXT:    ret i32 [[TMP10]]
 ;
   switch i32 %1, label %9 [
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll b/llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll
index 1e2f18b3f339d4..50998e447b71dc 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll
@@ -272,16 +272,14 @@ define i8 @switch_to_select_two_case_results_no_default(i32 %i) {
 ; CHECK-NEXT:      i32 0, label [[END:%.*]]
 ; CHECK-NEXT:      i32 2, label [[END]]
 ; CHECK-NEXT:      i32 4, label [[CASE3:%.*]]
-; CHECK-NEXT:      i32 6, label [[CASE4:%.*]]
+; CHECK-NEXT:      i32 6, label [[CASE3]]
 ; CHECK-NEXT:    ]
 ; CHECK:       case3:
 ; CHECK-NEXT:    br label [[END]]
-; CHECK:       case4:
-; CHECK-NEXT:    br label [[END]]
 ; CHECK:       default:
 ; CHECK-NEXT:    unreachable
 ; CHECK:       end:
-; CHECK-NEXT:    [[T0:%.*]] = phi i8 [ 44, [[CASE3]] ], [ 44, [[CASE4]] ], [ 42, [[ENTRY:%.*]] ], [ 42, [[ENTRY]] ]
+; CHECK-NEXT:    [[T0:%.*]] = phi i8 [ 44, [[CASE3]] ], [ 42, [[ENTRY:%.*]] ], [ 42, [[ENTRY]] ]
 ; CHECK-NEXT:    ret i8 [[T0]]
 ;
 entry:

>From 4e560672205f5328152251ac8cd1199494f9cda0 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Wed, 30 Oct 2024 12:12:02 -0700
Subject: [PATCH 03/24] fixup! respond to review

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index eb0c2346e08f27..74cb086f2b609e 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7463,19 +7463,26 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
     if (A->isConditional() != B->isConditional())
       return false;
 
-    if (A->isConditional() && A->getCondition() != B->getCondition())
-      return false;
+    if (A->isConditional()) {
+      // If the conditions are instructions, check equality up to commutativity.
+      // Otherwise, check that the two Values are the same.
+      Value *AC = A->getCondition();
+      Value *BC = B->getCondition();
+      auto *ACI = dyn_cast<Instruction>(AC);
+      auto *BCI = dyn_cast<Instruction>(BC);
+      if ((ACI && BCI && !areIdenticalUpToCommutativity(ACI, BCI)) && AC != BC)
+        return false;
+    }
 
     if (A->getNumSuccessors() != B->getNumSuccessors())
       return false;
 
-    for (unsigned I = 0; I < A->getNumSuccessors(); ++I)
-      if (A->getSuccessor(I) != B->getSuccessor(I))
+    for (unsigned I = 0; I < A->getNumSuccessors(); ++I) {
+      BasicBlock *ASucc = A->getSuccessor(I);
+      if (ASucc != B->getSuccessor(I))
         return false;
-
-    // Need to check that PHIs in sucessors have matching values
-    for (auto *Succ : A->successors()) {
-      for (PHINode &Phi : Succ->phis())
+      // Need to check that PHIs in sucessors have matching values
+      for (PHINode &Phi : ASucc->phis())
         if (Phi.getIncomingValueForBlock(A->getParent()) !=
             Phi.getIncomingValueForBlock(B->getParent()))
           return false;

>From 9e106554ee9a52a9f3838b46195162ddc07ca18c Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Wed, 30 Oct 2024 12:31:54 -0700
Subject: [PATCH 04/24] fixup! refactor for general approach

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 74cb086f2b609e..3809591f676258 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7455,7 +7455,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
     // FIXME: Relax that the terminator is a BranchInst by checking for equality
     // on other kinds of terminators.
     Instruction *T = BB->getTerminator();
-    if (T && BB->size() == 1 && isa<BranchInst>(T))
+    if (T && isa<BranchInst>(T))
       BBs.insert(BB);
   }
 
@@ -7491,6 +7491,16 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
     return true;
   };
 
+  auto IsBBEqualTo = [&IsBranchEq](BasicBlock *A, BasicBlock *B) {
+    // FIXME: Support more than just a single BranchInst. One way we could do
+    // this is by taking a hashing approach.
+    if (A->size() != 1 || B->size() != 1)
+      return false;
+
+    return IsBranchEq(cast<BranchInst>(A->getTerminator()),
+                      cast<BranchInst>(B->getTerminator()));
+  };
+
   // Construct a map from candidate basic block to an equivalent basic block
   // to replace it with. All equivalent basic blocks should be replaced with
   // the same basic block. To do this, if there is no equivalent BB in the map,
@@ -7502,8 +7512,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
   for (BasicBlock *BB : BBs) {
     bool Inserted = false;
     for (auto KV : ReplaceWith) {
-      if (IsBranchEq(cast<BranchInst>(BB->getTerminator()),
-                     cast<BranchInst>(KV.first->getTerminator()))) {
+      if (IsBBEqualTo(BB, KV.first)) {
         ReplaceWith[BB] = KV.first;
         Inserted = true;
         break;

>From 64ddee66f9350510381b3c567acc570ba8efb2df Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Wed, 30 Oct 2024 14:50:37 -0700
Subject: [PATCH 05/24] fixup! move PHI checks

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 24 ++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 3809591f676258..68fad8e213f60f 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7477,16 +7477,9 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
     if (A->getNumSuccessors() != B->getNumSuccessors())
       return false;
 
-    for (unsigned I = 0; I < A->getNumSuccessors(); ++I) {
-      BasicBlock *ASucc = A->getSuccessor(I);
-      if (ASucc != B->getSuccessor(I))
+    for (unsigned I = 0; I < A->getNumSuccessors(); ++I)
+      if (A->getSuccessor(I) != B->getSuccessor(I))
         return false;
-      // Need to check that PHIs in sucessors have matching values
-      for (PHINode &Phi : ASucc->phis())
-        if (Phi.getIncomingValueForBlock(A->getParent()) !=
-            Phi.getIncomingValueForBlock(B->getParent()))
-          return false;
-    }
 
     return true;
   };
@@ -7497,8 +7490,17 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
     if (A->size() != 1 || B->size() != 1)
       return false;
 
-    return IsBranchEq(cast<BranchInst>(A->getTerminator()),
-                      cast<BranchInst>(B->getTerminator()));
+    if (!IsBranchEq(cast<BranchInst>(A->getTerminator()),
+                    cast<BranchInst>(B->getTerminator())))
+      return false;
+
+    // Need to check that PHIs in sucessors have matching values
+    for (auto *Succ : cast<BranchInst>(A->getTerminator())->successors())
+      for (PHINode &Phi : Succ->phis())
+        if (Phi.getIncomingValueForBlock(A) != Phi.getIncomingValueForBlock(B))
+          return false;
+
+    return true;
   };
 
   // Construct a map from candidate basic block to an equivalent basic block

>From c8eecb6b9f4f5e80500fc072a661daf8b2afe92a Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Thu, 31 Oct 2024 10:04:29 -0700
Subject: [PATCH 06/24] fixup! make it O(n)

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 186 ++++++++++++----------
 1 file changed, 102 insertions(+), 84 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 68fad8e213f60f..f84ab936fe3433 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7437,106 +7437,124 @@ static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder,
   return true;
 }
 
-bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
-  // Simplify the case where multiple arms contain only a terminator, the
-  // terminators are the same, and their sucessor PHIS incoming values are the
-  // same.
-
-  // Find BBs that are candidates for simplification.
-  SmallPtrSet<BasicBlock *, 8> BBs;
-  for (auto &Case : SI->cases()) {
-    BasicBlock *BB = Case.getCaseSuccessor();
-
-    // FIXME: This case needs some extra care because the terminators other than
-    // SI need to be updated.
-    if (!BB->hasNPredecessors(1))
-      continue;
+namespace llvm {
+template <> struct DenseMapInfo<const SwitchInst::CaseHandle *> {
+  static const SwitchInst::CaseHandle *getEmptyKey() {
+    return reinterpret_cast<const SwitchInst::CaseHandle *>(
+        DenseMapInfo<void *>::getEmptyKey());
+  }
+  static const SwitchInst::CaseHandle *getTombstoneKey() {
+    return reinterpret_cast<const SwitchInst::CaseHandle *>(
+        DenseMapInfo<void *>::getTombstoneKey());
+  }
+  static unsigned getHashValue(const SwitchInst::CaseHandle *Case) {
+    BasicBlock *Succ = Case->getCaseSuccessor();
+    return hash_combine(Succ->size(), Succ->getTerminator()->getOpcode());
+  }
+  static bool isEqual(const SwitchInst::CaseHandle *LHS,
+                      const SwitchInst::CaseHandle *RHS) {
+
+    auto EKey = DenseMapInfo<const SwitchInst::CaseHandle *>::getEmptyKey();
+    auto TKey = DenseMapInfo<const SwitchInst::CaseHandle *>::getTombstoneKey();
+    if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
+      return LHS == RHS;
+
+    auto IsBranchEq = [](BranchInst *A, BranchInst *B) {
+      if (A->isConditional() != B->isConditional())
+        return false;
 
-    // FIXME: Relax that the terminator is a BranchInst by checking for equality
-    // on other kinds of terminators.
-    Instruction *T = BB->getTerminator();
-    if (T && isa<BranchInst>(T))
-      BBs.insert(BB);
-  }
+      if (A->isConditional()) {
+        // If the conditions are instructions, check equality up to
+        // commutativity. Otherwise, check that the two Values are the same.
+        Value *AC = A->getCondition();
+        Value *BC = B->getCondition();
+        auto *ACI = dyn_cast<Instruction>(AC);
+        auto *BCI = dyn_cast<Instruction>(BC);
 
-  auto IsBranchEq = [](BranchInst *A, BranchInst *B) {
-    if (A->isConditional() != B->isConditional())
-      return false;
+        if (ACI && BCI && !areIdenticalUpToCommutativity(ACI, BCI))
+          return false;
+        if ((!ACI || !BCI) && AC != BC)
+          return false;
+      }
 
-    if (A->isConditional()) {
-      // If the conditions are instructions, check equality up to commutativity.
-      // Otherwise, check that the two Values are the same.
-      Value *AC = A->getCondition();
-      Value *BC = B->getCondition();
-      auto *ACI = dyn_cast<Instruction>(AC);
-      auto *BCI = dyn_cast<Instruction>(BC);
-      if ((ACI && BCI && !areIdenticalUpToCommutativity(ACI, BCI)) && AC != BC)
+      if (A->getNumSuccessors() != B->getNumSuccessors())
         return false;
-    }
 
-    if (A->getNumSuccessors() != B->getNumSuccessors())
-      return false;
+      for (unsigned I = 0; I < A->getNumSuccessors(); ++I)
+        if (A->getSuccessor(I) != B->getSuccessor(I))
+          return false;
 
-    for (unsigned I = 0; I < A->getNumSuccessors(); ++I)
-      if (A->getSuccessor(I) != B->getSuccessor(I))
-        return false;
+      return true;
+    };
 
-    return true;
-  };
+    auto IsBBEq = [&IsBranchEq](BasicBlock *A, BasicBlock *B) {
+      // FIXME: Support more than just a single BranchInst. One way we could do
+      // this is by taking a hashing approach.
+      if (A->size() != 1 || B->size() != 1)
+        return false;
 
-  auto IsBBEqualTo = [&IsBranchEq](BasicBlock *A, BasicBlock *B) {
-    // FIXME: Support more than just a single BranchInst. One way we could do
-    // this is by taking a hashing approach.
-    if (A->size() != 1 || B->size() != 1)
-      return false;
+      if (!IsBranchEq(cast<BranchInst>(A->getTerminator()),
+                      cast<BranchInst>(B->getTerminator())))
+        return false;
 
-    if (!IsBranchEq(cast<BranchInst>(A->getTerminator()),
-                    cast<BranchInst>(B->getTerminator())))
-      return false;
+      // Need to check that PHIs in sucessors have matching values
+      for (auto *Succ : cast<BranchInst>(A->getTerminator())->successors())
+        for (PHINode &Phi : Succ->phis())
+          if (Phi.getIncomingValueForBlock(A) !=
+              Phi.getIncomingValueForBlock(B))
+            return false;
 
-    // Need to check that PHIs in sucessors have matching values
-    for (auto *Succ : cast<BranchInst>(A->getTerminator())->successors())
-      for (PHINode &Phi : Succ->phis())
-        if (Phi.getIncomingValueForBlock(A) != Phi.getIncomingValueForBlock(B))
-          return false;
+      return true;
+    };
 
-    return true;
-  };
+    return IsBBEq(LHS->getCaseSuccessor(), RHS->getCaseSuccessor());
+  }
+};
+} // namespace llvm
 
-  // Construct a map from candidate basic block to an equivalent basic block
-  // to replace it with. All equivalent basic blocks should be replaced with
-  // the same basic block. To do this, if there is no equivalent BB in the map,
-  // then insert into the map BB -> BB. Otherwise, we should check only elements
-  // in the map for equivalence to ensure that all equivalent BB get replaced
-  // by the BB in the map. Replacing BB with BB has no impact, so we skip
-  // a call to setSuccessor when we do the actual replacement.
-  DenseMap<BasicBlock *, BasicBlock *> ReplaceWith;
-  for (BasicBlock *BB : BBs) {
-    bool Inserted = false;
-    for (auto KV : ReplaceWith) {
-      if (IsBBEqualTo(BB, KV.first)) {
-        ReplaceWith[BB] = KV.first;
-        Inserted = true;
-        break;
-      }
-    }
-    if (!Inserted)
-      ReplaceWith[BB] = BB;
+bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
+  // Build a map where the key is the CaseHandle that contains the successor
+  // BasicBlock to replace all CaseHandle successor BasicBlocks in the value
+  // list. We use a custom DenseMapInfo to build this map in O(size(cases()).
+  // We use CaseHandle instead of BasicBlock since it allows us to do the
+  // replacement in O(size(cases)). If we had used BasicBlock, then to do the
+  // actual replacement, we'd need an additional nested loop to loop over all
+  // CaseHandle since there is no O(1) lookup of BasicBlock -> Cases.
+  DenseSet<const SwitchInst::CaseHandle *,
+           DenseMapInfo<const SwitchInst::CaseHandle *>>
+      ReplaceWith;
+
+  // We'd like to use CaseHandle* for our set because it is easier to write
+  // getEmptyKey and getTombstoneKey for CaseHandle* than it is for CaseHandle.
+  // We need to take ownership though, since SI->cases() makes no guarantee that
+  // the CaseHandle is still allocated on every iteration. We can skip over
+  // cases we know we won't consider for replacement.
+  SmallVector<SwitchInst::CaseHandle> Cases;
+  for (auto Case : SI->cases()) {
+    BasicBlock *BB = Case.getCaseSuccessor();
+    // Skip BBs that are not candidates for simplification.
+    // FIXME: This case needs some extra care because the terminators other than
+    // SI need to be updated.
+    if (!BB->hasNPredecessors(1))
+      continue;
+    // FIXME: Relax that the terminator is a BranchInst by checking for equality
+    // on other kinds of terminators.
+    Instruction *T = BB->getTerminator();
+    if (!T || !isa<BranchInst>(T))
+      continue;
+    Cases.emplace_back(Case);
   }
 
-  // Do the replacement in SI.
   bool MadeChange = false;
-  // There is no fast lookup of BasicBlock -> Cases, so we iterate over cases
-  // and check that the case was a candidate. BBs is already filtered, so
-  // hopefully calling contains on it is not too expensive.
-  for (auto &Case : SI->cases()) {
-    BasicBlock *OldSucc = Case.getCaseSuccessor();
-    if (!BBs.contains(OldSucc))
-      continue;
-    BasicBlock *NewSucc = ReplaceWith[OldSucc];
-    if (OldSucc != NewSucc) {
-      Case.setSuccessor(NewSucc);
+  for (auto &Case : Cases) {
+    // Case is a candidate for simplification. If we find a duplicate BB,
+    // replace it.
+    const auto Res = ReplaceWith.find(&Case);
+    if (Res != ReplaceWith.end()) {
+      Case.setSuccessor((*Res)->getCaseSuccessor());
       MadeChange = true;
+    } else {
+      ReplaceWith.insert(&Case);
     }
   }
 

>From 9e9afc2783de2f5d323c84e1bd8c2284322bce91 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Thu, 31 Oct 2024 10:21:45 -0700
Subject: [PATCH 07/24] fixup! use successor BBs in hash

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index f84ab936fe3433..20184a70dd9146 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7449,7 +7449,10 @@ template <> struct DenseMapInfo<const SwitchInst::CaseHandle *> {
   }
   static unsigned getHashValue(const SwitchInst::CaseHandle *Case) {
     BasicBlock *Succ = Case->getCaseSuccessor();
-    return hash_combine(Succ->size(), Succ->getTerminator()->getOpcode());
+    BranchInst *BI = cast<BranchInst>(Succ->getTerminator());
+
+    auto It = BI->successors();
+    return hash_combine(Succ->size(), hash_combine_range(It.begin(), It.end()));
   }
   static bool isEqual(const SwitchInst::CaseHandle *LHS,
                       const SwitchInst::CaseHandle *RHS) {

>From ef1bc74730c176cce828d129e439847134ec655e Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Thu, 31 Oct 2024 13:30:57 -0700
Subject: [PATCH 08/24] fixup! use hasNPredsOrMore for early exit capability

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 20184a70dd9146..ddfebb8c9fbd39 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7538,7 +7538,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
     // Skip BBs that are not candidates for simplification.
     // FIXME: This case needs some extra care because the terminators other than
     // SI need to be updated.
-    if (!BB->hasNPredecessors(1))
+    if (BB->hasNPredecessorsOrMore(2))
       continue;
     // FIXME: Relax that the terminator is a BranchInst by checking for equality
     // on other kinds of terminators.

>From 794d4e8810190af94193d679bdfb67f6ed63da0d Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Thu, 31 Oct 2024 13:36:03 -0700
Subject: [PATCH 09/24] fixup! don't add to Cases if size() != 1 to improve
 performance

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index ddfebb8c9fbd39..37222e1a758552 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7491,10 +7491,10 @@ template <> struct DenseMapInfo<const SwitchInst::CaseHandle *> {
     };
 
     auto IsBBEq = [&IsBranchEq](BasicBlock *A, BasicBlock *B) {
-      // FIXME: Support more than just a single BranchInst. One way we could do
-      // this is by taking a hashing approach.
-      if (A->size() != 1 || B->size() != 1)
-        return false;
+      // FIXME: we checked that the size of A and B are both 1 in
+      // simplifyDuplicateSwitchArms to make the Case list smaller to improve
+      // performance. If we decide to support BasicBlocks with more than just a
+      // single instruction, we need to check that A.size() == B.size() here.
 
       if (!IsBranchEq(cast<BranchInst>(A->getTerminator()),
                       cast<BranchInst>(B->getTerminator())))
@@ -7536,6 +7536,12 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
   for (auto Case : SI->cases()) {
     BasicBlock *BB = Case.getCaseSuccessor();
     // Skip BBs that are not candidates for simplification.
+
+    // FIXME: Support more than just a single BranchInst. One way we could do
+    // this is by taking a hashing approach of all insts in BB.
+    if (BB->size() != 1)
+      continue;
+
     // FIXME: This case needs some extra care because the terminators other than
     // SI need to be updated.
     if (BB->hasNPredecessorsOrMore(2))
@@ -7545,6 +7551,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
     Instruction *T = BB->getTerminator();
     if (!T || !isa<BranchInst>(T))
       continue;
+
     Cases.emplace_back(Case);
   }
 

>From 32b627fd388a4e7e63f870803105a70c9931d3b3 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Fri, 1 Nov 2024 10:27:41 -0700
Subject: [PATCH 10/24] fixup! precompute getIncomingValueForBlock

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 130 +++++++++++++---------
 1 file changed, 76 insertions(+), 54 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 37222e1a758552..86983249787e24 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7437,28 +7437,39 @@ static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder,
   return true;
 }
 
+/// Checking whether two CaseHandle.getCaseSuccessor() are equal depends on
+/// the incoming values of their successor PHINodes.
+/// PHINode::getIncomingValueForBlock has O(|Preds|), so we'd like to avoid
+/// calling this function on each BasicBlock every time isEqual is called,
+/// especially since the same BasicBlock may be passed as an argument multiple
+/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
+/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
+/// of the incoming values.
+struct CaseHandleWrapper {
+  const SwitchInst::CaseHandle Case;
+  DenseMap<PHINode *, DenseMap<BasicBlock *, Value *>> &PhiPredIVs;
+};
+
 namespace llvm {
-template <> struct DenseMapInfo<const SwitchInst::CaseHandle *> {
-  static const SwitchInst::CaseHandle *getEmptyKey() {
-    return reinterpret_cast<const SwitchInst::CaseHandle *>(
+template <> struct DenseMapInfo<const CaseHandleWrapper *> {
+  static const CaseHandleWrapper *getEmptyKey() {
+    return reinterpret_cast<CaseHandleWrapper *>(
         DenseMapInfo<void *>::getEmptyKey());
   }
-  static const SwitchInst::CaseHandle *getTombstoneKey() {
-    return reinterpret_cast<const SwitchInst::CaseHandle *>(
+  static const CaseHandleWrapper *getTombstoneKey() {
+    return reinterpret_cast<CaseHandleWrapper *>(
         DenseMapInfo<void *>::getTombstoneKey());
   }
-  static unsigned getHashValue(const SwitchInst::CaseHandle *Case) {
-    BasicBlock *Succ = Case->getCaseSuccessor();
+  static unsigned getHashValue(const CaseHandleWrapper *CT) {
+    BasicBlock *Succ = CT->Case.getCaseSuccessor();
     BranchInst *BI = cast<BranchInst>(Succ->getTerminator());
-
     auto It = BI->successors();
     return hash_combine(Succ->size(), hash_combine_range(It.begin(), It.end()));
   }
-  static bool isEqual(const SwitchInst::CaseHandle *LHS,
-                      const SwitchInst::CaseHandle *RHS) {
-
-    auto EKey = DenseMapInfo<const SwitchInst::CaseHandle *>::getEmptyKey();
-    auto TKey = DenseMapInfo<const SwitchInst::CaseHandle *>::getTombstoneKey();
+  static bool isEqual(const CaseHandleWrapper *LHS,
+                      const CaseHandleWrapper *RHS) {
+    auto EKey = DenseMapInfo<CaseHandleWrapper *>::getEmptyKey();
+    auto TKey = DenseMapInfo<CaseHandleWrapper *>::getTombstoneKey();
     if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
       return LHS == RHS;
 
@@ -7490,49 +7501,38 @@ template <> struct DenseMapInfo<const SwitchInst::CaseHandle *> {
       return true;
     };
 
-    auto IsBBEq = [&IsBranchEq](BasicBlock *A, BasicBlock *B) {
-      // FIXME: we checked that the size of A and B are both 1 in
-      // simplifyDuplicateSwitchArms to make the Case list smaller to improve
-      // performance. If we decide to support BasicBlocks with more than just a
-      // single instruction, we need to check that A.size() == B.size() here.
-
-      if (!IsBranchEq(cast<BranchInst>(A->getTerminator()),
-                      cast<BranchInst>(B->getTerminator())))
-        return false;
-
-      // Need to check that PHIs in sucessors have matching values
-      for (auto *Succ : cast<BranchInst>(A->getTerminator())->successors())
-        for (PHINode &Phi : Succ->phis())
-          if (Phi.getIncomingValueForBlock(A) !=
-              Phi.getIncomingValueForBlock(B))
+    auto IsBBEq =
+        [&IsBranchEq](
+            BasicBlock *A, BasicBlock *B,
+            DenseMap<PHINode *, DenseMap<BasicBlock *, Value *>> &PhiPredIVs) {
+          // FIXME: we checked that the size of A and B are both 1 in
+          // simplifyDuplicateSwitchArms to make the Case list smaller to
+          // improve performance. If we decide to support BasicBlocks with more
+          // than just a single instruction, we need to check that A.size() ==
+          // B.size() here.
+
+          if (!IsBranchEq(cast<BranchInst>(A->getTerminator()),
+                          cast<BranchInst>(B->getTerminator())))
             return false;
 
-      return true;
-    };
+          // Need to check that PHIs in sucessors have matching values
+          for (auto *Succ : cast<BranchInst>(A->getTerminator())->successors())
+            for (PHINode &Phi : Succ->phis())
+              if (PhiPredIVs[&Phi][A] != PhiPredIVs[&Phi][B])
+                return false;
 
-    return IsBBEq(LHS->getCaseSuccessor(), RHS->getCaseSuccessor());
+          return true;
+        };
+
+    return IsBBEq(LHS->Case.getCaseSuccessor(), RHS->Case.getCaseSuccessor(),
+                  LHS->PhiPredIVs);
   }
 };
 } // namespace llvm
 
 bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
-  // Build a map where the key is the CaseHandle that contains the successor
-  // BasicBlock to replace all CaseHandle successor BasicBlocks in the value
-  // list. We use a custom DenseMapInfo to build this map in O(size(cases()).
-  // We use CaseHandle instead of BasicBlock since it allows us to do the
-  // replacement in O(size(cases)). If we had used BasicBlock, then to do the
-  // actual replacement, we'd need an additional nested loop to loop over all
-  // CaseHandle since there is no O(1) lookup of BasicBlock -> Cases.
-  DenseSet<const SwitchInst::CaseHandle *,
-           DenseMapInfo<const SwitchInst::CaseHandle *>>
-      ReplaceWith;
-
-  // We'd like to use CaseHandle* for our set because it is easier to write
-  // getEmptyKey and getTombstoneKey for CaseHandle* than it is for CaseHandle.
-  // We need to take ownership though, since SI->cases() makes no guarantee that
-  // the CaseHandle is still allocated on every iteration. We can skip over
-  // cases we know we won't consider for replacement.
-  SmallVector<SwitchInst::CaseHandle> Cases;
+  DenseMap<PHINode *, DenseMap<BasicBlock *, Value *>> PhiPredIVs;
+  SmallVector<CaseHandleWrapper> Cases;
   for (auto Case : SI->cases()) {
     BasicBlock *BB = Case.getCaseSuccessor();
     // Skip BBs that are not candidates for simplification.
@@ -7552,19 +7552,41 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
     if (!T || !isa<BranchInst>(T))
       continue;
 
-    Cases.emplace_back(Case);
+    // Preprocess incoming PHI values for successors of BB.
+    for (BasicBlock *Succ : cast<BranchInst>(T)->successors()) {
+      for (PHINode &Phi : Succ->phis()) {
+        auto IncomingVals = PhiPredIVs.find(&Phi);
+        Value *Incoming = Phi.getIncomingValueForBlock(BB);
+        if (IncomingVals != PhiPredIVs.end())
+          IncomingVals->second.insert({BB, Incoming});
+        else
+          PhiPredIVs[&Phi] = {{BB, Incoming}};
+      }
+    }
+
+    Cases.emplace_back(CaseHandleWrapper{Case, PhiPredIVs});
   }
 
+  // Build a set such that if the CaseHandleWrapper exists in the set and
+  // another CaseHandleWrapper isEqual, then the equivalent CaseHandleWrapper
+  // which is not in the set should be replaced with the one in the set. If the
+  // CaseHandleWrapper is not in the set, then it should be added to the set so
+  // other CaseHandleWrappers can check against it in the same manner. We chose
+  // to use CaseHandleWrapper instead of BasicBlock since we'd need an extra
+  // nested loop since there is no O(1) lookup of BasicBlock -> Cases in
+  // SwichInst.
+  DenseSet<const CaseHandleWrapper *, DenseMapInfo<const CaseHandleWrapper *>>
+      ReplaceWith;
   bool MadeChange = false;
-  for (auto &Case : Cases) {
-    // Case is a candidate for simplification. If we find a duplicate BB,
+  for (auto &CHW : Cases) {
+    // CHW is a candidate for simplification. If we find a duplicate BB,
     // replace it.
-    const auto Res = ReplaceWith.find(&Case);
+    const auto Res = ReplaceWith.find(&CHW);
     if (Res != ReplaceWith.end()) {
-      Case.setSuccessor((*Res)->getCaseSuccessor());
+      CHW.Case.setSuccessor((*Res)->Case.getCaseSuccessor());
       MadeChange = true;
     } else {
-      ReplaceWith.insert(&Case);
+      ReplaceWith.insert(&CHW);
     }
   }
 

>From 41903527f1cc8ae0d6df6e6b502b5eb5c2964082 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Fri, 1 Nov 2024 11:14:16 -0700
Subject: [PATCH 11/24] fixup! only support unconditional branches

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 40 +++++++++--------------
 1 file changed, 15 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 86983249787e24..f346257ea29aa8 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7474,30 +7474,14 @@ template <> struct DenseMapInfo<const CaseHandleWrapper *> {
       return LHS == RHS;
 
     auto IsBranchEq = [](BranchInst *A, BranchInst *B) {
-      if (A->isConditional() != B->isConditional())
-        return false;
-
-      if (A->isConditional()) {
-        // If the conditions are instructions, check equality up to
-        // commutativity. Otherwise, check that the two Values are the same.
-        Value *AC = A->getCondition();
-        Value *BC = B->getCondition();
-        auto *ACI = dyn_cast<Instruction>(AC);
-        auto *BCI = dyn_cast<Instruction>(BC);
+      assert(A->isUnconditional() && B->isUnconditional() &&
+             "Only supporting unconditional branches for now");
+      assert(A->getNumSuccessors() == 1 && B->getNumSuccessors() == 1 &&
+             "Expected unconditional branches to have one successor");
 
-        if (ACI && BCI && !areIdenticalUpToCommutativity(ACI, BCI))
-          return false;
-        if ((!ACI || !BCI) && AC != BC)
-          return false;
-      }
-
-      if (A->getNumSuccessors() != B->getNumSuccessors())
+      if (A->getSuccessor(0) != B->getSuccessor(0))
         return false;
 
-      for (unsigned I = 0; I < A->getNumSuccessors(); ++I)
-        if (A->getSuccessor(I) != B->getSuccessor(I))
-          return false;
-
       return true;
     };
 
@@ -7546,14 +7530,20 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
     // SI need to be updated.
     if (BB->hasNPredecessorsOrMore(2))
       continue;
-    // FIXME: Relax that the terminator is a BranchInst by checking for equality
-    // on other kinds of terminators.
+
     Instruction *T = BB->getTerminator();
-    if (!T || !isa<BranchInst>(T))
+    if (!T)
+      continue;
+
+    // FIXME: Relax that the terminator is a BranchInst by checking for equality
+    // on other kinds of terminators. We decide to only support unconditional
+    // branches for now for compile time reasons.
+    auto *BI = dyn_cast<BranchInst>(T);
+    if (!BI || BI->isConditional())
       continue;
 
     // Preprocess incoming PHI values for successors of BB.
-    for (BasicBlock *Succ : cast<BranchInst>(T)->successors()) {
+    for (BasicBlock *Succ : BI->successors()) {
       for (PHINode &Phi : Succ->phis()) {
         auto IncomingVals = PhiPredIVs.find(&Phi);
         Value *Incoming = Phi.getIncomingValueForBlock(BB);

>From f04d67f6e582bd3088c47ccfdebda4a67f16efdc Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Fri, 1 Nov 2024 12:09:22 -0700
Subject: [PATCH 12/24] fixup! try improving getHashValue

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index f346257ea29aa8..9a3ea48eee835b 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7463,8 +7463,25 @@ template <> struct DenseMapInfo<const CaseHandleWrapper *> {
   static unsigned getHashValue(const CaseHandleWrapper *CT) {
     BasicBlock *Succ = CT->Case.getCaseSuccessor();
     BranchInst *BI = cast<BranchInst>(Succ->getTerminator());
-    auto It = BI->successors();
-    return hash_combine(Succ->size(), hash_combine_range(It.begin(), It.end()));
+    assert(BI->isUnconditional() &&
+           "Only supporting unconditional branches for now");
+    assert(BI->getNumSuccessors() == 1 &&
+           "Expected unconditional branches to have one successor");
+    assert(Succ->size() == 1 && "Expected just a single branch in the BB");
+
+    // Since we assume the BB is just a single BranchInst with a single
+    // succsessor, we hash as the BB and the incoming Values of its PHIs.
+    // Initially, we tried to just use the sucessor BB as the hash, but this had
+    // poor performance. We find that the extra computation of getting the
+    // incoming PHI values here leads to better performance on overall Set
+    // performance.
+    BasicBlock *BB = BI->getSuccessor(0);
+    SmallVector<Value *> PhiValsForBB;
+    for (PHINode &Phi : BB->phis())
+      PhiValsForBB.emplace_back(CT->PhiPredIVs[&Phi][BB]);
+
+    return hash_combine(
+        BB, hash_combine_range(PhiValsForBB.begin(), PhiValsForBB.end()));
   }
   static bool isEqual(const CaseHandleWrapper *LHS,
                       const CaseHandleWrapper *RHS) {

>From 4ca2777a9acf5d79806d1cd138d62203816c8b1a Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Fri, 1 Nov 2024 12:18:50 -0700
Subject: [PATCH 13/24] fixup! nitty cleanup

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 30 ++++++++++++++---------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 9a3ea48eee835b..4f85497917e303 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7510,17 +7510,21 @@ template <> struct DenseMapInfo<const CaseHandleWrapper *> {
           // simplifyDuplicateSwitchArms to make the Case list smaller to
           // improve performance. If we decide to support BasicBlocks with more
           // than just a single instruction, we need to check that A.size() ==
-          // B.size() here.
+          // B.size() here, and we need to check more than just the BranchInsts
+          // for equality.
 
-          if (!IsBranchEq(cast<BranchInst>(A->getTerminator()),
-                          cast<BranchInst>(B->getTerminator())))
+          BranchInst *ABI = cast<BranchInst>(A->getTerminator());
+          if (!IsBranchEq(ABI, cast<BranchInst>(B->getTerminator())))
             return false;
 
-          // Need to check that PHIs in sucessors have matching values
-          for (auto *Succ : cast<BranchInst>(A->getTerminator())->successors())
-            for (PHINode &Phi : Succ->phis())
-              if (PhiPredIVs[&Phi][A] != PhiPredIVs[&Phi][B])
+          // Need to check that PHIs in successors have matching values
+          for (auto *Succ : ABI->successors()) {
+            for (PHINode &Phi : Succ->phis()) {
+              auto PredIVs = PhiPredIVs[&Phi];
+              if (PredIVs[A] != PredIVs[B])
                 return false;
+            }
+          }
 
           return true;
         };
@@ -7532,26 +7536,28 @@ template <> struct DenseMapInfo<const CaseHandleWrapper *> {
 } // namespace llvm
 
 bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
+
+  // Build PhiPredIVs and Cases. Skip BBs that are not candidates for
+  // simplification.
   DenseMap<PHINode *, DenseMap<BasicBlock *, Value *>> PhiPredIVs;
   SmallVector<CaseHandleWrapper> Cases;
   for (auto Case : SI->cases()) {
     BasicBlock *BB = Case.getCaseSuccessor();
-    // Skip BBs that are not candidates for simplification.
 
     // FIXME: Support more than just a single BranchInst. One way we could do
     // this is by taking a hashing approach of all insts in BB.
     if (BB->size() != 1)
       continue;
 
+    Instruction *T = BB->getTerminator();
+    if (!T)
+      continue;
+
     // FIXME: This case needs some extra care because the terminators other than
     // SI need to be updated.
     if (BB->hasNPredecessorsOrMore(2))
       continue;
 
-    Instruction *T = BB->getTerminator();
-    if (!T)
-      continue;
-
     // FIXME: Relax that the terminator is a BranchInst by checking for equality
     // on other kinds of terminators. We decide to only support unconditional
     // branches for now for compile time reasons.

>From 28620dfd7a84775e87cbb586760e640f963c7909 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Fri, 1 Nov 2024 12:46:28 -0700
Subject: [PATCH 14/24] fixup! don't reproccess map for same BB

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 4f85497917e303..df984f54d5f0b0 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7517,13 +7517,14 @@ template <> struct DenseMapInfo<const CaseHandleWrapper *> {
           if (!IsBranchEq(ABI, cast<BranchInst>(B->getTerminator())))
             return false;
 
-          // Need to check that PHIs in successors have matching values
-          for (auto *Succ : ABI->successors()) {
-            for (PHINode &Phi : Succ->phis()) {
-              auto PredIVs = PhiPredIVs[&Phi];
-              if (PredIVs[A] != PredIVs[B])
-                return false;
-            }
+          // Need to check that PHIs in successor have matching values
+          assert(ABI->getNumSuccessors() == 1 &&
+                 "Expected unconditional branches to have one successor");
+          BasicBlock *Succ = ABI->getSuccessor(0);
+          for (PHINode &Phi : Succ->phis()) {
+            auto PredIVs = PhiPredIVs[&Phi];
+            if (PredIVs[A] != PredIVs[B])
+              return false;
           }
 
           return true;
@@ -7541,6 +7542,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
   // simplification.
   DenseMap<PHINode *, DenseMap<BasicBlock *, Value *>> PhiPredIVs;
   SmallVector<CaseHandleWrapper> Cases;
+  SmallPtrSet<BasicBlock *, 8> Seen;
   for (auto Case : SI->cases()) {
     BasicBlock *BB = Case.getCaseSuccessor();
 
@@ -7565,6 +7567,13 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
     if (!BI || BI->isConditional())
       continue;
 
+    // If we've seen BB before, then we've built PhiPredIVs for it already.
+    if (Seen.contains(BB)) {
+      Cases.emplace_back(CaseHandleWrapper{Case, PhiPredIVs});
+      continue;
+    }
+    Seen.insert(BB);
+
     // Preprocess incoming PHI values for successors of BB.
     for (BasicBlock *Succ : BI->successors()) {
       for (PHINode &Phi : Succ->phis()) {

>From 9dcf124d42628e5865f4d1d2f80562a91cab59e5 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Fri, 1 Nov 2024 15:38:08 -0700
Subject: [PATCH 15/24] fixup! avoid calls to getIncomingValueForBlock

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 31 ++++++++++++-----------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index df984f54d5f0b0..8459aad7aa3d1a 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7538,8 +7538,11 @@ template <> struct DenseMapInfo<const CaseHandleWrapper *> {
 
 bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
 
-  // Build PhiPredIVs and Cases. Skip BBs that are not candidates for
-  // simplification.
+  // Build Cases. Skip BBs that are not candidates for simplification. Mark
+  // PHINodes which need to be processed into PhiPredIVs. We decide to process
+  // an entire PHI at once opposed to calling getIncomingValueForBlock, since
+  // each call to getIncomingValueForBlock is O(|Preds|).
+  SmallPtrSet<PHINode *, 8> Phis;
   DenseMap<PHINode *, DenseMap<BasicBlock *, Value *>> PhiPredIVs;
   SmallVector<CaseHandleWrapper> Cases;
   SmallPtrSet<BasicBlock *, 8> Seen;
@@ -7567,28 +7570,26 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
     if (!BI || BI->isConditional())
       continue;
 
-    // If we've seen BB before, then we've built PhiPredIVs for it already.
+    // If we've seen BB before, then processed its successor PHIs.
     if (Seen.contains(BB)) {
       Cases.emplace_back(CaseHandleWrapper{Case, PhiPredIVs});
       continue;
     }
     Seen.insert(BB);
-
-    // Preprocess incoming PHI values for successors of BB.
-    for (BasicBlock *Succ : BI->successors()) {
-      for (PHINode &Phi : Succ->phis()) {
-        auto IncomingVals = PhiPredIVs.find(&Phi);
-        Value *Incoming = Phi.getIncomingValueForBlock(BB);
-        if (IncomingVals != PhiPredIVs.end())
-          IncomingVals->second.insert({BB, Incoming});
-        else
-          PhiPredIVs[&Phi] = {{BB, Incoming}};
-      }
-    }
+    // Keep track of which PHIs we need as keys in PhiPredIVs below.
+    for (BasicBlock *Succ : BI->successors())
+      for (PHINode &Phi : Succ->phis())
+        Phis.insert(&Phi);
 
     Cases.emplace_back(CaseHandleWrapper{Case, PhiPredIVs});
   }
 
+  for (PHINode *Phi : Phis) {
+    PhiPredIVs[Phi] = DenseMap<BasicBlock *, Value *>();
+    for (auto &IV : Phi->incoming_values())
+      PhiPredIVs[Phi].insert({Phi->getIncomingBlock(IV), IV.get()});
+  }
+
   // Build a set such that if the CaseHandleWrapper exists in the set and
   // another CaseHandleWrapper isEqual, then the equivalent CaseHandleWrapper
   // which is not in the set should be replaced with the one in the set. If the

>From 41edd151fdfdaa3a0410942570be071c7beab2e4 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Fri, 1 Nov 2024 15:41:19 -0700
Subject: [PATCH 16/24] fixup! drop Seen vector since we no longer build phi
 map in loop

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 8459aad7aa3d1a..724a0a8a0fc6ed 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7545,7 +7545,6 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
   SmallPtrSet<PHINode *, 8> Phis;
   DenseMap<PHINode *, DenseMap<BasicBlock *, Value *>> PhiPredIVs;
   SmallVector<CaseHandleWrapper> Cases;
-  SmallPtrSet<BasicBlock *, 8> Seen;
   for (auto Case : SI->cases()) {
     BasicBlock *BB = Case.getCaseSuccessor();
 
@@ -7570,12 +7569,6 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
     if (!BI || BI->isConditional())
       continue;
 
-    // If we've seen BB before, then processed its successor PHIs.
-    if (Seen.contains(BB)) {
-      Cases.emplace_back(CaseHandleWrapper{Case, PhiPredIVs});
-      continue;
-    }
-    Seen.insert(BB);
     // Keep track of which PHIs we need as keys in PhiPredIVs below.
     for (BasicBlock *Succ : BI->successors())
       for (PHINode &Phi : Succ->phis())

>From 5c247db53227470f0eb0e47d9522bea8b4540291 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Sat, 2 Nov 2024 08:03:41 -0700
Subject: [PATCH 17/24] fixup! respond to review

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 724a0a8a0fc6ed..cd80a13382d4fc 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7453,11 +7453,11 @@ struct CaseHandleWrapper {
 namespace llvm {
 template <> struct DenseMapInfo<const CaseHandleWrapper *> {
   static const CaseHandleWrapper *getEmptyKey() {
-    return reinterpret_cast<CaseHandleWrapper *>(
+    return static_cast<CaseHandleWrapper *>(
         DenseMapInfo<void *>::getEmptyKey());
   }
   static const CaseHandleWrapper *getTombstoneKey() {
-    return reinterpret_cast<CaseHandleWrapper *>(
+    return static_cast<CaseHandleWrapper *>(
         DenseMapInfo<void *>::getTombstoneKey());
   }
   static unsigned getHashValue(const CaseHandleWrapper *CT) {
@@ -7522,7 +7522,7 @@ template <> struct DenseMapInfo<const CaseHandleWrapper *> {
                  "Expected unconditional branches to have one successor");
           BasicBlock *Succ = ABI->getSuccessor(0);
           for (PHINode &Phi : Succ->phis()) {
-            auto PredIVs = PhiPredIVs[&Phi];
+            auto &PredIVs = PhiPredIVs[&Phi];
             if (PredIVs[A] != PredIVs[B])
               return false;
           }

>From c3a1361f31191476669cd0c2fc22d12215cb6c3e Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Sat, 2 Nov 2024 08:49:06 -0700
Subject: [PATCH 18/24] fixup! some cleanup and add Seen checks

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 78 ++++++++++-------------
 1 file changed, 33 insertions(+), 45 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index cd80a13382d4fc..8d13c1f8f766a0 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7490,59 +7490,45 @@ template <> struct DenseMapInfo<const CaseHandleWrapper *> {
     if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
       return LHS == RHS;
 
-    auto IsBranchEq = [](BranchInst *A, BranchInst *B) {
-      assert(A->isUnconditional() && B->isUnconditional() &&
-             "Only supporting unconditional branches for now");
-      assert(A->getNumSuccessors() == 1 && B->getNumSuccessors() == 1 &&
-             "Expected unconditional branches to have one successor");
+    BasicBlock *A = LHS->Case.getCaseSuccessor();
+    BasicBlock *B = RHS->Case.getCaseSuccessor();
+
+    // FIXME: we checked that the size of A and B are both 1 in
+    // simplifyDuplicateSwitchArms to make the Case list smaller to
+    // improve performance. If we decide to support BasicBlocks with more
+    // than just a single instruction, we need to check that A.size() ==
+    // B.size() here, and we need to check more than just the BranchInsts
+    // for equality.
+
+    BranchInst *ABI = cast<BranchInst>(A->getTerminator());
+    BranchInst *BBI = cast<BranchInst>(B->getTerminator());
+    assert(ABI->isUnconditional() && BBI->isUnconditional() &&
+           "Only supporting unconditional branches for now");
+    assert(ABI->getNumSuccessors() == 1 &&
+           "Expected unconditional branches to have one successor");
+    if (ABI->getSuccessor(0) != BBI->getSuccessor(0))
+      return false;
 
-      if (A->getSuccessor(0) != B->getSuccessor(0))
+    // Need to check that PHIs in successor have matching values
+    BasicBlock *Succ = ABI->getSuccessor(0);
+    for (PHINode &Phi : Succ->phis()) {
+      auto &PredIVs = LHS->PhiPredIVs[&Phi];
+      if (PredIVs[A] != PredIVs[B])
         return false;
+    }
 
-      return true;
-    };
-
-    auto IsBBEq =
-        [&IsBranchEq](
-            BasicBlock *A, BasicBlock *B,
-            DenseMap<PHINode *, DenseMap<BasicBlock *, Value *>> &PhiPredIVs) {
-          // FIXME: we checked that the size of A and B are both 1 in
-          // simplifyDuplicateSwitchArms to make the Case list smaller to
-          // improve performance. If we decide to support BasicBlocks with more
-          // than just a single instruction, we need to check that A.size() ==
-          // B.size() here, and we need to check more than just the BranchInsts
-          // for equality.
-
-          BranchInst *ABI = cast<BranchInst>(A->getTerminator());
-          if (!IsBranchEq(ABI, cast<BranchInst>(B->getTerminator())))
-            return false;
-
-          // Need to check that PHIs in successor have matching values
-          assert(ABI->getNumSuccessors() == 1 &&
-                 "Expected unconditional branches to have one successor");
-          BasicBlock *Succ = ABI->getSuccessor(0);
-          for (PHINode &Phi : Succ->phis()) {
-            auto &PredIVs = PhiPredIVs[&Phi];
-            if (PredIVs[A] != PredIVs[B])
-              return false;
-          }
-
-          return true;
-        };
-
-    return IsBBEq(LHS->Case.getCaseSuccessor(), RHS->Case.getCaseSuccessor(),
-                  LHS->PhiPredIVs);
+    return true;
   }
 };
 } // namespace llvm
 
 bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
-
   // Build Cases. Skip BBs that are not candidates for simplification. Mark
   // PHINodes which need to be processed into PhiPredIVs. We decide to process
   // an entire PHI at once opposed to calling getIncomingValueForBlock, since
   // each call to getIncomingValueForBlock is O(|Preds|).
   SmallPtrSet<PHINode *, 8> Phis;
+  SmallPtrSet<BasicBlock *, 8> Seen;
   DenseMap<PHINode *, DenseMap<BasicBlock *, Value *>> PhiPredIVs;
   SmallVector<CaseHandleWrapper> Cases;
   for (auto Case : SI->cases()) {
@@ -7569,11 +7555,13 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
     if (!BI || BI->isConditional())
       continue;
 
-    // Keep track of which PHIs we need as keys in PhiPredIVs below.
-    for (BasicBlock *Succ : BI->successors())
-      for (PHINode &Phi : Succ->phis())
-        Phis.insert(&Phi);
-
+    if (!Seen.contains(BB)) {
+      Seen.insert(BB);
+      // Keep track of which PHIs we need as keys in PhiPredIVs below.
+      for (BasicBlock *Succ : BI->successors())
+        for (PHINode &Phi : Succ->phis())
+          Phis.insert(&Phi);
+    }
     Cases.emplace_back(CaseHandleWrapper{Case, PhiPredIVs});
   }
 

>From b5630807557580b807d6002f8158b6003d73e9d2 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 4 Nov 2024 18:37:33 -0800
Subject: [PATCH 19/24] fixup! try and avoid some resizes

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 25 ++++++++++++-----------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 8d13c1f8f766a0..8cd4617a751721 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7460,8 +7460,8 @@ template <> struct DenseMapInfo<const CaseHandleWrapper *> {
     return static_cast<CaseHandleWrapper *>(
         DenseMapInfo<void *>::getTombstoneKey());
   }
-  static unsigned getHashValue(const CaseHandleWrapper *CT) {
-    BasicBlock *Succ = CT->Case.getCaseSuccessor();
+  static unsigned getHashValue(const CaseHandleWrapper *CHW) {
+    BasicBlock *Succ = CHW->Case.getCaseSuccessor();
     BranchInst *BI = cast<BranchInst>(Succ->getTerminator());
     assert(BI->isUnconditional() &&
            "Only supporting unconditional branches for now");
@@ -7478,7 +7478,7 @@ template <> struct DenseMapInfo<const CaseHandleWrapper *> {
     BasicBlock *BB = BI->getSuccessor(0);
     SmallVector<Value *> PhiValsForBB;
     for (PHINode &Phi : BB->phis())
-      PhiValsForBB.emplace_back(CT->PhiPredIVs[&Phi][BB]);
+      PhiValsForBB.emplace_back(CHW->PhiPredIVs[&Phi][BB]);
 
     return hash_combine(
         BB, hash_combine_range(PhiValsForBB.begin(), PhiValsForBB.end()));
@@ -7530,8 +7530,8 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
   SmallPtrSet<PHINode *, 8> Phis;
   SmallPtrSet<BasicBlock *, 8> Seen;
   DenseMap<PHINode *, DenseMap<BasicBlock *, Value *>> PhiPredIVs;
-  SmallVector<CaseHandleWrapper> Cases;
-  for (auto Case : SI->cases()) {
+  std::vector<CaseHandleWrapper> Cases;
+  for (auto &Case : SI->cases()) {
     BasicBlock *BB = Case.getCaseSuccessor();
 
     // FIXME: Support more than just a single BranchInst. One way we could do
@@ -7555,8 +7555,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
     if (!BI || BI->isConditional())
       continue;
 
-    if (!Seen.contains(BB)) {
-      Seen.insert(BB);
+    if (Seen.insert(BB).second) {
       // Keep track of which PHIs we need as keys in PhiPredIVs below.
       for (BasicBlock *Succ : BI->successors())
         for (PHINode &Phi : Succ->phis())
@@ -7566,7 +7565,8 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
   }
 
   for (PHINode *Phi : Phis) {
-    PhiPredIVs[Phi] = DenseMap<BasicBlock *, Value *>();
+    PhiPredIVs[Phi] =
+        DenseMap<BasicBlock *, Value *>(Phi->getNumIncomingValues());
     for (auto &IV : Phi->incoming_values())
       PhiPredIVs[Phi].insert({Phi->getIncomingBlock(IV), IV.get()});
   }
@@ -7580,14 +7580,15 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
   // nested loop since there is no O(1) lookup of BasicBlock -> Cases in
   // SwichInst.
   DenseSet<const CaseHandleWrapper *, DenseMapInfo<const CaseHandleWrapper *>>
-      ReplaceWith;
+      ReplaceWith(Cases.size());
+
   bool MadeChange = false;
   for (auto &CHW : Cases) {
     // CHW is a candidate for simplification. If we find a duplicate BB,
     // replace it.
-    const auto Res = ReplaceWith.find(&CHW);
-    if (Res != ReplaceWith.end()) {
-      CHW.Case.setSuccessor((*Res)->Case.getCaseSuccessor());
+    const auto [It, Inserted] = ReplaceWith.insert(&CHW);
+    if (!Inserted) {
+      CHW.Case.setSuccessor((*It)->Case.getCaseSuccessor());
       MadeChange = true;
     } else {
       ReplaceWith.insert(&CHW);

>From 67b3c8023f277baa120618d495a798761311adab Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 4 Nov 2024 19:41:25 -0800
Subject: [PATCH 20/24] fixup! presize PhiPredIVs based on first pass data

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 8cd4617a751721..84a07d4bf48b11 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7564,6 +7564,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
     Cases.emplace_back(CaseHandleWrapper{Case, PhiPredIVs});
   }
 
+  PhiPredIVs.reserve(Phis.size());
   for (PHINode *Phi : Phis) {
     PhiPredIVs[Phi] =
         DenseMap<BasicBlock *, Value *>(Phi->getNumIncomingValues());

>From 94ff86ef997cf2cce9c58c02c88d27056b68859e Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 4 Nov 2024 20:23:01 -0800
Subject: [PATCH 21/24] fixup! use reserve instead of resize

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 84a07d4bf48b11..e1398426630e78 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7447,7 +7447,7 @@ static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder,
 /// of the incoming values.
 struct CaseHandleWrapper {
   const SwitchInst::CaseHandle Case;
-  DenseMap<PHINode *, DenseMap<BasicBlock *, Value *>> &PhiPredIVs;
+  DenseMap<PHINode *, DenseMap<BasicBlock *, Value *>> *PhiPredIVs;
 };
 
 namespace llvm {
@@ -7478,7 +7478,7 @@ template <> struct DenseMapInfo<const CaseHandleWrapper *> {
     BasicBlock *BB = BI->getSuccessor(0);
     SmallVector<Value *> PhiValsForBB;
     for (PHINode &Phi : BB->phis())
-      PhiValsForBB.emplace_back(CHW->PhiPredIVs[&Phi][BB]);
+      PhiValsForBB.emplace_back((*CHW->PhiPredIVs)[&Phi][BB]);
 
     return hash_combine(
         BB, hash_combine_range(PhiValsForBB.begin(), PhiValsForBB.end()));
@@ -7512,7 +7512,7 @@ template <> struct DenseMapInfo<const CaseHandleWrapper *> {
     // Need to check that PHIs in successor have matching values
     BasicBlock *Succ = ABI->getSuccessor(0);
     for (PHINode &Phi : Succ->phis()) {
-      auto &PredIVs = LHS->PhiPredIVs[&Phi];
+      auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
       if (PredIVs[A] != PredIVs[B])
         return false;
     }
@@ -7531,6 +7531,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
   SmallPtrSet<BasicBlock *, 8> Seen;
   DenseMap<PHINode *, DenseMap<BasicBlock *, Value *>> PhiPredIVs;
   std::vector<CaseHandleWrapper> Cases;
+  Cases.reserve(SI->getNumCases());
   for (auto &Case : SI->cases()) {
     BasicBlock *BB = Case.getCaseSuccessor();
 
@@ -7561,7 +7562,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
         for (PHINode &Phi : Succ->phis())
           Phis.insert(&Phi);
     }
-    Cases.emplace_back(CaseHandleWrapper{Case, PhiPredIVs});
+    Cases.emplace_back(CaseHandleWrapper{Case, &PhiPredIVs});
   }
 
   PhiPredIVs.reserve(Phis.size());
@@ -7581,7 +7582,8 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
   // nested loop since there is no O(1) lookup of BasicBlock -> Cases in
   // SwichInst.
   DenseSet<const CaseHandleWrapper *, DenseMapInfo<const CaseHandleWrapper *>>
-      ReplaceWith(Cases.size());
+      ReplaceWith;
+  ReplaceWith.reserve(Cases.size());
 
   bool MadeChange = false;
   for (auto &CHW : Cases) {

>From c6a5e5257214ddee4111f029e1480c39290b7672 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 4 Nov 2024 21:27:31 -0800
Subject: [PATCH 22/24] fixup! precompute incoming values from BB

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 31 +++++++++++++----------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index e1398426630e78..9e73c7c2d8ca73 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7442,12 +7442,16 @@ static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder,
 /// PHINode::getIncomingValueForBlock has O(|Preds|), so we'd like to avoid
 /// calling this function on each BasicBlock every time isEqual is called,
 /// especially since the same BasicBlock may be passed as an argument multiple
-/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
-/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
-/// of the incoming values.
+/// times. To do this, we can precompute a map, PhiPredIVs, of PHINode -> Pred
+/// BasicBlock -> IncomingValue and add it in the Wrapper so isEqual can do O(1)
+/// checking of the incoming values. We also store the SmallVector PhiVals which
+/// correspond to the incoming Value *s of all PHIs from
+/// Case.getCaseSuccessor(), which is used in getHashValue instead of walking
+/// all successor phis.
 struct CaseHandleWrapper {
   const SwitchInst::CaseHandle Case;
   DenseMap<PHINode *, DenseMap<BasicBlock *, Value *>> *PhiPredIVs;
+  SmallVector<Value *> *PhiVals;
 };
 
 namespace llvm {
@@ -7472,16 +7476,10 @@ template <> struct DenseMapInfo<const CaseHandleWrapper *> {
     // Since we assume the BB is just a single BranchInst with a single
     // succsessor, we hash as the BB and the incoming Values of its PHIs.
     // Initially, we tried to just use the sucessor BB as the hash, but this had
-    // poor performance. We find that the extra computation of getting the
-    // incoming PHI values here leads to better performance on overall Set
-    // performance.
+    // poor performance.
     BasicBlock *BB = BI->getSuccessor(0);
-    SmallVector<Value *> PhiValsForBB;
-    for (PHINode &Phi : BB->phis())
-      PhiValsForBB.emplace_back((*CHW->PhiPredIVs)[&Phi][BB]);
-
     return hash_combine(
-        BB, hash_combine_range(PhiValsForBB.begin(), PhiValsForBB.end()));
+        BB, hash_combine_range(CHW->PhiVals->begin(), CHW->PhiVals->end()));
   }
   static bool isEqual(const CaseHandleWrapper *LHS,
                       const CaseHandleWrapper *RHS) {
@@ -7530,6 +7528,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
   SmallPtrSet<PHINode *, 8> Phis;
   SmallPtrSet<BasicBlock *, 8> Seen;
   DenseMap<PHINode *, DenseMap<BasicBlock *, Value *>> PhiPredIVs;
+  DenseMap<BasicBlock *, SmallVector<Value *>> PhiVals;
   std::vector<CaseHandleWrapper> Cases;
   Cases.reserve(SI->getNumCases());
   for (auto &Case : SI->cases()) {
@@ -7562,15 +7561,19 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
         for (PHINode &Phi : Succ->phis())
           Phis.insert(&Phi);
     }
-    Cases.emplace_back(CaseHandleWrapper{Case, &PhiPredIVs});
+    PhiVals[BB] = SmallVector<Value *>();
+    Cases.emplace_back(CaseHandleWrapper{Case, &PhiPredIVs, &PhiVals[BB]});
   }
 
   PhiPredIVs.reserve(Phis.size());
   for (PHINode *Phi : Phis) {
     PhiPredIVs[Phi] =
         DenseMap<BasicBlock *, Value *>(Phi->getNumIncomingValues());
-    for (auto &IV : Phi->incoming_values())
-      PhiPredIVs[Phi].insert({Phi->getIncomingBlock(IV), IV.get()});
+    for (auto &IV : Phi->incoming_values()) {
+      BasicBlock *BB = Phi->getIncomingBlock(IV);
+      PhiPredIVs[Phi].insert({BB, IV.get()});
+      PhiVals[BB].emplace_back(IV.get());
+    }
   }
 
   // Build a set such that if the CaseHandleWrapper exists in the set and

>From 241fef5c94ee1bda0bd1da581bd029b7e030a97b Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 5 Nov 2024 07:58:13 -0800
Subject: [PATCH 23/24] fixup! don't hash an empty set of values

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 9e73c7c2d8ca73..066c0303441f58 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7476,10 +7476,13 @@ template <> struct DenseMapInfo<const CaseHandleWrapper *> {
     // Since we assume the BB is just a single BranchInst with a single
     // succsessor, we hash as the BB and the incoming Values of its PHIs.
     // Initially, we tried to just use the sucessor BB as the hash, but this had
-    // poor performance.
+    // poor performance. If the BB has no Phis, then just use BB as the hash.
     BasicBlock *BB = BI->getSuccessor(0);
+    if (CHW->PhiVals->begin() == CHW->PhiVals->end())
+      return hash_value(BB);
     return hash_combine(
-        BB, hash_combine_range(CHW->PhiVals->begin(), CHW->PhiVals->end()));
+        hash_value(BB),
+        hash_combine_range(CHW->PhiVals->begin(), CHW->PhiVals->end()));
   }
   static bool isEqual(const CaseHandleWrapper *LHS,
                       const CaseHandleWrapper *RHS) {

>From aa4cf4354a425a71aa78dc180f31c98d3dce05df Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 5 Nov 2024 08:05:23 -0800
Subject: [PATCH 24/24] fixup! add some comments

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 29 +++++++++++++----------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 066c0303441f58..ad83514682c93e 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7438,16 +7438,16 @@ static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder,
 }
 
 /// Checking whether two CaseHandle.getCaseSuccessor() are equal depends on
-/// the incoming values of their successor PHINodes.
-/// PHINode::getIncomingValueForBlock has O(|Preds|), so we'd like to avoid
+/// the BasicBlock and the incoming values of their successor PHINodes.
+/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
 /// calling this function on each BasicBlock every time isEqual is called,
 /// especially since the same BasicBlock may be passed as an argument multiple
 /// times. To do this, we can precompute a map, PhiPredIVs, of PHINode -> Pred
 /// BasicBlock -> IncomingValue and add it in the Wrapper so isEqual can do O(1)
-/// checking of the incoming values. We also store the SmallVector PhiVals which
-/// correspond to the incoming Value *s of all PHIs from
-/// Case.getCaseSuccessor(), which is used in getHashValue instead of walking
-/// all successor phis.
+/// checking of the incoming values. We also want to use the incoming Phi
+/// values of a getCaseSuccessor to calculate the hash value. In order to avoid
+/// iterating all of the successor Phis on evry call to getHashValue, we
+/// precompute a list of incoming values from getCaseSucessor, PhiVals.
 struct CaseHandleWrapper {
   const SwitchInst::CaseHandle Case;
   DenseMap<PHINode *, DenseMap<BasicBlock *, Value *>> *PhiPredIVs;
@@ -7474,9 +7474,10 @@ template <> struct DenseMapInfo<const CaseHandleWrapper *> {
     assert(Succ->size() == 1 && "Expected just a single branch in the BB");
 
     // Since we assume the BB is just a single BranchInst with a single
-    // succsessor, we hash as the BB and the incoming Values of its PHIs.
-    // Initially, we tried to just use the sucessor BB as the hash, but this had
-    // poor performance. If the BB has no Phis, then just use BB as the hash.
+    // succsessor, we hash as the BB and the incoming Values of its sucessor
+    // Phis. Initially, we tried to just use the sucessor BB as the hash, but
+    // this had poor performance. If the BB has no sucessor Phis, then just use
+    // BB to compute the hash.
     BasicBlock *BB = BI->getSuccessor(0);
     if (CHW->PhiVals->begin() == CHW->PhiVals->end())
       return hash_value(BB);
@@ -7526,8 +7527,9 @@ template <> struct DenseMapInfo<const CaseHandleWrapper *> {
 bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
   // Build Cases. Skip BBs that are not candidates for simplification. Mark
   // PHINodes which need to be processed into PhiPredIVs. We decide to process
-  // an entire PHI at once opposed to calling getIncomingValueForBlock, since
-  // each call to getIncomingValueForBlock is O(|Preds|).
+  // an entire PHI at once after the loop, opposed to calling
+  // getIncomingValueForBlock inside this loop, since each call to
+  // getIncomingValueForBlock is O(|Preds|).
   SmallPtrSet<PHINode *, 8> Phis;
   SmallPtrSet<BasicBlock *, 8> Seen;
   DenseMap<PHINode *, DenseMap<BasicBlock *, Value *>> PhiPredIVs;
@@ -7559,7 +7561,8 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
       continue;
 
     if (Seen.insert(BB).second) {
-      // Keep track of which PHIs we need as keys in PhiPredIVs below.
+      // Keep track of which PHIs we need as keys in PhiPredIVs and whose values
+      // we need to get for PhiVals below.
       for (BasicBlock *Succ : BI->successors())
         for (PHINode &Phi : Succ->phis())
           Phis.insert(&Phi);
@@ -7568,6 +7571,8 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
     Cases.emplace_back(CaseHandleWrapper{Case, &PhiPredIVs, &PhiVals[BB]});
   }
 
+  // Precompute the data structures to improve performance of isEqual and
+  // getHashValue for CaseHandleWrapper.
   PhiPredIVs.reserve(Phis.size());
   for (PHINode *Phi : Phis) {
     PhiPredIVs[Phi] =



More information about the llvm-commits mailing list