[llvm] [CGP] Split off switch cases with 0 and another power-of-2 const. (PR #139321)

via llvm-commits llvm-commits at lists.llvm.org
Fri May 9 12:48:55 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Florian Hahn (fhahn)

<details>
<summary>Changes</summary>

Add a new transform to split off switch cases with 0 and another
power-of-2 constant to an AND + ICMP + BR. This removes a branch which
can be highly profitable, especially when the switch controls exiting
the loop.

Alive2 proof showing that a power-of-2 constant is required:
https://alive2.llvm.org/ce/z/VIMMNB.

---

Patch is 26.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139321.diff


3 Files Affected:

- (modified) llvm/lib/CodeGen/CodeGenPrepare.cpp (+75-3) 
- (added) llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll (+204) 
- (added) llvm/test/Transforms/CodeGenPrepare/AArch64/switch-cases-to-branch-and.ll (+523) 


``````````diff
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 2c53a9c27ccb2..450219799a78a 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -441,7 +441,8 @@ class CodeGenPrepare {
   bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
   bool optimizeSwitchType(SwitchInst *SI);
   bool optimizeSwitchPhiConstants(SwitchInst *SI);
-  bool optimizeSwitchInst(SwitchInst *SI);
+  bool optimizeSwitchPow2Constant(SwitchInst *SI, ModifyDT &ModifiedDT);
+  bool optimizeSwitchInst(SwitchInst *SI, ModifyDT &ModifiedDT);
   bool optimizeExtractElementInst(Instruction *Inst);
   bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
   bool fixupDbgValue(Instruction *I);
@@ -7888,9 +7889,80 @@ bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) {
   return Changed;
 }
 
-bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
+bool CodeGenPrepare::optimizeSwitchPow2Constant(SwitchInst *SI,
+                                                ModifyDT &ModifiedDT) {
+  // Try to split off and combine a case with 0 and a power-of-2 together to a
+  // single check and branch.
+
+  // Bail out if there either aren't enough cases to fold or too many.
+  if (SI->getNumCases() < 2 || SI->getNumCases() >= 8)
+    return false;
+
+  // Collect cases and sort them so that power-of-2s come first in ascending
+  // order.
+  SmallVector<std::pair<APInt, BasicBlock *>> Cases;
+  for (auto &C : SI->cases())
+    Cases.emplace_back(C.getCaseValue()->getValue(), C.getCaseSuccessor());
+  sort(Cases, [](const auto &A, const auto &B) {
+    const APInt &AV = A.first;
+    const APInt &BV = B.first;
+    if (AV.isPowerOf2() != BV.isPowerOf2())
+      return AV.isPowerOf2();
+    return AV.ult(BV);
+  });
+
+  // Bail out if we don't have a single power-of-2 constant, followed by zero
+  // with a common destination.
+  // TODO: could support multiple power-of-2s by just picking one.
+  BasicBlock *Dst = Cases[0].second;
+  APInt Pow2 = Cases[0].first;
+  if (Dst != Cases[1].second || !Cases[1].first.isZero() || !Pow2.isPowerOf2())
+    return false;
+
+  // Limit the transform to switches leaving loops for now.
+  if (LI->getLoopFor(Dst) == LI->getLoopFor(SI->getParent()))
+    return false;
+
+  // Check if there are case values before/after the power-of-2 that are
+  // consecutive. In that case, they can be generated as range-checks.
+  sort(Cases,
+       [](const auto &A, const auto &B) { return A.first.ult(B.first); });
+  auto Idx = find_if(Cases, [Pow2](const auto &C) { return C.first == Pow2; });
+  bool Increasing = Idx + 1 != Cases.end() && (Idx + 1)->second == Dst &&
+                    Idx->first + 1 == (Idx + 1)->first;
+  bool Decreasing = Idx != Cases.begin() && (Idx - 1)->second == Dst &&
+                    Idx->first - 1 == (Idx - 1)->first;
+  if (Increasing || Decreasing)
+    return false;
+
+  auto *OldBB = SI->getParent();
+  auto *NewBB = OldBB->splitBasicBlock(OldBB->getTerminator()->getIterator());
+  OldBB->getTerminator()->eraseFromParent();
+  IRBuilder<> B(OldBB);
+  auto *Pow2CI = ConstantInt::get(OldBB->getContext(), Pow2);
+  auto *And = B.CreateAnd(
+      SI->getCondition(),
+      B.CreateNeg(B.CreateAdd(Pow2CI, B.getIntN(Pow2.getBitWidth(), 1))));
+  auto *C = B.CreateICmpEQ(And, B.getIntN(Pow2.getBitWidth(), 0));
+  B.CreateCondBr(C, Dst, SI->getParent());
+  SI->removeCase(
+      SI->findCaseValue(ConstantInt::get(OldBB->getContext(), Cases[0].first)));
+  SI->removeCase(SI->findCaseValue(Pow2CI));
+
+  for (auto &P : Dst->phis()) {
+    P.addIncoming(P.getIncomingValueForBlock(NewBB), OldBB);
+    P.removeIncomingValue(NewBB);
+    P.removeIncomingValue(NewBB);
+  }
+
+  ModifiedDT = ModifyDT::ModifyBBDT;
+  return true;
+}
+
+bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI, ModifyDT &ModifiedDT) {
   bool Changed = optimizeSwitchType(SI);
   Changed |= optimizeSwitchPhiConstants(SI);
+  Changed |= optimizeSwitchPow2Constant(SI, ModifiedDT);
   return Changed;
 }
 
@@ -8815,7 +8887,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
   case Instruction::ShuffleVector:
     return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
   case Instruction::Switch:
-    return optimizeSwitchInst(cast<SwitchInst>(I));
+    return optimizeSwitchInst(cast<SwitchInst>(I), ModifiedDT);
   case Instruction::ExtractElement:
     return optimizeExtractElementInst(cast<ExtractElementInst>(I));
   case Instruction::Br:
diff --git a/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll b/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll
new file mode 100644
index 0000000000000..7c9ab884cd376
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll
@@ -0,0 +1,204 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -O3 -mtriple=arm64-apple-macosx -o - %s | FileCheck %s
+
+define i32 @switch_with_matching_dests_0_and_pow2_3_cases(i8 %v) {
+; CHECK-LABEL: switch_with_matching_dests_0_and_pow2_3_cases:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    mov w8, #100 ; =0x64
+; CHECK-NEXT:    mov w9, #223 ; =0xdf
+; CHECK-NEXT:  LBB0_1: ; %loop.header
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    tst w0, w9
+; CHECK-NEXT:    b.eq LBB0_4
+; CHECK-NEXT:  ; %bb.2: ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    and w10, w0, #0xff
+; CHECK-NEXT:    cmp w10, #124
+; CHECK-NEXT:    b.eq LBB0_5
+; CHECK-NEXT:  ; %bb.3: ; %loop.latch
+; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    subs w8, w8, #1
+; CHECK-NEXT:    b.ne LBB0_1
+; CHECK-NEXT:  LBB0_4:
+; CHECK-NEXT:    mov w0, #20 ; =0x14
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  LBB0_5: ; %e2
+; CHECK-NEXT:    mov w0, #30 ; =0x1e
+; CHECK-NEXT:    ret
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  switch i8 %v, label %loop.latch [
+  i8 32, label %e1
+  i8 0, label %e1
+  i8 124, label %e2
+  ]
+
+loop.latch:
+  %iv.next = add i32 %iv, 1
+  %c = icmp eq i32 %iv.next, 100
+  br i1 %c, label %e1, label %loop.header
+
+e1:
+  ret i32 20
+
+e2:
+  ret i32 30
+}
+
+define i64 @consecutive_match_both(ptr %p, i32 %param) {
+; CHECK-LABEL: consecutive_match_both:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    mov w8, #1 ; =0x1
+; CHECK-NEXT:    mov w9, #100 ; =0x64
+; CHECK-NEXT:    mov w10, #249 ; =0xf9
+; CHECK-NEXT:    lsl w8, w8, w1
+; CHECK-NEXT:    b LBB1_2
+; CHECK-NEXT:  LBB1_1: ; %loop.latch
+; CHECK-NEXT:    ; in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:    subs w9, w9, #1
+; CHECK-NEXT:    b.eq LBB1_5
+; CHECK-NEXT:  LBB1_2: ; %loop.header
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    cmp w1, #7
+; CHECK-NEXT:    b.hi LBB1_1
+; CHECK-NEXT:  ; %bb.3: ; %loop.header
+; CHECK-NEXT:    ; in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:    tst w8, w10
+; CHECK-NEXT:    b.eq LBB1_1
+; CHECK-NEXT:  ; %bb.4: ; %e0
+; CHECK-NEXT:    mov x0, xzr
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  LBB1_5:
+; CHECK-NEXT:    mov x0, #-42 ; =0xffffffffffffffd6
+; CHECK-NEXT:    ret
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  switch i32 %param, label %loop.latch [
+  i32 7, label %e0
+  i32 6, label %e0
+  i32 5, label %e0
+  i32 4, label %e0
+  i32 3, label %e0
+  i32 0, label %e0
+  ]
+
+loop.latch:
+  %iv.next = add i32 %iv, 1
+  %ec = icmp eq i32 %iv.next, 100
+  br i1 %ec, label %e1, label %loop.header
+
+e0:
+  %m = getelementptr i8, ptr %p, i64 20
+  br label %e1
+
+e1:
+  %res = phi i64 [ 0, %e0 ], [ -42, %loop.latch ]
+  ret i64 %res
+}
+
+define i64 @consecutive_match_before(ptr %p, i32 %param) {
+; CHECK-LABEL: consecutive_match_before:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    mov w8, #1 ; =0x1
+; CHECK-NEXT:    mov w9, #100 ; =0x64
+; CHECK-NEXT:    mov w10, #25 ; =0x19
+; CHECK-NEXT:    lsl w8, w8, w1
+; CHECK-NEXT:    b LBB2_2
+; CHECK-NEXT:  LBB2_1: ; %loop.latch
+; CHECK-NEXT:    ; in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT:    subs w9, w9, #1
+; CHECK-NEXT:    b.eq LBB2_5
+; CHECK-NEXT:  LBB2_2: ; %loop.header
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    cmp w1, #4
+; CHECK-NEXT:    b.hi LBB2_1
+; CHECK-NEXT:  ; %bb.3: ; %loop.header
+; CHECK-NEXT:    ; in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT:    tst w8, w10
+; CHECK-NEXT:    b.eq LBB2_1
+; CHECK-NEXT:  ; %bb.4: ; %e0
+; CHECK-NEXT:    mov x0, xzr
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  LBB2_5:
+; CHECK-NEXT:    mov x0, #-42 ; =0xffffffffffffffd6
+; CHECK-NEXT:    ret
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  switch i32 %param, label %loop.latch [
+  i32 4, label %e0
+  i32 3, label %e0
+  i32 0, label %e0
+  ]
+
+loop.latch:
+  %iv.next = add i32 %iv, 1
+  %ec = icmp eq i32 %iv.next, 100
+  br i1 %ec, label %e1, label %loop.header
+
+e0:
+  %m = getelementptr i8, ptr %p, i64 20
+  br label %e1
+
+e1:
+  %res = phi i64 [ 0, %e0 ], [ -42, %loop.latch ]
+  ret i64 %res
+}
+
+define i64 @consecutive_match_after(ptr %p, i32 %param) {
+; CHECK-LABEL: consecutive_match_after:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    mov w8, #1 ; =0x1
+; CHECK-NEXT:    mov w9, #100 ; =0x64
+; CHECK-NEXT:    mov w10, #49 ; =0x31
+; CHECK-NEXT:    lsl w8, w8, w1
+; CHECK-NEXT:    b LBB3_2
+; CHECK-NEXT:  LBB3_1: ; %loop.latch
+; CHECK-NEXT:    ; in Loop: Header=BB3_2 Depth=1
+; CHECK-NEXT:    subs w9, w9, #1
+; CHECK-NEXT:    b.eq LBB3_5
+; CHECK-NEXT:  LBB3_2: ; %loop.header
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    cmp w1, #5
+; CHECK-NEXT:    b.hi LBB3_1
+; CHECK-NEXT:  ; %bb.3: ; %loop.header
+; CHECK-NEXT:    ; in Loop: Header=BB3_2 Depth=1
+; CHECK-NEXT:    tst w8, w10
+; CHECK-NEXT:    b.eq LBB3_1
+; CHECK-NEXT:  ; %bb.4: ; %e0
+; CHECK-NEXT:    mov x0, xzr
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  LBB3_5:
+; CHECK-NEXT:    mov x0, #-42 ; =0xffffffffffffffd6
+; CHECK-NEXT:    ret
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  switch i32 %param, label %loop.latch [
+  i32 5, label %e0
+  i32 4, label %e0
+  i32 0, label %e0
+  ]
+
+loop.latch:
+  %iv.next = add i32 %iv, 1
+  %ec = icmp eq i32 %iv.next, 100
+  br i1 %ec, label %e1, label %loop.header
+
+e0:
+  %m = getelementptr i8, ptr %p, i64 20
+  br label %e1
+
+e1:
+  %res = phi i64 [ 0, %e0 ], [ -42, %loop.latch ]
+  ret i64 %res
+}
diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/switch-cases-to-branch-and.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/switch-cases-to-branch-and.ll
new file mode 100644
index 0000000000000..f01b3008b018a
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/switch-cases-to-branch-and.ll
@@ -0,0 +1,523 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -codegenprepare -S -mtriple=aarch64 %s | FileCheck %s
+
+define i32 @switch_with_matching_dests_0_and_pow2_3_cases(i8 %v) {
+; CHECK-LABEL: define i32 @switch_with_matching_dests_0_and_pow2_3_cases(
+; CHECK-SAME: i8 [[V:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[V]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP3]], -33
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label %[[E1:.*]], label %[[BB3:.*]]
+; CHECK:       [[BB3]]:
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[TMP3]], 124
+; CHECK-NEXT:    br i1 [[COND]], label %[[E2:.*]], label %[[LOOP_LATCH]]
+; CHECK:       [[LOOP_LATCH]]:
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[IV_NEXT]], 100
+; CHECK-NEXT:    br i1 [[C]], label %[[E1]], label %[[LOOP_HEADER]]
+; CHECK:       [[E1]]:
+; CHECK-NEXT:    ret i32 20
+; CHECK:       [[E2]]:
+; CHECK-NEXT:    ret i32 30
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  switch i8 %v, label %loop.latch [
+  i8 32, label %e1
+  i8 0, label %e1
+  i8 124, label %e2
+  ]
+
+loop.latch:
+  %iv.next = add i32 %iv, 1
+  %c = icmp eq i32 %iv.next, 100
+  br i1 %c, label %e1, label %loop.header
+
+e1:
+  ret i32 20
+
+e2:
+  ret i32 30
+}
+
+define i32 @switch_with_matching_dests_0_and_pow2_3_cases_swapped(i8 %v) {
+; CHECK-LABEL: define i32 @switch_with_matching_dests_0_and_pow2_3_cases_swapped(
+; CHECK-SAME: i8 [[V:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[V]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP3]], -33
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label %[[E1:.*]], label %[[BB3:.*]]
+; CHECK:       [[BB3]]:
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[TMP3]], 124
+; CHECK-NEXT:    br i1 [[COND]], label %[[E2:.*]], label %[[LOOP_LATCH]]
+; CHECK:       [[LOOP_LATCH]]:
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[IV_NEXT]], 100
+; CHECK-NEXT:    br i1 [[C]], label %[[E0:.*]], label %[[LOOP_HEADER]]
+; CHECK:       [[E0]]:
+; CHECK-NEXT:    ret i32 10
+; CHECK:       [[E1]]:
+; CHECK-NEXT:    ret i32 20
+; CHECK:       [[E2]]:
+; CHECK-NEXT:    ret i32 30
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  switch i8 %v, label %loop.latch [
+  i8 0, label %e1
+  i8 32, label %e1
+  i8 124, label %e2
+  ]
+
+loop.latch:
+  %iv.next = add i32 %iv, 1
+  %c = icmp eq i32 %iv.next, 100
+  br i1 %c, label %e0, label %loop.header
+
+e0:
+  ret i32 10
+
+e1:
+  ret i32 20
+
+e2:
+  ret i32 30
+}
+
+define i32 @switch_with_matching_dests_0_and_pow2_3_cases_with_phi(i8 %v, i1 %c) {
+; CHECK-LABEL: define i32 @switch_with_matching_dests_0_and_pow2_3_cases_with_phi(
+; CHECK-SAME: i8 [[V:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 [[C]], label %[[THEN:.*]], label %[[E1:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[THEN]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[V]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], -33
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label %[[E1]], label %[[BB3:.*]]
+; CHECK:       [[BB3]]:
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[TMP0]], 124
+; CHECK-NEXT:    br i1 [[COND]], label %[[E2:.*]], label %[[LOOP_LATCH]]
+; CHECK:       [[LOOP_LATCH]]:
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 100
+; CHECK-NEXT:    br i1 [[EC]], label %[[E0:.*]], label %[[LOOP_HEADER]]
+; CHECK:       [[E0]]:
+; CHECK-NEXT:    ret i32 10
+; CHECK:       [[E1]]:
+; CHECK-NEXT:    [[P:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 20, %[[LOOP_HEADER]] ]
+; CHECK-NEXT:    ret i32 [[P]]
+; CHECK:       [[E2]]:
+; CHECK-NEXT:    ret i32 30
+;
+entry:
+  br i1 %c, label %then, label %e1
+
+then:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %then], [ %iv.next, %loop.latch ]
+  switch i8 %v, label %loop.latch [
+  i8 32, label %e1
+  i8 0, label %e1
+  i8 124, label %e2
+  ]
+
+loop.latch:
+  %iv.next = add i32 %iv, 1
+  %ec = icmp eq i32 %iv.next, 100
+  br i1 %ec, label %e0, label %loop.header
+
+e0:
+  ret i32 10
+
+e1:
+  %p = phi i32 [ 0, %entry ], [ 20, %loop.header ], [ 20, %loop.header ]
+  ret i32 %p
+
+e2:
+  ret i32 30
+}
+
+define i32 @switch_with_matching_dests_0_and_pow2_3_cases_all_different_succs(i8 %v) {
+; CHECK-LABEL: define i32 @switch_with_matching_dests_0_and_pow2_3_cases_all_different_succs(
+; CHECK-SAME: i8 [[V:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[V]] to i32
+; CHECK-NEXT:    switch i32 [[TMP0]], label %[[LOOP_LATCH]] [
+; CHECK-NEXT:      i32 32, label %[[E1:.*]]
+; CHECK-NEXT:      i32 0, label %[[E2:.*]]
+; CHECK-NEXT:      i32 124, label %[[E3:.*]]
+; CHECK-NEXT:    ]
+; CHECK:       [[LOOP_LATCH]]:
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 100
+; CHECK-NEXT:    br i1 [[EC]], label %[[E0:.*]], label %[[LOOP_HEADER]]
+; CHECK:       [[E0]]:
+; CHECK-NEXT:    ret i32 10
+; CHECK:       [[E1]]:
+; CHECK-NEXT:    ret i32 20
+; CHECK:       [[E2]]:
+; CHECK-NEXT:    ret i32 30
+; CHECK:       [[E3]]:
+; CHECK-NEXT:    ret i32 40
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  switch i8 %v, label %loop.latch [
+  i8 32, label %e1
+  i8 0, label %e2
+  i8 124, label %e3
+  ]
+
+loop.latch:
+  %iv.next = add i32 %iv, 1
+  %ec = icmp eq i32 %iv.next, 100
+  br i1 %ec, label %e0, label %loop.header
+
+e0:
+  ret i32 10
+
+e1:
+  ret i32 20
+
+e2:
+  ret i32 30
+
+e3:
+  ret i32 40
+}
+
+define i32 @switch_in_loop_with_matching_dests_0_and_pow2_3_cases(ptr %start) {
+; CHECK-LABEL: define i32 @switch_in_loop_with_matching_dests_0_and_pow2_3_cases(
+; CHECK-SAME: ptr [[START:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[P:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[TMP0:%.*]], %[[TMP4:.*]] ]
+; CHECK-NEXT:    [[TMP0]] = getelementptr inbounds nuw i8, ptr [[P]], i64 1
+; CHECK-NEXT:    [[L:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[L]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP1]], -33
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label %[[E1:.*]], label %[[TMP4]]
+; CHECK:       [[TMP4]]:
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[TMP1]], 124
+; CHECK-NEXT:    br i1 [[COND]], label %[[E2:.*]], label %[[LOOP]]
+; CHECK:       [[E1]]:
+; CHECK-NEXT:    br label %[[E2]]
+; CHECK:       [[E2]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ -1, %[[E1]] ], [ 0, %[[TMP4]] ]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+entry:
+  br label %loop
+
+loop:
+  %p = phi ptr [ %start, %entry ], [ %4, %loop ]
+  %4 = getelementptr inbounds nuw i8, ptr %p, i64 1
+  %l = load i8, ptr %4, align 1
+  switch i8 %l, label %loop [
+  i8 32, label %e1
+  i8 0, label %e1
+  i8 124, label %e2
+  ]
+
+e1:
+  br label %e2
+
+e2:
+  %8 = phi i32 [ -1, %e1 ], [ 0, %loop ]
+  ret i32 %8
+}
+
+define i32 @switch_in_loop_with_matching_dests_0_and_pow2_4_cases(ptr %start) {
+; CHECK-LABEL: define i32 @switch_in_loop_with_matching_dests_0_and_pow2_4_cases(
+; CHECK-SAME: ptr [[START:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[P:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[TMP0:%.*]], %[[TMP4:.*]] ]
+; CHECK-NEXT:    [[TMP0]] = getelementptr inbounds nuw i8, ptr [[P]], i64 1
+; CHECK-NEXT:    [[L:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[L]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP1]], -33
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label %[[E1:.*]], label %[[TMP4]]
+; CHECK:       [[TMP4]]:
+; CHECK-NEXT:    switch i32 [[TMP1]], label %[[LOOP]] [
+; CHECK-NEXT:      i32 124, label %[[E2:.*]]
+; CHECK-NEXT:      i32 15, label %[[E1]]
+; CHECK-NEXT:    ]
+; CHECK:       [[E1]]:
+; CHECK-NEXT:    br label %[[E2]]
+; CHECK:       [[E2]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ -1, %[[E1]] ], [ 0, %[[TMP4]] ]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+entry:
+  br label %loop
+
+loop:
+  %p = phi ptr [ %start, %entry ], [ %4, %loop ]
+  %4 = getelementptr inbounds nuw i8, ptr %p, i64 1
+  %l = load i8, ptr %4, align 1
+  switch i8 %l, label %loop [
+  i8 0, label %e1
+  i8 15, label %e1
+  i8 32, label %e1
+  i8 124, label %e2
+  ]
+
+e1:
+  br label %e2
+
+e2:
+  %8...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/139321


More information about the llvm-commits mailing list