[llvm] [SwitchLowering] Support merging 0 and power-of-2 case. (PR #139736)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Tue May 13 07:07:09 PDT 2025


https://github.com/fhahn created https://github.com/llvm/llvm-project/pull/139736

None

>From abd59ba04294d43152d8337ca75163284e72c999 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 27 Mar 2025 21:25:57 +0000
Subject: [PATCH 1/2] Add test case

---
 .../AArch64/switch-cases-to-branch-and.ll     | 210 +++++++
 .../AArch64/switch-cases-to-branch-and.ll     | 517 ++++++++++++++++++
 2 files changed, 727 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll
 create mode 100644 llvm/test/Transforms/CodeGenPrepare/AArch64/switch-cases-to-branch-and.ll

diff --git a/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll b/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll
new file mode 100644
index 0000000000000..7a28c06e75d70
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll
@@ -0,0 +1,210 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -O3 -mtriple=arm64-apple-macosx -o - %s | FileCheck %s
+
+define i32 @switch_with_matching_dests_0_and_pow2_3_cases(i8 %v) {
+; CHECK-LABEL: switch_with_matching_dests_0_and_pow2_3_cases:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    mov w9, #100 ; =0x64
+; CHECK-NEXT:    mov w8, #20 ; =0x14
+; CHECK-NEXT:  LBB0_1: ; %loop.header
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ands w10, w0, #0xff
+; CHECK-NEXT:    b.eq LBB0_6
+; CHECK-NEXT:  ; %bb.2: ; %loop.header
+; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    cmp w10, #32
+; CHECK-NEXT:    b.eq LBB0_6
+; CHECK-NEXT:  ; %bb.3: ; %loop.header
+; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    cmp w10, #124
+; CHECK-NEXT:    b.eq LBB0_7
+; CHECK-NEXT:  ; %bb.4: ; %loop.latch
+; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    subs w9, w9, #1
+; CHECK-NEXT:    b.ne LBB0_1
+; CHECK-NEXT:  ; %bb.5:
+; CHECK-NEXT:    mov w8, #20 ; =0x14
+; CHECK-NEXT:  LBB0_6: ; %common.ret
+; CHECK-NEXT:    mov w0, w8
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  LBB0_7: ; %e2
+; CHECK-NEXT:    mov w0, #30 ; =0x1e
+; CHECK-NEXT:    ret
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  switch i8 %v, label %loop.latch [
+  i8 32, label %e1
+  i8 0, label %e1
+  i8 124, label %e2
+  ]
+
+loop.latch:
+  %iv.next = add i32 %iv, 1
+  %c = icmp eq i32 %iv.next, 100
+  br i1 %c, label %e1, label %loop.header
+
+e1:
+  ret i32 20
+
+e2:
+  ret i32 30
+}
+
+define i64 @consecutive_match_both(ptr %p, i32 %param) {
+; CHECK-LABEL: consecutive_match_both:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    mov w8, #1 ; =0x1
+; CHECK-NEXT:    mov w9, #100 ; =0x64
+; CHECK-NEXT:    mov w10, #249 ; =0xf9
+; CHECK-NEXT:    lsl w8, w8, w1
+; CHECK-NEXT:    b LBB1_2
+; CHECK-NEXT:  LBB1_1: ; %loop.latch
+; CHECK-NEXT:    ; in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:    subs w9, w9, #1
+; CHECK-NEXT:    b.eq LBB1_5
+; CHECK-NEXT:  LBB1_2: ; %loop.header
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    cmp w1, #7
+; CHECK-NEXT:    b.hi LBB1_1
+; CHECK-NEXT:  ; %bb.3: ; %loop.header
+; CHECK-NEXT:    ; in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:    tst w8, w10
+; CHECK-NEXT:    b.eq LBB1_1
+; CHECK-NEXT:  ; %bb.4: ; %e0
+; CHECK-NEXT:    mov x0, xzr
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  LBB1_5:
+; CHECK-NEXT:    mov x0, #-42 ; =0xffffffffffffffd6
+; CHECK-NEXT:    ret
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  switch i32 %param, label %loop.latch [
+  i32 7, label %e0
+  i32 6, label %e0
+  i32 5, label %e0
+  i32 4, label %e0
+  i32 3, label %e0
+  i32 0, label %e0
+  ]
+
+loop.latch:
+  %iv.next = add i32 %iv, 1
+  %ec = icmp eq i32 %iv.next, 100
+  br i1 %ec, label %e1, label %loop.header
+
+e0:
+  %m = getelementptr i8, ptr %p, i64 20
+  br label %e1
+
+e1:
+  %res = phi i64 [ 0, %e0 ], [ -42, %loop.latch ]
+  ret i64 %res
+}
+
+define i64 @consecutive_match_before(ptr %p, i32 %param) {
+; CHECK-LABEL: consecutive_match_before:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    mov w8, #1 ; =0x1
+; CHECK-NEXT:    mov w9, #100 ; =0x64
+; CHECK-NEXT:    mov w10, #25 ; =0x19
+; CHECK-NEXT:    lsl w8, w8, w1
+; CHECK-NEXT:    b LBB2_2
+; CHECK-NEXT:  LBB2_1: ; %loop.latch
+; CHECK-NEXT:    ; in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT:    subs w9, w9, #1
+; CHECK-NEXT:    b.eq LBB2_5
+; CHECK-NEXT:  LBB2_2: ; %loop.header
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    cmp w1, #4
+; CHECK-NEXT:    b.hi LBB2_1
+; CHECK-NEXT:  ; %bb.3: ; %loop.header
+; CHECK-NEXT:    ; in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT:    tst w8, w10
+; CHECK-NEXT:    b.eq LBB2_1
+; CHECK-NEXT:  ; %bb.4: ; %e0
+; CHECK-NEXT:    mov x0, xzr
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  LBB2_5:
+; CHECK-NEXT:    mov x0, #-42 ; =0xffffffffffffffd6
+; CHECK-NEXT:    ret
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  switch i32 %param, label %loop.latch [
+  i32 4, label %e0
+  i32 3, label %e0
+  i32 0, label %e0
+  ]
+
+loop.latch:
+  %iv.next = add i32 %iv, 1
+  %ec = icmp eq i32 %iv.next, 100
+  br i1 %ec, label %e1, label %loop.header
+
+e0:
+  %m = getelementptr i8, ptr %p, i64 20
+  br label %e1
+
+e1:
+  %res = phi i64 [ 0, %e0 ], [ -42, %loop.latch ]
+  ret i64 %res
+}
+
+define i64 @consecutive_match_after(ptr %p, i32 %param) {
+; CHECK-LABEL: consecutive_match_after:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    mov w8, #1 ; =0x1
+; CHECK-NEXT:    mov w9, #100 ; =0x64
+; CHECK-NEXT:    mov w10, #49 ; =0x31
+; CHECK-NEXT:    lsl w8, w8, w1
+; CHECK-NEXT:    b LBB3_2
+; CHECK-NEXT:  LBB3_1: ; %loop.latch
+; CHECK-NEXT:    ; in Loop: Header=BB3_2 Depth=1
+; CHECK-NEXT:    subs w9, w9, #1
+; CHECK-NEXT:    b.eq LBB3_5
+; CHECK-NEXT:  LBB3_2: ; %loop.header
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    cmp w1, #5
+; CHECK-NEXT:    b.hi LBB3_1
+; CHECK-NEXT:  ; %bb.3: ; %loop.header
+; CHECK-NEXT:    ; in Loop: Header=BB3_2 Depth=1
+; CHECK-NEXT:    tst w8, w10
+; CHECK-NEXT:    b.eq LBB3_1
+; CHECK-NEXT:  ; %bb.4: ; %e0
+; CHECK-NEXT:    mov x0, xzr
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  LBB3_5:
+; CHECK-NEXT:    mov x0, #-42 ; =0xffffffffffffffd6
+; CHECK-NEXT:    ret
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  switch i32 %param, label %loop.latch [
+  i32 5, label %e0
+  i32 4, label %e0
+  i32 0, label %e0
+  ]
+
+loop.latch:
+  %iv.next = add i32 %iv, 1
+  %ec = icmp eq i32 %iv.next, 100
+  br i1 %ec, label %e1, label %loop.header
+
+e0:
+  %m = getelementptr i8, ptr %p, i64 20
+  br label %e1
+
+e1:
+  %res = phi i64 [ 0, %e0 ], [ -42, %loop.latch ]
+  ret i64 %res
+}
diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/switch-cases-to-branch-and.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/switch-cases-to-branch-and.ll
new file mode 100644
index 0000000000000..ea4ad7fa8cef9
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/switch-cases-to-branch-and.ll
@@ -0,0 +1,517 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -codegenprepare -S -mtriple=aarch64 %s | FileCheck %s
+
+define i32 @switch_with_matching_dests_0_and_pow2_3_cases(i8 %v) {
+; CHECK-LABEL: define i32 @switch_with_matching_dests_0_and_pow2_3_cases(
+; CHECK-SAME: i8 [[V:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[V]] to i32
+; CHECK-NEXT:    switch i32 [[TMP3]], label %[[LOOP_LATCH]] [
+; CHECK-NEXT:      i32 32, label %[[E1:.*]]
+; CHECK-NEXT:      i32 0, label %[[E1]]
+; CHECK-NEXT:      i32 124, label %[[E2:.*]]
+; CHECK-NEXT:    ]
+; CHECK:       [[LOOP_LATCH]]:
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[IV_NEXT]], 100
+; CHECK-NEXT:    br i1 [[C]], label %[[E1]], label %[[LOOP_HEADER]]
+; CHECK:       [[E1]]:
+; CHECK-NEXT:    ret i32 20
+; CHECK:       [[E2]]:
+; CHECK-NEXT:    ret i32 30
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  switch i8 %v, label %loop.latch [
+  i8 32, label %e1
+  i8 0, label %e1
+  i8 124, label %e2
+  ]
+
+loop.latch:
+  %iv.next = add i32 %iv, 1
+  %c = icmp eq i32 %iv.next, 100
+  br i1 %c, label %e1, label %loop.header
+
+e1:
+  ret i32 20
+
+e2:
+  ret i32 30
+}
+
+define i32 @switch_with_matching_dests_0_and_pow2_3_cases_swapped(i8 %v) {
+; CHECK-LABEL: define i32 @switch_with_matching_dests_0_and_pow2_3_cases_swapped(
+; CHECK-SAME: i8 [[V:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[V]] to i32
+; CHECK-NEXT:    switch i32 [[TMP3]], label %[[LOOP_LATCH]] [
+; CHECK-NEXT:      i32 0, label %[[E1:.*]]
+; CHECK-NEXT:      i32 32, label %[[E1]]
+; CHECK-NEXT:      i32 124, label %[[E2:.*]]
+; CHECK-NEXT:    ]
+; CHECK:       [[LOOP_LATCH]]:
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[IV_NEXT]], 100
+; CHECK-NEXT:    br i1 [[C]], label %[[E0:.*]], label %[[LOOP_HEADER]]
+; CHECK:       [[E0]]:
+; CHECK-NEXT:    ret i32 10
+; CHECK:       [[E1]]:
+; CHECK-NEXT:    ret i32 20
+; CHECK:       [[E2]]:
+; CHECK-NEXT:    ret i32 30
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  switch i8 %v, label %loop.latch [
+  i8 0, label %e1
+  i8 32, label %e1
+  i8 124, label %e2
+  ]
+
+loop.latch:
+  %iv.next = add i32 %iv, 1
+  %c = icmp eq i32 %iv.next, 100
+  br i1 %c, label %e0, label %loop.header
+
+e0:
+  ret i32 10
+
+e1:
+  ret i32 20
+
+e2:
+  ret i32 30
+}
+
+define i32 @switch_with_matching_dests_0_and_pow2_3_cases_with_phi(i8 %v, i1 %c) {
+; CHECK-LABEL: define i32 @switch_with_matching_dests_0_and_pow2_3_cases_with_phi(
+; CHECK-SAME: i8 [[V:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 [[C]], label %[[THEN:.*]], label %[[E1:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[THEN]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[V]] to i32
+; CHECK-NEXT:    switch i32 [[TMP0]], label %[[LOOP_LATCH]] [
+; CHECK-NEXT:      i32 32, label %[[E1]]
+; CHECK-NEXT:      i32 0, label %[[E1]]
+; CHECK-NEXT:      i32 124, label %[[E2:.*]]
+; CHECK-NEXT:    ]
+; CHECK:       [[LOOP_LATCH]]:
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 100
+; CHECK-NEXT:    br i1 [[EC]], label %[[E0:.*]], label %[[LOOP_HEADER]]
+; CHECK:       [[E0]]:
+; CHECK-NEXT:    ret i32 10
+; CHECK:       [[E1]]:
+; CHECK-NEXT:    [[P:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 20, %[[LOOP_HEADER]] ], [ 20, %[[LOOP_HEADER]] ]
+; CHECK-NEXT:    ret i32 [[P]]
+; CHECK:       [[E2]]:
+; CHECK-NEXT:    ret i32 30
+;
+entry:
+  br i1 %c, label %then, label %e1
+
+then:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %then], [ %iv.next, %loop.latch ]
+  switch i8 %v, label %loop.latch [
+  i8 32, label %e1
+  i8 0, label %e1
+  i8 124, label %e2
+  ]
+
+loop.latch:
+  %iv.next = add i32 %iv, 1
+  %ec = icmp eq i32 %iv.next, 100
+  br i1 %ec, label %e0, label %loop.header
+
+e0:
+  ret i32 10
+
+e1:
+  %p = phi i32 [ 0, %entry ], [ 20, %loop.header ], [ 20, %loop.header ]
+  ret i32 %p
+
+e2:
+  ret i32 30
+}
+
+define i32 @switch_with_matching_dests_0_and_pow2_3_cases_all_different_succs(i8 %v) {
+; CHECK-LABEL: define i32 @switch_with_matching_dests_0_and_pow2_3_cases_all_different_succs(
+; CHECK-SAME: i8 [[V:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[V]] to i32
+; CHECK-NEXT:    switch i32 [[TMP0]], label %[[LOOP_LATCH]] [
+; CHECK-NEXT:      i32 32, label %[[E1:.*]]
+; CHECK-NEXT:      i32 0, label %[[E2:.*]]
+; CHECK-NEXT:      i32 124, label %[[E3:.*]]
+; CHECK-NEXT:    ]
+; CHECK:       [[LOOP_LATCH]]:
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 100
+; CHECK-NEXT:    br i1 [[EC]], label %[[E0:.*]], label %[[LOOP_HEADER]]
+; CHECK:       [[E0]]:
+; CHECK-NEXT:    ret i32 10
+; CHECK:       [[E1]]:
+; CHECK-NEXT:    ret i32 20
+; CHECK:       [[E2]]:
+; CHECK-NEXT:    ret i32 30
+; CHECK:       [[E3]]:
+; CHECK-NEXT:    ret i32 40
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  switch i8 %v, label %loop.latch [
+  i8 32, label %e1
+  i8 0, label %e2
+  i8 124, label %e3
+  ]
+
+loop.latch:
+  %iv.next = add i32 %iv, 1
+  %ec = icmp eq i32 %iv.next, 100
+  br i1 %ec, label %e0, label %loop.header
+
+e0:
+  ret i32 10
+
+e1:
+  ret i32 20
+
+e2:
+  ret i32 30
+
+e3:
+  ret i32 40
+}
+
+define i32 @switch_in_loop_with_matching_dests_0_and_pow2_3_cases(ptr %start) {
+; CHECK-LABEL: define i32 @switch_in_loop_with_matching_dests_0_and_pow2_3_cases(
+; CHECK-SAME: ptr [[START:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[P:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[TMP0:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[TMP0]] = getelementptr inbounds nuw i8, ptr [[P]], i64 1
+; CHECK-NEXT:    [[L:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[L]] to i32
+; CHECK-NEXT:    switch i32 [[TMP1]], label %[[LOOP]] [
+; CHECK-NEXT:      i32 32, label %[[E1:.*]]
+; CHECK-NEXT:      i32 0, label %[[E1]]
+; CHECK-NEXT:      i32 124, label %[[E2:.*]]
+; CHECK-NEXT:    ]
+; CHECK:       [[E1]]:
+; CHECK-NEXT:    br label %[[E2]]
+; CHECK:       [[E2]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ -1, %[[E1]] ], [ 0, %[[LOOP]] ]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+entry:
+  br label %loop
+
+loop:
+  %p = phi ptr [ %start, %entry ], [ %4, %loop ]
+  %4 = getelementptr inbounds nuw i8, ptr %p, i64 1
+  %l = load i8, ptr %4, align 1
+  switch i8 %l, label %loop [
+  i8 32, label %e1
+  i8 0, label %e1
+  i8 124, label %e2
+  ]
+
+e1:
+  br label %e2
+
+e2:
+  %8 = phi i32 [ -1, %e1 ], [ 0, %loop ]
+  ret i32 %8
+}
+
+define i32 @switch_in_loop_with_matching_dests_0_and_pow2_4_cases(ptr %start) {
+; CHECK-LABEL: define i32 @switch_in_loop_with_matching_dests_0_and_pow2_4_cases(
+; CHECK-SAME: ptr [[START:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[P:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[TMP0:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[TMP0]] = getelementptr inbounds nuw i8, ptr [[P]], i64 1
+; CHECK-NEXT:    [[L:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[L]] to i32
+; CHECK-NEXT:    switch i32 [[TMP1]], label %[[LOOP]] [
+; CHECK-NEXT:      i32 0, label %[[E1:.*]]
+; CHECK-NEXT:      i32 15, label %[[E1]]
+; CHECK-NEXT:      i32 32, label %[[E1]]
+; CHECK-NEXT:      i32 124, label %[[E2:.*]]
+; CHECK-NEXT:    ]
+; CHECK:       [[E1]]:
+; CHECK-NEXT:    br label %[[E2]]
+; CHECK:       [[E2]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ -1, %[[E1]] ], [ 0, %[[LOOP]] ]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+entry:
+  br label %loop
+
+loop:
+  %p = phi ptr [ %start, %entry ], [ %4, %loop ]
+  %4 = getelementptr inbounds nuw i8, ptr %p, i64 1
+  %l = load i8, ptr %4, align 1
+  switch i8 %l, label %loop [
+  i8 0, label %e1
+  i8 15, label %e1
+  i8 32, label %e1
+  i8 124, label %e2
+  ]
+
+e1:
+  br label %e2
+
+e2:
+  %8 = phi i32 [ -1, %e1 ], [ 0, %loop ]
+  ret i32 %8
+}
+
+define i32 @switch_in_loop_with_matching_dests_0_and_non_pow2(ptr %start) {
+; CHECK-LABEL: define i32 @switch_in_loop_with_matching_dests_0_and_non_pow2(
+; CHECK-SAME: ptr [[START:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[P:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[TMP0:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[TMP0]] = getelementptr inbounds nuw i8, ptr [[P]], i64 1
+; CHECK-NEXT:    [[L:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[L]] to i32
+; CHECK-NEXT:    switch i32 [[TMP1]], label %[[LOOP]] [
+; CHECK-NEXT:      i32 0, label %[[E1:.*]]
+; CHECK-NEXT:      i32 35, label %[[E1]]
+; CHECK-NEXT:      i32 33, label %[[E2:.*]]
+; CHECK-NEXT:    ]
+; CHECK:       [[E1]]:
+; CHECK-NEXT:    ret i32 -1
+; CHECK:       [[E2]]:
+; CHECK-NEXT:    ret i32 10
+;
+entry:
+  br label %loop
+
+loop:
+  %p = phi ptr [ %start, %entry ], [ %4, %loop ]
+  %4 = getelementptr inbounds nuw i8, ptr %p, i64 1
+  %l = load i8, ptr %4, align 1
+  switch i8 %l, label %loop [
+  i8 0, label %e1
+  i8 35, label %e1
+  i8 33, label %e2
+  ]
+
+e1:
+  ret i32 -1
+
+e2:
+  ret i32 10
+}
+
+define void @test_successor_with_loop_phi(ptr %A, ptr %B) {
+; CHECK-LABEL: define void @test_successor_with_loop_phi(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[P:%.*]] = phi ptr [ [[A]], %[[ENTRY]] ], [ [[B]], %[[LOOP]] ], [ [[B]], %[[LOOP]] ]
+; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[P]], align 4
+; CHECK-NEXT:    switch i32 [[L]], label %[[EXIT:.*]] [
+; CHECK-NEXT:      i32 4, label %[[LOOP]]
+; CHECK-NEXT:      i32 0, label %[[LOOP]]
+; CHECK-NEXT:    ]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %p = phi ptr [ %A, %entry ], [ %B , %loop ], [ %B, %loop ]
+  %l = load i32, ptr %p
+  store i32 0, ptr %p
+  switch i32 %l, label %exit [
+  i32 4, label %loop
+  i32 0, label %loop
+  ]
+
+exit:
+  ret void
+}
+
+define i64 @consecutive_match_both(ptr %p, i32 %param) {
+; CHECK-LABEL: define i64 @consecutive_match_both(
+; CHECK-SAME: ptr [[P:%.*]], i32 [[PARAM:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT:    switch i32 [[PARAM]], label %[[LOOP_LATCH]] [
+; CHECK-NEXT:      i32 7, label %[[E0:.*]]
+; CHECK-NEXT:      i32 6, label %[[E0]]
+; CHECK-NEXT:      i32 5, label %[[E0]]
+; CHECK-NEXT:      i32 4, label %[[E0]]
+; CHECK-NEXT:      i32 3, label %[[E0]]
+; CHECK-NEXT:      i32 0, label %[[E0]]
+; CHECK-NEXT:    ]
+; CHECK:       [[LOOP_LATCH]]:
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 100
+; CHECK-NEXT:    br i1 [[EC]], label %[[E1:.*]], label %[[LOOP_HEADER]]
+; CHECK:       [[E0]]:
+; CHECK-NEXT:    [[M:%.*]] = getelementptr i8, ptr [[P]], i64 20
+; CHECK-NEXT:    br label %[[E1]]
+; CHECK:       [[E1]]:
+; CHECK-NEXT:    [[RES:%.*]] = phi i64 [ 0, %[[E0]] ], [ -42, %[[LOOP_LATCH]] ]
+; CHECK-NEXT:    ret i64 [[RES]]
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  switch i32 %param, label %loop.latch [
+  i32 7, label %e0
+  i32 6, label %e0
+  i32 5, label %e0
+  i32 4, label %e0
+  i32 3, label %e0
+  i32 0, label %e0
+  ]
+
+loop.latch:
+  %iv.next = add i32 %iv, 1
+  %ec = icmp eq i32 %iv.next, 100
+  br i1 %ec, label %e1, label %loop.header
+
+e0:
+  %m = getelementptr i8, ptr %p, i64 20
+  br label %e1
+
+e1:
+  %res = phi i64 [ 0, %e0 ], [ -42, %loop.latch ]
+  ret i64 %res
+}
+
+define i64 @consecutive_match_before(ptr %p, i32 %param) {
+; CHECK-LABEL: define i64 @consecutive_match_before(
+; CHECK-SAME: ptr [[P:%.*]], i32 [[PARAM:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT:    switch i32 [[PARAM]], label %[[LOOP_LATCH]] [
+; CHECK-NEXT:      i32 4, label %[[E0:.*]]
+; CHECK-NEXT:      i32 3, label %[[E0]]
+; CHECK-NEXT:      i32 0, label %[[E0]]
+; CHECK-NEXT:    ]
+; CHECK:       [[LOOP_LATCH]]:
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 100
+; CHECK-NEXT:    br i1 [[EC]], label %[[E1:.*]], label %[[LOOP_HEADER]]
+; CHECK:       [[E0]]:
+; CHECK-NEXT:    [[M:%.*]] = getelementptr i8, ptr [[P]], i64 20
+; CHECK-NEXT:    br label %[[E1]]
+; CHECK:       [[E1]]:
+; CHECK-NEXT:    [[RES:%.*]] = phi i64 [ 0, %[[E0]] ], [ -42, %[[LOOP_LATCH]] ]
+; CHECK-NEXT:    ret i64 [[RES]]
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  switch i32 %param, label %loop.latch [
+  i32 4, label %e0
+  i32 3, label %e0
+  i32 0, label %e0
+  ]
+
+loop.latch:
+  %iv.next = add i32 %iv, 1
+  %ec = icmp eq i32 %iv.next, 100
+  br i1 %ec, label %e1, label %loop.header
+
+e0:
+  %m = getelementptr i8, ptr %p, i64 20
+  br label %e1
+
+e1:
+  %res = phi i64 [ 0, %e0 ], [ -42, %loop.latch ]
+  ret i64 %res
+}
+
+define i64 @consecutive_match_after(ptr %p, i32 %param) {
+; CHECK-LABEL: define i64 @consecutive_match_after(
+; CHECK-SAME: ptr [[P:%.*]], i32 [[PARAM:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT:    switch i32 [[PARAM]], label %[[LOOP_LATCH]] [
+; CHECK-NEXT:      i32 5, label %[[E0:.*]]
+; CHECK-NEXT:      i32 4, label %[[E0]]
+; CHECK-NEXT:      i32 0, label %[[E0]]
+; CHECK-NEXT:    ]
+; CHECK:       [[LOOP_LATCH]]:
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 100
+; CHECK-NEXT:    br i1 [[EC]], label %[[E1:.*]], label %[[LOOP_HEADER]]
+; CHECK:       [[E0]]:
+; CHECK-NEXT:    [[M:%.*]] = getelementptr i8, ptr [[P]], i64 20
+; CHECK-NEXT:    br label %[[E1]]
+; CHECK:       [[E1]]:
+; CHECK-NEXT:    [[RES:%.*]] = phi i64 [ 0, %[[E0]] ], [ -42, %[[LOOP_LATCH]] ]
+; CHECK-NEXT:    ret i64 [[RES]]
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  switch i32 %param, label %loop.latch [
+  i32 5, label %e0
+  i32 4, label %e0
+  i32 0, label %e0
+  ]
+
+loop.latch:
+  %iv.next = add i32 %iv, 1
+  %ec = icmp eq i32 %iv.next, 100
+  br i1 %ec, label %e1, label %loop.header
+
+e0:
+  %m = getelementptr i8, ptr %p, i64 20
+  br label %e1
+
+e1:
+  %res = phi i64 [ 0, %e0 ], [ -42, %loop.latch ]
+  ret i64 %res
+}

>From b6d1aef45393717a4214f765d0608770fd75493c Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 13 May 2025 15:02:30 +0100
Subject: [PATCH 2/2] [SwitchLowering] Support merging 0 and power-of-2 case.

---
 .../llvm/CodeGen/SwitchLoweringUtils.h        |  3 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp      | 26 +++++++++++++++
 llvm/lib/CodeGen/SwitchLoweringUtils.cpp      | 33 +++++++++++++++++++
 .../AArch64/switch-cases-to-branch-and.ll     | 27 +++++++--------
 4 files changed, 72 insertions(+), 17 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
index 9f1d6f7b4f952..377b9fd8cff88 100644
--- a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
+++ b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
@@ -35,7 +35,8 @@ enum CaseClusterKind {
   /// A cluster of cases suitable for jump table lowering.
   CC_JumpTable,
   /// A cluster of cases suitable for bit test lowering.
-  CC_BitTests
+  CC_BitTests,
+  CC_And
 };
 
 /// A cluster of case labels.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8e74a076cc013..6c2d7a0872a50 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -12238,6 +12238,32 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
 
         break;
       }
+      case CC_And: {
+        SDLoc dl = getCurSDLoc();
+
+        const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+        EVT VT = TLI.getValueType(DAG.getDataLayout(), I->Low->getType(), true);
+        SDValue C = DAG.getConstant(*I->Low, dl, VT);
+        SDValue Zero = DAG.getConstant(0, dl, VT);
+        SDValue CondLHS = getValue(Cond);
+        SDValue And = DAG.getNode(ISD::AND, dl, C.getValueType(), CondLHS, C);
+        auto CondD = DAG.getSetCC(dl, MVT::i1, And, Zero, ISD::SETEQ);
+        SDNodeFlags Flags;
+        SDValue BrCond =
+            DAG.getNode(ISD::BRCOND, dl, MVT::Other, getControlRoot(), CondD,
+                        DAG.getBasicBlock(I->MBB), Flags);
+
+        // Insert the false branch. Do this even if it's a fall through branch,
+        // this makes it easier to do DAG optimizations which require inverting
+        // the branch condition.
+        BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+                             DAG.getBasicBlock(Fallthrough));
+        addSuccessorWithProb(CurMBB, I->MBB, UnhandledProbs);
+        addSuccessorWithProb(CurMBB, Fallthrough,
+                             BranchProbability::getUnknown());
+        CurMBB->normalizeSuccProbs();
+        DAG.setRoot(BrCond);
+      }
     }
     CurMBB = Fallthrough;
   }
diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
index 038c499fe236e..2fc545dd2ef54 100644
--- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
+++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
@@ -362,6 +362,39 @@ void SwitchCG::SwitchLowering::findBitTestClusters(CaseClusterVector &Clusters,
     }
   }
   Clusters.resize(DstIndex);
+
+  unsigned ZeroIdx = -1;
+  for (const auto &[Idx, C] : enumerate(Clusters)) {
+    if (C.Kind != CC_Range || C.Low != C.High)
+      continue;
+    if (C.Low->isZero()) {
+      ZeroIdx = Idx;
+      break;
+    }
+  }
+
+  if (ZeroIdx == -1u)
+    return;
+
+  unsigned Pow2Idx = -1;
+  for (const auto &[Idx, C] : enumerate(Clusters)) {
+    if (C.Kind != CC_Range || C.Low != C.High || C.MBB != Clusters[ZeroIdx].MBB)
+      continue;
+    if (C.Low->getValue().isPowerOf2()) {
+      Pow2Idx = Idx;
+      break;
+    }
+  }
+
+  if (Pow2Idx == -1u)
+    return;
+
+  APInt Pow2 = Clusters[Pow2Idx].Low->getValue();
+  APInt NewC = (Pow2 + 1) * -1;
+  Clusters[ZeroIdx].Low = ConstantInt::get(SI->getContext(), NewC);
+  Clusters[ZeroIdx].Low = ConstantInt::get(SI->getContext(), NewC);
+  Clusters[ZeroIdx].Kind = CC_And;
+  Clusters.erase(Clusters.begin() + Pow2Idx);
 }
 
 bool SwitchCG::SwitchLowering::buildBitTests(CaseClusterVector &Clusters,
diff --git a/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll b/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll
index 7a28c06e75d70..b8906ab7d7713 100644
--- a/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll
+++ b/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll
@@ -4,30 +4,25 @@
 define i32 @switch_with_matching_dests_0_and_pow2_3_cases(i8 %v) {
 ; CHECK-LABEL: switch_with_matching_dests_0_and_pow2_3_cases:
 ; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    mov w9, #100 ; =0x64
-; CHECK-NEXT:    mov w8, #20 ; =0x14
+; CHECK-NEXT:    mov w8, #100 ; =0x64
+; CHECK-NEXT:    mov w9, #223 ; =0xdf
 ; CHECK-NEXT:  LBB0_1: ; %loop.header
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ands w10, w0, #0xff
-; CHECK-NEXT:    b.eq LBB0_6
+; CHECK-NEXT:    tst w0, w9
+; CHECK-NEXT:    b.eq LBB0_4
 ; CHECK-NEXT:  ; %bb.2: ; %loop.header
 ; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    cmp w10, #32
-; CHECK-NEXT:    b.eq LBB0_6
-; CHECK-NEXT:  ; %bb.3: ; %loop.header
-; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    and w10, w0, #0xff
 ; CHECK-NEXT:    cmp w10, #124
-; CHECK-NEXT:    b.eq LBB0_7
-; CHECK-NEXT:  ; %bb.4: ; %loop.latch
+; CHECK-NEXT:    b.eq LBB0_5
+; CHECK-NEXT:  ; %bb.3: ; %loop.latch
 ; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    subs w9, w9, #1
+; CHECK-NEXT:    subs w8, w8, #1
 ; CHECK-NEXT:    b.ne LBB0_1
-; CHECK-NEXT:  ; %bb.5:
-; CHECK-NEXT:    mov w8, #20 ; =0x14
-; CHECK-NEXT:  LBB0_6: ; %common.ret
-; CHECK-NEXT:    mov w0, w8
+; CHECK-NEXT:  LBB0_4:
+; CHECK-NEXT:    mov w0, #20 ; =0x14
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  LBB0_7: ; %e2
+; CHECK-NEXT:  LBB0_5: ; %e2
 ; CHECK-NEXT:    mov w0, #30 ; =0x1e
 ; CHECK-NEXT:    ret
 entry:



More information about the llvm-commits mailing list