[llvm] [SwitchLowering] Support merging 0 and power-of-2 case. (PR #139736)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Tue May 13 07:07:09 PDT 2025
https://github.com/fhahn created https://github.com/llvm/llvm-project/pull/139736
None
>From abd59ba04294d43152d8337ca75163284e72c999 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 27 Mar 2025 21:25:57 +0000
Subject: [PATCH 1/2] Add test case
---
.../AArch64/switch-cases-to-branch-and.ll | 210 +++++++
.../AArch64/switch-cases-to-branch-and.ll | 517 ++++++++++++++++++
2 files changed, 727 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll
create mode 100644 llvm/test/Transforms/CodeGenPrepare/AArch64/switch-cases-to-branch-and.ll
diff --git a/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll b/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll
new file mode 100644
index 0000000000000..7a28c06e75d70
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll
@@ -0,0 +1,210 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -O3 -mtriple=arm64-apple-macosx -o - %s | FileCheck %s
+
+define i32 @switch_with_matching_dests_0_and_pow2_3_cases(i8 %v) {
+; CHECK-LABEL: switch_with_matching_dests_0_and_pow2_3_cases:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: mov w9, #100 ; =0x64
+; CHECK-NEXT: mov w8, #20 ; =0x14
+; CHECK-NEXT: LBB0_1: ; %loop.header
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ands w10, w0, #0xff
+; CHECK-NEXT: b.eq LBB0_6
+; CHECK-NEXT: ; %bb.2: ; %loop.header
+; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: cmp w10, #32
+; CHECK-NEXT: b.eq LBB0_6
+; CHECK-NEXT: ; %bb.3: ; %loop.header
+; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: cmp w10, #124
+; CHECK-NEXT: b.eq LBB0_7
+; CHECK-NEXT: ; %bb.4: ; %loop.latch
+; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: subs w9, w9, #1
+; CHECK-NEXT: b.ne LBB0_1
+; CHECK-NEXT: ; %bb.5:
+; CHECK-NEXT: mov w8, #20 ; =0x14
+; CHECK-NEXT: LBB0_6: ; %common.ret
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: ret
+; CHECK-NEXT: LBB0_7: ; %e2
+; CHECK-NEXT: mov w0, #30 ; =0x1e
+; CHECK-NEXT: ret
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ switch i8 %v, label %loop.latch [
+ i8 32, label %e1
+ i8 0, label %e1
+ i8 124, label %e2
+ ]
+
+loop.latch:
+ %iv.next = add i32 %iv, 1
+ %c = icmp eq i32 %iv.next, 100
+ br i1 %c, label %e1, label %loop.header
+
+e1:
+ ret i32 20
+
+e2:
+ ret i32 30
+}
+
+define i64 @consecutive_match_both(ptr %p, i32 %param) {
+; CHECK-LABEL: consecutive_match_both:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: mov w8, #1 ; =0x1
+; CHECK-NEXT: mov w9, #100 ; =0x64
+; CHECK-NEXT: mov w10, #249 ; =0xf9
+; CHECK-NEXT: lsl w8, w8, w1
+; CHECK-NEXT: b LBB1_2
+; CHECK-NEXT: LBB1_1: ; %loop.latch
+; CHECK-NEXT: ; in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT: subs w9, w9, #1
+; CHECK-NEXT: b.eq LBB1_5
+; CHECK-NEXT: LBB1_2: ; %loop.header
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: cmp w1, #7
+; CHECK-NEXT: b.hi LBB1_1
+; CHECK-NEXT: ; %bb.3: ; %loop.header
+; CHECK-NEXT: ; in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT: tst w8, w10
+; CHECK-NEXT: b.eq LBB1_1
+; CHECK-NEXT: ; %bb.4: ; %e0
+; CHECK-NEXT: mov x0, xzr
+; CHECK-NEXT: ret
+; CHECK-NEXT: LBB1_5:
+; CHECK-NEXT: mov x0, #-42 ; =0xffffffffffffffd6
+; CHECK-NEXT: ret
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ switch i32 %param, label %loop.latch [
+ i32 7, label %e0
+ i32 6, label %e0
+ i32 5, label %e0
+ i32 4, label %e0
+ i32 3, label %e0
+ i32 0, label %e0
+ ]
+
+loop.latch:
+ %iv.next = add i32 %iv, 1
+ %ec = icmp eq i32 %iv.next, 100
+ br i1 %ec, label %e1, label %loop.header
+
+e0:
+ %m = getelementptr i8, ptr %p, i64 20
+ br label %e1
+
+e1:
+ %res = phi i64 [ 0, %e0 ], [ -42, %loop.latch ]
+ ret i64 %res
+}
+
+define i64 @consecutive_match_before(ptr %p, i32 %param) {
+; CHECK-LABEL: consecutive_match_before:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: mov w8, #1 ; =0x1
+; CHECK-NEXT: mov w9, #100 ; =0x64
+; CHECK-NEXT: mov w10, #25 ; =0x19
+; CHECK-NEXT: lsl w8, w8, w1
+; CHECK-NEXT: b LBB2_2
+; CHECK-NEXT: LBB2_1: ; %loop.latch
+; CHECK-NEXT: ; in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT: subs w9, w9, #1
+; CHECK-NEXT: b.eq LBB2_5
+; CHECK-NEXT: LBB2_2: ; %loop.header
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: cmp w1, #4
+; CHECK-NEXT: b.hi LBB2_1
+; CHECK-NEXT: ; %bb.3: ; %loop.header
+; CHECK-NEXT: ; in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT: tst w8, w10
+; CHECK-NEXT: b.eq LBB2_1
+; CHECK-NEXT: ; %bb.4: ; %e0
+; CHECK-NEXT: mov x0, xzr
+; CHECK-NEXT: ret
+; CHECK-NEXT: LBB2_5:
+; CHECK-NEXT: mov x0, #-42 ; =0xffffffffffffffd6
+; CHECK-NEXT: ret
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ switch i32 %param, label %loop.latch [
+ i32 4, label %e0
+ i32 3, label %e0
+ i32 0, label %e0
+ ]
+
+loop.latch:
+ %iv.next = add i32 %iv, 1
+ %ec = icmp eq i32 %iv.next, 100
+ br i1 %ec, label %e1, label %loop.header
+
+e0:
+ %m = getelementptr i8, ptr %p, i64 20
+ br label %e1
+
+e1:
+ %res = phi i64 [ 0, %e0 ], [ -42, %loop.latch ]
+ ret i64 %res
+}
+
+define i64 @consecutive_match_after(ptr %p, i32 %param) {
+; CHECK-LABEL: consecutive_match_after:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: mov w8, #1 ; =0x1
+; CHECK-NEXT: mov w9, #100 ; =0x64
+; CHECK-NEXT: mov w10, #49 ; =0x31
+; CHECK-NEXT: lsl w8, w8, w1
+; CHECK-NEXT: b LBB3_2
+; CHECK-NEXT: LBB3_1: ; %loop.latch
+; CHECK-NEXT: ; in Loop: Header=BB3_2 Depth=1
+; CHECK-NEXT: subs w9, w9, #1
+; CHECK-NEXT: b.eq LBB3_5
+; CHECK-NEXT: LBB3_2: ; %loop.header
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: cmp w1, #5
+; CHECK-NEXT: b.hi LBB3_1
+; CHECK-NEXT: ; %bb.3: ; %loop.header
+; CHECK-NEXT: ; in Loop: Header=BB3_2 Depth=1
+; CHECK-NEXT: tst w8, w10
+; CHECK-NEXT: b.eq LBB3_1
+; CHECK-NEXT: ; %bb.4: ; %e0
+; CHECK-NEXT: mov x0, xzr
+; CHECK-NEXT: ret
+; CHECK-NEXT: LBB3_5:
+; CHECK-NEXT: mov x0, #-42 ; =0xffffffffffffffd6
+; CHECK-NEXT: ret
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ switch i32 %param, label %loop.latch [
+ i32 5, label %e0
+ i32 4, label %e0
+ i32 0, label %e0
+ ]
+
+loop.latch:
+ %iv.next = add i32 %iv, 1
+ %ec = icmp eq i32 %iv.next, 100
+ br i1 %ec, label %e1, label %loop.header
+
+e0:
+ %m = getelementptr i8, ptr %p, i64 20
+ br label %e1
+
+e1:
+ %res = phi i64 [ 0, %e0 ], [ -42, %loop.latch ]
+ ret i64 %res
+}
diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/switch-cases-to-branch-and.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/switch-cases-to-branch-and.ll
new file mode 100644
index 0000000000000..ea4ad7fa8cef9
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/switch-cases-to-branch-and.ll
@@ -0,0 +1,517 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -codegenprepare -S -mtriple=aarch64 %s | FileCheck %s
+
+define i32 @switch_with_matching_dests_0_and_pow2_3_cases(i8 %v) {
+; CHECK-LABEL: define i32 @switch_with_matching_dests_0_and_pow2_3_cases(
+; CHECK-SAME: i8 [[V:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[V]] to i32
+; CHECK-NEXT: switch i32 [[TMP3]], label %[[LOOP_LATCH]] [
+; CHECK-NEXT: i32 32, label %[[E1:.*]]
+; CHECK-NEXT: i32 0, label %[[E1]]
+; CHECK-NEXT: i32 124, label %[[E2:.*]]
+; CHECK-NEXT: ]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[IV_NEXT]], 100
+; CHECK-NEXT: br i1 [[C]], label %[[E1]], label %[[LOOP_HEADER]]
+; CHECK: [[E1]]:
+; CHECK-NEXT: ret i32 20
+; CHECK: [[E2]]:
+; CHECK-NEXT: ret i32 30
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ switch i8 %v, label %loop.latch [
+ i8 32, label %e1
+ i8 0, label %e1
+ i8 124, label %e2
+ ]
+
+loop.latch:
+ %iv.next = add i32 %iv, 1
+ %c = icmp eq i32 %iv.next, 100
+ br i1 %c, label %e1, label %loop.header
+
+e1:
+ ret i32 20
+
+e2:
+ ret i32 30
+}
+
+define i32 @switch_with_matching_dests_0_and_pow2_3_cases_swapped(i8 %v) {
+; CHECK-LABEL: define i32 @switch_with_matching_dests_0_and_pow2_3_cases_swapped(
+; CHECK-SAME: i8 [[V:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[V]] to i32
+; CHECK-NEXT: switch i32 [[TMP3]], label %[[LOOP_LATCH]] [
+; CHECK-NEXT: i32 0, label %[[E1:.*]]
+; CHECK-NEXT: i32 32, label %[[E1]]
+; CHECK-NEXT: i32 124, label %[[E2:.*]]
+; CHECK-NEXT: ]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[IV_NEXT]], 100
+; CHECK-NEXT: br i1 [[C]], label %[[E0:.*]], label %[[LOOP_HEADER]]
+; CHECK: [[E0]]:
+; CHECK-NEXT: ret i32 10
+; CHECK: [[E1]]:
+; CHECK-NEXT: ret i32 20
+; CHECK: [[E2]]:
+; CHECK-NEXT: ret i32 30
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ switch i8 %v, label %loop.latch [
+ i8 0, label %e1
+ i8 32, label %e1
+ i8 124, label %e2
+ ]
+
+loop.latch:
+ %iv.next = add i32 %iv, 1
+ %c = icmp eq i32 %iv.next, 100
+ br i1 %c, label %e0, label %loop.header
+
+e0:
+ ret i32 10
+
+e1:
+ ret i32 20
+
+e2:
+ ret i32 30
+}
+
+define i32 @switch_with_matching_dests_0_and_pow2_3_cases_with_phi(i8 %v, i1 %c) {
+; CHECK-LABEL: define i32 @switch_with_matching_dests_0_and_pow2_3_cases_with_phi(
+; CHECK-SAME: i8 [[V:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[E1:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[THEN]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[V]] to i32
+; CHECK-NEXT: switch i32 [[TMP0]], label %[[LOOP_LATCH]] [
+; CHECK-NEXT: i32 32, label %[[E1]]
+; CHECK-NEXT: i32 0, label %[[E1]]
+; CHECK-NEXT: i32 124, label %[[E2:.*]]
+; CHECK-NEXT: ]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 100
+; CHECK-NEXT: br i1 [[EC]], label %[[E0:.*]], label %[[LOOP_HEADER]]
+; CHECK: [[E0]]:
+; CHECK-NEXT: ret i32 10
+; CHECK: [[E1]]:
+; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 20, %[[LOOP_HEADER]] ], [ 20, %[[LOOP_HEADER]] ]
+; CHECK-NEXT: ret i32 [[P]]
+; CHECK: [[E2]]:
+; CHECK-NEXT: ret i32 30
+;
+entry:
+ br i1 %c, label %then, label %e1
+
+then:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %then], [ %iv.next, %loop.latch ]
+ switch i8 %v, label %loop.latch [
+ i8 32, label %e1
+ i8 0, label %e1
+ i8 124, label %e2
+ ]
+
+loop.latch:
+ %iv.next = add i32 %iv, 1
+ %ec = icmp eq i32 %iv.next, 100
+ br i1 %ec, label %e0, label %loop.header
+
+e0:
+ ret i32 10
+
+e1:
+ %p = phi i32 [ 0, %entry ], [ 20, %loop.header ], [ 20, %loop.header ]
+ ret i32 %p
+
+e2:
+ ret i32 30
+}
+
+define i32 @switch_with_matching_dests_0_and_pow2_3_cases_all_different_succs(i8 %v) {
+; CHECK-LABEL: define i32 @switch_with_matching_dests_0_and_pow2_3_cases_all_different_succs(
+; CHECK-SAME: i8 [[V:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[V]] to i32
+; CHECK-NEXT: switch i32 [[TMP0]], label %[[LOOP_LATCH]] [
+; CHECK-NEXT: i32 32, label %[[E1:.*]]
+; CHECK-NEXT: i32 0, label %[[E2:.*]]
+; CHECK-NEXT: i32 124, label %[[E3:.*]]
+; CHECK-NEXT: ]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 100
+; CHECK-NEXT: br i1 [[EC]], label %[[E0:.*]], label %[[LOOP_HEADER]]
+; CHECK: [[E0]]:
+; CHECK-NEXT: ret i32 10
+; CHECK: [[E1]]:
+; CHECK-NEXT: ret i32 20
+; CHECK: [[E2]]:
+; CHECK-NEXT: ret i32 30
+; CHECK: [[E3]]:
+; CHECK-NEXT: ret i32 40
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ switch i8 %v, label %loop.latch [
+ i8 32, label %e1
+ i8 0, label %e2
+ i8 124, label %e3
+ ]
+
+loop.latch:
+ %iv.next = add i32 %iv, 1
+ %ec = icmp eq i32 %iv.next, 100
+ br i1 %ec, label %e0, label %loop.header
+
+e0:
+ ret i32 10
+
+e1:
+ ret i32 20
+
+e2:
+ ret i32 30
+
+e3:
+ ret i32 40
+}
+
+define i32 @switch_in_loop_with_matching_dests_0_and_pow2_3_cases(ptr %start) {
+; CHECK-LABEL: define i32 @switch_in_loop_with_matching_dests_0_and_pow2_3_cases(
+; CHECK-SAME: ptr [[START:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[P:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[TMP0:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[TMP0]] = getelementptr inbounds nuw i8, ptr [[P]], i64 1
+; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[L]] to i32
+; CHECK-NEXT: switch i32 [[TMP1]], label %[[LOOP]] [
+; CHECK-NEXT: i32 32, label %[[E1:.*]]
+; CHECK-NEXT: i32 0, label %[[E1]]
+; CHECK-NEXT: i32 124, label %[[E2:.*]]
+; CHECK-NEXT: ]
+; CHECK: [[E1]]:
+; CHECK-NEXT: br label %[[E2]]
+; CHECK: [[E2]]:
+; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ -1, %[[E1]] ], [ 0, %[[LOOP]] ]
+; CHECK-NEXT: ret i32 [[TMP2]]
+;
+entry:
+ br label %loop
+
+loop:
+ %p = phi ptr [ %start, %entry ], [ %4, %loop ]
+ %4 = getelementptr inbounds nuw i8, ptr %p, i64 1
+ %l = load i8, ptr %4, align 1
+ switch i8 %l, label %loop [
+ i8 32, label %e1
+ i8 0, label %e1
+ i8 124, label %e2
+ ]
+
+e1:
+ br label %e2
+
+e2:
+ %8 = phi i32 [ -1, %e1 ], [ 0, %loop ]
+ ret i32 %8
+}
+
+define i32 @switch_in_loop_with_matching_dests_0_and_pow2_4_cases(ptr %start) {
+; CHECK-LABEL: define i32 @switch_in_loop_with_matching_dests_0_and_pow2_4_cases(
+; CHECK-SAME: ptr [[START:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[P:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[TMP0:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[TMP0]] = getelementptr inbounds nuw i8, ptr [[P]], i64 1
+; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[L]] to i32
+; CHECK-NEXT: switch i32 [[TMP1]], label %[[LOOP]] [
+; CHECK-NEXT: i32 0, label %[[E1:.*]]
+; CHECK-NEXT: i32 15, label %[[E1]]
+; CHECK-NEXT: i32 32, label %[[E1]]
+; CHECK-NEXT: i32 124, label %[[E2:.*]]
+; CHECK-NEXT: ]
+; CHECK: [[E1]]:
+; CHECK-NEXT: br label %[[E2]]
+; CHECK: [[E2]]:
+; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ -1, %[[E1]] ], [ 0, %[[LOOP]] ]
+; CHECK-NEXT: ret i32 [[TMP2]]
+;
+entry:
+ br label %loop
+
+loop:
+ %p = phi ptr [ %start, %entry ], [ %4, %loop ]
+ %4 = getelementptr inbounds nuw i8, ptr %p, i64 1
+ %l = load i8, ptr %4, align 1
+ switch i8 %l, label %loop [
+ i8 0, label %e1
+ i8 15, label %e1
+ i8 32, label %e1
+ i8 124, label %e2
+ ]
+
+e1:
+ br label %e2
+
+e2:
+ %8 = phi i32 [ -1, %e1 ], [ 0, %loop ]
+ ret i32 %8
+}
+
+define i32 @switch_in_loop_with_matching_dests_0_and_non_pow2(ptr %start) {
+; CHECK-LABEL: define i32 @switch_in_loop_with_matching_dests_0_and_non_pow2(
+; CHECK-SAME: ptr [[START:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[P:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[TMP0:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[TMP0]] = getelementptr inbounds nuw i8, ptr [[P]], i64 1
+; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[L]] to i32
+; CHECK-NEXT: switch i32 [[TMP1]], label %[[LOOP]] [
+; CHECK-NEXT: i32 0, label %[[E1:.*]]
+; CHECK-NEXT: i32 35, label %[[E1]]
+; CHECK-NEXT: i32 33, label %[[E2:.*]]
+; CHECK-NEXT: ]
+; CHECK: [[E1]]:
+; CHECK-NEXT: ret i32 -1
+; CHECK: [[E2]]:
+; CHECK-NEXT: ret i32 10
+;
+entry:
+ br label %loop
+
+loop:
+ %p = phi ptr [ %start, %entry ], [ %4, %loop ]
+ %4 = getelementptr inbounds nuw i8, ptr %p, i64 1
+ %l = load i8, ptr %4, align 1
+ switch i8 %l, label %loop [
+ i8 0, label %e1
+ i8 35, label %e1
+ i8 33, label %e2
+ ]
+
+e1:
+ ret i32 -1
+
+e2:
+ ret i32 10
+}
+
+define void @test_successor_with_loop_phi(ptr %A, ptr %B) {
+; CHECK-LABEL: define void @test_successor_with_loop_phi(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[P:%.*]] = phi ptr [ [[A]], %[[ENTRY]] ], [ [[B]], %[[LOOP]] ], [ [[B]], %[[LOOP]] ]
+; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT: store i32 0, ptr [[P]], align 4
+; CHECK-NEXT: switch i32 [[L]], label %[[EXIT:.*]] [
+; CHECK-NEXT: i32 4, label %[[LOOP]]
+; CHECK-NEXT: i32 0, label %[[LOOP]]
+; CHECK-NEXT: ]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %p = phi ptr [ %A, %entry ], [ %B , %loop ], [ %B, %loop ]
+ %l = load i32, ptr %p
+ store i32 0, ptr %p
+ switch i32 %l, label %exit [
+ i32 4, label %loop
+ i32 0, label %loop
+ ]
+
+exit:
+ ret void
+}
+
+define i64 @consecutive_match_both(ptr %p, i32 %param) {
+; CHECK-LABEL: define i64 @consecutive_match_both(
+; CHECK-SAME: ptr [[P:%.*]], i32 [[PARAM:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: switch i32 [[PARAM]], label %[[LOOP_LATCH]] [
+; CHECK-NEXT: i32 7, label %[[E0:.*]]
+; CHECK-NEXT: i32 6, label %[[E0]]
+; CHECK-NEXT: i32 5, label %[[E0]]
+; CHECK-NEXT: i32 4, label %[[E0]]
+; CHECK-NEXT: i32 3, label %[[E0]]
+; CHECK-NEXT: i32 0, label %[[E0]]
+; CHECK-NEXT: ]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 100
+; CHECK-NEXT: br i1 [[EC]], label %[[E1:.*]], label %[[LOOP_HEADER]]
+; CHECK: [[E0]]:
+; CHECK-NEXT: [[M:%.*]] = getelementptr i8, ptr [[P]], i64 20
+; CHECK-NEXT: br label %[[E1]]
+; CHECK: [[E1]]:
+; CHECK-NEXT: [[RES:%.*]] = phi i64 [ 0, %[[E0]] ], [ -42, %[[LOOP_LATCH]] ]
+; CHECK-NEXT: ret i64 [[RES]]
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ switch i32 %param, label %loop.latch [
+ i32 7, label %e0
+ i32 6, label %e0
+ i32 5, label %e0
+ i32 4, label %e0
+ i32 3, label %e0
+ i32 0, label %e0
+ ]
+
+loop.latch:
+ %iv.next = add i32 %iv, 1
+ %ec = icmp eq i32 %iv.next, 100
+ br i1 %ec, label %e1, label %loop.header
+
+e0:
+ %m = getelementptr i8, ptr %p, i64 20
+ br label %e1
+
+e1:
+ %res = phi i64 [ 0, %e0 ], [ -42, %loop.latch ]
+ ret i64 %res
+}
+
+define i64 @consecutive_match_before(ptr %p, i32 %param) {
+; CHECK-LABEL: define i64 @consecutive_match_before(
+; CHECK-SAME: ptr [[P:%.*]], i32 [[PARAM:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: switch i32 [[PARAM]], label %[[LOOP_LATCH]] [
+; CHECK-NEXT: i32 4, label %[[E0:.*]]
+; CHECK-NEXT: i32 3, label %[[E0]]
+; CHECK-NEXT: i32 0, label %[[E0]]
+; CHECK-NEXT: ]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 100
+; CHECK-NEXT: br i1 [[EC]], label %[[E1:.*]], label %[[LOOP_HEADER]]
+; CHECK: [[E0]]:
+; CHECK-NEXT: [[M:%.*]] = getelementptr i8, ptr [[P]], i64 20
+; CHECK-NEXT: br label %[[E1]]
+; CHECK: [[E1]]:
+; CHECK-NEXT: [[RES:%.*]] = phi i64 [ 0, %[[E0]] ], [ -42, %[[LOOP_LATCH]] ]
+; CHECK-NEXT: ret i64 [[RES]]
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ switch i32 %param, label %loop.latch [
+ i32 4, label %e0
+ i32 3, label %e0
+ i32 0, label %e0
+ ]
+
+loop.latch:
+ %iv.next = add i32 %iv, 1
+ %ec = icmp eq i32 %iv.next, 100
+ br i1 %ec, label %e1, label %loop.header
+
+e0:
+ %m = getelementptr i8, ptr %p, i64 20
+ br label %e1
+
+e1:
+ %res = phi i64 [ 0, %e0 ], [ -42, %loop.latch ]
+ ret i64 %res
+}
+
+define i64 @consecutive_match_after(ptr %p, i32 %param) {
+; CHECK-LABEL: define i64 @consecutive_match_after(
+; CHECK-SAME: ptr [[P:%.*]], i32 [[PARAM:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: switch i32 [[PARAM]], label %[[LOOP_LATCH]] [
+; CHECK-NEXT: i32 5, label %[[E0:.*]]
+; CHECK-NEXT: i32 4, label %[[E0]]
+; CHECK-NEXT: i32 0, label %[[E0]]
+; CHECK-NEXT: ]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 100
+; CHECK-NEXT: br i1 [[EC]], label %[[E1:.*]], label %[[LOOP_HEADER]]
+; CHECK: [[E0]]:
+; CHECK-NEXT: [[M:%.*]] = getelementptr i8, ptr [[P]], i64 20
+; CHECK-NEXT: br label %[[E1]]
+; CHECK: [[E1]]:
+; CHECK-NEXT: [[RES:%.*]] = phi i64 [ 0, %[[E0]] ], [ -42, %[[LOOP_LATCH]] ]
+; CHECK-NEXT: ret i64 [[RES]]
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ switch i32 %param, label %loop.latch [
+ i32 5, label %e0
+ i32 4, label %e0
+ i32 0, label %e0
+ ]
+
+loop.latch:
+ %iv.next = add i32 %iv, 1
+ %ec = icmp eq i32 %iv.next, 100
+ br i1 %ec, label %e1, label %loop.header
+
+e0:
+ %m = getelementptr i8, ptr %p, i64 20
+ br label %e1
+
+e1:
+ %res = phi i64 [ 0, %e0 ], [ -42, %loop.latch ]
+ ret i64 %res
+}
>From b6d1aef45393717a4214f765d0608770fd75493c Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 13 May 2025 15:02:30 +0100
Subject: [PATCH 2/2] [SwitchLowering] Support merging 0 and power-of-2 case.
---
.../llvm/CodeGen/SwitchLoweringUtils.h | 3 +-
.../SelectionDAG/SelectionDAGBuilder.cpp | 26 +++++++++++++++
llvm/lib/CodeGen/SwitchLoweringUtils.cpp | 33 +++++++++++++++++++
.../AArch64/switch-cases-to-branch-and.ll | 27 +++++++--------
4 files changed, 72 insertions(+), 17 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
index 9f1d6f7b4f952..377b9fd8cff88 100644
--- a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
+++ b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
@@ -35,7 +35,8 @@ enum CaseClusterKind {
/// A cluster of cases suitable for jump table lowering.
CC_JumpTable,
/// A cluster of cases suitable for bit test lowering.
- CC_BitTests
+ CC_BitTests,
+ CC_And
};
/// A cluster of case labels.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8e74a076cc013..6c2d7a0872a50 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -12238,6 +12238,32 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
break;
}
+ case CC_And: {
+ SDLoc dl = getCurSDLoc();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I->Low->getType(), true);
+ SDValue C = DAG.getConstant(*I->Low, dl, VT);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue CondLHS = getValue(Cond);
+ SDValue And = DAG.getNode(ISD::AND, dl, C.getValueType(), CondLHS, C);
+ auto CondD = DAG.getSetCC(dl, MVT::i1, And, Zero, ISD::SETEQ);
+ SDNodeFlags Flags;
+ SDValue BrCond =
+ DAG.getNode(ISD::BRCOND, dl, MVT::Other, getControlRoot(), CondD,
+ DAG.getBasicBlock(I->MBB), Flags);
+
+ // Insert the false branch. Do this even if it's a fall through branch,
+ // this makes it easier to do DAG optimizations which require inverting
+ // the branch condition.
+ BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+ DAG.getBasicBlock(Fallthrough));
+ addSuccessorWithProb(CurMBB, I->MBB, UnhandledProbs);
+ addSuccessorWithProb(CurMBB, Fallthrough,
+ BranchProbability::getUnknown());
+ CurMBB->normalizeSuccProbs();
+ DAG.setRoot(BrCond);
+ }
}
CurMBB = Fallthrough;
}
diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
index 038c499fe236e..2fc545dd2ef54 100644
--- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
+++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
@@ -362,6 +362,39 @@ void SwitchCG::SwitchLowering::findBitTestClusters(CaseClusterVector &Clusters,
}
}
Clusters.resize(DstIndex);
+
+ unsigned ZeroIdx = -1;
+ for (const auto &[Idx, C] : enumerate(Clusters)) {
+ if (C.Kind != CC_Range || C.Low != C.High)
+ continue;
+ if (C.Low->isZero()) {
+ ZeroIdx = Idx;
+ break;
+ }
+ }
+
+ if (ZeroIdx == -1u)
+ return;
+
+ unsigned Pow2Idx = -1;
+ for (const auto &[Idx, C] : enumerate(Clusters)) {
+ if (C.Kind != CC_Range || C.Low != C.High || C.MBB != Clusters[ZeroIdx].MBB)
+ continue;
+ if (C.Low->getValue().isPowerOf2()) {
+ Pow2Idx = Idx;
+ break;
+ }
+ }
+
+ if (Pow2Idx == -1u)
+ return;
+
+ APInt Pow2 = Clusters[Pow2Idx].Low->getValue();
+ APInt NewC = (Pow2 + 1) * -1;
+ Clusters[ZeroIdx].Low = ConstantInt::get(SI->getContext(), NewC);
+ Clusters[ZeroIdx].Low = ConstantInt::get(SI->getContext(), NewC);
+ Clusters[ZeroIdx].Kind = CC_And;
+ Clusters.erase(Clusters.begin() + Pow2Idx);
}
bool SwitchCG::SwitchLowering::buildBitTests(CaseClusterVector &Clusters,
diff --git a/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll b/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll
index 7a28c06e75d70..b8906ab7d7713 100644
--- a/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll
+++ b/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll
@@ -4,30 +4,25 @@
define i32 @switch_with_matching_dests_0_and_pow2_3_cases(i8 %v) {
; CHECK-LABEL: switch_with_matching_dests_0_and_pow2_3_cases:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: mov w9, #100 ; =0x64
-; CHECK-NEXT: mov w8, #20 ; =0x14
+; CHECK-NEXT: mov w8, #100 ; =0x64
+; CHECK-NEXT: mov w9, #223 ; =0xdf
; CHECK-NEXT: LBB0_1: ; %loop.header
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ands w10, w0, #0xff
-; CHECK-NEXT: b.eq LBB0_6
+; CHECK-NEXT: tst w0, w9
+; CHECK-NEXT: b.eq LBB0_4
; CHECK-NEXT: ; %bb.2: ; %loop.header
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: cmp w10, #32
-; CHECK-NEXT: b.eq LBB0_6
-; CHECK-NEXT: ; %bb.3: ; %loop.header
-; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: and w10, w0, #0xff
; CHECK-NEXT: cmp w10, #124
-; CHECK-NEXT: b.eq LBB0_7
-; CHECK-NEXT: ; %bb.4: ; %loop.latch
+; CHECK-NEXT: b.eq LBB0_5
+; CHECK-NEXT: ; %bb.3: ; %loop.latch
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: subs w9, w9, #1
+; CHECK-NEXT: subs w8, w8, #1
; CHECK-NEXT: b.ne LBB0_1
-; CHECK-NEXT: ; %bb.5:
-; CHECK-NEXT: mov w8, #20 ; =0x14
-; CHECK-NEXT: LBB0_6: ; %common.ret
-; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: LBB0_4:
+; CHECK-NEXT: mov w0, #20 ; =0x14
; CHECK-NEXT: ret
-; CHECK-NEXT: LBB0_7: ; %e2
+; CHECK-NEXT: LBB0_5: ; %e2
; CHECK-NEXT: mov w0, #30 ; =0x1e
; CHECK-NEXT: ret
entry:
More information about the llvm-commits
mailing list