[llvm] [GISel] funnel shift combiner port from SelectionDAG ISel to GlobalISel (PR #135132)
Axel Sorenson via llvm-commits
llvm-commits at lists.llvm.org
Sat May 3 05:30:22 PDT 2025
https://github.com/axelcool1234 updated https://github.com/llvm/llvm-project/pull/135132
>From 733135ea6efc0cb336a4f06bd8d07262a25590ef Mon Sep 17 00:00:00 2001
From: Axel Sorenson <AxelPSorenson at gmail.com>
Date: Thu, 27 Feb 2025 17:41:41 -0800
Subject: [PATCH 1/2] funnel shift combiner port from SelectionDAG ISel to
GlobalISel
---
.../include/llvm/Target/GlobalISel/Combine.td | 22 +++++++-
llvm/test/CodeGen/AArch64/funnel-shift.ll | 12 ++---
llvm/test/CodeGen/RISCV/GlobalISel/shift.ll | 50 +++++++++++++++++++
3 files changed, 76 insertions(+), 8 deletions(-)
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 5309d5952f087..9d0b1cab559be 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1033,6 +1033,24 @@ def funnel_shift_overshift: GICombineRule<
(apply [{ Helper.applyFunnelShiftConstantModulo(*${root}); }])
>;
+// Transform: fshl x, z, y | shl x, y -> fshl x, z, y
+def funnel_shift_or_shift_to_funnel_shift_left: GICombineRule<
+ (defs root:$root),
+ (match (G_FSHL $out1, $x, $z, $y),
+ (G_SHL $out2, $x, $y),
+ (G_OR $root, $out1, $out2)),
+ (apply (G_FSHL $root, $x, $z, $y))
+>;
+
+// Transform: fshr z, x, y | srl x, y -> fshr z, x, y
+def funnel_shift_or_shift_to_funnel_shift_right: GICombineRule<
+ (defs root:$root),
+ (match (G_FSHR $out1, $z, $x, $y),
+ (G_LSHR $out2, $x, $y),
+ (G_OR $root, $out1, $out2)),
+ (apply (G_FSHR $root, $z, $x, $y))
+>;
+
def rotate_out_of_range : GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_ROTR, G_ROTL):$root,
@@ -1105,7 +1123,9 @@ def funnel_shift_combines : GICombineGroup<[funnel_shift_from_or_shift,
funnel_shift_to_rotate,
funnel_shift_right_zero,
funnel_shift_left_zero,
- funnel_shift_overshift]>;
+ funnel_shift_overshift,
+ funnel_shift_or_shift_to_funnel_shift_left,
+ funnel_shift_or_shift_to_funnel_shift_right]>;
def bitfield_extract_from_sext_inreg : GICombineRule<
(defs root:$root, build_fn_matchinfo:$info),
diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll
index e5aa360f804c1..254bbc934f10f 100644
--- a/llvm/test/CodeGen/AArch64/funnel-shift.ll
+++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll
@@ -674,14 +674,12 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) {
; CHECK-GI-LABEL: or_shl_fshl_simplify:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #31 // =0x1f
-; CHECK-GI-NEXT: and w9, w2, #0x1f
-; CHECK-GI-NEXT: lsr w10, w0, #1
-; CHECK-GI-NEXT: lsl w11, w1, w2
+; CHECK-GI-NEXT: lsr w9, w0, #1
+; CHECK-GI-NEXT: and w10, w2, #0x1f
; CHECK-GI-NEXT: bic w8, w8, w2
-; CHECK-GI-NEXT: lsl w9, w1, w9
-; CHECK-GI-NEXT: lsr w8, w10, w8
-; CHECK-GI-NEXT: orr w9, w9, w11
-; CHECK-GI-NEXT: orr w0, w9, w8
+; CHECK-GI-NEXT: lsl w10, w1, w10
+; CHECK-GI-NEXT: lsr w8, w9, w8
+; CHECK-GI-NEXT: orr w0, w10, w8
; CHECK-GI-NEXT: ret
%shy = shl i32 %y, %s
%fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
index 75e318a58fd45..f564abf3d3184 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
@@ -105,3 +105,53 @@ define i16 @test_shl_i48_2(i48 %x, i48 %y) {
%trunc = trunc i48 %shl to i16
ret i16 %trunc
}
+
+define i16 @test_fshl_i32(i32 %x, i32 %_, i32 %y) {
+; RV32-LABEL: test_fshl_i32:
+; RV32: # %bb.0:
+; RV32-NEXT: not a3, a2
+; RV32-NEXT: sll a0, a0, a2
+; RV32-NEXT: srli a1, a1, 1
+; RV32-NEXT: srl a1, a1, a3
+; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: ret
+;
+; RV64-LABEL: test_fshl_i32:
+; RV64: # %bb.0:
+; RV64-NEXT: not a3, a2
+; RV64-NEXT: sllw a0, a0, a2
+; RV64-NEXT: srliw a1, a1, 1
+; RV64-NEXT: srlw a1, a1, a3
+; RV64-NEXT: or a0, a0, a1
+; RV64-NEXT: ret
+ %fshl = call i32 @llvm.fshl.i32(i32 %x, i32 %_, i32 %y)
+ %shl = shl i32 %x, %y
+ %or = or i32 %fshl, %shl
+ %trunc = trunc i32 %or to i16
+ ret i16 %trunc
+}
+
+define i16 @test_fshr_i32(i32 %_, i32 %x, i32 %y) {
+; RV32-LABEL: test_fshr_i32:
+; RV32: # %bb.0:
+; RV32-NEXT: not a3, a2
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: sll a0, a0, a3
+; RV32-NEXT: srl a1, a1, a2
+; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: ret
+;
+; RV64-LABEL: test_fshr_i32:
+; RV64: # %bb.0:
+; RV64-NEXT: not a3, a2
+; RV64-NEXT: slli a0, a0, 1
+; RV64-NEXT: sllw a0, a0, a3
+; RV64-NEXT: srlw a1, a1, a2
+; RV64-NEXT: or a0, a0, a1
+; RV64-NEXT: ret
+ %fshr = call i32 @llvm.fshr.i32(i32 %_, i32 %x, i32 %y)
+ %lshr = lshr i32 %x, %y
+ %or = or i32 %fshr, %lshr
+ %trunc = trunc i32 %or to i16
+ ret i16 %trunc
+}
>From 2fc366c432a669d00017b591deff3be3cdd83f07 Mon Sep 17 00:00:00 2001
From: Axel Sorenson <AxelPSorenson at gmail.com>
Date: Sat, 3 May 2025 05:30:12 -0700
Subject: [PATCH 2/2] pre-commit test
---
.../include/llvm/Target/GlobalISel/Combine.td | 16 +--
llvm/test/CodeGen/AArch64/funnel-shift.ll | 12 +-
llvm/test/CodeGen/RISCV/GlobalISel/shift.ll | 115 ++++++++++++------
3 files changed, 94 insertions(+), 49 deletions(-)
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 9d0b1cab559be..2cbf0c794628a 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1038,8 +1038,9 @@ def funnel_shift_or_shift_to_funnel_shift_left: GICombineRule<
(defs root:$root),
(match (G_FSHL $out1, $x, $z, $y),
(G_SHL $out2, $x, $y),
- (G_OR $root, $out1, $out2)),
- (apply (G_FSHL $root, $x, $z, $y))
+ (G_OR $root, $out1, $out2),
+ [{ return MRI.hasOneUse(${out2}.getReg()); }]),
+ (apply (GIReplaceReg $root, $out1))
>;
// Transform: fshr z, x, y | srl x, y -> fshr z, x, y
@@ -1047,8 +1048,9 @@ def funnel_shift_or_shift_to_funnel_shift_right: GICombineRule<
(defs root:$root),
(match (G_FSHR $out1, $z, $x, $y),
(G_LSHR $out2, $x, $y),
- (G_OR $root, $out1, $out2)),
- (apply (G_FSHR $root, $z, $x, $y))
+ (G_OR $root, $out1, $out2),
+ [{ return MRI.hasOneUse(${out2}.getReg()); }]),
+ (apply (GIReplaceReg $root, $out1))
>;
def rotate_out_of_range : GICombineRule<
@@ -1123,9 +1125,9 @@ def funnel_shift_combines : GICombineGroup<[funnel_shift_from_or_shift,
funnel_shift_to_rotate,
funnel_shift_right_zero,
funnel_shift_left_zero,
- funnel_shift_overshift,
- funnel_shift_or_shift_to_funnel_shift_left,
- funnel_shift_or_shift_to_funnel_shift_right]>;
+ funnel_shift_overshift]>;
+ //funnel_shift_or_shift_to_funnel_shift_left,
+ //funnel_shift_or_shift_to_funnel_shift_right]>;
def bitfield_extract_from_sext_inreg : GICombineRule<
(defs root:$root, build_fn_matchinfo:$info),
diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll
index 254bbc934f10f..e5aa360f804c1 100644
--- a/llvm/test/CodeGen/AArch64/funnel-shift.ll
+++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll
@@ -674,12 +674,14 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) {
; CHECK-GI-LABEL: or_shl_fshl_simplify:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #31 // =0x1f
-; CHECK-GI-NEXT: lsr w9, w0, #1
-; CHECK-GI-NEXT: and w10, w2, #0x1f
+; CHECK-GI-NEXT: and w9, w2, #0x1f
+; CHECK-GI-NEXT: lsr w10, w0, #1
+; CHECK-GI-NEXT: lsl w11, w1, w2
; CHECK-GI-NEXT: bic w8, w8, w2
-; CHECK-GI-NEXT: lsl w10, w1, w10
-; CHECK-GI-NEXT: lsr w8, w9, w8
-; CHECK-GI-NEXT: orr w0, w10, w8
+; CHECK-GI-NEXT: lsl w9, w1, w9
+; CHECK-GI-NEXT: lsr w8, w10, w8
+; CHECK-GI-NEXT: orr w9, w9, w11
+; CHECK-GI-NEXT: orr w0, w9, w8
; CHECK-GI-NEXT: ret
%shy = shl i32 %y, %s
%fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
index f564abf3d3184..36000e3496276 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
@@ -2,60 +2,91 @@
; RUN: llc -mtriple=riscv32 -global-isel -global-isel-abort=1 -verify-machineinstrs < %s 2>&1 | FileCheck %s --check-prefixes=RV32
; RUN: llc -mtriple=riscv64 -global-isel -global-isel-abort=1 -verify-machineinstrs < %s 2>&1 | FileCheck %s --check-prefixes=RV64
-define i16 @test_lshr_i48(i48 %x) {
-; RV32-LABEL: test_lshr_i48:
+define i16 @test_lshr_i32(i32 %_, i32 %x, i32 %y) {
+; RV32-LABEL: test_lshr_i32:
; RV32: # %bb.0:
-; RV32-NEXT: srli a0, a0, 16
+; RV32-NEXT: srl a1, a1, a2
+; RV32-NEXT: not a2, a2
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: sll a0, a0, a2
+; RV32-NEXT: or a2, a1, a1
+; RV32-NEXT: or a0, a0, a2
+; RV32-NEXT: and a0, a0, a1
; RV32-NEXT: ret
;
-; RV64-LABEL: test_lshr_i48:
+; RV64-LABEL: test_lshr_i32:
; RV64: # %bb.0:
-; RV64-NEXT: srliw a0, a0, 16
+; RV64-NEXT: srlw a1, a1, a2
+; RV64-NEXT: not a2, a2
+; RV64-NEXT: slli a0, a0, 1
+; RV64-NEXT: sllw a0, a0, a2
+; RV64-NEXT: or a2, a1, a1
+; RV64-NEXT: or a0, a0, a2
+; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: ret
- %lshr = lshr i48 %x, 16
- %trunc = trunc i48 %lshr to i16
+ %lshr = lshr i32 %x, %y
+ %fshr = call i32 @llvm.fshr.i32(i32 %_, i32 %x, i32 %y)
+ %or = or i32 %fshr, %lshr
+ %and = and i32 %or, %lshr
+ %trunc = trunc i32 %and to i16
ret i16 %trunc
}
-define i16 @test_ashr_i48(i48 %x) {
-; RV32-LABEL: test_ashr_i48:
+define i16 @test_ashr_i32(i32 %x) {
+; RV32-LABEL: test_ashr_i32:
; RV32: # %bb.0:
; RV32-NEXT: srai a0, a0, 16
; RV32-NEXT: ret
;
-; RV64-LABEL: test_ashr_i48:
+; RV64-LABEL: test_ashr_i32:
; RV64: # %bb.0:
; RV64-NEXT: sraiw a0, a0, 16
; RV64-NEXT: ret
- %ashr = ashr i48 %x, 16
- %trunc = trunc i48 %ashr to i16
+ %ashr = ashr i32 %x, 16
+ %trunc = trunc i32 %ashr to i16
ret i16 %trunc
}
-define i16 @test_shl_i48(i48 %x) {
-; RV32-LABEL: test_shl_i48:
+define i16 @test_shl_i32(i32 %_, i32 %x, i32 %y) {
+; RV32-LABEL: test_shl_i32:
; RV32: # %bb.0:
-; RV32-NEXT: slli a0, a0, 8
+; RV32-NEXT: not a3, a2
+; RV32-NEXT: sll a1, a1, a2
+; RV32-NEXT: srli a0, a0, 1
+; RV32-NEXT: srl a0, a0, a3
+; RV32-NEXT: or a2, a1, a1
+; RV32-NEXT: or a0, a2, a0
+; RV32-NEXT: and a0, a0, a1
; RV32-NEXT: ret
;
-; RV64-LABEL: test_shl_i48:
+; RV64-LABEL: test_shl_i32:
; RV64: # %bb.0:
-; RV64-NEXT: slli a0, a0, 8
+; RV64-NEXT: not a3, a2
+; RV64-NEXT: sllw a1, a1, a2
+; RV64-NEXT: srliw a0, a0, 1
+; RV64-NEXT: srlw a0, a0, a3
+; RV64-NEXT: or a2, a1, a1
+; RV64-NEXT: or a0, a2, a0
+; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: ret
- %shl = shl i48 %x, 8
- %trunc = trunc i48 %shl to i16
+ %fshl = call i32 @llvm.fshl.i32(i32 %x, i32 %_, i32 %y)
+ %shl = shl i32 %x, %y
+ %or = or i32 %fshl, %shl
+ %and = and i32 %or, %shl
+ %trunc = trunc i32 %and to i16
ret i16 %trunc
+
}
; FIXME: Could use srlw to remove slli+srli.
-define i16 @test_lshr_i48_2(i48 %x, i48 %y) {
-; RV32-LABEL: test_lshr_i48_2:
+define i16 @test_lshr_i48(i48 %x, i48 %y) {
+; RV32-LABEL: test_lshr_i48:
; RV32: # %bb.0:
; RV32-NEXT: andi a2, a2, 15
; RV32-NEXT: srl a0, a0, a2
; RV32-NEXT: ret
;
-; RV64-LABEL: test_lshr_i48_2:
+; RV64-LABEL: test_lshr_i48:
; RV64: # %bb.0:
; RV64-NEXT: andi a1, a1, 15
; RV64-NEXT: slli a0, a0, 32
@@ -69,14 +100,14 @@ define i16 @test_lshr_i48_2(i48 %x, i48 %y) {
}
; FIXME: Could use sraw to remove the sext.w.
-define i16 @test_ashr_i48_2(i48 %x, i48 %y) {
-; RV32-LABEL: test_ashr_i48_2:
+define i16 @test_ashr_i48(i48 %x, i48 %y) {
+; RV32-LABEL: test_ashr_i48:
; RV32: # %bb.0:
; RV32-NEXT: andi a2, a2, 15
; RV32-NEXT: sra a0, a0, a2
; RV32-NEXT: ret
;
-; RV64-LABEL: test_ashr_i48_2:
+; RV64-LABEL: test_ashr_i48:
; RV64: # %bb.0:
; RV64-NEXT: andi a1, a1, 15
; RV64-NEXT: sext.w a0, a0
@@ -88,21 +119,21 @@ define i16 @test_ashr_i48_2(i48 %x, i48 %y) {
ret i16 %trunc
}
-define i16 @test_shl_i48_2(i48 %x, i48 %y) {
-; RV32-LABEL: test_shl_i48_2:
+define i16 @test_shl_i32_2(i32 %x, i32 %y) {
+; RV32-LABEL: test_shl_i32_2:
; RV32: # %bb.0:
-; RV32-NEXT: andi a2, a2, 15
-; RV32-NEXT: sll a0, a0, a2
+; RV32-NEXT: andi a1, a1, 15
+; RV32-NEXT: sll a0, a0, a1
; RV32-NEXT: ret
;
-; RV64-LABEL: test_shl_i48_2:
+; RV64-LABEL: test_shl_i32_2:
; RV64: # %bb.0:
; RV64-NEXT: andi a1, a1, 15
; RV64-NEXT: sll a0, a0, a1
; RV64-NEXT: ret
- %and = and i48 %y, 15
- %shl = shl i48 %x, %and
- %trunc = trunc i48 %shl to i16
+ %and = and i32 %y, 15
+ %shl = shl i32 %x, %and
+ %trunc = trunc i32 %shl to i16
ret i16 %trunc
}
@@ -113,7 +144,9 @@ define i16 @test_fshl_i32(i32 %x, i32 %_, i32 %y) {
; RV32-NEXT: sll a0, a0, a2
; RV32-NEXT: srli a1, a1, 1
; RV32-NEXT: srl a1, a1, a3
-; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: or a1, a0, a1
+; RV32-NEXT: or a0, a1, a0
+; RV32-NEXT: and a0, a0, a1
; RV32-NEXT: ret
;
; RV64-LABEL: test_fshl_i32:
@@ -122,12 +155,15 @@ define i16 @test_fshl_i32(i32 %x, i32 %_, i32 %y) {
; RV64-NEXT: sllw a0, a0, a2
; RV64-NEXT: srliw a1, a1, 1
; RV64-NEXT: srlw a1, a1, a3
-; RV64-NEXT: or a0, a0, a1
+; RV64-NEXT: or a1, a0, a1
+; RV64-NEXT: or a0, a1, a0
+; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: ret
%fshl = call i32 @llvm.fshl.i32(i32 %x, i32 %_, i32 %y)
%shl = shl i32 %x, %y
%or = or i32 %fshl, %shl
- %trunc = trunc i32 %or to i16
+ %and = and i32 %or, %fshl
+ %trunc = trunc i32 %and to i16
ret i16 %trunc
}
@@ -139,6 +175,8 @@ define i16 @test_fshr_i32(i32 %_, i32 %x, i32 %y) {
; RV32-NEXT: sll a0, a0, a3
; RV32-NEXT: srl a1, a1, a2
; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: or a1, a0, a1
+; RV32-NEXT: and a0, a1, a0
; RV32-NEXT: ret
;
; RV64-LABEL: test_fshr_i32:
@@ -148,10 +186,13 @@ define i16 @test_fshr_i32(i32 %_, i32 %x, i32 %y) {
; RV64-NEXT: sllw a0, a0, a3
; RV64-NEXT: srlw a1, a1, a2
; RV64-NEXT: or a0, a0, a1
+; RV64-NEXT: or a1, a0, a1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: ret
%fshr = call i32 @llvm.fshr.i32(i32 %_, i32 %x, i32 %y)
%lshr = lshr i32 %x, %y
%or = or i32 %fshr, %lshr
- %trunc = trunc i32 %or to i16
+ %and = and i32 %or, %fshr
+ %trunc = trunc i32 %and to i16
ret i16 %trunc
}
More information about the llvm-commits
mailing list