[llvm] [GISel] funnel shift combiner port from SelectionDAG ISel to GlobalISel (PR #135132)

Sat Apr 12 00:03:31 PDT 2025

https://github.com/axelcool1234 updated https://github.com/llvm/llvm-project/pull/135132

>From 17c74e0b4e6afc898dd1b4dcf868927c8a6f07d4 Mon Sep 17 00:00:00 2001
From: Axel Sorenson <AxelPSorenson at gmail.com>
Date: Thu, 27 Feb 2025 17:41:41 -0800
Subject: [PATCH] funnel shift combiner port from SelectionDAG ISel to
 GlobalISel

---
 .../include/llvm/Target/GlobalISel/Combine.td | 22 +++++++-
 llvm/test/CodeGen/AArch64/funnel-shift.ll     | 12 ++---
 llvm/test/CodeGen/RISCV/GlobalISel/shift.ll   | 50 +++++++++++++++++++
 3 files changed, 76 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 5309d5952f087..615617d4ca01b 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1033,6 +1033,24 @@ def funnel_shift_overshift: GICombineRule<
   (apply [{ Helper.applyFunnelShiftConstantModulo(*${root}); }])
 >;
 
+// Transform: fshl x, ?, y | shl x, y -> fshl x, ?, y
+def funnel_shift_or_shift_to_funnel_shift_left: GICombineRule<
+  (defs root:$root), 
+  (match (G_FSHL $out1, $x, $_, $y),
+         (G_SHL $out2, $x, $y),
+         (G_OR $root, $out1, $out2)),
+  (apply (G_FSHL $root, $x, $_, $y))
+>;
+
+// Transform: fshr ?, x, y | srl x, y -> fshr ?, x, y
+def funnel_shift_or_shift_to_funnel_shift_right: GICombineRule<
+  (defs root:$root), 
+  (match (G_FSHR $out1, $_, $x, $y),
+         (G_LSHR $out2, $x, $y),
+         (G_OR $root, $out1, $out2)),
+  (apply (G_FSHR $root, $_, $x, $y))
+>;
+
 def rotate_out_of_range : GICombineRule<
   (defs root:$root),
   (match (wip_match_opcode G_ROTR, G_ROTL):$root,
@@ -1105,7 +1123,9 @@ def funnel_shift_combines : GICombineGroup<[funnel_shift_from_or_shift,
                                             funnel_shift_to_rotate,
                                             funnel_shift_right_zero,
                                             funnel_shift_left_zero,
-                                            funnel_shift_overshift]>;
+                                            funnel_shift_overshift,
+                                            funnel_shift_or_shift_to_funnel_shift_left,
+                                            funnel_shift_or_shift_to_funnel_shift_right]>;
 
 def bitfield_extract_from_sext_inreg : GICombineRule<
   (defs root:$root, build_fn_matchinfo:$info),
diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll
index e5aa360f804c1..254bbc934f10f 100644
--- a/llvm/test/CodeGen/AArch64/funnel-shift.ll
+++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll
@@ -674,14 +674,12 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) {
 ; CHECK-GI-LABEL: or_shl_fshl_simplify:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    mov w8, #31 // =0x1f
-; CHECK-GI-NEXT:    and w9, w2, #0x1f
-; CHECK-GI-NEXT:    lsr w10, w0, #1
-; CHECK-GI-NEXT:    lsl w11, w1, w2
+; CHECK-GI-NEXT:    lsr w9, w0, #1
+; CHECK-GI-NEXT:    and w10, w2, #0x1f
 ; CHECK-GI-NEXT:    bic w8, w8, w2
-; CHECK-GI-NEXT:    lsl w9, w1, w9
-; CHECK-GI-NEXT:    lsr w8, w10, w8
-; CHECK-GI-NEXT:    orr w9, w9, w11
-; CHECK-GI-NEXT:    orr w0, w9, w8
+; CHECK-GI-NEXT:    lsl w10, w1, w10
+; CHECK-GI-NEXT:    lsr w8, w9, w8
+; CHECK-GI-NEXT:    orr w0, w10, w8
 ; CHECK-GI-NEXT:    ret
   %shy = shl i32 %y, %s
   %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
index 75e318a58fd45..f564abf3d3184 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
@@ -105,3 +105,53 @@ define i16 @test_shl_i48_2(i48 %x, i48 %y) {
   %trunc = trunc i48 %shl to i16
   ret i16 %trunc
 }
+
+define i16 @test_fshl_i32(i32 %x, i32 %_, i32 %y) {
+; RV32-LABEL: test_fshl_i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    not a3, a2
+; RV32-NEXT:    sll a0, a0, a2
+; RV32-NEXT:    srli a1, a1, 1
+; RV32-NEXT:    srl a1, a1, a3
+; RV32-NEXT:    or a0, a0, a1
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test_fshl_i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    not a3, a2
+; RV64-NEXT:    sllw a0, a0, a2
+; RV64-NEXT:    srliw a1, a1, 1
+; RV64-NEXT:    srlw a1, a1, a3
+; RV64-NEXT:    or a0, a0, a1
+; RV64-NEXT:    ret
+  %fshl = call i32 @llvm.fshl.i32(i32 %x, i32 %_, i32 %y)
+  %shl = shl i32 %x, %y
+  %or = or i32 %fshl, %shl
+  %trunc = trunc i32 %or to i16
+  ret i16 %trunc
+}
+
+define i16 @test_fshr_i32(i32 %_, i32 %x, i32 %y) {
+; RV32-LABEL: test_fshr_i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    not a3, a2
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    sll a0, a0, a3
+; RV32-NEXT:    srl a1, a1, a2
+; RV32-NEXT:    or a0, a0, a1
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test_fshr_i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    not a3, a2
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    sllw a0, a0, a3
+; RV64-NEXT:    srlw a1, a1, a2
+; RV64-NEXT:    or a0, a0, a1
+; RV64-NEXT:    ret
+  %fshr = call i32 @llvm.fshr.i32(i32 %_, i32 %x, i32 %y)
+  %lshr = lshr i32 %x, %y
+  %or = or i32 %fshr, %lshr
+  %trunc = trunc i32 %or to i16
+  ret i16 %trunc
+}