[llvm] [GISel] funnel shift combiner port from SelectionDAG ISel to GlobalISel (PR #135132)

Sat May 3 05:30:22 PDT 2025

https://github.com/axelcool1234 updated https://github.com/llvm/llvm-project/pull/135132

>From 733135ea6efc0cb336a4f06bd8d07262a25590ef Mon Sep 17 00:00:00 2001
From: Axel Sorenson <AxelPSorenson at gmail.com>
Date: Thu, 27 Feb 2025 17:41:41 -0800
Subject: [PATCH 1/2] funnel shift combiner port from SelectionDAG ISel to
 GlobalISel

---
 .../include/llvm/Target/GlobalISel/Combine.td | 22 +++++++-
 llvm/test/CodeGen/AArch64/funnel-shift.ll     | 12 ++---
 llvm/test/CodeGen/RISCV/GlobalISel/shift.ll   | 50 +++++++++++++++++++
 3 files changed, 76 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 5309d5952f087..9d0b1cab559be 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1033,6 +1033,24 @@ def funnel_shift_overshift: GICombineRule<
   (apply [{ Helper.applyFunnelShiftConstantModulo(*${root}); }])
 >;
 
+// Transform: fshl x, z, y | shl x, y -> fshl x, z, y
+def funnel_shift_or_shift_to_funnel_shift_left: GICombineRule<
+  (defs root:$root), 
+  (match (G_FSHL $out1, $x, $z, $y),
+         (G_SHL $out2, $x, $y),
+         (G_OR $root, $out1, $out2)),
+  (apply (G_FSHL $root, $x, $z, $y))
+>;
+
+// Transform: fshr z, x, y | srl x, y -> fshr z, x, y
+def funnel_shift_or_shift_to_funnel_shift_right: GICombineRule<
+  (defs root:$root), 
+  (match (G_FSHR $out1, $z, $x, $y),
+         (G_LSHR $out2, $x, $y),
+         (G_OR $root, $out1, $out2)),
+  (apply (G_FSHR $root, $z, $x, $y))
+>;
+
 def rotate_out_of_range : GICombineRule<
   (defs root:$root),
   (match (wip_match_opcode G_ROTR, G_ROTL):$root,
@@ -1105,7 +1123,9 @@ def funnel_shift_combines : GICombineGroup<[funnel_shift_from_or_shift,
                                             funnel_shift_to_rotate,
                                             funnel_shift_right_zero,
                                             funnel_shift_left_zero,
-                                            funnel_shift_overshift]>;
+                                            funnel_shift_overshift,
+                                            funnel_shift_or_shift_to_funnel_shift_left,
+                                            funnel_shift_or_shift_to_funnel_shift_right]>;
 
 def bitfield_extract_from_sext_inreg : GICombineRule<
   (defs root:$root, build_fn_matchinfo:$info),
diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll
index e5aa360f804c1..254bbc934f10f 100644
--- a/llvm/test/CodeGen/AArch64/funnel-shift.ll
+++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll
@@ -674,14 +674,12 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) {
 ; CHECK-GI-LABEL: or_shl_fshl_simplify:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    mov w8, #31 // =0x1f
-; CHECK-GI-NEXT:    and w9, w2, #0x1f
-; CHECK-GI-NEXT:    lsr w10, w0, #1
-; CHECK-GI-NEXT:    lsl w11, w1, w2
+; CHECK-GI-NEXT:    lsr w9, w0, #1
+; CHECK-GI-NEXT:    and w10, w2, #0x1f
 ; CHECK-GI-NEXT:    bic w8, w8, w2
-; CHECK-GI-NEXT:    lsl w9, w1, w9
-; CHECK-GI-NEXT:    lsr w8, w10, w8
-; CHECK-GI-NEXT:    orr w9, w9, w11
-; CHECK-GI-NEXT:    orr w0, w9, w8
+; CHECK-GI-NEXT:    lsl w10, w1, w10
+; CHECK-GI-NEXT:    lsr w8, w9, w8
+; CHECK-GI-NEXT:    orr w0, w10, w8
 ; CHECK-GI-NEXT:    ret
   %shy = shl i32 %y, %s
   %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
index 75e318a58fd45..f564abf3d3184 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
@@ -105,3 +105,53 @@ define i16 @test_shl_i48_2(i48 %x, i48 %y) {
   %trunc = trunc i48 %shl to i16
   ret i16 %trunc
 }
+
+define i16 @test_fshl_i32(i32 %x, i32 %_, i32 %y) {
+; RV32-LABEL: test_fshl_i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    not a3, a2
+; RV32-NEXT:    sll a0, a0, a2
+; RV32-NEXT:    srli a1, a1, 1
+; RV32-NEXT:    srl a1, a1, a3
+; RV32-NEXT:    or a0, a0, a1
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test_fshl_i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    not a3, a2
+; RV64-NEXT:    sllw a0, a0, a2
+; RV64-NEXT:    srliw a1, a1, 1
+; RV64-NEXT:    srlw a1, a1, a3
+; RV64-NEXT:    or a0, a0, a1
+; RV64-NEXT:    ret
+  %fshl = call i32 @llvm.fshl.i32(i32 %x, i32 %_, i32 %y)
+  %shl = shl i32 %x, %y
+  %or = or i32 %fshl, %shl
+  %trunc = trunc i32 %or to i16
+  ret i16 %trunc
+}
+
+define i16 @test_fshr_i32(i32 %_, i32 %x, i32 %y) {
+; RV32-LABEL: test_fshr_i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    not a3, a2
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    sll a0, a0, a3
+; RV32-NEXT:    srl a1, a1, a2
+; RV32-NEXT:    or a0, a0, a1
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test_fshr_i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    not a3, a2
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    sllw a0, a0, a3
+; RV64-NEXT:    srlw a1, a1, a2
+; RV64-NEXT:    or a0, a0, a1
+; RV64-NEXT:    ret
+  %fshr = call i32 @llvm.fshr.i32(i32 %_, i32 %x, i32 %y)
+  %lshr = lshr i32 %x, %y
+  %or = or i32 %fshr, %lshr
+  %trunc = trunc i32 %or to i16
+  ret i16 %trunc
+}

>From 2fc366c432a669d00017b591deff3be3cdd83f07 Mon Sep 17 00:00:00 2001
From: Axel Sorenson <AxelPSorenson at gmail.com>
Date: Sat, 3 May 2025 05:30:12 -0700
Subject: [PATCH 2/2] pre-commit test

---
 .../include/llvm/Target/GlobalISel/Combine.td |  16 +--
 llvm/test/CodeGen/AArch64/funnel-shift.ll     |  12 +-
 llvm/test/CodeGen/RISCV/GlobalISel/shift.ll   | 115 ++++++++++++------
 3 files changed, 94 insertions(+), 49 deletions(-)

diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 9d0b1cab559be..2cbf0c794628a 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1038,8 +1038,9 @@ def funnel_shift_or_shift_to_funnel_shift_left: GICombineRule<
   (defs root:$root), 
   (match (G_FSHL $out1, $x, $z, $y),
          (G_SHL $out2, $x, $y),
-         (G_OR $root, $out1, $out2)),
-  (apply (G_FSHL $root, $x, $z, $y))
+         (G_OR $root, $out1, $out2),
+         [{ return MRI.hasOneUse(${out2}.getReg()); }]),
+  (apply (GIReplaceReg $root, $out1))
 >;
 
 // Transform: fshr z, x, y | srl x, y -> fshr z, x, y
@@ -1047,8 +1048,9 @@ def funnel_shift_or_shift_to_funnel_shift_right: GICombineRule<
   (defs root:$root), 
   (match (G_FSHR $out1, $z, $x, $y),
          (G_LSHR $out2, $x, $y),
-         (G_OR $root, $out1, $out2)),
-  (apply (G_FSHR $root, $z, $x, $y))
+         (G_OR $root, $out1, $out2),
+         [{ return MRI.hasOneUse(${out2}.getReg()); }]),
+  (apply (GIReplaceReg $root, $out1))
 >;
 
 def rotate_out_of_range : GICombineRule<
@@ -1123,9 +1125,9 @@ def funnel_shift_combines : GICombineGroup<[funnel_shift_from_or_shift,
                                             funnel_shift_to_rotate,
                                             funnel_shift_right_zero,
                                             funnel_shift_left_zero,
-                                            funnel_shift_overshift,
-                                            funnel_shift_or_shift_to_funnel_shift_left,
-                                            funnel_shift_or_shift_to_funnel_shift_right]>;
+                                            funnel_shift_overshift]>;
+                                            //funnel_shift_or_shift_to_funnel_shift_left,
+                                            //funnel_shift_or_shift_to_funnel_shift_right]>;
 
 def bitfield_extract_from_sext_inreg : GICombineRule<
   (defs root:$root, build_fn_matchinfo:$info),
diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll
index 254bbc934f10f..e5aa360f804c1 100644
--- a/llvm/test/CodeGen/AArch64/funnel-shift.ll
+++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll
@@ -674,12 +674,14 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) {
 ; CHECK-GI-LABEL: or_shl_fshl_simplify:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    mov w8, #31 // =0x1f
-; CHECK-GI-NEXT:    lsr w9, w0, #1
-; CHECK-GI-NEXT:    and w10, w2, #0x1f
+; CHECK-GI-NEXT:    and w9, w2, #0x1f
+; CHECK-GI-NEXT:    lsr w10, w0, #1
+; CHECK-GI-NEXT:    lsl w11, w1, w2
 ; CHECK-GI-NEXT:    bic w8, w8, w2
-; CHECK-GI-NEXT:    lsl w10, w1, w10
-; CHECK-GI-NEXT:    lsr w8, w9, w8
-; CHECK-GI-NEXT:    orr w0, w10, w8
+; CHECK-GI-NEXT:    lsl w9, w1, w9
+; CHECK-GI-NEXT:    lsr w8, w10, w8
+; CHECK-GI-NEXT:    orr w9, w9, w11
+; CHECK-GI-NEXT:    orr w0, w9, w8
 ; CHECK-GI-NEXT:    ret
   %shy = shl i32 %y, %s
   %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
index f564abf3d3184..36000e3496276 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
@@ -2,60 +2,91 @@
 ; RUN: llc -mtriple=riscv32 -global-isel -global-isel-abort=1 -verify-machineinstrs < %s 2>&1 | FileCheck %s --check-prefixes=RV32
 ; RUN: llc -mtriple=riscv64 -global-isel -global-isel-abort=1 -verify-machineinstrs < %s 2>&1 | FileCheck %s --check-prefixes=RV64
 
-define i16 @test_lshr_i48(i48 %x) {
-; RV32-LABEL: test_lshr_i48:
+define i16 @test_lshr_i32(i32 %_, i32 %x, i32 %y) {
+; RV32-LABEL: test_lshr_i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    srli a0, a0, 16
+; RV32-NEXT:    srl a1, a1, a2
+; RV32-NEXT:    not a2, a2
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    sll a0, a0, a2
+; RV32-NEXT:    or a2, a1, a1
+; RV32-NEXT:    or a0, a0, a2
+; RV32-NEXT:    and a0, a0, a1
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: test_lshr_i48:
+; RV64-LABEL: test_lshr_i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    srliw a0, a0, 16
+; RV64-NEXT:    srlw a1, a1, a2
+; RV64-NEXT:    not a2, a2
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    sllw a0, a0, a2
+; RV64-NEXT:    or a2, a1, a1
+; RV64-NEXT:    or a0, a0, a2
+; RV64-NEXT:    and a0, a0, a1
 ; RV64-NEXT:    ret
-  %lshr = lshr i48 %x, 16
-  %trunc = trunc i48 %lshr to i16
+  %lshr = lshr i32 %x, %y
+  %fshr = call i32 @llvm.fshr.i32(i32 %_, i32 %x, i32 %y)
+  %or = or i32 %fshr, %lshr
+  %and = and i32 %or, %lshr
+  %trunc = trunc i32 %and to i16
   ret i16 %trunc
 }
 
-define i16 @test_ashr_i48(i48 %x) {
-; RV32-LABEL: test_ashr_i48:
+define i16 @test_ashr_i32(i32 %x) {
+; RV32-LABEL: test_ashr_i32:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    srai a0, a0, 16
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: test_ashr_i48:
+; RV64-LABEL: test_ashr_i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    sraiw a0, a0, 16
 ; RV64-NEXT:    ret
-  %ashr = ashr i48 %x, 16
-  %trunc = trunc i48 %ashr to i16
+  %ashr = ashr i32 %x, 16
+  %trunc = trunc i32 %ashr to i16
   ret i16 %trunc
 }
 
-define i16 @test_shl_i48(i48 %x) {
-; RV32-LABEL: test_shl_i48:
+define i16 @test_shl_i32(i32 %_, i32 %x, i32 %y) {
+; RV32-LABEL: test_shl_i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a0, a0, 8
+; RV32-NEXT:    not a3, a2
+; RV32-NEXT:    sll a1, a1, a2
+; RV32-NEXT:    srli a0, a0, 1
+; RV32-NEXT:    srl a0, a0, a3
+; RV32-NEXT:    or a2, a1, a1
+; RV32-NEXT:    or a0, a2, a0
+; RV32-NEXT:    and a0, a0, a1
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: test_shl_i48:
+; RV64-LABEL: test_shl_i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a0, a0, 8
+; RV64-NEXT:    not a3, a2
+; RV64-NEXT:    sllw a1, a1, a2
+; RV64-NEXT:    srliw a0, a0, 1
+; RV64-NEXT:    srlw a0, a0, a3
+; RV64-NEXT:    or a2, a1, a1
+; RV64-NEXT:    or a0, a2, a0
+; RV64-NEXT:    and a0, a0, a1
 ; RV64-NEXT:    ret
-  %shl = shl i48 %x, 8
-  %trunc = trunc i48 %shl to i16
+  %fshl = call i32 @llvm.fshl.i32(i32 %x, i32 %_, i32 %y)
+  %shl = shl i32 %x, %y
+  %or = or i32 %fshl, %shl
+  %and = and i32 %or, %shl
+  %trunc = trunc i32 %and to i16
   ret i16 %trunc
+
 }
 
 ; FIXME: Could use srlw to remove slli+srli.
-define i16 @test_lshr_i48_2(i48 %x, i48 %y) {
-; RV32-LABEL: test_lshr_i48_2:
+define i16 @test_lshr_i48(i48 %x, i48 %y) {
+; RV32-LABEL: test_lshr_i48:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    andi a2, a2, 15
 ; RV32-NEXT:    srl a0, a0, a2
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: test_lshr_i48_2:
+; RV64-LABEL: test_lshr_i48:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    andi a1, a1, 15
 ; RV64-NEXT:    slli a0, a0, 32
@@ -69,14 +100,14 @@ define i16 @test_lshr_i48_2(i48 %x, i48 %y) {
 }
 
 ; FIXME: Could use sraw to remove the sext.w.
-define i16 @test_ashr_i48_2(i48 %x, i48 %y) {
-; RV32-LABEL: test_ashr_i48_2:
+define i16 @test_ashr_i48(i48 %x, i48 %y) {
+; RV32-LABEL: test_ashr_i48:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    andi a2, a2, 15
 ; RV32-NEXT:    sra a0, a0, a2
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: test_ashr_i48_2:
+; RV64-LABEL: test_ashr_i48:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    andi a1, a1, 15
 ; RV64-NEXT:    sext.w a0, a0
@@ -88,21 +119,21 @@ define i16 @test_ashr_i48_2(i48 %x, i48 %y) {
   ret i16 %trunc
 }
 
-define i16 @test_shl_i48_2(i48 %x, i48 %y) {
-; RV32-LABEL: test_shl_i48_2:
+define i16 @test_shl_i32_2(i32 %x, i32 %y) {
+; RV32-LABEL: test_shl_i32_2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    andi a2, a2, 15
-; RV32-NEXT:    sll a0, a0, a2
+; RV32-NEXT:    andi a1, a1, 15
+; RV32-NEXT:    sll a0, a0, a1
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: test_shl_i48_2:
+; RV64-LABEL: test_shl_i32_2:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    andi a1, a1, 15
 ; RV64-NEXT:    sll a0, a0, a1
 ; RV64-NEXT:    ret
-  %and = and i48 %y, 15
-  %shl = shl i48 %x, %and
-  %trunc = trunc i48 %shl to i16
+  %and = and i32 %y, 15
+  %shl = shl i32 %x, %and
+  %trunc = trunc i32 %shl to i16
   ret i16 %trunc
 }
 
@@ -113,7 +144,9 @@ define i16 @test_fshl_i32(i32 %x, i32 %_, i32 %y) {
 ; RV32-NEXT:    sll a0, a0, a2
 ; RV32-NEXT:    srli a1, a1, 1
 ; RV32-NEXT:    srl a1, a1, a3
-; RV32-NEXT:    or a0, a0, a1
+; RV32-NEXT:    or a1, a0, a1
+; RV32-NEXT:    or a0, a1, a0
+; RV32-NEXT:    and a0, a0, a1
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: test_fshl_i32:
@@ -122,12 +155,15 @@ define i16 @test_fshl_i32(i32 %x, i32 %_, i32 %y) {
 ; RV64-NEXT:    sllw a0, a0, a2
 ; RV64-NEXT:    srliw a1, a1, 1
 ; RV64-NEXT:    srlw a1, a1, a3
-; RV64-NEXT:    or a0, a0, a1
+; RV64-NEXT:    or a1, a0, a1
+; RV64-NEXT:    or a0, a1, a0
+; RV64-NEXT:    and a0, a0, a1
 ; RV64-NEXT:    ret
   %fshl = call i32 @llvm.fshl.i32(i32 %x, i32 %_, i32 %y)
   %shl = shl i32 %x, %y
   %or = or i32 %fshl, %shl
-  %trunc = trunc i32 %or to i16
+  %and = and i32 %or, %fshl
+  %trunc = trunc i32 %and to i16
   ret i16 %trunc
 }
 
@@ -139,6 +175,8 @@ define i16 @test_fshr_i32(i32 %_, i32 %x, i32 %y) {
 ; RV32-NEXT:    sll a0, a0, a3
 ; RV32-NEXT:    srl a1, a1, a2
 ; RV32-NEXT:    or a0, a0, a1
+; RV32-NEXT:    or a1, a0, a1
+; RV32-NEXT:    and a0, a1, a0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: test_fshr_i32:
@@ -148,10 +186,13 @@ define i16 @test_fshr_i32(i32 %_, i32 %x, i32 %y) {
 ; RV64-NEXT:    sllw a0, a0, a3
 ; RV64-NEXT:    srlw a1, a1, a2
 ; RV64-NEXT:    or a0, a0, a1
+; RV64-NEXT:    or a1, a0, a1
+; RV64-NEXT:    and a0, a1, a0
 ; RV64-NEXT:    ret
   %fshr = call i32 @llvm.fshr.i32(i32 %_, i32 %x, i32 %y)
   %lshr = lshr i32 %x, %y
   %or = or i32 %fshr, %lshr
-  %trunc = trunc i32 %or to i16
+  %and = and i32 %or, %fshr
+  %trunc = trunc i32 %and to i16
   ret i16 %trunc
 }