[llvm] [GISel][RISCV][AMDGPU] Add G_SHL, G_LSHR, G_ASHR to binop_left_to_zero. (PR #115089)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 6 12:29:50 PST 2024
https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/115089
>From db052d2ab1489b798ad7b8e9f9a73dd3f67b31ff Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Tue, 5 Nov 2024 15:21:51 -0800
Subject: [PATCH] [GISel][RISCV][AMDGPU] Add G_SHL, G_LSHR, G_ASHR to
binop_left_to_zero.
Shifting 0 by any amount is still zero.
---
.../include/llvm/Target/GlobalISel/Combine.td | 3 +-
...mbine-shl-from-extend-narrow.postlegal.mir | 8 +-
llvm/test/CodeGen/AMDGPU/fptoi.i128.ll | 76 ++++++++-----------
.../test/CodeGen/RISCV/GlobalISel/rv32zbkb.ll | 2 -
4 files changed, 38 insertions(+), 51 deletions(-)
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 80a22c35ebceff..3b04d7fed3bc0e 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -574,7 +574,8 @@ def binop_same_val: GICombineRule<
// Fold (0 op x) - > 0
def binop_left_to_zero: GICombineRule<
(defs root:$root),
- (match (wip_match_opcode G_SDIV, G_UDIV, G_SREM, G_UREM):$root,
+ (match (wip_match_opcode G_SHL, G_LSHR, G_ASHR, G_SDIV, G_UDIV, G_SREM,
+ G_UREM):$root,
[{ return Helper.matchOperandIsZero(*${root}, 1); }]),
(apply [{ Helper.replaceSingleDefInstWithOperand(*${root}, 1); }])
>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir
index 6a291510fe66c1..6ae8895322d6f9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir
@@ -348,14 +348,14 @@ body: |
; GFX6-LABEL: name: do_not_shl_s32_zero_by_16_from_zext_s16
; GFX6: liveins: $vgpr0
; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: %shl:_(s32) = G_CONSTANT i32 0
- ; GFX6-NEXT: $vgpr0 = COPY %shl(s32)
+ ; GFX6-NEXT: %extend:_(s32) = G_CONSTANT i32 0
+ ; GFX6-NEXT: $vgpr0 = COPY %extend(s32)
;
; GFX9-LABEL: name: do_not_shl_s32_zero_by_16_from_zext_s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: %shl:_(s32) = G_CONSTANT i32 0
- ; GFX9-NEXT: $vgpr0 = COPY %shl(s32)
+ ; GFX9-NEXT: %extend:_(s32) = G_CONSTANT i32 0
+ ; GFX9-NEXT: $vgpr0 = COPY %extend(s32)
%zero:_(s16) = G_CONSTANT i16 0
%extend:_(s32) = G_ZEXT %zero:_(s16)
%shiftamt:_(s16) = G_CONSTANT i16 16
diff --git a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
index 6f4f7c27a51473..a0fe9d88e31cf9 100644
--- a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
@@ -270,14 +270,12 @@ define i128 @fptosi_f64_to_i128(double %x) {
; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[16:17]
; GISEL-NEXT: s_cbranch_execz .LBB0_6
; GISEL-NEXT: ; %bb.5: ; %fp-to-i-if-then12
-; GISEL-NEXT: v_sub_co_u32_e32 v6, vcc, 0x433, v6
-; GISEL-NEXT: v_add_u32_e32 v2, 0xffffffc0, v6
-; GISEL-NEXT: v_lshrrev_b64 v[0:1], v6, v[4:5]
-; GISEL-NEXT: v_lshrrev_b64 v[2:3], v2, 0
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
+; GISEL-NEXT: v_sub_co_u32_e32 v2, vcc, 0x433, v6
+; GISEL-NEXT: v_lshrrev_b64 v[0:1], v2, v[4:5]
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
; GISEL-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc
; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[6:7], v4, v10, 0
; GISEL-NEXT: v_cndmask_b32_e32 v5, v1, v5, vcc
@@ -640,14 +638,12 @@ define i128 @fptoui_f64_to_i128(double %x) {
; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[16:17]
; GISEL-NEXT: s_cbranch_execz .LBB1_6
; GISEL-NEXT: ; %bb.5: ; %fp-to-i-if-then12
-; GISEL-NEXT: v_sub_co_u32_e32 v6, vcc, 0x433, v6
-; GISEL-NEXT: v_add_u32_e32 v2, 0xffffffc0, v6
-; GISEL-NEXT: v_lshrrev_b64 v[0:1], v6, v[4:5]
-; GISEL-NEXT: v_lshrrev_b64 v[2:3], v2, 0
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
+; GISEL-NEXT: v_sub_co_u32_e32 v2, vcc, 0x433, v6
+; GISEL-NEXT: v_lshrrev_b64 v[0:1], v2, v[4:5]
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
; GISEL-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc
; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[6:7], v4, v10, 0
; GISEL-NEXT: v_cndmask_b32_e32 v5, v1, v5, vcc
@@ -1003,13 +999,11 @@ define i128 @fptosi_f32_to_i128(float %x) {
; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[16:17]
; GISEL-NEXT: s_cbranch_execz .LBB2_6
; GISEL-NEXT: ; %bb.5: ; %fp-to-i-if-then12
-; GISEL-NEXT: v_sub_co_u32_e32 v3, vcc, 0x96, v6
-; GISEL-NEXT: v_add_u32_e32 v2, 0xffffffc0, v3
-; GISEL-NEXT: v_lshrrev_b64 v[0:1], v3, v[4:5]
-; GISEL-NEXT: v_lshrrev_b64 v[1:2], v2, 0
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GISEL-NEXT: v_sub_co_u32_e32 v2, vcc, 0x96, v6
+; GISEL-NEXT: v_lshrrev_b64 v[0:1], v2, v[4:5]
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
; GISEL-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v4, v9, 0
; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[8:9], v4, v10, 0
@@ -1360,13 +1354,11 @@ define i128 @fptoui_f32_to_i128(float %x) {
; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[16:17]
; GISEL-NEXT: s_cbranch_execz .LBB3_6
; GISEL-NEXT: ; %bb.5: ; %fp-to-i-if-then12
-; GISEL-NEXT: v_sub_co_u32_e32 v3, vcc, 0x96, v6
-; GISEL-NEXT: v_add_u32_e32 v2, 0xffffffc0, v3
-; GISEL-NEXT: v_lshrrev_b64 v[0:1], v3, v[4:5]
-; GISEL-NEXT: v_lshrrev_b64 v[1:2], v2, 0
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GISEL-NEXT: v_sub_co_u32_e32 v2, vcc, 0x96, v6
+; GISEL-NEXT: v_lshrrev_b64 v[0:1], v2, v[4:5]
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
; GISEL-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v4, v9, 0
; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[8:9], v4, v10, 0
@@ -1740,13 +1732,11 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[16:17]
; GISEL-NEXT: s_cbranch_execz .LBB6_6
; GISEL-NEXT: ; %bb.5: ; %fp-to-i-if-then12
-; GISEL-NEXT: v_sub_co_u32_e32 v3, vcc, 0x86, v5
-; GISEL-NEXT: v_add_u32_e32 v2, 0xffffffc0, v3
-; GISEL-NEXT: v_lshrrev_b64 v[0:1], v3, v[7:8]
-; GISEL-NEXT: v_lshrrev_b64 v[1:2], v2, 0
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GISEL-NEXT: v_sub_co_u32_e32 v2, vcc, 0x86, v5
+; GISEL-NEXT: v_lshrrev_b64 v[0:1], v2, v[7:8]
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
; GISEL-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v9
; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v1
@@ -2091,13 +2081,11 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[16:17]
; GISEL-NEXT: s_cbranch_execz .LBB7_6
; GISEL-NEXT: ; %bb.5: ; %fp-to-i-if-then12
-; GISEL-NEXT: v_sub_co_u32_e32 v3, vcc, 0x86, v5
-; GISEL-NEXT: v_add_u32_e32 v2, 0xffffffc0, v3
-; GISEL-NEXT: v_lshrrev_b64 v[0:1], v3, v[7:8]
-; GISEL-NEXT: v_lshrrev_b64 v[1:2], v2, 0
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GISEL-NEXT: v_sub_co_u32_e32 v2, vcc, 0x86, v5
+; GISEL-NEXT: v_lshrrev_b64 v[0:1], v2, v[7:8]
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
; GISEL-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v9
; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v1
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbkb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbkb.ll
index b6f13aa7227a77..b75fa7395d2110 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbkb.ll
@@ -153,9 +153,7 @@ define i64 @packh_i64_2(i64 %a, i64 %b) nounwind {
; CHECK-NEXT: andi a0, a0, 255
; CHECK-NEXT: andi a1, a2, 255
; CHECK-NEXT: slli a2, a1, 8
-; CHECK-NEXT: slli a3, zero, 8
; CHECK-NEXT: srli a1, a1, 24
-; CHECK-NEXT: or a1, a3, a1
; CHECK-NEXT: or a0, a2, a0
; CHECK-NEXT: ret
%and = and i64 %a, 255
More information about the llvm-commits
mailing list