[llvm] 3d9e226 - [AMDGPU] Use s_cmp instead of s_cmpk
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 30 10:03:55 PST 2022
Author: Jay Foad
Date: 2022-11-30T18:02:39Z
New Revision: 3d9e226081cf6a908c27cedf4c0dc72380b62bb2
URL: https://github.com/llvm/llvm-project/commit/3d9e226081cf6a908c27cedf4c0dc72380b62bb2
DIFF: https://github.com/llvm/llvm-project/commit/3d9e226081cf6a908c27cedf4c0dc72380b62bb2.diff
LOG: [AMDGPU] Use s_cmp instead of s_cmpk
Don't bother pre-shrinking "s_cmp_lg_u32 reg, 0" to s_cmpk_lg_u32
because 0 is already an inline constant so the s_cmpk form is no
smaller.
This is just for consistency with the surrounding code and to simplify a
downstream patch.
Differential Revision: https://reviews.llvm.org/D138993
Added:
Modified:
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll
llvm/test/CodeGen/AMDGPU/udiv.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index d55b73037b7c4..0fe6ffc0efaab 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4202,7 +4202,7 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
.addImm(0);
}
} else {
- BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMPK_LG_U32))
+ BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U32))
.addReg(Src2.getReg())
.addImm(0);
}
diff --git a/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll b/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll
index 776de7db20657..e3072396ee96f 100644
--- a/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll
+++ b/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll
@@ -50,7 +50,7 @@ define i32 @s_add_co_select_user() {
; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_add_co_u32 v0, s5, s4, s4
-; GFX10-NEXT: s_cmpk_lg_u32 s5, 0x0
+; GFX10-NEXT: s_cmp_lg_u32 s5, 0
; GFX10-NEXT: s_addc_u32 s5, s4, 0
; GFX10-NEXT: s_cselect_b32 s6, -1, 0
; GFX10-NEXT: s_and_b32 s6, s6, exec_lo
@@ -69,7 +69,7 @@ define i32 @s_add_co_select_user() {
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_add_co_u32 v0, s1, s0, s0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT: s_cmpk_lg_u32 s1, 0x0
+; GFX11-NEXT: s_cmp_lg_u32 s1, 0
; GFX11-NEXT: s_addc_u32 s1, s0, 0
; GFX11-NEXT: s_cselect_b32 s2, -1, 0
; GFX11-NEXT: s_and_b32 s2, s2, exec_lo
@@ -154,7 +154,7 @@ define amdgpu_kernel void @s_add_co_br_user(i32 %i) {
; GFX10-NEXT: s_cmp_lt_u32 s1, s0
; GFX10-NEXT: s_cselect_b32 s1, -1, 0
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s1
-; GFX10-NEXT: s_cmpk_lg_u32 s1, 0x0
+; GFX10-NEXT: s_cmp_lg_u32 s1, 0
; GFX10-NEXT: s_addc_u32 s0, s0, 0
; GFX10-NEXT: v_cmp_ge_u32_e32 vcc_lo, s0, v0
; GFX10-NEXT: s_cbranch_vccnz .LBB1_2
@@ -181,7 +181,7 @@ define amdgpu_kernel void @s_add_co_br_user(i32 %i) {
; GFX11-NEXT: s_cmp_lt_u32 s1, s0
; GFX11-NEXT: s_cselect_b32 s1, -1, 0
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s1
-; GFX11-NEXT: s_cmpk_lg_u32 s1, 0x0
+; GFX11-NEXT: s_cmp_lg_u32 s1, 0
; GFX11-NEXT: s_addc_u32 s0, s0, 0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_cmp_ge_u32_e32 vcc_lo, s0, v0
diff --git a/llvm/test/CodeGen/AMDGPU/udiv.ll b/llvm/test/CodeGen/AMDGPU/udiv.ll
index 889c3fc8b9d0c..3cbb8fad54136 100644
--- a/llvm/test/CodeGen/AMDGPU/udiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/udiv.ll
@@ -2807,7 +2807,7 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) {
; GFX1030-NEXT: s_add_u32 s4, 0x4237, s4
; GFX1030-NEXT: s_addc_u32 s5, 0, 0
; GFX1030-NEXT: v_add_co_u32 v2, s4, 0xa9000000, s4
-; GFX1030-NEXT: s_cmpk_lg_u32 s4, 0x0
+; GFX1030-NEXT: s_cmp_lg_u32 s4, 0
; GFX1030-NEXT: s_addc_u32 s5, s5, 0xa7c5
; GFX1030-NEXT: v_readfirstlane_b32 s4, v2
; GFX1030-NEXT: s_mul_i32 s6, s5, 0xfffe7960
@@ -2830,7 +2830,7 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) {
; GFX1030-NEXT: s_add_u32 s4, s4, s7
; GFX1030-NEXT: s_addc_u32 s6, 0, s6
; GFX1030-NEXT: v_add_co_u32 v4, s4, v2, s4
-; GFX1030-NEXT: s_cmpk_lg_u32 s4, 0x0
+; GFX1030-NEXT: s_cmp_lg_u32 s4, 0
; GFX1030-NEXT: s_addc_u32 s4, s5, s6
; GFX1030-NEXT: v_mul_hi_u32 v8, v0, v4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v0, s4, 0
More information about the llvm-commits
mailing list