[llvm] [AMDGPU] Add patterns for V_CMP_O/U (PR #69157)
Pierre van Houtryve via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 16 02:31:52 PDT 2023
================
@@ -494,6 +494,121 @@ define amdgpu_kernel void @v_fcmp_f32_ole(ptr addrspace(1) %out, float %src) {
ret void
}
+define amdgpu_kernel void @v_fcmp_f32_o(ptr addrspace(1) %out, float %src) {
+; SDAG-GFX11-LABEL: v_fcmp_f32_o:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_o_f32_e64 s2, 0x42c80000, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_fcmp_f32_o:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_o_f32_e64 s0, 0x42c80000, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_fcmp_f32_o:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_o_f32_e64 s2, 0x42c80000, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_fcmp_f32_o:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_o_f32_e64 s0, 0x42c80000, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 7)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_fcmp_f32_uo(ptr addrspace(1) %out, float %src) {
+; SDAG-GFX11-LABEL: v_fcmp_f32_uo:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_u_f32_e64 s2, 0x42c80000, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_fcmp_f32_uo:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_u_f32_e64 s0, 0x42c80000, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_fcmp_f32_uo:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_u_f32_e64 s2, 0x42c80000, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_fcmp_f32_uo:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_u_f32_e64 s0, 0x42c80000, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 8)
----------------
Pierre-vh wrote:
I'm not sure really, I observed this due to a fcmp being generated from InstCombineIntrinsics by combining a icmp + fcmp
In this case the `setcc setuo` is emitted from FCMP lowering. Maybe the actual fcmp inst goes through a different path and the o/uo gets legalized away?
https://github.com/llvm/llvm-project/pull/69157
More information about the llvm-commits
mailing list