[llvm] [AMDGPU] Add patterns for V_CMP_O/U (PR #69157)
Pierre van Houtryve via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 16 02:37:11 PDT 2023
https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/69157
>From 9dd652ae2f19674f478ba311ca95b3a1dc9710a9 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Mon, 16 Oct 2023 10:21:25 +0200
Subject: [PATCH 1/2] [AMDGPU] Add patterns for V_CMP_O/U
Fixes SWDEV-427162
---
llvm/lib/Target/AMDGPU/VOPCInstructions.td | 8 +
.../CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll | 346 +++++++++++++
.../CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll | 460 ++++++++++++++++++
3 files changed, 814 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index 6fc3d0957dce191..63e75a2382366e1 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -1081,6 +1081,8 @@ multiclass FCMP_Pattern <PatFrags cond, Instruction inst, ValueType vt> {
}
}
+defm : FCMP_Pattern <COND_O, V_CMP_O_F32_e64, f32>;
+defm : FCMP_Pattern <COND_UO, V_CMP_U_F32_e64, f32>;
defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F32_e64, f32>;
defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F32_e64, f32>;
defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F32_e64, f32>;
@@ -1088,6 +1090,8 @@ defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F32_e64, f32>;
defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F32_e64, f32>;
defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F32_e64, f32>;
+defm : FCMP_Pattern <COND_O, V_CMP_O_F64_e64, f64>;
+defm : FCMP_Pattern <COND_UO, V_CMP_U_F64_e64, f64>;
defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F64_e64, f64>;
defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F64_e64, f64>;
defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F64_e64, f64>;
@@ -1110,6 +1114,8 @@ defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;
let OtherPredicates = [HasTrue16BitInsts] in {
+defm : FCMP_Pattern <COND_O, V_CMP_U_F16_t16_e64, f16>;
+defm : FCMP_Pattern <COND_UO, V_CMP_O_F16_t16_e64, f16>;
defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_t16_e64, f16>;
defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_t16_e64, f16>;
defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_t16_e64, f16>;
@@ -1126,6 +1132,8 @@ defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_t16_e64, f16>;
} // End OtherPredicates = [HasTrue16BitInsts]
let OtherPredicates = [NotHasTrue16BitInsts] in {
+defm : FCMP_Pattern <COND_O, V_CMP_U_F16_e64, f16>;
+defm : FCMP_Pattern <COND_UO, V_CMP_O_F16_e64, f16>;
defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_e64, f16>;
defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_e64, f16>;
defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_e64, f16>;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
index 007b52fa3a0c6f2..16f90296b98908b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
@@ -494,6 +494,121 @@ define amdgpu_kernel void @v_fcmp_f32_ole(ptr addrspace(1) %out, float %src) {
ret void
}
+define amdgpu_kernel void @v_fcmp_f32_o(ptr addrspace(1) %out, float %src) {
+; SDAG-GFX11-LABEL: v_fcmp_f32_o:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_o_f32_e64 s2, 0x42c80000, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_fcmp_f32_o:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_o_f32_e64 s0, 0x42c80000, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_fcmp_f32_o:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_o_f32_e64 s2, 0x42c80000, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_fcmp_f32_o:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_o_f32_e64 s0, 0x42c80000, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 7)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_fcmp_f32_uo(ptr addrspace(1) %out, float %src) {
+; SDAG-GFX11-LABEL: v_fcmp_f32_uo:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_u_f32_e64 s2, 0x42c80000, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_fcmp_f32_uo:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_u_f32_e64 s0, 0x42c80000, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_fcmp_f32_uo:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_u_f32_e64 s2, 0x42c80000, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_fcmp_f32_uo:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_u_f32_e64 s0, 0x42c80000, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 8)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
define amdgpu_kernel void @v_fcmp_f32_ueq(ptr addrspace(1) %out, float %src) {
; SDAG-GFX11-LABEL: v_fcmp_f32_ueq:
@@ -1249,6 +1364,122 @@ define amdgpu_kernel void @v_fcmp_f64_ueq(ptr addrspace(1) %out, double %src) {
ret void
}
+define amdgpu_kernel void @v_fcmp_f64_o(ptr addrspace(1) %out, double %src) {
+; SDAG-GFX11-LABEL: v_fcmp_f64_o:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_mov_b32 s4, 0
+; SDAG-GFX11-NEXT: s_mov_b32 s5, 0x40590000
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_o_f64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_fcmp_f64_o:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: s_mov_b32 s4, 0
+; SDAG-GFX10-NEXT: s_mov_b32 s5, 0x40590000
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_o_f64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_fcmp_f64_o:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT: s_mov_b32 s4, 0
+; GISEL-GFX11-NEXT: s_mov_b32 s5, 0x40590000
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_o_f64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_fcmp_f64_o:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: s_mov_b32 s4, 0
+; GISEL-GFX10-NEXT: s_mov_b32 s5, 0x40590000
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_o_f64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 7)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_fcmp_f64_uo(ptr addrspace(1) %out, double %src) {
+; SDAG-GFX11-LABEL: v_fcmp_f64_uo:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_mov_b32 s4, 0
+; SDAG-GFX11-NEXT: s_mov_b32 s5, 0x40590000
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_u_f64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_fcmp_f64_uo:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: s_mov_b32 s4, 0
+; SDAG-GFX10-NEXT: s_mov_b32 s5, 0x40590000
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_u_f64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_fcmp_f64_uo:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT: s_mov_b32 s4, 0
+; GISEL-GFX11-NEXT: s_mov_b32 s5, 0x40590000
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_u_f64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_fcmp_f64_uo:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: s_mov_b32 s4, 0
+; GISEL-GFX10-NEXT: s_mov_b32 s5, 0x40590000
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_u_f64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 8)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
define amdgpu_kernel void @v_fcmp_f64_une(ptr addrspace(1) %out, double %src) {
; SDAG-GFX11-LABEL: v_fcmp_f64_une:
; SDAG-GFX11: ; %bb.0:
@@ -2348,6 +2579,121 @@ define amdgpu_kernel void @v_fcmp_f16_ult(ptr addrspace(1) %out, half %src) {
ret void
}
+define amdgpu_kernel void @v_fcmp_f16_o(ptr addrspace(1) %out, half %src) {
+; SDAG-GFX11-LABEL: v_fcmp_f16_o:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_u_f16_e64 s2, 0x5640, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_fcmp_f16_o:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_u_f16_e64 s0, 0x5640, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_fcmp_f16_o:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_o_f16_e64 s2, 0x5640, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_fcmp_f16_o:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_o_f16_e64 s0, 0x5640, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 7)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_fcmp_f16_uo(ptr addrspace(1) %out, half %src) {
+; SDAG-GFX11-LABEL: v_fcmp_f16_uo:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_o_f16_e64 s2, 0x5640, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_fcmp_f16_uo:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_o_f16_e64 s0, 0x5640, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_fcmp_f16_uo:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_u_f16_e64 s2, 0x5640, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_fcmp_f16_uo:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_u_f16_e64 s0, 0x5640, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 8)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
define amdgpu_kernel void @v_fcmp_f16_ule(ptr addrspace(1) %out, half %src) {
; SDAG-GFX11-LABEL: v_fcmp_f16_ule:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
index eeff0c57bb46152..b62512ae05e6e3f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
@@ -546,6 +546,129 @@ define amdgpu_kernel void @v_fcmp_f32_ole(ptr addrspace(1) %out, float %src) {
ret void
}
+define amdgpu_kernel void @v_fcmp_f32_o(ptr addrspace(1) %out, float %src) {
+; GFX11-LABEL: v_fcmp_f32_o:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_o_f32_e64 s[2:3], 0x42c80000, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_fcmp_f32_o:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x42c80000
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_cmp_o_f32_e64 s[0:1], s4, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT: s_endpgm
+;
+; VI-SDAG-LABEL: v_fcmp_f32_o:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c
+; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x42c80000
+; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT: v_cmp_o_f32_e64 s[2:3], s2, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3
+; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; VI-SDAG-NEXT: s_endpgm
+;
+; VI-GISEL-LABEL: v_fcmp_f32_o:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c
+; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42c80000
+; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT: v_cmp_o_f32_e64 s[2:3], s2, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; VI-GISEL-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 7)
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_fcmp_f32_uo(ptr addrspace(1) %out, float %src) {
+; GFX11-LABEL: v_fcmp_f32_uo:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_u_f32_e64 s[2:3], 0x42c80000, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_fcmp_f32_uo:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x42c80000
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_cmp_u_f32_e64 s[0:1], s4, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT: s_endpgm
+;
+; VI-SDAG-LABEL: v_fcmp_f32_uo:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c
+; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x42c80000
+; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT: v_cmp_u_f32_e64 s[2:3], s2, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3
+; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; VI-SDAG-NEXT: s_endpgm
+;
+; VI-GISEL-LABEL: v_fcmp_f32_uo:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c
+; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42c80000
+; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT: v_cmp_u_f32_e64 s[2:3], s2, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; VI-GISEL-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 8)
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
define amdgpu_kernel void @v_fcmp_f32_ueq(ptr addrspace(1) %out, float %src) {
; GFX11-LABEL: v_fcmp_f32_ueq:
@@ -1465,6 +1588,162 @@ define amdgpu_kernel void @v_fcmp_f64_ueq(ptr addrspace(1) %out, double %src) {
ret void
}
+define amdgpu_kernel void @v_fcmp_f64_o(ptr addrspace(1) %out, double %src) {
+; GFX11-LABEL: v_fcmp_f64_o:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: s_mov_b32 s4, 0
+; GFX11-NEXT: s_mov_b32 s5, 0x40590000
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_o_f64_e64 s[2:3], s[2:3], s[4:5]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; GFX9-SDAG-LABEL: v_fcmp_f64_o:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_cmp_o_f64_e64 s[2:3], s[2:3], v[0:1]
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s2
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s3
+; GFX9-SDAG-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1]
+; GFX9-SDAG-NEXT: s_endpgm
+;
+; GFX9-GISEL-LABEL: v_fcmp_f64_o:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40590000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_cmp_o_f64_e64 s[2:3], s[2:3], v[0:1]
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-GISEL-NEXT: s_endpgm
+;
+; VI-SDAG-LABEL: v_fcmp_f64_o:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000
+; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT: v_cmp_o_f64_e64 s[2:3], s[2:3], v[0:1]
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3
+; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; VI-SDAG-NEXT: s_endpgm
+;
+; VI-GISEL-LABEL: v_fcmp_f64_o:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-GISEL-NEXT: s_mov_b32 s4, 0
+; VI-GISEL-NEXT: s_mov_b32 s5, 0x40590000
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, s5
+; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT: v_cmp_o_f64_e64 s[2:3], s[2:3], v[0:1]
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; VI-GISEL-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 7)
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_fcmp_f64_uo(ptr addrspace(1) %out, double %src) {
+; GFX11-LABEL: v_fcmp_f64_uo:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: s_mov_b32 s4, 0
+; GFX11-NEXT: s_mov_b32 s5, 0x40590000
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_u_f64_e64 s[2:3], s[2:3], s[4:5]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; GFX9-SDAG-LABEL: v_fcmp_f64_uo:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_cmp_u_f64_e64 s[2:3], s[2:3], v[0:1]
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s2
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s3
+; GFX9-SDAG-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1]
+; GFX9-SDAG-NEXT: s_endpgm
+;
+; GFX9-GISEL-LABEL: v_fcmp_f64_uo:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40590000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_cmp_u_f64_e64 s[2:3], s[2:3], v[0:1]
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-GISEL-NEXT: s_endpgm
+;
+; VI-SDAG-LABEL: v_fcmp_f64_uo:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000
+; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT: v_cmp_u_f64_e64 s[2:3], s[2:3], v[0:1]
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3
+; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; VI-SDAG-NEXT: s_endpgm
+;
+; VI-GISEL-LABEL: v_fcmp_f64_uo:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-GISEL-NEXT: s_mov_b32 s4, 0
+; VI-GISEL-NEXT: s_mov_b32 s5, 0x40590000
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, s5
+; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT: v_cmp_u_f64_e64 s[2:3], s[2:3], v[0:1]
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; VI-GISEL-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 8)
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
+
define amdgpu_kernel void @v_fcmp_f64_une(ptr addrspace(1) %out, double %src) {
; GFX11-LABEL: v_fcmp_f64_une:
; GFX11: ; %bb.0:
@@ -2731,6 +3010,187 @@ define amdgpu_kernel void @v_fcmp_f16_ult(ptr addrspace(1) %out, half %src) {
ret void
}
+define amdgpu_kernel void @v_fcmp_f16_o(ptr addrspace(1) %out, half %src) {
+; GFX11-SDAG-LABEL: v_fcmp_f16_o:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_clause 0x1
+; GFX11-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_cmp_u_f16_e64 s[2:3], 0x5640, s2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-SDAG-NEXT: s_nop 0
+; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: v_fcmp_f16_o:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_clause 0x1
+; GFX11-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cmp_o_f16_e64 s[2:3], 0x5640, s2
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
+; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-GISEL-NEXT: s_endpgm
+;
+; GFX9-SDAG-LABEL: v_fcmp_f16_o:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0x5640
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_cmp_u_f16_e64 s[0:1], s4, v0
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-SDAG-NEXT: s_endpgm
+;
+; GFX9-GISEL-LABEL: v_fcmp_f16_o:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0x5640
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_cmp_o_f16_e64 s[0:1], s4, v0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-GISEL-NEXT: s_endpgm
+;
+; VI-SDAG-LABEL: v_fcmp_f16_o:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c
+; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x5640
+; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT: v_cmp_u_f16_e64 s[2:3], s2, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3
+; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; VI-SDAG-NEXT: s_endpgm
+;
+; VI-GISEL-LABEL: v_fcmp_f16_o:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c
+; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x5640
+; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT: v_cmp_o_f16_e64 s[2:3], s2, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; VI-GISEL-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 7)
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_fcmp_f16_uo(ptr addrspace(1) %out, half %src) {
+; GFX11-SDAG-LABEL: v_fcmp_f16_uo:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_clause 0x1
+; GFX11-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_cmp_o_f16_e64 s[2:3], 0x5640, s2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-SDAG-NEXT: s_nop 0
+; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: v_fcmp_f16_uo:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_clause 0x1
+; GFX11-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cmp_u_f16_e64 s[2:3], 0x5640, s2
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
+; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-GISEL-NEXT: s_endpgm
+;
+; GFX9-SDAG-LABEL: v_fcmp_f16_uo:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0x5640
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_cmp_o_f16_e64 s[0:1], s4, v0
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-SDAG-NEXT: s_endpgm
+;
+; GFX9-GISEL-LABEL: v_fcmp_f16_uo:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0x5640
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_cmp_u_f16_e64 s[0:1], s4, v0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-GISEL-NEXT: s_endpgm
+;
+; VI-SDAG-LABEL: v_fcmp_f16_uo:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c
+; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x5640
+; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT: v_cmp_o_f16_e64 s[2:3], s2, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3
+; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; VI-SDAG-NEXT: s_endpgm
+;
+; VI-GISEL-LABEL: v_fcmp_f16_uo:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c
+; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x5640
+; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT: v_cmp_u_f16_e64 s[2:3], s2, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; VI-GISEL-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 8)
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
define amdgpu_kernel void @v_fcmp_f16_ule(ptr addrspace(1) %out, half %src) {
; GFX11-LABEL: v_fcmp_f16_ule:
>From 25009f63387ea12d258fe53630c89c4a70978aba Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Mon, 16 Oct 2023 11:36:54 +0200
Subject: [PATCH 2/2] Fix backwards fp16 patterns
---
llvm/lib/Target/AMDGPU/VOPCInstructions.td | 8 +-
.../CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll | 8 +-
.../CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll | 170 ++++++------------
3 files changed, 64 insertions(+), 122 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index 63e75a2382366e1..cbea380ab28c0a4 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -1114,8 +1114,8 @@ defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;
let OtherPredicates = [HasTrue16BitInsts] in {
-defm : FCMP_Pattern <COND_O, V_CMP_U_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_UO, V_CMP_O_F16_t16_e64, f16>;
+defm : FCMP_Pattern <COND_O, V_CMP_O_F16_t16_e64, f16>;
+defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_t16_e64, f16>;
defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_t16_e64, f16>;
defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_t16_e64, f16>;
defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_t16_e64, f16>;
@@ -1132,8 +1132,8 @@ defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_t16_e64, f16>;
} // End OtherPredicates = [HasTrue16BitInsts]
let OtherPredicates = [NotHasTrue16BitInsts] in {
-defm : FCMP_Pattern <COND_O, V_CMP_U_F16_e64, f16>;
-defm : FCMP_Pattern <COND_UO, V_CMP_O_F16_e64, f16>;
+defm : FCMP_Pattern <COND_O, V_CMP_O_F16_e64, f16>;
+defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_e64, f16>;
defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_e64, f16>;
defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_e64, f16>;
defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_e64, f16>;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
index 16f90296b98908b..5a950d803e9c5d1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
@@ -2586,7 +2586,7 @@ define amdgpu_kernel void @v_fcmp_f16_o(ptr addrspace(1) %out, half %src) {
; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-GFX11-NEXT: v_cmp_u_f16_e64 s2, 0x5640, s2
+; SDAG-GFX11-NEXT: v_cmp_o_f16_e64 s2, 0x5640, s2
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -2601,7 +2601,7 @@ define amdgpu_kernel void @v_fcmp_f16_o(ptr addrspace(1) %out, half %src) {
; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-GFX10-NEXT: v_cmp_u_f16_e64 s0, 0x5640, s4
+; SDAG-GFX10-NEXT: v_cmp_o_f16_e64 s0, 0x5640, s4
; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
; SDAG-GFX10-NEXT: s_endpgm
@@ -2644,7 +2644,7 @@ define amdgpu_kernel void @v_fcmp_f16_uo(ptr addrspace(1) %out, half %src) {
; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-GFX11-NEXT: v_cmp_o_f16_e64 s2, 0x5640, s2
+; SDAG-GFX11-NEXT: v_cmp_u_f16_e64 s2, 0x5640, s2
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -2659,7 +2659,7 @@ define amdgpu_kernel void @v_fcmp_f16_uo(ptr addrspace(1) %out, half %src) {
; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-GFX10-NEXT: v_cmp_o_f16_e64 s0, 0x5640, s4
+; SDAG-GFX10-NEXT: v_cmp_u_f16_e64 s0, 0x5640, s4
; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
; SDAG-GFX10-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
index b62512ae05e6e3f..e2bdcfa6bbddc87 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
@@ -3011,63 +3011,34 @@ define amdgpu_kernel void @v_fcmp_f16_ult(ptr addrspace(1) %out, half %src) {
}
define amdgpu_kernel void @v_fcmp_f16_o(ptr addrspace(1) %out, half %src) {
-; GFX11-SDAG-LABEL: v_fcmp_f16_o:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_clause 0x1
-; GFX11-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x2c
-; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
-; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_cmp_u_f16_e64 s[2:3], 0x5640, s2
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, s2
-; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s3
-; GFX11-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX11-SDAG-NEXT: s_nop 0
-; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-SDAG-NEXT: s_endpgm
-;
-; GFX11-GISEL-LABEL: v_fcmp_f16_o:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_clause 0x1
-; GFX11-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x2c
-; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_cmp_o_f16_e64 s[2:3], 0x5640, s2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX11-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX11-GISEL-NEXT: s_nop 0
-; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-GISEL-NEXT: s_endpgm
-;
-; GFX9-SDAG-LABEL: v_fcmp_f16_o:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dword s4, s[0:1], 0x2c
-; GFX9-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
-; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0x5640
-; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_cmp_u_f16_e64 s[0:1], s4, v0
-; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0
-; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1
-; GFX9-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
-; GFX9-SDAG-NEXT: s_endpgm
+; GFX11-LABEL: v_fcmp_f16_o:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_o_f16_e64 s[2:3], 0x5640, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
;
-; GFX9-GISEL-LABEL: v_fcmp_f16_o:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dword s4, s[0:1], 0x2c
-; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0x5640
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_cmp_o_f16_e64 s[0:1], s4, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
-; GFX9-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_fcmp_f16_o:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x5640
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_cmp_o_f16_e64 s[0:1], s4, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT: s_endpgm
;
; VI-SDAG-LABEL: v_fcmp_f16_o:
; VI-SDAG: ; %bb.0:
@@ -3075,7 +3046,7 @@ define amdgpu_kernel void @v_fcmp_f16_o(ptr addrspace(1) %out, half %src) {
; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x5640
; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; VI-SDAG-NEXT: v_cmp_u_f16_e64 s[2:3], s2, v0
+; VI-SDAG-NEXT: v_cmp_o_f16_e64 s[2:3], s2, v0
; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0
; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2
; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
@@ -3102,63 +3073,34 @@ define amdgpu_kernel void @v_fcmp_f16_o(ptr addrspace(1) %out, half %src) {
}
define amdgpu_kernel void @v_fcmp_f16_uo(ptr addrspace(1) %out, half %src) {
-; GFX11-SDAG-LABEL: v_fcmp_f16_uo:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_clause 0x1
-; GFX11-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x2c
-; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
-; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_cmp_o_f16_e64 s[2:3], 0x5640, s2
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, s2
-; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s3
-; GFX11-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX11-SDAG-NEXT: s_nop 0
-; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-SDAG-NEXT: s_endpgm
-;
-; GFX11-GISEL-LABEL: v_fcmp_f16_uo:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_clause 0x1
-; GFX11-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x2c
-; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_cmp_u_f16_e64 s[2:3], 0x5640, s2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX11-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX11-GISEL-NEXT: s_nop 0
-; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-GISEL-NEXT: s_endpgm
-;
-; GFX9-SDAG-LABEL: v_fcmp_f16_uo:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dword s4, s[0:1], 0x2c
-; GFX9-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
-; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0x5640
-; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_cmp_o_f16_e64 s[0:1], s4, v0
-; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0
-; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1
-; GFX9-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
-; GFX9-SDAG-NEXT: s_endpgm
+; GFX11-LABEL: v_fcmp_f16_uo:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_u_f16_e64 s[2:3], 0x5640, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
;
-; GFX9-GISEL-LABEL: v_fcmp_f16_uo:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dword s4, s[0:1], 0x2c
-; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0x5640
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_cmp_u_f16_e64 s[0:1], s4, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
-; GFX9-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_fcmp_f16_uo:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x5640
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_cmp_u_f16_e64 s[0:1], s4, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT: s_endpgm
;
; VI-SDAG-LABEL: v_fcmp_f16_uo:
; VI-SDAG: ; %bb.0:
@@ -3166,7 +3108,7 @@ define amdgpu_kernel void @v_fcmp_f16_uo(ptr addrspace(1) %out, half %src) {
; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x5640
; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; VI-SDAG-NEXT: v_cmp_o_f16_e64 s[2:3], s2, v0
+; VI-SDAG-NEXT: v_cmp_u_f16_e64 s[2:3], s2, v0
; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0
; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2
; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
More information about the llvm-commits
mailing list