[llvm] [AMDGPU] Add patterns for V_CMP_O/U (PR #69157)

Pierre van Houtryve via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 16 01:23:23 PDT 2023


https://github.com/Pierre-vh created https://github.com/llvm/llvm-project/pull/69157

Fixes SWDEV-427162

>From 9dd652ae2f19674f478ba311ca95b3a1dc9710a9 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Mon, 16 Oct 2023 10:21:25 +0200
Subject: [PATCH] [AMDGPU] Add patterns for V_CMP_O/U

Fixes SWDEV-427162
---
 llvm/lib/Target/AMDGPU/VOPCInstructions.td    |   8 +
 .../CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll    | 346 +++++++++++++
 .../CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll    | 460 ++++++++++++++++++
 3 files changed, 814 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index 6fc3d0957dce191..63e75a2382366e1 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -1081,6 +1081,8 @@ multiclass FCMP_Pattern <PatFrags cond, Instruction inst, ValueType vt> {
   }
 }
 
+defm : FCMP_Pattern <COND_O, V_CMP_O_F32_e64, f32>;
+defm : FCMP_Pattern <COND_UO, V_CMP_U_F32_e64, f32>;
 defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F32_e64, f32>;
 defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F32_e64, f32>;
 defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F32_e64, f32>;
@@ -1088,6 +1090,8 @@ defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F32_e64, f32>;
 defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F32_e64, f32>;
 defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F32_e64, f32>;
 
+defm : FCMP_Pattern <COND_O, V_CMP_O_F64_e64, f64>;
+defm : FCMP_Pattern <COND_UO, V_CMP_U_F64_e64, f64>;
 defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F64_e64, f64>;
 defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F64_e64, f64>;
 defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F64_e64, f64>;
@@ -1110,6 +1114,8 @@ defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
 defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;
 
 let OtherPredicates = [HasTrue16BitInsts] in {
+defm : FCMP_Pattern <COND_O, V_CMP_U_F16_t16_e64, f16>;
+defm : FCMP_Pattern <COND_UO, V_CMP_O_F16_t16_e64, f16>;
 defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_t16_e64, f16>;
 defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_t16_e64, f16>;
 defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_t16_e64, f16>;
@@ -1126,6 +1132,8 @@ defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_t16_e64, f16>;
 } // End OtherPredicates = [HasTrue16BitInsts]
 
 let OtherPredicates = [NotHasTrue16BitInsts] in {
+defm : FCMP_Pattern <COND_O, V_CMP_U_F16_e64, f16>;
+defm : FCMP_Pattern <COND_UO, V_CMP_O_F16_e64, f16>;
 defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_e64, f16>;
 defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_e64, f16>;
 defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_e64, f16>;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
index 007b52fa3a0c6f2..16f90296b98908b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
@@ -494,6 +494,121 @@ define amdgpu_kernel void @v_fcmp_f32_ole(ptr addrspace(1) %out, float %src) {
   ret void
 }
 
+define amdgpu_kernel void @v_fcmp_f32_o(ptr addrspace(1) %out, float %src) {
+; SDAG-GFX11-LABEL: v_fcmp_f32_o:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_clause 0x1
+; SDAG-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_o_f32_e64 s2, 0x42c80000, s2
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_nop 0
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_fcmp_f32_o:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_clause 0x1
+; SDAG-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_o_f32_e64 s0, 0x42c80000, s4
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_fcmp_f32_o:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_clause 0x1
+; GISEL-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_o_f32_e64 s2, 0x42c80000, s2
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_nop 0
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_fcmp_f32_o:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_clause 0x1
+; GISEL-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_o_f32_e64 s0, 0x42c80000, s4
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 7)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @v_fcmp_f32_uo(ptr addrspace(1) %out, float %src) {
+; SDAG-GFX11-LABEL: v_fcmp_f32_uo:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_clause 0x1
+; SDAG-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_u_f32_e64 s2, 0x42c80000, s2
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_nop 0
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_fcmp_f32_uo:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_clause 0x1
+; SDAG-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_u_f32_e64 s0, 0x42c80000, s4
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_fcmp_f32_uo:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_clause 0x1
+; GISEL-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_u_f32_e64 s2, 0x42c80000, s2
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_nop 0
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_fcmp_f32_uo:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_clause 0x1
+; GISEL-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_u_f32_e64 s0, 0x42c80000, s4
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 8)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
 
 define amdgpu_kernel void @v_fcmp_f32_ueq(ptr addrspace(1) %out, float %src) {
 ; SDAG-GFX11-LABEL: v_fcmp_f32_ueq:
@@ -1249,6 +1364,122 @@ define amdgpu_kernel void @v_fcmp_f64_ueq(ptr addrspace(1) %out, double %src) {
   ret void
 }
 
+define amdgpu_kernel void @v_fcmp_f64_o(ptr addrspace(1) %out, double %src) {
+; SDAG-GFX11-LABEL: v_fcmp_f64_o:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_mov_b32 s4, 0
+; SDAG-GFX11-NEXT:    s_mov_b32 s5, 0x40590000
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_o_f64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_nop 0
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_fcmp_f64_o:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    s_mov_b32 s4, 0
+; SDAG-GFX10-NEXT:    s_mov_b32 s5, 0x40590000
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_o_f64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_fcmp_f64_o:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    s_mov_b32 s4, 0
+; GISEL-GFX11-NEXT:    s_mov_b32 s5, 0x40590000
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_o_f64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_nop 0
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_fcmp_f64_o:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    s_mov_b32 s4, 0
+; GISEL-GFX10-NEXT:    s_mov_b32 s5, 0x40590000
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_o_f64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 7)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @v_fcmp_f64_uo(ptr addrspace(1) %out, double %src) {
+; SDAG-GFX11-LABEL: v_fcmp_f64_uo:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_mov_b32 s4, 0
+; SDAG-GFX11-NEXT:    s_mov_b32 s5, 0x40590000
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_u_f64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_nop 0
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_fcmp_f64_uo:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    s_mov_b32 s4, 0
+; SDAG-GFX10-NEXT:    s_mov_b32 s5, 0x40590000
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_u_f64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_fcmp_f64_uo:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    s_mov_b32 s4, 0
+; GISEL-GFX11-NEXT:    s_mov_b32 s5, 0x40590000
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_u_f64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_nop 0
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_fcmp_f64_uo:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    s_mov_b32 s4, 0
+; GISEL-GFX10-NEXT:    s_mov_b32 s5, 0x40590000
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_u_f64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 8)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
 define amdgpu_kernel void @v_fcmp_f64_une(ptr addrspace(1) %out, double %src) {
 ; SDAG-GFX11-LABEL: v_fcmp_f64_une:
 ; SDAG-GFX11:       ; %bb.0:
@@ -2348,6 +2579,121 @@ define amdgpu_kernel void @v_fcmp_f16_ult(ptr addrspace(1) %out, half %src) {
   ret void
 }
 
+define amdgpu_kernel void @v_fcmp_f16_o(ptr addrspace(1) %out, half %src) {
+; SDAG-GFX11-LABEL: v_fcmp_f16_o:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_clause 0x1
+; SDAG-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_u_f16_e64 s2, 0x5640, s2
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_nop 0
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_fcmp_f16_o:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_clause 0x1
+; SDAG-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_u_f16_e64 s0, 0x5640, s4
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_fcmp_f16_o:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_clause 0x1
+; GISEL-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_o_f16_e64 s2, 0x5640, s2
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_nop 0
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_fcmp_f16_o:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_clause 0x1
+; GISEL-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_o_f16_e64 s0, 0x5640, s4
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 7)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @v_fcmp_f16_uo(ptr addrspace(1) %out, half %src) {
+; SDAG-GFX11-LABEL: v_fcmp_f16_uo:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_clause 0x1
+; SDAG-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_o_f16_e64 s2, 0x5640, s2
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_nop 0
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_fcmp_f16_uo:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_clause 0x1
+; SDAG-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_o_f16_e64 s0, 0x5640, s4
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_fcmp_f16_uo:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_clause 0x1
+; GISEL-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_u_f16_e64 s2, 0x5640, s2
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_nop 0
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_fcmp_f16_uo:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_clause 0x1
+; GISEL-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_u_f16_e64 s0, 0x5640, s4
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 8)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
 
 define amdgpu_kernel void @v_fcmp_f16_ule(ptr addrspace(1) %out, half %src) {
 ; SDAG-GFX11-LABEL: v_fcmp_f16_ule:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
index eeff0c57bb46152..b62512ae05e6e3f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
@@ -546,6 +546,129 @@ define amdgpu_kernel void @v_fcmp_f32_ole(ptr addrspace(1) %out, float %src) {
   ret void
 }
 
+define amdgpu_kernel void @v_fcmp_f32_o(ptr addrspace(1) %out, float %src) {
+; GFX11-LABEL: v_fcmp_f32_o:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_o_f32_e64 s[2:3], 0x42c80000, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_nop 0
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_fcmp_f32_o:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x42c80000
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_o_f32_e64 s[0:1], s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT:    s_endpgm
+;
+; VI-SDAG-LABEL: v_fcmp_f32_o:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x42c80000
+; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT:    v_cmp_o_f32_e64 s[2:3], s2, v0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, s2
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s3
+; VI-SDAG-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-SDAG-NEXT:    s_endpgm
+;
+; VI-GISEL-LABEL: v_fcmp_f32_o:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, 0x42c80000
+; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT:    v_cmp_o_f32_e64 s[2:3], s2, v0
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s1
+; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; VI-GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-GISEL-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 7)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @v_fcmp_f32_uo(ptr addrspace(1) %out, float %src) {
+; GFX11-LABEL: v_fcmp_f32_uo:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_u_f32_e64 s[2:3], 0x42c80000, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_nop 0
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_fcmp_f32_uo:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x42c80000
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_u_f32_e64 s[0:1], s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT:    s_endpgm
+;
+; VI-SDAG-LABEL: v_fcmp_f32_uo:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x42c80000
+; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT:    v_cmp_u_f32_e64 s[2:3], s2, v0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, s2
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s3
+; VI-SDAG-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-SDAG-NEXT:    s_endpgm
+;
+; VI-GISEL-LABEL: v_fcmp_f32_uo:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, 0x42c80000
+; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT:    v_cmp_u_f32_e64 s[2:3], s2, v0
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s1
+; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; VI-GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-GISEL-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 8)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
 
 define amdgpu_kernel void @v_fcmp_f32_ueq(ptr addrspace(1) %out, float %src) {
 ; GFX11-LABEL: v_fcmp_f32_ueq:
@@ -1465,6 +1588,162 @@ define amdgpu_kernel void @v_fcmp_f64_ueq(ptr addrspace(1) %out, double %src) {
   ret void
 }
 
+define amdgpu_kernel void @v_fcmp_f64_o(ptr addrspace(1) %out, double %src) {
+; GFX11-LABEL: v_fcmp_f64_o:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT:    s_mov_b32 s4, 0
+; GFX11-NEXT:    s_mov_b32 s5, 0x40590000
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_o_f64_e64 s[2:3], s[2:3], s[4:5]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_nop 0
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; GFX9-SDAG-LABEL: v_fcmp_f64_o:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, 0x40590000
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_cmp_o_f64_e64 s[2:3], s[2:3], v[0:1]
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s2
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s3
+; GFX9-SDAG-NEXT:    global_store_dwordx2 v0, v[1:2], s[0:1]
+; GFX9-SDAG-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: v_fcmp_f64_o:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-GISEL-NEXT:    s_mov_b32 s4, 0
+; GFX9-GISEL-NEXT:    s_mov_b32 s5, 0x40590000
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_cmp_o_f64_e64 s[2:3], s[2:3], v[0:1]
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; VI-SDAG-LABEL: v_fcmp_f64_o:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x40590000
+; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT:    v_cmp_o_f64_e64 s[2:3], s[2:3], v[0:1]
+; VI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, s2
+; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s3
+; VI-SDAG-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-SDAG-NEXT:    s_endpgm
+;
+; VI-GISEL-LABEL: v_fcmp_f64_o:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-GISEL-NEXT:    s_mov_b32 s4, 0
+; VI-GISEL-NEXT:    s_mov_b32 s5, 0x40590000
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s5
+; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT:    v_cmp_o_f64_e64 s[2:3], s[2:3], v[0:1]
+; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s1
+; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; VI-GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-GISEL-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 7)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @v_fcmp_f64_uo(ptr addrspace(1) %out, double %src) {
+; GFX11-LABEL: v_fcmp_f64_uo:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT:    s_mov_b32 s4, 0
+; GFX11-NEXT:    s_mov_b32 s5, 0x40590000
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_u_f64_e64 s[2:3], s[2:3], s[4:5]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_nop 0
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; GFX9-SDAG-LABEL: v_fcmp_f64_uo:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, 0x40590000
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_cmp_u_f64_e64 s[2:3], s[2:3], v[0:1]
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s2
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s3
+; GFX9-SDAG-NEXT:    global_store_dwordx2 v0, v[1:2], s[0:1]
+; GFX9-SDAG-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: v_fcmp_f64_uo:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-GISEL-NEXT:    s_mov_b32 s4, 0
+; GFX9-GISEL-NEXT:    s_mov_b32 s5, 0x40590000
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_cmp_u_f64_e64 s[2:3], s[2:3], v[0:1]
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; VI-SDAG-LABEL: v_fcmp_f64_uo:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x40590000
+; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT:    v_cmp_u_f64_e64 s[2:3], s[2:3], v[0:1]
+; VI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, s2
+; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s3
+; VI-SDAG-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-SDAG-NEXT:    s_endpgm
+;
+; VI-GISEL-LABEL: v_fcmp_f64_uo:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-GISEL-NEXT:    s_mov_b32 s4, 0
+; VI-GISEL-NEXT:    s_mov_b32 s5, 0x40590000
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s5
+; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT:    v_cmp_u_f64_e64 s[2:3], s[2:3], v[0:1]
+; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s1
+; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; VI-GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-GISEL-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 8)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
 define amdgpu_kernel void @v_fcmp_f64_une(ptr addrspace(1) %out, double %src) {
 ; GFX11-LABEL: v_fcmp_f64_une:
 ; GFX11:       ; %bb.0:
@@ -2731,6 +3010,187 @@ define amdgpu_kernel void @v_fcmp_f16_ult(ptr addrspace(1) %out, half %src) {
   ret void
 }
 
+define amdgpu_kernel void @v_fcmp_f16_o(ptr addrspace(1) %out, half %src) {
+; GFX11-SDAG-LABEL: v_fcmp_f16_o:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_clause 0x1
+; GFX11-SDAG-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_cmp_u_f16_e64 s[2:3], 0x5640, s2
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-SDAG-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-SDAG-NEXT:    s_nop 0
+; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_fcmp_f16_o:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_clause 0x1
+; GFX11-GISEL-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_cmp_o_f16_e64 s[2:3], 0x5640, s2
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-GISEL-NEXT:    s_nop 0
+; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-GISEL-NEXT:    s_endpgm
+;
+; GFX9-SDAG-LABEL: v_fcmp_f16_o:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0x5640
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_cmp_u_f16_e64 s[0:1], s4, v0
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-SDAG-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-SDAG-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: v_fcmp_f16_o:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0x5640
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_cmp_o_f16_e64 s[0:1], s4, v0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; VI-SDAG-LABEL: v_fcmp_f16_o:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x5640
+; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT:    v_cmp_u_f16_e64 s[2:3], s2, v0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, s2
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s3
+; VI-SDAG-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-SDAG-NEXT:    s_endpgm
+;
+; VI-GISEL-LABEL: v_fcmp_f16_o:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, 0x5640
+; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT:    v_cmp_o_f16_e64 s[2:3], s2, v0
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s1
+; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; VI-GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-GISEL-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 7)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @v_fcmp_f16_uo(ptr addrspace(1) %out, half %src) {
+; GFX11-SDAG-LABEL: v_fcmp_f16_uo:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_clause 0x1
+; GFX11-SDAG-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_cmp_o_f16_e64 s[2:3], 0x5640, s2
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-SDAG-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-SDAG-NEXT:    s_nop 0
+; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_fcmp_f16_uo:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_clause 0x1
+; GFX11-GISEL-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_cmp_u_f16_e64 s[2:3], 0x5640, s2
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-GISEL-NEXT:    s_nop 0
+; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-GISEL-NEXT:    s_endpgm
+;
+; GFX9-SDAG-LABEL: v_fcmp_f16_uo:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0x5640
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_cmp_o_f16_e64 s[0:1], s4, v0
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-SDAG-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-SDAG-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: v_fcmp_f16_uo:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0x5640
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_cmp_u_f16_e64 s[0:1], s4, v0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; VI-SDAG-LABEL: v_fcmp_f16_uo:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x5640
+; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT:    v_cmp_o_f16_e64 s[2:3], s2, v0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, s2
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s3
+; VI-SDAG-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-SDAG-NEXT:    s_endpgm
+;
+; VI-GISEL-LABEL: v_fcmp_f16_uo:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, 0x5640
+; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT:    v_cmp_u_f16_e64 s[2:3], s2, v0
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s1
+; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; VI-GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-GISEL-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 8)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
 
 define amdgpu_kernel void @v_fcmp_f16_ule(ptr addrspace(1) %out, half %src) {
 ; GFX11-LABEL: v_fcmp_f16_ule:



More information about the llvm-commits mailing list