[llvm] [AMDGPU][True16][CodeGen] true16 codegen for v_cmp (PR #124757)
Brox Chen via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 28 06:40:29 PST 2025
https://github.com/broxigarchen created https://github.com/llvm/llvm-project/pull/124757
None
>From 23076fdb510ca059a1aac91fe87d96fe851fb8d0 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Mon, 27 Jan 2025 17:05:47 -0500
Subject: [PATCH] true16 codegen for v_cmp
---
.../AMDGPU/AMDGPUInstructionSelector.cpp | 3 +-
llvm/lib/Target/AMDGPU/VOPCInstructions.td | 57 +-
.../inst-select-amdgcn.fcmp.constants.w32.mir | 12 +-
.../inst-select-amdgcn.fcmp.constants.w64.mir | 12 +-
llvm/test/CodeGen/AMDGPU/fcmp.f16.ll | 2362 +++++++++++------
llvm/test/CodeGen/AMDGPU/icmp.i16.ll | 44 +
6 files changed, 1700 insertions(+), 790 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 40eaba2c09209d..3bbbbcf71d8aec 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1207,9 +1207,8 @@ static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size,
unsigned FakeS16Opc, unsigned S32Opc,
unsigned S64Opc) {
if (Size == 16)
- // FIXME-TRUE16 use TrueS16Opc when realtrue16 is supported for CMP code
return ST.hasTrue16BitInsts()
- ? ST.useRealTrue16Insts() ? FakeS16Opc : FakeS16Opc
+ ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
: S16Opc;
if (Size == 32)
return S32Opc;
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index e16ac4423265ec..00a3381b3fd49e 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -1035,6 +1035,20 @@ multiclass VOPCClassPat64<string inst_name> {
>;
}
+multiclass VOPCClassPat64_t16<string inst_name> {
+ defvar inst = !cast<VOP_Pseudo>(inst_name#"_t16_e64");
+ defvar P = inst.Pfl;
+ def : GCNPat <
+ (i1:$sdst
+ (AMDGPUfp_class
+ (P.Src0VT (VOP3ModsNonCanonicalizing P.Src0VT:$src0, i32:$src0_modifiers)),
+ i32:$src1)),
+ (inst i32:$src0_modifiers, VSrcT_f16:$src0,
+ 0 /* src1_modifiers */, (f16 (EXTRACT_SUBREG VGPR_32:$src1, lo16)),
+ 0) /* op_sel */
+ >;
+}
+
multiclass VOPCClassPat64_fake16<string inst_name> {
defvar inst = !cast<VOP_Pseudo>(inst_name#"_fake16_e64");
defvar P = inst.Pfl;
@@ -1158,6 +1172,7 @@ multiclass VOPC_CLASS_F16 <string opName> {
}
let True16Predicate = UseRealTrue16Insts in {
defm _t16 : VOPC_Class_Pseudos <opName#"_t16", VOPC_I1_F16_I16_t16, 0>;
+ defm : VOPCClassPat64_t16<NAME>;
}
let True16Predicate = UseFakeTrue16Insts in {
defm _fake16 : VOPC_Class_Pseudos <opName#"_fake16", VOPC_I1_F16_I16_fake16, 0>;
@@ -1207,27 +1222,30 @@ defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">;
// We need to use COPY_TO_REGCLASS to w/a the problem when ReplaceAllUsesWith()
// complaints it cannot replace i1 <-> i64/i32 if node was not morphed in place.
-multiclass ICMP_Pattern <PatFrags cond, Instruction inst, ValueType vt> {
+multiclass ICMP_Pattern <PatFrags cond, Instruction inst, ValueType vt, dag dstInst = (inst $src0, $src1)> {
let WaveSizePredicate = isWave64 in
def : GCNPat <
(i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
- (i64 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_64))
+ (i64 (COPY_TO_REGCLASS dstInst, SReg_64))
>;
let WaveSizePredicate = isWave32 in {
def : GCNPat <
(i32 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
- (i32 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_32))
+ (i32 (COPY_TO_REGCLASS dstInst, SReg_32))
>;
// Support codegen of i64 setcc in wave32 mode.
def : GCNPat <
(i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
- (i64 (REG_SEQUENCE SReg_64, (inst $src0, $src1), sub0, (S_MOV_B32 (i32 0)), sub1))
+ (i64 (REG_SEQUENCE SReg_64, dstInst, sub0, (S_MOV_B32 (i32 0)), sub1))
>;
}
}
+multiclass ICMP_Pattern_t16<PatFrags cond, Instruction inst, ValueType vt>
+ : ICMP_Pattern<cond, inst, vt, (inst 0, $src0, 0, $src1)>;
+
defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U32_e64, i32>;
defm : ICMP_Pattern <COND_NE, V_CMP_NE_U32_e64, i32>;
defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U32_e64, i32>;
@@ -1250,6 +1268,19 @@ defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I64_e64, i64>;
defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I64_e64, i64>;
defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I64_e64, i64>;
+let True16Predicate = UseRealTrue16Insts in {
+defm : ICMP_Pattern_t16 <COND_EQ, V_CMP_EQ_U16_t16_e64, i16>;
+defm : ICMP_Pattern_t16 <COND_NE, V_CMP_NE_U16_t16_e64, i16>;
+defm : ICMP_Pattern_t16 <COND_UGT, V_CMP_GT_U16_t16_e64, i16>;
+defm : ICMP_Pattern_t16 <COND_UGE, V_CMP_GE_U16_t16_e64, i16>;
+defm : ICMP_Pattern_t16 <COND_ULT, V_CMP_LT_U16_t16_e64, i16>;
+defm : ICMP_Pattern_t16 <COND_ULE, V_CMP_LE_U16_t16_e64, i16>;
+defm : ICMP_Pattern_t16 <COND_SGT, V_CMP_GT_I16_t16_e64, i16>;
+defm : ICMP_Pattern_t16 <COND_SGE, V_CMP_GE_I16_t16_e64, i16>;
+defm : ICMP_Pattern_t16 <COND_SLT, V_CMP_LT_I16_t16_e64, i16>;
+defm : ICMP_Pattern_t16 <COND_SLE, V_CMP_LE_I16_t16_e64, i16>;
+} // End True16Predicate = UseRealTrue16Insts
+
let True16Predicate = UseFakeTrue16Insts in {
defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_fake16_e64, i16>;
defm : ICMP_Pattern <COND_NE, V_CMP_NE_U16_fake16_e64, i16>;
@@ -1335,6 +1366,24 @@ defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F64_e64, f64>;
defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;
+let True16Predicate = UseRealTrue16Insts in {
+defm : FCMP_Pattern <COND_O, V_CMP_O_F16_t16_e64, f16>;
+defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_t16_e64, f16>;
+defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_t16_e64, f16>;
+defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_t16_e64, f16>;
+defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_t16_e64, f16>;
+defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_t16_e64, f16>;
+defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_t16_e64, f16>;
+defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_t16_e64, f16>;
+
+defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_t16_e64, f16>;
+defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_t16_e64, f16>;
+defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_t16_e64, f16>;
+defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_t16_e64, f16>;
+defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_t16_e64, f16>;
+defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_t16_e64, f16>;
+} // End True16Predicate = UseRealTrue16Insts
+
let True16Predicate = UseFakeTrue16Insts in {
defm : FCMP_Pattern <COND_O, V_CMP_O_F16_fake16_e64, f16>;
defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_fake16_e64, f16>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
index cdb67caea12cf0..49383135ab0c58 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
@@ -17,11 +17,9 @@ body: |
; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
- ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_]]
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
- ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_1]]
- ; GFX11-TRUE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_fake16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
- ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]]
+ ; GFX11-TRUE16-NEXT: [[V_CMP_F_F16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_t16_e64 0, [[V_CVT_F16_F32_t16_e64_]], 0, [[V_CVT_F16_F32_t16_e64_1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_t16_e64_]]
;
; GFX11-FAKE16-LABEL: name: fcmp_false_f16
; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
@@ -55,11 +53,9 @@ body: |
; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
- ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_]]
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
- ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_1]]
- ; GFX11-TRUE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_fake16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
- ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]]
+ ; GFX11-TRUE16-NEXT: [[V_CMP_TRU_F16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_t16_e64 0, [[V_CVT_F16_F32_t16_e64_]], 0, [[V_CVT_F16_F32_t16_e64_1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_t16_e64_]]
;
; GFX11-FAKE16-LABEL: name: fcmp_true_f16
; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
index ed811d37c3d0fc..828eb5d3fb40ac 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
@@ -17,11 +17,9 @@ body: |
; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
- ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_]]
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
- ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_1]]
- ; GFX11-TRUE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F16_fake16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
- ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]]
+ ; GFX11-TRUE16-NEXT: [[V_CMP_F_F16_t16_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F16_t16_e64 0, [[V_CVT_F16_F32_t16_e64_]], 0, [[V_CVT_F16_F32_t16_e64_1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_t16_e64_]]
;
; GFX11-FAKE16-LABEL: name: fcmp_false_f16
; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
@@ -55,11 +53,9 @@ body: |
; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
- ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_]]
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
- ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_1]]
- ; GFX11-TRUE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F16_fake16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
- ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]]
+ ; GFX11-TRUE16-NEXT: [[V_CMP_TRU_F16_t16_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F16_t16_e64 0, [[V_CVT_F16_F32_t16_e64_]], 0, [[V_CVT_F16_F32_t16_e64_1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_t16_e64_]]
;
; GFX11-FAKE16-LABEL: name: fcmp_true_f16
; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll b/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll
index 23b54c6741e512..a25c183dca0a1f 100644
--- a/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll
@@ -1,7 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=VI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GFX11 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GFX11-TRUE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GFX11-FAKE16 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1200 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GFX12 %s
define amdgpu_kernel void @fcmp_f16_lt(
@@ -55,30 +56,57 @@ define amdgpu_kernel void @fcmp_f16_lt(
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_f16_lt:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: v_cmp_lt_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_f16_lt:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_f16_lt:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: v_cmp_lt_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_f16_lt:
; GFX12: ; %bb.0: ; %entry
@@ -167,31 +195,58 @@ define amdgpu_kernel void @fcmp_f16_lt_abs(
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_f16_lt_abs:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: v_cmp_lt_f16_e64 s2, |v0|, |v1|
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, s2
-; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_f16_lt_abs:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e64 s2, |v0.l|, |v0.h|
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, s2
+; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_f16_lt_abs:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: v_cmp_lt_f16_e64 s2, |v0|, |v1|
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, s2
+; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_f16_lt_abs:
; GFX12: ; %bb.0: ; %entry
@@ -285,30 +340,57 @@ define amdgpu_kernel void @fcmp_f16_eq(
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_f16_eq:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: v_cmp_eq_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_f16_eq:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cmp_eq_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_f16_eq:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: v_cmp_eq_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_f16_eq:
; GFX12: ; %bb.0: ; %entry
@@ -397,30 +479,57 @@ define amdgpu_kernel void @fcmp_f16_le(
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_f16_le:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: v_cmp_le_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_f16_le:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cmp_le_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_f16_le:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: v_cmp_le_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_f16_le:
; GFX12: ; %bb.0: ; %entry
@@ -509,30 +618,57 @@ define amdgpu_kernel void @fcmp_f16_gt(
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_f16_gt:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: v_cmp_gt_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_f16_gt:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_f16_gt:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_f16_gt:
; GFX12: ; %bb.0: ; %entry
@@ -621,30 +757,57 @@ define amdgpu_kernel void @fcmp_f16_lg(
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_f16_lg:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: v_cmp_lg_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_f16_lg:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_f16_lg:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_f16_lg:
; GFX12: ; %bb.0: ; %entry
@@ -733,30 +896,57 @@ define amdgpu_kernel void @fcmp_f16_ge(
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_f16_ge:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: v_cmp_ge_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_f16_ge:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cmp_ge_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_f16_ge:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: v_cmp_ge_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_f16_ge:
; GFX12: ; %bb.0: ; %entry
@@ -845,30 +1035,57 @@ define amdgpu_kernel void @fcmp_f16_o(
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_f16_o:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_f16_o:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cmp_o_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_f16_o:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_f16_o:
; GFX12: ; %bb.0: ; %entry
@@ -957,30 +1174,57 @@ define amdgpu_kernel void @fcmp_f16_u(
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_f16_u:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: v_cmp_u_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_f16_u:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cmp_u_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_f16_u:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: v_cmp_u_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_f16_u:
; GFX12: ; %bb.0: ; %entry
@@ -1069,30 +1313,57 @@ define amdgpu_kernel void @fcmp_f16_nge(
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_f16_nge:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: v_cmp_nge_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_f16_nge:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cmp_nge_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_f16_nge:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: v_cmp_nge_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_f16_nge:
; GFX12: ; %bb.0: ; %entry
@@ -1181,30 +1452,57 @@ define amdgpu_kernel void @fcmp_f16_nlg(
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_f16_nlg:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: v_cmp_nlg_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_f16_nlg:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cmp_nlg_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_f16_nlg:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: v_cmp_nlg_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_f16_nlg:
; GFX12: ; %bb.0: ; %entry
@@ -1293,30 +1591,57 @@ define amdgpu_kernel void @fcmp_f16_ngt(
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_f16_ngt:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_f16_ngt:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_f16_ngt:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_f16_ngt:
; GFX12: ; %bb.0: ; %entry
@@ -1405,30 +1730,57 @@ define amdgpu_kernel void @fcmp_f16_nle(
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_f16_nle:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: v_cmp_nle_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_f16_nle:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_f16_nle:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: v_cmp_nle_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_f16_nle:
; GFX12: ; %bb.0: ; %entry
@@ -1517,30 +1869,57 @@ define amdgpu_kernel void @fcmp_f16_neq(
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_f16_neq:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: v_cmp_neq_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_f16_neq:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cmp_neq_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_f16_neq:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: v_cmp_neq_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_f16_neq:
; GFX12: ; %bb.0: ; %entry
@@ -1629,30 +2008,57 @@ define amdgpu_kernel void @fcmp_f16_nlt(
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_f16_nlt:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_f16_nlt:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_f16_nlt:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_f16_nlt:
; GFX12: ; %bb.0: ; %entry
@@ -1751,35 +2157,65 @@ define amdgpu_kernel void @fcmp_v2f16_lt(
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_v2f16_lt:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_b32 v0, off, s[4:7], 0
-; GFX11-NEXT: buffer_load_b32 v1, off, s[12:15], 0
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: s_waitcnt vmcnt(1)
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-NEXT: v_cmp_lt_f16_e32 vcc_lo, v1, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT: v_cmp_lt_f16_e32 vcc_lo, v3, v2
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_v2f16_lt:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-TRUE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e32 vcc_lo, v1.l, v0.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e32 vcc_lo, v3.l, v2.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_v2f16_lt:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-FAKE16-NEXT: v_cmp_lt_f16_e32 vcc_lo, v1, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_cmp_lt_f16_e32 vcc_lo, v3, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_v2f16_lt:
; GFX12: ; %bb.0: ; %entry
@@ -1884,35 +2320,65 @@ define amdgpu_kernel void @fcmp_v2f16_eq(
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_v2f16_eq:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_b32 v0, off, s[4:7], 0
-; GFX11-NEXT: buffer_load_b32 v1, off, s[12:15], 0
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: s_waitcnt vmcnt(1)
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-NEXT: v_cmp_eq_f16_e32 vcc_lo, v1, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT: v_cmp_eq_f16_e32 vcc_lo, v3, v2
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_v2f16_eq:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-TRUE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_f16_e32 vcc_lo, v1.l, v0.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_cmp_eq_f16_e32 vcc_lo, v3.l, v2.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_v2f16_eq:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-FAKE16-NEXT: v_cmp_eq_f16_e32 vcc_lo, v1, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_cmp_eq_f16_e32 vcc_lo, v3, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_v2f16_eq:
; GFX12: ; %bb.0: ; %entry
@@ -2016,35 +2482,65 @@ define amdgpu_kernel void @fcmp_v2f16_le(
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_v2f16_le:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_b32 v0, off, s[4:7], 0
-; GFX11-NEXT: buffer_load_b32 v1, off, s[12:15], 0
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: s_waitcnt vmcnt(1)
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-NEXT: v_cmp_le_f16_e32 vcc_lo, v1, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT: v_cmp_le_f16_e32 vcc_lo, v3, v2
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_v2f16_le:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-TRUE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-TRUE16-NEXT: v_cmp_le_f16_e32 vcc_lo, v1.l, v0.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_cmp_le_f16_e32 vcc_lo, v3.l, v2.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_v2f16_le:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-FAKE16-NEXT: v_cmp_le_f16_e32 vcc_lo, v1, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_cmp_le_f16_e32 vcc_lo, v3, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_v2f16_le:
; GFX12: ; %bb.0: ; %entry
@@ -2148,35 +2644,65 @@ define amdgpu_kernel void @fcmp_v2f16_gt(
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_v2f16_gt:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_b32 v0, off, s[4:7], 0
-; GFX11-NEXT: buffer_load_b32 v1, off, s[12:15], 0
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: s_waitcnt vmcnt(1)
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-NEXT: v_cmp_gt_f16_e32 vcc_lo, v1, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT: v_cmp_gt_f16_e32 vcc_lo, v3, v2
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_v2f16_gt:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-TRUE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-TRUE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, v1.l, v0.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, v3.l, v2.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_v2f16_gt:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-FAKE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, v1, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, v3, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_v2f16_gt:
; GFX12: ; %bb.0: ; %entry
@@ -2281,35 +2807,65 @@ define amdgpu_kernel void @fcmp_v2f16_lg(
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_v2f16_lg:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_b32 v0, off, s[4:7], 0
-; GFX11-NEXT: buffer_load_b32 v1, off, s[12:15], 0
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: s_waitcnt vmcnt(1)
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-NEXT: v_cmp_lg_f16_e32 vcc_lo, v1, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT: v_cmp_lg_f16_e32 vcc_lo, v3, v2
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_v2f16_lg:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-TRUE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, v1.l, v0.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, v3.l, v2.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_v2f16_lg:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-FAKE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, v1, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, v3, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_v2f16_lg:
; GFX12: ; %bb.0: ; %entry
@@ -2414,35 +2970,65 @@ define amdgpu_kernel void @fcmp_v2f16_ge(
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_v2f16_ge:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_b32 v0, off, s[4:7], 0
-; GFX11-NEXT: buffer_load_b32 v1, off, s[12:15], 0
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: s_waitcnt vmcnt(1)
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-NEXT: v_cmp_ge_f16_e32 vcc_lo, v1, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT: v_cmp_ge_f16_e32 vcc_lo, v3, v2
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_v2f16_ge:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-TRUE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-TRUE16-NEXT: v_cmp_ge_f16_e32 vcc_lo, v1.l, v0.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_cmp_ge_f16_e32 vcc_lo, v3.l, v2.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_v2f16_ge:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-FAKE16-NEXT: v_cmp_ge_f16_e32 vcc_lo, v1, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_cmp_ge_f16_e32 vcc_lo, v3, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_v2f16_ge:
; GFX12: ; %bb.0: ; %entry
@@ -2547,35 +3133,65 @@ define amdgpu_kernel void @fcmp_v2f16_o(
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_v2f16_o:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_b32 v0, off, s[4:7], 0
-; GFX11-NEXT: buffer_load_b32 v1, off, s[12:15], 0
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: s_waitcnt vmcnt(1)
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v2
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_v2f16_o:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-TRUE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-TRUE16-NEXT: v_cmp_o_f16_e32 vcc_lo, v1.l, v0.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_cmp_o_f16_e32 vcc_lo, v3.l, v2.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_v2f16_o:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-FAKE16-NEXT: v_cmp_o_f16_e32 vcc_lo, v1, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_v2f16_o:
; GFX12: ; %bb.0: ; %entry
@@ -2680,35 +3296,65 @@ define amdgpu_kernel void @fcmp_v2f16_u(
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_v2f16_u:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_b32 v0, off, s[4:7], 0
-; GFX11-NEXT: buffer_load_b32 v1, off, s[12:15], 0
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: s_waitcnt vmcnt(1)
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-NEXT: v_cmp_u_f16_e32 vcc_lo, v1, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT: v_cmp_u_f16_e32 vcc_lo, v3, v2
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_v2f16_u:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-TRUE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-TRUE16-NEXT: v_cmp_u_f16_e32 vcc_lo, v1.l, v0.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_cmp_u_f16_e32 vcc_lo, v3.l, v2.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_v2f16_u:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-FAKE16-NEXT: v_cmp_u_f16_e32 vcc_lo, v1, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_cmp_u_f16_e32 vcc_lo, v3, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_v2f16_u:
; GFX12: ; %bb.0: ; %entry
@@ -2812,35 +3458,65 @@ define amdgpu_kernel void @fcmp_v2f16_nge(
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_v2f16_nge:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_b32 v0, off, s[4:7], 0
-; GFX11-NEXT: buffer_load_b32 v1, off, s[12:15], 0
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: s_waitcnt vmcnt(1)
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-NEXT: v_cmp_nge_f16_e32 vcc_lo, v1, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT: v_cmp_nge_f16_e32 vcc_lo, v3, v2
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_v2f16_nge:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-TRUE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-TRUE16-NEXT: v_cmp_nge_f16_e32 vcc_lo, v1.l, v0.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_cmp_nge_f16_e32 vcc_lo, v3.l, v2.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_v2f16_nge:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-FAKE16-NEXT: v_cmp_nge_f16_e32 vcc_lo, v1, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_cmp_nge_f16_e32 vcc_lo, v3, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_v2f16_nge:
; GFX12: ; %bb.0: ; %entry
@@ -2944,35 +3620,65 @@ define amdgpu_kernel void @fcmp_v2f16_nlg(
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_v2f16_nlg:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_b32 v0, off, s[4:7], 0
-; GFX11-NEXT: buffer_load_b32 v1, off, s[12:15], 0
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: s_waitcnt vmcnt(1)
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-NEXT: v_cmp_nlg_f16_e32 vcc_lo, v1, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT: v_cmp_nlg_f16_e32 vcc_lo, v3, v2
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_v2f16_nlg:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-TRUE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-TRUE16-NEXT: v_cmp_nlg_f16_e32 vcc_lo, v1.l, v0.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_cmp_nlg_f16_e32 vcc_lo, v3.l, v2.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_v2f16_nlg:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-FAKE16-NEXT: v_cmp_nlg_f16_e32 vcc_lo, v1, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_cmp_nlg_f16_e32 vcc_lo, v3, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_v2f16_nlg:
; GFX12: ; %bb.0: ; %entry
@@ -3077,35 +3783,65 @@ define amdgpu_kernel void @fcmp_v2f16_ngt(
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_v2f16_ngt:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_b32 v0, off, s[4:7], 0
-; GFX11-NEXT: buffer_load_b32 v1, off, s[12:15], 0
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: s_waitcnt vmcnt(1)
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_v2f16_ngt:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-TRUE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1.l, v0.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3.l, v2.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_v2f16_ngt:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_v2f16_ngt:
; GFX12: ; %bb.0: ; %entry
@@ -3209,35 +3945,65 @@ define amdgpu_kernel void @fcmp_v2f16_nle(
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_v2f16_nle:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_b32 v0, off, s[4:7], 0
-; GFX11-NEXT: buffer_load_b32 v1, off, s[12:15], 0
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: s_waitcnt vmcnt(1)
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-NEXT: v_cmp_nle_f16_e32 vcc_lo, v1, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT: v_cmp_nle_f16_e32 vcc_lo, v3, v2
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_v2f16_nle:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-TRUE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e32 vcc_lo, v1.l, v0.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e32 vcc_lo, v3.l, v2.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_v2f16_nle:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-FAKE16-NEXT: v_cmp_nle_f16_e32 vcc_lo, v1, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_cmp_nle_f16_e32 vcc_lo, v3, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_v2f16_nle:
; GFX12: ; %bb.0: ; %entry
@@ -3341,35 +4107,65 @@ define amdgpu_kernel void @fcmp_v2f16_neq(
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_v2f16_neq:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_b32 v0, off, s[4:7], 0
-; GFX11-NEXT: buffer_load_b32 v1, off, s[12:15], 0
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: s_waitcnt vmcnt(1)
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-NEXT: v_cmp_neq_f16_e32 vcc_lo, v1, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT: v_cmp_neq_f16_e32 vcc_lo, v3, v2
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_v2f16_neq:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-TRUE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-TRUE16-NEXT: v_cmp_neq_f16_e32 vcc_lo, v1.l, v0.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_cmp_neq_f16_e32 vcc_lo, v3.l, v2.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_v2f16_neq:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-FAKE16-NEXT: v_cmp_neq_f16_e32 vcc_lo, v1, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_cmp_neq_f16_e32 vcc_lo, v3, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_v2f16_neq:
; GFX12: ; %bb.0: ; %entry
@@ -3473,35 +4269,65 @@ define amdgpu_kernel void @fcmp_v2f16_nlt(
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: fcmp_v2f16_nlt:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s10, -1
-; GFX11-NEXT: s_mov_b32 s11, 0x31016000
-; GFX11-NEXT: s_mov_b32 s6, s10
-; GFX11-NEXT: s_mov_b32 s7, s11
-; GFX11-NEXT: s_mov_b32 s14, s10
-; GFX11-NEXT: s_mov_b32 s15, s11
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s12, s2
-; GFX11-NEXT: s_mov_b32 s13, s3
-; GFX11-NEXT: buffer_load_b32 v0, off, s[4:7], 0
-; GFX11-NEXT: buffer_load_b32 v1, off, s[12:15], 0
-; GFX11-NEXT: s_mov_b32 s8, s0
-; GFX11-NEXT: s_mov_b32 s9, s1
-; GFX11-NEXT: s_waitcnt vmcnt(1)
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3, v2
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
-; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: fcmp_v2f16_nlt:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-TRUE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1.l, v0.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3.l, v2.l
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-TRUE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fcmp_v2f16_nlt:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT: buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-FAKE16-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-FAKE16-NEXT: s_endpgm
;
; GFX12-LABEL: fcmp_v2f16_nlt:
; GFX12: ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/icmp.i16.ll b/llvm/test/CodeGen/AMDGPU/icmp.i16.ll
index c1a074a81b2aa2..790a457c2b3371 100644
--- a/llvm/test/CodeGen/AMDGPU/icmp.i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/icmp.i16.ll
@@ -1,5 +1,9 @@
; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s| FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s| FileCheck -check-prefixes=GCN,GFX11-FAKE16 %s
+; FIXME-TRUE16. In true16 flow, the codegen introduces addtional s2v copy and mov, and revert the operand order thus picking different cmp instructions
+; This should be corrected after addtional mov/copy is removed
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s| FileCheck -check-prefixes=GCN,GFX11-TRUE16 %s
;;;==========================================================================;;;
;; 16-bit integer comparisons
@@ -8,6 +12,8 @@
; GCN-LABEL: {{^}}i16_eq:
; VI: v_cmp_eq_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
; SI: v_cmp_eq_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-FAKE16: v_cmp_eq_u16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-TRUE16: v_cmp_eq_u16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
define amdgpu_kernel void @i16_eq(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -26,6 +32,8 @@ entry:
; GCN-LABEL: {{^}}i16_ne:
; VI: v_cmp_ne_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
; SI: v_cmp_ne_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-FAKE16: v_cmp_ne_u16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-TRUE16: v_cmp_ne_u16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
define amdgpu_kernel void @i16_ne(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -44,6 +52,8 @@ entry:
; GCN-LABEL: {{^}}i16_ugt:
; VI: v_cmp_gt_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
; SI: v_cmp_gt_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-FAKE16: v_cmp_gt_u16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-TRUE16: v_cmp_gt_u16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
define amdgpu_kernel void @i16_ugt(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -62,6 +72,8 @@ entry:
; GCN-LABEL: {{^}}i16_uge:
; VI: v_cmp_ge_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
; SI: v_cmp_ge_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-FAKE16: v_cmp_ge_u16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-TRUE16: v_cmp_ge_u16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
define amdgpu_kernel void @i16_uge(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -80,6 +92,8 @@ entry:
; GCN-LABEL: {{^}}i16_ult:
; VI: v_cmp_lt_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
; SI: v_cmp_lt_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-FAKE16: v_cmp_lt_u16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-TRUE16: v_cmp_lt_u16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
define amdgpu_kernel void @i16_ult(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -98,6 +112,8 @@ entry:
; GCN-LABEL: {{^}}i16_ule:
; VI: v_cmp_le_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
; SI: v_cmp_le_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-FAKE16: v_cmp_le_u16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-TRUE16: v_cmp_le_u16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
define amdgpu_kernel void @i16_ule(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -117,6 +133,8 @@ entry:
; GCN-LABEL: {{^}}i16_sgt:
; VI: v_cmp_gt_i16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
; SI: v_cmp_gt_i32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-FAKE16: v_cmp_gt_i16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-TRUE16: v_cmp_gt_i16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
define amdgpu_kernel void @i16_sgt(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -135,6 +153,8 @@ entry:
; GCN-LABEL: {{^}}i16_sge:
; VI: v_cmp_ge_i16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
; SI: v_cmp_ge_i32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-FAKE16: v_cmp_ge_i16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-TRUE16: v_cmp_ge_i16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
define amdgpu_kernel void @i16_sge(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -153,6 +173,8 @@ entry:
; GCN-LABEL: {{^}}i16_slt:
; VI: v_cmp_lt_i16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
; SI: v_cmp_lt_i32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-FAKE16: v_cmp_lt_i16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-TRUE16: v_cmp_lt_i16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
define amdgpu_kernel void @i16_slt(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -171,6 +193,8 @@ entry:
; GCN-LABEL: {{^}}i16_sle:
; VI: v_cmp_le_i16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
; SI: v_cmp_le_i32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-FAKE16: v_cmp_le_i16_e32 vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-TRUE16: v_cmp_le_i16_e32 vcc_lo, v{{[0-9]+}}.l, v{{[0-9]+}}.h
define amdgpu_kernel void @i16_sle(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -190,6 +214,8 @@ entry:
; GCN-LABEL: {{^}}i16_eq_v_s:
; VI: v_cmp_eq_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
; SI: v_cmp_eq_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-FAKE16: v_cmp_eq_u16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-TRUE16: v_cmp_eq_u16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
define amdgpu_kernel void @i16_eq_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -206,6 +232,8 @@ entry:
; GCN-LABEL: {{^}}i16_ne_v_s:
; VI: v_cmp_ne_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
; SI: v_cmp_ne_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-FAKE16: v_cmp_ne_u16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-TRUE16: v_cmp_ne_u16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
define amdgpu_kernel void @i16_ne_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -222,6 +250,8 @@ entry:
; GCN-LABEL: {{^}}i16_ugt_v_s:
; VI: v_cmp_lt_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
; SI: v_cmp_lt_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-FAKE16: v_cmp_lt_u16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-TRUE16: v_cmp_gt_u16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
define amdgpu_kernel void @i16_ugt_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -238,6 +268,8 @@ entry:
; GCN-LABEL: {{^}}i16_uge_v_s:
; VI: v_cmp_le_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
; SI: v_cmp_le_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-FAKE16: v_cmp_le_u16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-TRUE16: v_cmp_ge_u16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
define amdgpu_kernel void @i16_uge_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -254,6 +286,8 @@ entry:
; GCN-LABEL: {{^}}i16_ult_v_s:
; VI: v_cmp_gt_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
; SI: v_cmp_gt_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-FAKE16: v_cmp_gt_u16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-TRUE16: v_cmp_lt_u16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
define amdgpu_kernel void @i16_ult_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -270,6 +304,8 @@ entry:
; GCN-LABEL: {{^}}i16_ule_v_s:
; VI: v_cmp_ge_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
; SI: v_cmp_ge_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-FAKE16: v_cmp_ge_u16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-TRUE16: v_cmp_le_u16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
define amdgpu_kernel void @i16_ule_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -286,6 +322,8 @@ entry:
; GCN-LABEL: {{^}}i16_sgt_v_s:
; VI: v_cmp_lt_i16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
; SI: v_cmp_lt_i32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-FAKE16: v_cmp_lt_i16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-TRUE16: v_cmp_gt_i16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
define amdgpu_kernel void @i16_sgt_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -302,6 +340,8 @@ entry:
; GCN-LABEL: {{^}}i16_sge_v_s:
; VI: v_cmp_le_i16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
; SI: v_cmp_le_i32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-FAKE16: v_cmp_le_i16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-TRUE16: v_cmp_ge_i16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
define amdgpu_kernel void @i16_sge_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -318,6 +358,8 @@ entry:
; GCN-LABEL: {{^}}i16_slt_v_s:
; VI: v_cmp_gt_i16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
; SI: v_cmp_gt_i32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-FAKE16: v_cmp_gt_i16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-TRUE16: v_cmp_lt_i16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
define amdgpu_kernel void @i16_slt_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -334,6 +376,8 @@ entry:
; GCN-LABEL: {{^}}i16_sle_v_s:
; VI: v_cmp_ge_i16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
; SI: v_cmp_ge_i32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-FAKE16: v_cmp_ge_i16_e32 vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX11-TRUE16: v_cmp_le_i16_e32 vcc_lo, v{{[0-9]+}}.h, v{{[0-9]+}}.l
define amdgpu_kernel void @i16_sle_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
More information about the llvm-commits
mailing list