[llvm] Salu float codegen (PR #66885)
Mirko BrkuĊĦanin via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 20 03:36:35 PDT 2023
https://github.com/mbrkusanin created https://github.com/llvm/llvm-project/pull/66885
None
>From ddd6474a2cf4320cfdeca9872d2591878a7a8265 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Wed, 20 Sep 2023 11:44:03 +0200
Subject: [PATCH 1/2] [AMDGPU] Add gfx1150 SALU Float instructions
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 14 +-
.../Disassembler/AMDGPUDisassembler.cpp | 2 +
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 3 +
llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 9 +
llvm/lib/Target/AMDGPU/SISchedule.td | 12 +
llvm/lib/Target/AMDGPU/SOPInstructions.td | 262 +-
llvm/test/MC/AMDGPU/gfx1150_asm_salu_float.s | 2527 +++++++++++++++++
llvm/test/MC/AMDGPU/gfx11_unsupported.s | 174 ++
.../AMDGPU/gfx1150_asm_salu_float.txt | 2527 +++++++++++++++++
9 files changed, 5519 insertions(+), 11 deletions(-)
create mode 100644 llvm/test/MC/AMDGPU/gfx1150_asm_salu_float.s
create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx1150_asm_salu_float.txt
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index c9d3b00caa8739b..037a8aa104f0b32 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -773,6 +773,12 @@ def FeatureForceStoreSC0SC1 : SubtargetFeature<"force-store-sc0-sc1",
"Has SC0 and SC1 on stores"
>;
+def FeatureSALUFloatInsts : SubtargetFeature<"salu-float",
+ "HasSALUFloatInsts",
+ "true",
+ "Has SALU floating point instructions"
+>;
+
//===------------------------------------------------------------===//
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
@@ -1364,11 +1370,12 @@ def FeatureISAVersion11_0_3 : FeatureSet<
def FeatureISAVersion11_5_0 : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
- [])>;
+ [FeatureSALUFloatInsts])>;
def FeatureISAVersion11_5_1 : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
- [FeatureGFX11FullVGPRs])>;
+ [FeatureSALUFloatInsts,
+ FeatureGFX11FullVGPRs])>;
//===----------------------------------------------------------------------===//
@@ -1869,6 +1876,9 @@ def HasMADIntraFwdBug : Predicate<"Subtarget->hasMADIntraFwdBug()">;
def HasNotMADIntraFwdBug : Predicate<"!Subtarget->hasMADIntraFwdBug()">;
+def HasSALUFloatInsts : Predicate<"Subtarget->hasSALUFloatInsts()">,
+ AssemblerPredicate<(all_of FeatureSALUFloatInsts)>;
+
def HasGDS : Predicate<"Subtarget->hasGDS()">;
def HasGWS : Predicate<"Subtarget->hasGWS()">;
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 561ed697df6ba94..4c76e19abc2a8f2 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -238,6 +238,7 @@ DECODE_SRC_OPERAND_REG_AV10(AV_128, OPW128)
DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_64, OPW64, 64)
DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 16)
DECODE_OPERAND_SRC_REG_OR_IMM_9(SRegOrLds_32, OPW32, 32)
DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32_Lo128, OPW16, 16)
DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 16)
@@ -259,6 +260,7 @@ DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_1024, OPW1024, 32)
DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32_Lo128, OPW16, 16)
DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW16, 16)
DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW32, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(SReg_32, OPW32, 32)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
uint64_t Addr,
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index bf46dd381048c06..970ce48de9f47c2 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -192,6 +192,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool UnalignedDSAccess = false;
bool HasPackedTID = false;
bool ScalarizeGlobal = false;
+ bool HasSALUFloatInsts = false;
bool HasVcmpxPermlaneHazard = false;
bool HasVMEMtoScalarWriteHazard = false;
@@ -1136,6 +1137,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
// hasGFX90AInsts is also true.
bool hasGFX940Insts() const { return GFX940Insts; }
+ bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
+
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs
/// SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index b2b1b458a63af6c..4f355409f88e56d 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1153,12 +1153,21 @@ class RegOrF16_Lo128_Deferred <string RegisterClass,
// SSrc_* Operands with an SGPR or a 32-bit immediate
//===----------------------------------------------------------------------===//
+def SSrc_b16 : RegOrB16 <"SReg_32", "OPERAND_REG_IMM">;
+def SSrc_f16 : RegOrF16 <"SReg_32", "OPERAND_REG_IMM">;
def SSrc_b32 : RegOrB32 <"SReg_32", "OPERAND_REG_IMM">;
def SSrc_f32 : RegOrF32 <"SReg_32", "OPERAND_REG_IMM">;
def SSrc_b64 : RegOrB64 <"SReg_64", "OPERAND_REG_IMM">;
def SSrcOrLds_b32 : RegOrB32 <"SRegOrLds_32", "OPERAND_REG_IMM">;
+//===----------------------------------------------------------------------===//
+// SSrc_32_Deferred Operands with an SGPR or a 32-bit immediate for use with
+// FMAMK/FMAAK
+//===----------------------------------------------------------------------===//
+
+def SSrc_f32_Deferred : RegOrF32_Deferred<"SReg_32", "OPERAND_REG_IMM">;
+
//===----------------------------------------------------------------------===//
// SCSrc_* Operands with an SGPR or a inline constant
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td
index 3d1631fc0461950..c67e647a7e7c704 100644
--- a/llvm/lib/Target/AMDGPU/SISchedule.td
+++ b/llvm/lib/Target/AMDGPU/SISchedule.td
@@ -65,6 +65,9 @@ def Write16PassMAI : SchedWrite;
def Write4PassDGEMM : SchedWrite;
def Write8PassDGEMM : SchedWrite;
+// Scalar float instructions
+def WriteSFPU : SchedWrite;
+
// FIXME: Should there be a class for instructions which are VALU
// instructions and have VALU rates, but write to the SALU (i.e. VOPC
// instructions)
@@ -128,6 +131,10 @@ class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources,
class HWVALUWriteRes<SchedWrite write, int latency> :
HWWriteRes<write, [HWVALU], latency>;
+class UnsupportedWriteRes<SchedWrite write> : WriteRes<write, []> {
+ let Unsupported = 1;
+}
+
def PredMIReadVGPR : SchedPredicate<[{TII->hasVGPRUses(*MI)}]>;
def MIReadVGPR : SchedReadVariant<[
@@ -165,6 +172,8 @@ multiclass SICommonWriteRes {
def : HWWriteRes<Write8PassMAI, [HWXDL], 8>;
let ReleaseAtCycles = [16] in
def : HWWriteRes<Write16PassMAI, [HWXDL], 16>;
+
+ def : UnsupportedWriteRes<WriteSFPU>;
} // End RetireOOO = 1
def : ReadAdvance<MIVGPRRead, -2>;
@@ -307,6 +316,8 @@ def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>;
def : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>;
def : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>;
def : HWWriteRes<WriteBarrier, [HWBranch], 2000>;
+
+def : UnsupportedWriteRes<WriteSFPU>;
} // End RetireOOO = 1
def : InstRW<[WriteCopy], (instrs COPY)>;
@@ -334,6 +345,7 @@ def : HWWriteRes<WriteBranch, [HWBranch], 32>;
def : HWWriteRes<WriteExport, [HWExport, HWRC], 16>;
def : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>;
def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>;
+def : HWWriteRes<WriteSFPU, [HWSALU, HWRC], 4>;
def : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>;
def : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>;
def : HWWriteRes<WriteBarrier, [HWBranch], 2000>;
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 229aa9c75d16d2d..08ab81848710d5e 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -401,6 +401,33 @@ let SubtargetPredicate = isGFX11Plus in {
}
} // End SubtargetPredicate = isGFX11Plus
+let SubtargetPredicate = HasSALUFloatInsts, Uses = [MODE],
+ SchedRW = [WriteSFPU], isReMaterializable = 1 in {
+ def S_CVT_F32_I32 : SOP1_32<"s_cvt_f32_i32">;
+ def S_CVT_F32_U32 : SOP1_32<"s_cvt_f32_u32">;
+
+ let mayRaiseFPException = 1 in {
+ def S_CVT_I32_F32 : SOP1_32<"s_cvt_i32_f32">;
+ def S_CVT_U32_F32 : SOP1_32<"s_cvt_u32_f32">;
+ def S_CVT_F32_F16 : SOP1_32<"s_cvt_f32_f16">;
+ def S_CVT_HI_F32_F16 : SOP1_32<"s_cvt_hi_f32_f16">;
+
+ def S_CEIL_F32 : SOP1_32<"s_ceil_f32">;
+ def S_FLOOR_F32 : SOP1_32<"s_floor_f32">;
+ def S_TRUNC_F32 : SOP1_32<"s_trunc_f32">;
+ def S_RNDNE_F32 : SOP1_32<"s_rndne_f32">;
+
+ let FPDPRounding = 1 in
+ def S_CVT_F16_F32 : SOP1_32<"s_cvt_f16_f32">;
+
+ def S_CEIL_F16 : SOP1_32<"s_ceil_f16">;
+ def S_FLOOR_F16 : SOP1_32<"s_floor_f16">;
+ def S_TRUNC_F16 : SOP1_32<"s_trunc_f16">;
+ def S_RNDNE_F16 : SOP1_32<"s_rndne_f16">;
+ } // End mayRaiseFPException = 1
+} // End SubtargetPredicate = HasSALUFloatInsts, Uses = [MODE]
+ // SchedRW = [WriteSFPU], isReMaterializable = 1
+
//===----------------------------------------------------------------------===//
// SOP2 Instructions
//===----------------------------------------------------------------------===//
@@ -427,10 +454,9 @@ class SOP2_Pseudo<string opName, dag outs, dag ins,
// let Size = 4; // Do we need size here?
}
-class SOP2_Real<bits<7> op, SOP_Pseudo ps, string real_name = ps.Mnemonic> :
+class SOP2_Real<SOP_Pseudo ps, string real_name = ps.Mnemonic> :
InstSI <ps.OutOperandList, ps.InOperandList,
- real_name # ps.AsmOperands>,
- Enc32 {
+ real_name # ps.AsmOperands> {
let SALU = 1;
let SOP2 = 1;
let isPseudo = 0;
@@ -444,12 +470,18 @@ class SOP2_Real<bits<7> op, SOP_Pseudo ps, string real_name = ps.Mnemonic> :
let SchedRW = ps.SchedRW;
let mayLoad = ps.mayLoad;
let mayStore = ps.mayStore;
+ let Constraints = ps.Constraints;
+ let DisableEncoding = ps.DisableEncoding;
// encoding
bits<7> sdst;
bits<8> src0;
bits<8> src1;
+ bits<32> imm;
+}
+class SOP2_Real32<bits<7> op, SOP_Pseudo ps, string real_name = ps.Mnemonic> :
+ SOP2_Real<ps, real_name>, Enc32 {
let Inst{7-0} = src0;
let Inst{15-8} = src1;
let Inst{22-16} = !if(ps.has_sdst, sdst, ?);
@@ -457,12 +489,31 @@ class SOP2_Real<bits<7> op, SOP_Pseudo ps, string real_name = ps.Mnemonic> :
let Inst{31-30} = 0x2; // encoding
}
+class SOP2_Real64<bits<7> op, SOP_Pseudo ps, string real_name = ps.Mnemonic> :
+ SOP2_Real<ps, real_name>, Enc64 {
+ let Inst{7-0} = src0;
+ let Inst{15-8} = src1;
+ let Inst{22-16} = !if(ps.has_sdst, sdst, ?);
+ let Inst{29-23} = op;
+ let Inst{31-30} = 0x2; // encoding
+ let Inst{63-32} = imm;
+}
+
+class SOP2_F16 <string opName, list<dag> pattern=[]> : SOP2_Pseudo <
+ opName, (outs SReg_32:$sdst), (ins SSrc_f16:$src0, SSrc_f16:$src1),
+ "$sdst, $src0, $src1", pattern
+>;
class SOP2_32 <string opName, list<dag> pattern=[]> : SOP2_Pseudo <
opName, (outs SReg_32:$sdst), (ins SSrc_b32:$src0, SSrc_b32:$src1),
"$sdst, $src0, $src1", pattern
>;
+class SOP2_F32 <string opName, list<dag> pattern=[]> : SOP2_Pseudo <
+ opName, (outs SReg_32:$sdst), (ins SSrc_f32:$src0, SSrc_f32:$src1),
+ "$sdst, $src0, $src1", pattern
+>;
+
class SOP2_64 <string opName, list<dag> pattern=[]> : SOP2_Pseudo <
opName, (outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1),
"$sdst, $src0, $src1", pattern
@@ -705,6 +756,63 @@ let SubtargetPredicate = isGFX11Plus in {
def S_PACK_HL_B32_B16 : SOP2_32<"s_pack_hl_b32_b16">;
} // End SubtargetPredicate = isGFX11Plus
+let SubtargetPredicate = HasSALUFloatInsts, mayRaiseFPException = 1,
+ Uses = [MODE], SchedRW = [WriteSFPU] in {
+ let isReMaterializable = 1 in {
+ let isCommutable = 1 in {
+ def S_ADD_F32 : SOP2_F32<"s_add_f32">;
+ def S_MIN_F32 : SOP2_F32<"s_min_f32">;
+ def S_MAX_F32 : SOP2_F32<"s_max_f32">;
+ def S_MUL_F32 : SOP2_F32<"s_mul_f32">;
+
+ let FixedSize = 1 in
+ def S_FMAAK_F32 : SOP2_Pseudo<
+ "s_fmaak_f32", (outs SReg_32:$sdst),
+ (ins SSrc_f32_Deferred:$src0, SSrc_f32_Deferred:$src1, KImmFP32:$imm),
+ "$sdst, $src0, $src1, $imm"
+ >;
+
+ let FPDPRounding = 1 in {
+ def S_ADD_F16 : SOP2_F16<"s_add_f16">;
+ def S_MUL_F16 : SOP2_F16<"s_mul_f16">;
+ } // End FPDPRounding
+
+ def S_MIN_F16 : SOP2_F16<"s_min_f16">;
+ def S_MAX_F16 : SOP2_F16<"s_max_f16">;
+ } // End isCommutable = 1
+
+ let FPDPRounding = 1 in
+ def S_SUB_F16 : SOP2_F16<"s_sub_f16">;
+
+ def S_SUB_F32 : SOP2_F32<"s_sub_f32">;
+ def S_CVT_PK_RTZ_F16_F32 : SOP2_F32<"s_cvt_pk_rtz_f16_f32">;
+
+ let FixedSize = 1 in
+ def S_FMAMK_F32 : SOP2_Pseudo<
+ "s_fmamk_f32", (outs SReg_32:$sdst),
+ (ins SSrc_f32_Deferred:$src0, KImmFP32:$imm, SSrc_f32_Deferred:$src1),
+ "$sdst, $src0, $imm, $src1"
+ >;
+ } // End isReMaterializable = 1
+
+ let Constraints = "$sdst = $src2", DisableEncoding="$src2",
+ isCommutable = 1 in {
+ def S_FMAC_F32 : SOP2_Pseudo<
+ "s_fmac_f32", (outs SReg_32:$sdst),
+ (ins SSrc_f32:$src0, SSrc_f32:$src1, SReg_32:$src2),
+ "$sdst, $src0, $src1"
+ >;
+
+ def S_FMAC_F16 : SOP2_Pseudo<
+ "s_fmac_f16", (outs SReg_32:$sdst),
+ (ins SSrc_f16:$src0, SSrc_f16:$src1, SReg_32:$src2),
+ "$sdst, $src0, $src1"
+ >;
+ } // End Constraints = "$sdst = $src2", DisableEncoding="$src2",
+ // isCommutable = 1
+} // End SubtargetPredicate = HasSALUFloatInsts, mayRaiseFPException = 1,
+ // Uses = [MODE], SchedRW = [WriteSFPU]
+
//===----------------------------------------------------------------------===//
// SOPK Instructions
//===----------------------------------------------------------------------===//
@@ -1035,6 +1143,30 @@ class SOPC_CMP_32<string opName,
let isCommutable = 1;
}
+class SOPC_CMP_F32<string opName,
+ SDPatternOperator cond = COND_NULL, string revOp = opName>
+ : SOPC_Helper<SSrc_b32, f32, opName, cond>,
+ Commutable_REV<revOp, !eq(revOp, opName)>,
+ SOPKInstTable<0, opName> {
+ let isCompare = 1;
+ let isCommutable = 1;
+ let mayRaiseFPException = 1;
+ let Uses = [MODE];
+ let SchedRW = [WriteSFPU];
+}
+
+class SOPC_CMP_F16<string opName,
+ SDPatternOperator cond = COND_NULL, string revOp = opName>
+ : SOPC_Helper<SSrc_b16, f16, opName, cond>,
+ Commutable_REV<revOp, !eq(revOp, opName)>,
+ SOPKInstTable<0, opName> {
+ let isCompare = 1;
+ let isCommutable = 1;
+ let mayRaiseFPException = 1;
+ let Uses = [MODE];
+ let SchedRW = [WriteSFPU];
+}
+
class SOPC_CMP_64<string opName,
SDPatternOperator cond = COND_NULL, string revOp = opName>
: SOPC_Helper<SSrc_b64, i64, opName, cond>,
@@ -1091,6 +1223,40 @@ def S_SET_GPR_IDX_ON : SOPC_Pseudo <
}
}
+let SubtargetPredicate = HasSALUFloatInsts in {
+
+def S_CMP_LT_F32 : SOPC_CMP_F32<"s_cmp_lt_f32", COND_OLT, "s_cmp_gt_f32">;
+def S_CMP_EQ_F32 : SOPC_CMP_F32<"s_cmp_eq_f32", COND_OEQ>;
+def S_CMP_LE_F32 : SOPC_CMP_F32<"s_cmp_le_f32", COND_OLE, "s_cmp_ge_f32">;
+def S_CMP_GT_F32 : SOPC_CMP_F32<"s_cmp_gt_f32", COND_OGT>;
+def S_CMP_LG_F32 : SOPC_CMP_F32<"s_cmp_lg_f32", COND_ONE>;
+def S_CMP_GE_F32 : SOPC_CMP_F32<"s_cmp_ge_f32", COND_OGE>;
+def S_CMP_O_F32 : SOPC_CMP_F32<"s_cmp_o_f32", COND_O>;
+def S_CMP_U_F32 : SOPC_CMP_F32<"s_cmp_u_f32", COND_UO>;
+def S_CMP_NGE_F32 : SOPC_CMP_F32<"s_cmp_nge_f32", COND_ULT, "s_cmp_nle_f32">;
+def S_CMP_NLG_F32 : SOPC_CMP_F32<"s_cmp_nlg_f32", COND_UEQ>;
+def S_CMP_NGT_F32 : SOPC_CMP_F32<"s_cmp_ngt_f32", COND_ULE, "s_cmp_nlt_f32">;
+def S_CMP_NLE_F32 : SOPC_CMP_F32<"s_cmp_nle_f32", COND_UGT>;
+def S_CMP_NEQ_F32 : SOPC_CMP_F32<"s_cmp_neq_f32", COND_UNE>;
+def S_CMP_NLT_F32 : SOPC_CMP_F32<"s_cmp_nlt_f32", COND_UGE>;
+
+def S_CMP_LT_F16 : SOPC_CMP_F16<"s_cmp_lt_f16", COND_OLT, "s_cmp_gt_f16">;
+def S_CMP_EQ_F16 : SOPC_CMP_F16<"s_cmp_eq_f16", COND_OEQ>;
+def S_CMP_LE_F16 : SOPC_CMP_F16<"s_cmp_le_f16", COND_OLE, "s_cmp_ge_f16">;
+def S_CMP_GT_F16 : SOPC_CMP_F16<"s_cmp_gt_f16", COND_OGT>;
+def S_CMP_LG_F16 : SOPC_CMP_F16<"s_cmp_lg_f16", COND_ONE>;
+def S_CMP_GE_F16 : SOPC_CMP_F16<"s_cmp_ge_f16", COND_OGE>;
+def S_CMP_O_F16 : SOPC_CMP_F16<"s_cmp_o_f16", COND_O>;
+def S_CMP_U_F16 : SOPC_CMP_F16<"s_cmp_u_f16", COND_UO>;
+def S_CMP_NGE_F16 : SOPC_CMP_F16<"s_cmp_nge_f16", COND_ULT, "s_cmp_nle_f16">;
+def S_CMP_NLG_F16 : SOPC_CMP_F16<"s_cmp_nlg_f16", COND_UEQ>;
+def S_CMP_NGT_F16 : SOPC_CMP_F16<"s_cmp_ngt_f16", COND_ULE, "s_cmp_nlt_f16">;
+def S_CMP_NLE_F16 : SOPC_CMP_F16<"s_cmp_nle_f16", COND_UGT>;
+def S_CMP_NEQ_F16 : SOPC_CMP_F16<"s_cmp_neq_f16", COND_UNE>;
+def S_CMP_NLT_F16 : SOPC_CMP_F16<"s_cmp_nlt_f16", COND_UGE>;
+
+} // End SubtargetPredicate = HasSALUFloatInsts
+
//===----------------------------------------------------------------------===//
// SOPP Instructions
//===----------------------------------------------------------------------===//
@@ -1642,6 +1808,26 @@ defm S_RFE_B64 : SOP1_Real_gfx11<0x04a>;
defm S_SENDMSG_RTN_B32 : SOP1_Real_gfx11<0x04c>;
defm S_SENDMSG_RTN_B64 : SOP1_Real_gfx11<0x04d>;
+//===----------------------------------------------------------------------===//
+// SOP1 - GFX1150
+//===----------------------------------------------------------------------===//
+
+defm S_CEIL_F32 : SOP1_Real_gfx11<0x060>;
+defm S_FLOOR_F32 : SOP1_Real_gfx11<0x061>;
+defm S_TRUNC_F32 : SOP1_Real_gfx11<0x062>;
+defm S_RNDNE_F32 : SOP1_Real_gfx11<0x063>;
+defm S_CVT_F32_I32 : SOP1_Real_gfx11<0x064>;
+defm S_CVT_F32_U32 : SOP1_Real_gfx11<0x065>;
+defm S_CVT_I32_F32 : SOP1_Real_gfx11<0x066>;
+defm S_CVT_U32_F32 : SOP1_Real_gfx11<0x067>;
+defm S_CVT_F16_F32 : SOP1_Real_gfx11<0x068>;
+defm S_CVT_F32_F16 : SOP1_Real_gfx11<0x069>;
+defm S_CVT_HI_F32_F16 : SOP1_Real_gfx11<0x06a>;
+defm S_CEIL_F16 : SOP1_Real_gfx11<0x06b>;
+defm S_FLOOR_F16 : SOP1_Real_gfx11<0x06c>;
+defm S_TRUNC_F16 : SOP1_Real_gfx11<0x06d>;
+defm S_RNDNE_F16 : SOP1_Real_gfx11<0x06e>;
+
//===----------------------------------------------------------------------===//
// SOP1 - GFX10.
//===----------------------------------------------------------------------===//
@@ -1746,12 +1932,12 @@ defm S_ABS_I32 : SOP1_Real_gfx6_gfx7_gfx10<0x034>;
//===----------------------------------------------------------------------===//
multiclass SOP2_Real_gfx11<bits<7> op> {
- def _gfx11 : SOP2_Real<op, !cast<SOP2_Pseudo>(NAME)>,
+ def _gfx11 : SOP2_Real32<op, !cast<SOP2_Pseudo>(NAME)>,
Select_gfx11<!cast<SOP2_Pseudo>(NAME).Mnemonic>;
}
multiclass SOP2_Real_Renamed_gfx11<bits<7> op, SOP2_Pseudo backing_pseudo, string real_name> {
- def _gfx11 : SOP2_Real<op, backing_pseudo, real_name>,
+ def _gfx11 : SOP2_Real32<op, backing_pseudo, real_name>,
Select_gfx11<backing_pseudo.Mnemonic>,
MnemonicAlias<backing_pseudo.Mnemonic, real_name>, Requires<[isGFX11Plus]>;
}
@@ -1800,13 +1986,38 @@ defm S_CSELECT_B32 : SOP2_Real_gfx11<0x030>;
defm S_CSELECT_B64 : SOP2_Real_gfx11<0x031>;
defm S_PACK_HL_B32_B16 : SOP2_Real_gfx11<0x035>;
+//===----------------------------------------------------------------------===//
+// SOP2 - GFX1150
+//===----------------------------------------------------------------------===//
+
+multiclass SOP2_Real_FMAK_gfx11<bits<7> op> {
+ def _gfx11 : SOP2_Real64<op, !cast<SOP2_Pseudo>(NAME)>,
+ Select_gfx11<!cast<SOP2_Pseudo>(NAME).Mnemonic>;
+}
+
+defm S_ADD_F32 : SOP2_Real_gfx11<0x040>;
+defm S_SUB_F32 : SOP2_Real_gfx11<0x041>;
+defm S_MIN_F32 : SOP2_Real_gfx11<0x042>;
+defm S_MAX_F32 : SOP2_Real_gfx11<0x043>;
+defm S_MUL_F32 : SOP2_Real_gfx11<0x044>;
+defm S_FMAAK_F32 : SOP2_Real_FMAK_gfx11<0x045>;
+defm S_FMAMK_F32 : SOP2_Real_FMAK_gfx11<0x046>;
+defm S_FMAC_F32 : SOP2_Real_gfx11<0x047>;
+defm S_CVT_PK_RTZ_F16_F32 : SOP2_Real_gfx11<0x048>;
+defm S_ADD_F16 : SOP2_Real_gfx11<0x049>;
+defm S_SUB_F16 : SOP2_Real_gfx11<0x04a>;
+defm S_MIN_F16 : SOP2_Real_gfx11<0x04b>;
+defm S_MAX_F16 : SOP2_Real_gfx11<0x04c>;
+defm S_MUL_F16 : SOP2_Real_gfx11<0x04d>;
+defm S_FMAC_F16 : SOP2_Real_gfx11<0x04e>;
+
//===----------------------------------------------------------------------===//
// SOP2 - GFX10.
//===----------------------------------------------------------------------===//
multiclass SOP2_Real_gfx10<bits<7> op> {
defvar ps = !cast<SOP2_Pseudo>(NAME);
- def _gfx10 : SOP2_Real<op, ps>,
+ def _gfx10 : SOP2_Real32<op, ps>,
Select_gfx10<ps.Mnemonic>;
}
@@ -1829,7 +2040,7 @@ defm S_MUL_HI_I32 : SOP2_Real_gfx10<0x036>;
multiclass SOP2_Real_gfx6_gfx7<bits<7> op> {
defvar ps = !cast<SOP_Pseudo>(NAME);
- def _gfx6_gfx7 : SOP2_Real<op, ps>,
+ def _gfx6_gfx7 : SOP2_Real32<op, ps>,
Select_gfx6_gfx7<ps.Mnemonic>;
}
@@ -2199,6 +2410,40 @@ defm S_BITCMP1_B64 : SOPC_Real_gfx11<0x0f>;
defm S_CMP_EQ_U64 : SOPC_Real_gfx11<0x10>;
defm S_CMP_LG_U64 : SOPC_Real_gfx11<0x11>;
+//===----------------------------------------------------------------------===//
+// SOPC - GFX1150
+//===----------------------------------------------------------------------===//
+
+defm S_CMP_LT_F32 : SOPC_Real_gfx11<0x41>;
+defm S_CMP_EQ_F32 : SOPC_Real_gfx11<0x42>;
+defm S_CMP_LE_F32 : SOPC_Real_gfx11<0x43>;
+defm S_CMP_GT_F32 : SOPC_Real_gfx11<0x44>;
+defm S_CMP_LG_F32 : SOPC_Real_gfx11<0x45>;
+defm S_CMP_GE_F32 : SOPC_Real_gfx11<0x46>;
+defm S_CMP_O_F32 : SOPC_Real_gfx11<0x47>;
+defm S_CMP_U_F32 : SOPC_Real_gfx11<0x48>;
+defm S_CMP_NGE_F32 : SOPC_Real_gfx11<0x49>;
+defm S_CMP_NLG_F32 : SOPC_Real_gfx11<0x4a>;
+defm S_CMP_NGT_F32 : SOPC_Real_gfx11<0x4b>;
+defm S_CMP_NLE_F32 : SOPC_Real_gfx11<0x4c>;
+defm S_CMP_NEQ_F32 : SOPC_Real_gfx11<0x4d>;
+defm S_CMP_NLT_F32 : SOPC_Real_gfx11<0x4e>;
+
+defm S_CMP_LT_F16 : SOPC_Real_gfx11<0x51>;
+defm S_CMP_EQ_F16 : SOPC_Real_gfx11<0x52>;
+defm S_CMP_LE_F16 : SOPC_Real_gfx11<0x53>;
+defm S_CMP_GT_F16 : SOPC_Real_gfx11<0x54>;
+defm S_CMP_LG_F16 : SOPC_Real_gfx11<0x55>;
+defm S_CMP_GE_F16 : SOPC_Real_gfx11<0x56>;
+defm S_CMP_O_F16 : SOPC_Real_gfx11<0x57>;
+defm S_CMP_U_F16 : SOPC_Real_gfx11<0x58>;
+defm S_CMP_NGE_F16 : SOPC_Real_gfx11<0x59>;
+defm S_CMP_NLG_F16 : SOPC_Real_gfx11<0x5a>;
+defm S_CMP_NGT_F16 : SOPC_Real_gfx11<0x5b>;
+defm S_CMP_NLE_F16 : SOPC_Real_gfx11<0x5c>;
+defm S_CMP_NEQ_F16 : SOPC_Real_gfx11<0x5d>;
+defm S_CMP_NLT_F16 : SOPC_Real_gfx11<0x5e>;
+
//===----------------------------------------------------------------------===//
// SOPC - GFX6, GFX7, GFX8, GFX9, GFX10
//===----------------------------------------------------------------------===//
@@ -2259,9 +2504,8 @@ class SOP1_Real_vi<bits<8> op, SOP1_Pseudo ps> :
SOP1_Real<op, ps>,
Select_vi<ps.Mnemonic>;
-
class SOP2_Real_vi<bits<7> op, SOP2_Pseudo ps> :
- SOP2_Real<op, ps>,
+ SOP2_Real32<op, ps>,
Select_vi<ps.Mnemonic>;
class SOPK_Real_vi<bits<5> op, SOPK_Pseudo ps> :
diff --git a/llvm/test/MC/AMDGPU/gfx1150_asm_salu_float.s b/llvm/test/MC/AMDGPU/gfx1150_asm_salu_float.s
new file mode 100644
index 000000000000000..0c097bf24a14cc7
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1150_asm_salu_float.s
@@ -0,0 +1,2527 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1150 -show-encoding %s | FileCheck --check-prefixes=GFX1150 %s
+
+s_cvt_f32_i32 s5, s1
+// GFX1150: encoding: [0x01,0x64,0x85,0xbe]
+
+s_cvt_f32_i32 s105, s1
+// GFX1150: encoding: [0x01,0x64,0xe9,0xbe]
+
+s_cvt_f32_i32 s5, s105
+// GFX1150: encoding: [0x69,0x64,0x85,0xbe]
+
+s_cvt_f32_i32 s5, s103
+// GFX1150: encoding: [0x67,0x64,0x85,0xbe]
+
+s_cvt_f32_i32 s5, vcc_lo
+// GFX1150: encoding: [0x6a,0x64,0x85,0xbe]
+
+s_cvt_f32_i32 s5, vcc_hi
+// GFX1150: encoding: [0x6b,0x64,0x85,0xbe]
+
+s_cvt_f32_i32 s5, ttmp11
+// GFX1150: encoding: [0x77,0x64,0x85,0xbe]
+
+s_cvt_f32_i32 s5, m0
+// GFX1150: encoding: [0x7d,0x64,0x85,0xbe]
+
+s_cvt_f32_i32 s5, exec_lo
+// GFX1150: encoding: [0x7e,0x64,0x85,0xbe]
+
+s_cvt_f32_i32 s5, exec_hi
+// GFX1150: encoding: [0x7f,0x64,0x85,0xbe]
+
+s_cvt_f32_i32 s5, 0
+// GFX1150: encoding: [0x80,0x64,0x85,0xbe]
+
+s_cvt_f32_i32 s5, -1
+// GFX1150: encoding: [0xc1,0x64,0x85,0xbe]
+
+s_cvt_f32_i32 s5, 0.5
+// GFX1150: encoding: [0xf0,0x64,0x85,0xbe]
+
+s_cvt_f32_i32 s5, -4.0
+// GFX1150: encoding: [0xf7,0x64,0x85,0xbe]
+
+s_cvt_f32_i32 s5, 0xaf123456
+// GFX1150: encoding: [0xff,0x64,0x85,0xbe,0x56,0x34,0x12,0xaf]
+
+s_cvt_f32_i32 s5, 0x3f717273
+// GFX1150: encoding: [0xff,0x64,0x85,0xbe,0x73,0x72,0x71,0x3f]
+
+s_cvt_f32_u32 s5, s1
+// GFX1150: encoding: [0x01,0x65,0x85,0xbe]
+
+s_cvt_f32_u32 s105, s1
+// GFX1150: encoding: [0x01,0x65,0xe9,0xbe]
+
+s_cvt_f32_u32 s5, s105
+// GFX1150: encoding: [0x69,0x65,0x85,0xbe]
+
+s_cvt_f32_u32 s5, s103
+// GFX1150: encoding: [0x67,0x65,0x85,0xbe]
+
+s_cvt_f32_u32 s5, vcc_lo
+// GFX1150: encoding: [0x6a,0x65,0x85,0xbe]
+
+s_cvt_f32_u32 s5, vcc_hi
+// GFX1150: encoding: [0x6b,0x65,0x85,0xbe]
+
+s_cvt_f32_u32 s5, ttmp11
+// GFX1150: encoding: [0x77,0x65,0x85,0xbe]
+
+s_cvt_f32_u32 s5, m0
+// GFX1150: encoding: [0x7d,0x65,0x85,0xbe]
+
+s_cvt_f32_u32 s5, exec_lo
+// GFX1150: encoding: [0x7e,0x65,0x85,0xbe]
+
+s_cvt_f32_u32 s5, exec_hi
+// GFX1150: encoding: [0x7f,0x65,0x85,0xbe]
+
+s_cvt_f32_u32 s5, 0
+// GFX1150: encoding: [0x80,0x65,0x85,0xbe]
+
+s_cvt_f32_u32 s5, -1
+// GFX1150: encoding: [0xc1,0x65,0x85,0xbe]
+
+s_cvt_f32_u32 s5, 0.5
+// GFX1150: encoding: [0xf0,0x65,0x85,0xbe]
+
+s_cvt_f32_u32 s5, -4.0
+// GFX1150: encoding: [0xf7,0x65,0x85,0xbe]
+
+s_cvt_f32_u32 s5, 0xaf123456
+// GFX1150: encoding: [0xff,0x65,0x85,0xbe,0x56,0x34,0x12,0xaf]
+
+s_cvt_f32_u32 s5, 0x3f717273
+// GFX1150: encoding: [0xff,0x65,0x85,0xbe,0x73,0x72,0x71,0x3f]
+
+s_cvt_u32_f32 s5, s1
+// GFX1150: encoding: [0x01,0x67,0x85,0xbe]
+
+s_cvt_u32_f32 s105, s1
+// GFX1150: encoding: [0x01,0x67,0xe9,0xbe]
+
+s_cvt_u32_f32 s5, s105
+// GFX1150: encoding: [0x69,0x67,0x85,0xbe]
+
+s_cvt_u32_f32 s5, s103
+// GFX1150: encoding: [0x67,0x67,0x85,0xbe]
+
+s_cvt_u32_f32 s5, vcc_lo
+// GFX1150: encoding: [0x6a,0x67,0x85,0xbe]
+
+s_cvt_u32_f32 s5, vcc_hi
+// GFX1150: encoding: [0x6b,0x67,0x85,0xbe]
+
+s_cvt_u32_f32 s5, ttmp11
+// GFX1150: encoding: [0x77,0x67,0x85,0xbe]
+
+s_cvt_u32_f32 s5, m0
+// GFX1150: encoding: [0x7d,0x67,0x85,0xbe]
+
+s_cvt_u32_f32 s5, exec_lo
+// GFX1150: encoding: [0x7e,0x67,0x85,0xbe]
+
+s_cvt_u32_f32 s5, exec_hi
+// GFX1150: encoding: [0x7f,0x67,0x85,0xbe]
+
+s_cvt_u32_f32 s5, 0
+// GFX1150: encoding: [0x80,0x67,0x85,0xbe]
+
+s_cvt_u32_f32 s5, -1
+// GFX1150: encoding: [0xc1,0x67,0x85,0xbe]
+
+s_cvt_u32_f32 s5, 0.5
+// GFX1150: encoding: [0xf0,0x67,0x85,0xbe]
+
+s_cvt_u32_f32 s5, -4.0
+// GFX1150: encoding: [0xf7,0x67,0x85,0xbe]
+
+s_cvt_u32_f32 s5, 0xaf123456
+// GFX1150: encoding: [0xff,0x67,0x85,0xbe,0x56,0x34,0x12,0xaf]
+
+s_cvt_u32_f32 s5, 0x3f717273
+// GFX1150: encoding: [0xff,0x67,0x85,0xbe,0x73,0x72,0x71,0x3f]
+
+s_cvt_i32_f32 s5, s1
+// GFX1150: encoding: [0x01,0x66,0x85,0xbe]
+
+s_cvt_i32_f32 s105, s1
+// GFX1150: encoding: [0x01,0x66,0xe9,0xbe]
+
+s_cvt_i32_f32 s5, s105
+// GFX1150: encoding: [0x69,0x66,0x85,0xbe]
+
+s_cvt_i32_f32 s5, s103
+// GFX1150: encoding: [0x67,0x66,0x85,0xbe]
+
+s_cvt_i32_f32 s5, vcc_lo
+// GFX1150: encoding: [0x6a,0x66,0x85,0xbe]
+
+s_cvt_i32_f32 s5, vcc_hi
+// GFX1150: encoding: [0x6b,0x66,0x85,0xbe]
+
+s_cvt_i32_f32 s5, ttmp11
+// GFX1150: encoding: [0x77,0x66,0x85,0xbe]
+
+s_cvt_i32_f32 s5, m0
+// GFX1150: encoding: [0x7d,0x66,0x85,0xbe]
+
+s_cvt_i32_f32 s5, exec_lo
+// GFX1150: encoding: [0x7e,0x66,0x85,0xbe]
+
+s_cvt_i32_f32 s5, exec_hi
+// GFX1150: encoding: [0x7f,0x66,0x85,0xbe]
+
+s_cvt_i32_f32 s5, 0
+// GFX1150: encoding: [0x80,0x66,0x85,0xbe]
+
+s_cvt_i32_f32 s5, -1
+// GFX1150: encoding: [0xc1,0x66,0x85,0xbe]
+
+s_cvt_i32_f32 s5, 0.5
+// GFX1150: encoding: [0xf0,0x66,0x85,0xbe]
+
+s_cvt_i32_f32 s5, -4.0
+// GFX1150: encoding: [0xf7,0x66,0x85,0xbe]
+
+s_cvt_i32_f32 s5, 0xaf123456
+// GFX1150: encoding: [0xff,0x66,0x85,0xbe,0x56,0x34,0x12,0xaf]
+
+s_cvt_i32_f32 s5, 0x3f717273
+// GFX1150: encoding: [0xff,0x66,0x85,0xbe,0x73,0x72,0x71,0x3f]
+
+s_cvt_f16_f32 s5, s1
+// GFX1150: encoding: [0x01,0x68,0x85,0xbe]
+
+s_cvt_f16_f32 s105, s1
+// GFX1150: encoding: [0x01,0x68,0xe9,0xbe]
+
+s_cvt_f16_f32 s5, s105
+// GFX1150: encoding: [0x69,0x68,0x85,0xbe]
+
+s_cvt_f16_f32 s5, s103
+// GFX1150: encoding: [0x67,0x68,0x85,0xbe]
+
+s_cvt_f16_f32 s5, vcc_lo
+// GFX1150: encoding: [0x6a,0x68,0x85,0xbe]
+
+s_cvt_f16_f32 s5, vcc_hi
+// GFX1150: encoding: [0x6b,0x68,0x85,0xbe]
+
+s_cvt_f16_f32 s5, ttmp11
+// GFX1150: encoding: [0x77,0x68,0x85,0xbe]
+
+s_cvt_f16_f32 s5, m0
+// GFX1150: encoding: [0x7d,0x68,0x85,0xbe]
+
+s_cvt_f16_f32 s5, exec_lo
+// GFX1150: encoding: [0x7e,0x68,0x85,0xbe]
+
+s_cvt_f16_f32 s5, exec_hi
+// GFX1150: encoding: [0x7f,0x68,0x85,0xbe]
+
+s_cvt_f16_f32 s5, 0
+// GFX1150: encoding: [0x80,0x68,0x85,0xbe]
+
+s_cvt_f16_f32 s5, -1
+// GFX1150: encoding: [0xc1,0x68,0x85,0xbe]
+
+s_cvt_f16_f32 s5, 0.5
+// GFX1150: encoding: [0xf0,0x68,0x85,0xbe]
+
+s_cvt_f16_f32 s5, -4.0
+// GFX1150: encoding: [0xf7,0x68,0x85,0xbe]
+
+s_cvt_f16_f32 s5, 0xaf123456
+// GFX1150: encoding: [0xff,0x68,0x85,0xbe,0x56,0x34,0x12,0xaf]
+
+s_cvt_f16_f32 s5, 0x3f717273
+// GFX1150: encoding: [0xff,0x68,0x85,0xbe,0x73,0x72,0x71,0x3f]
+
+s_cvt_f32_f16 s5, s1
+// GFX1150: encoding: [0x01,0x69,0x85,0xbe]
+
+s_cvt_f32_f16 s105, s1
+// GFX1150: encoding: [0x01,0x69,0xe9,0xbe]
+
+s_cvt_f32_f16 s5, s105
+// GFX1150: encoding: [0x69,0x69,0x85,0xbe]
+
+s_cvt_f32_f16 s5, s103
+// GFX1150: encoding: [0x67,0x69,0x85,0xbe]
+
+s_cvt_f32_f16 s5, vcc_lo
+// GFX1150: encoding: [0x6a,0x69,0x85,0xbe]
+
+s_cvt_f32_f16 s5, vcc_hi
+// GFX1150: encoding: [0x6b,0x69,0x85,0xbe]
+
+s_cvt_f32_f16 s5, ttmp11
+// GFX1150: encoding: [0x77,0x69,0x85,0xbe]
+
+s_cvt_f32_f16 s5, m0
+// GFX1150: encoding: [0x7d,0x69,0x85,0xbe]
+
+s_cvt_f32_f16 s5, exec_lo
+// GFX1150: encoding: [0x7e,0x69,0x85,0xbe]
+
+s_cvt_f32_f16 s5, exec_hi
+// GFX1150: encoding: [0x7f,0x69,0x85,0xbe]
+
+s_cvt_f32_f16 s5, 0
+// GFX1150: encoding: [0x80,0x69,0x85,0xbe]
+
+s_cvt_f32_f16 s5, -1
+// GFX1150: encoding: [0xc1,0x69,0x85,0xbe]
+
+s_cvt_hi_f32_f16 s5, s1
+// GFX1150: encoding: [0x01,0x6a,0x85,0xbe]
+
+s_cvt_hi_f32_f16 s105, s1
+// GFX1150: encoding: [0x01,0x6a,0xe9,0xbe]
+
+s_cvt_hi_f32_f16 s5, s105
+// GFX1150: encoding: [0x69,0x6a,0x85,0xbe]
+
+s_cvt_hi_f32_f16 s5, s103
+// GFX1150: encoding: [0x67,0x6a,0x85,0xbe]
+
+s_cvt_hi_f32_f16 s5, vcc_lo
+// GFX1150: encoding: [0x6a,0x6a,0x85,0xbe]
+
+s_cvt_hi_f32_f16 s5, vcc_hi
+// GFX1150: encoding: [0x6b,0x6a,0x85,0xbe]
+
+s_cvt_hi_f32_f16 s5, ttmp11
+// GFX1150: encoding: [0x77,0x6a,0x85,0xbe]
+
+s_cvt_hi_f32_f16 s5, m0
+// GFX1150: encoding: [0x7d,0x6a,0x85,0xbe]
+
+s_cvt_hi_f32_f16 s5, exec_lo
+// GFX1150: encoding: [0x7e,0x6a,0x85,0xbe]
+
+s_cvt_hi_f32_f16 s5, exec_hi
+// GFX1150: encoding: [0x7f,0x6a,0x85,0xbe]
+
+s_cvt_hi_f32_f16 s5, 0
+// GFX1150: encoding: [0x80,0x6a,0x85,0xbe]
+
+s_cvt_hi_f32_f16 s5, -1
+// GFX1150: encoding: [0xc1,0x6a,0x85,0xbe]
+
+s_trunc_f32 s5, s1
+// GFX1150: encoding: [0x01,0x62,0x85,0xbe]
+
+s_trunc_f32 s105, s1
+// GFX1150: encoding: [0x01,0x62,0xe9,0xbe]
+
+s_trunc_f32 s5, s105
+// GFX1150: encoding: [0x69,0x62,0x85,0xbe]
+
+s_trunc_f32 s5, s103
+// GFX1150: encoding: [0x67,0x62,0x85,0xbe]
+
+s_trunc_f32 s5, vcc_lo
+// GFX1150: encoding: [0x6a,0x62,0x85,0xbe]
+
+s_trunc_f32 s5, vcc_hi
+// GFX1150: encoding: [0x6b,0x62,0x85,0xbe]
+
+s_trunc_f32 s5, ttmp11
+// GFX1150: encoding: [0x77,0x62,0x85,0xbe]
+
+s_trunc_f32 s5, m0
+// GFX1150: encoding: [0x7d,0x62,0x85,0xbe]
+
+s_trunc_f32 s5, exec_lo
+// GFX1150: encoding: [0x7e,0x62,0x85,0xbe]
+
+s_trunc_f32 s5, exec_hi
+// GFX1150: encoding: [0x7f,0x62,0x85,0xbe]
+
+s_trunc_f32 s5, 0
+// GFX1150: encoding: [0x80,0x62,0x85,0xbe]
+
+s_trunc_f32 s5, -1
+// GFX1150: encoding: [0xc1,0x62,0x85,0xbe]
+
+s_trunc_f32 s5, 0.5
+// GFX1150: encoding: [0xf0,0x62,0x85,0xbe]
+
+s_trunc_f32 s5, -4.0
+// GFX1150: encoding: [0xf7,0x62,0x85,0xbe]
+
+s_trunc_f32 s5, 0xaf123456
+// GFX1150: encoding: [0xff,0x62,0x85,0xbe,0x56,0x34,0x12,0xaf]
+
+s_trunc_f32 s5, 0x3f717273
+// GFX1150: encoding: [0xff,0x62,0x85,0xbe,0x73,0x72,0x71,0x3f]
+
+s_ceil_f32 s5, s1
+// GFX1150: encoding: [0x01,0x60,0x85,0xbe]
+
+s_ceil_f32 s105, s1
+// GFX1150: encoding: [0x01,0x60,0xe9,0xbe]
+
+s_ceil_f32 s5, s105
+// GFX1150: encoding: [0x69,0x60,0x85,0xbe]
+
+s_ceil_f32 s5, s103
+// GFX1150: encoding: [0x67,0x60,0x85,0xbe]
+
+s_ceil_f32 s5, vcc_lo
+// GFX1150: encoding: [0x6a,0x60,0x85,0xbe]
+
+s_ceil_f32 s5, vcc_hi
+// GFX1150: encoding: [0x6b,0x60,0x85,0xbe]
+
+s_ceil_f32 s5, ttmp11
+// GFX1150: encoding: [0x77,0x60,0x85,0xbe]
+
+s_ceil_f32 s5, m0
+// GFX1150: encoding: [0x7d,0x60,0x85,0xbe]
+
+s_ceil_f32 s5, exec_lo
+// GFX1150: encoding: [0x7e,0x60,0x85,0xbe]
+
+s_ceil_f32 s5, exec_hi
+// GFX1150: encoding: [0x7f,0x60,0x85,0xbe]
+
+s_ceil_f32 s5, 0
+// GFX1150: encoding: [0x80,0x60,0x85,0xbe]
+
+s_ceil_f32 s5, -1
+// GFX1150: encoding: [0xc1,0x60,0x85,0xbe]
+
+s_ceil_f32 s5, 0.5
+// GFX1150: encoding: [0xf0,0x60,0x85,0xbe]
+
+s_ceil_f32 s5, -4.0
+// GFX1150: encoding: [0xf7,0x60,0x85,0xbe]
+
+s_ceil_f32 s5, 0xaf123456
+// GFX1150: encoding: [0xff,0x60,0x85,0xbe,0x56,0x34,0x12,0xaf]
+
+s_ceil_f32 s5, 0x3f717273
+// GFX1150: encoding: [0xff,0x60,0x85,0xbe,0x73,0x72,0x71,0x3f]
+
+s_rndne_f32 s5, s1
+// GFX1150: encoding: [0x01,0x63,0x85,0xbe]
+
+s_rndne_f32 s105, s1
+// GFX1150: encoding: [0x01,0x63,0xe9,0xbe]
+
+s_rndne_f32 s5, s105
+// GFX1150: encoding: [0x69,0x63,0x85,0xbe]
+
+s_rndne_f32 s5, s103
+// GFX1150: encoding: [0x67,0x63,0x85,0xbe]
+
+s_rndne_f32 s5, vcc_lo
+// GFX1150: encoding: [0x6a,0x63,0x85,0xbe]
+
+s_rndne_f32 s5, vcc_hi
+// GFX1150: encoding: [0x6b,0x63,0x85,0xbe]
+
+s_rndne_f32 s5, ttmp11
+// GFX1150: encoding: [0x77,0x63,0x85,0xbe]
+
+s_rndne_f32 s5, m0
+// GFX1150: encoding: [0x7d,0x63,0x85,0xbe]
+
+s_rndne_f32 s5, exec_lo
+// GFX1150: encoding: [0x7e,0x63,0x85,0xbe]
+
+s_rndne_f32 s5, exec_hi
+// GFX1150: encoding: [0x7f,0x63,0x85,0xbe]
+
+s_rndne_f32 s5, 0
+// GFX1150: encoding: [0x80,0x63,0x85,0xbe]
+
+s_rndne_f32 s5, -1
+// GFX1150: encoding: [0xc1,0x63,0x85,0xbe]
+
+s_rndne_f32 s5, 0.5
+// GFX1150: encoding: [0xf0,0x63,0x85,0xbe]
+
+s_rndne_f32 s5, -4.0
+// GFX1150: encoding: [0xf7,0x63,0x85,0xbe]
+
+s_rndne_f32 s5, 0xaf123456
+// GFX1150: encoding: [0xff,0x63,0x85,0xbe,0x56,0x34,0x12,0xaf]
+
+s_rndne_f32 s5, 0x3f717273
+// GFX1150: encoding: [0xff,0x63,0x85,0xbe,0x73,0x72,0x71,0x3f]
+
+s_floor_f32 s5, s1
+// GFX1150: encoding: [0x01,0x61,0x85,0xbe]
+
+s_floor_f32 s105, s1
+// GFX1150: encoding: [0x01,0x61,0xe9,0xbe]
+
+s_floor_f32 s5, s105
+// GFX1150: encoding: [0x69,0x61,0x85,0xbe]
+
+s_floor_f32 s5, s103
+// GFX1150: encoding: [0x67,0x61,0x85,0xbe]
+
+s_floor_f32 s5, vcc_lo
+// GFX1150: encoding: [0x6a,0x61,0x85,0xbe]
+
+s_floor_f32 s5, vcc_hi
+// GFX1150: encoding: [0x6b,0x61,0x85,0xbe]
+
+s_floor_f32 s5, ttmp11
+// GFX1150: encoding: [0x77,0x61,0x85,0xbe]
+
+s_floor_f32 s5, m0
+// GFX1150: encoding: [0x7d,0x61,0x85,0xbe]
+
+s_floor_f32 s5, exec_lo
+// GFX1150: encoding: [0x7e,0x61,0x85,0xbe]
+
+s_floor_f32 s5, exec_hi
+// GFX1150: encoding: [0x7f,0x61,0x85,0xbe]
+
+s_floor_f32 s5, 0
+// GFX1150: encoding: [0x80,0x61,0x85,0xbe]
+
+s_floor_f32 s5, -1
+// GFX1150: encoding: [0xc1,0x61,0x85,0xbe]
+
+s_floor_f32 s5, 0.5
+// GFX1150: encoding: [0xf0,0x61,0x85,0xbe]
+
+s_floor_f32 s5, -4.0
+// GFX1150: encoding: [0xf7,0x61,0x85,0xbe]
+
+s_floor_f32 s5, 0xaf123456
+// GFX1150: encoding: [0xff,0x61,0x85,0xbe,0x56,0x34,0x12,0xaf]
+
+s_floor_f32 s5, 0x3f717273
+// GFX1150: encoding: [0xff,0x61,0x85,0xbe,0x73,0x72,0x71,0x3f]
+
+s_floor_f16 s5, s1
+// GFX1150: encoding: [0x01,0x6c,0x85,0xbe]
+
+s_floor_f16 s105, s1
+// GFX1150: encoding: [0x01,0x6c,0xe9,0xbe]
+
+s_floor_f16 s5, s105
+// GFX1150: encoding: [0x69,0x6c,0x85,0xbe]
+
+s_floor_f16 s5, s101
+// GFX1150: encoding: [0x65,0x6c,0x85,0xbe]
+
+s_floor_f16 s5, vcc_lo
+// GFX1150: encoding: [0x6a,0x6c,0x85,0xbe]
+
+s_floor_f16 s5, vcc_hi
+// GFX1150: encoding: [0x6b,0x6c,0x85,0xbe]
+
+s_floor_f16 s5, m0
+// GFX1150: encoding: [0x7d,0x6c,0x85,0xbe]
+
+s_floor_f16 s5, exec_lo
+// GFX1150: encoding: [0x7e,0x6c,0x85,0xbe]
+
+s_floor_f16 s5, exec_hi
+// GFX1150: encoding: [0x7f,0x6c,0x85,0xbe]
+
+s_floor_f16 s5, 0
+// GFX1150: encoding: [0x80,0x6c,0x85,0xbe]
+
+s_floor_f16 s5, -1
+// GFX1150: encoding: [0xc1,0x6c,0x85,0xbe]
+
+s_floor_f16 s5, 0.5
+// GFX1150: encoding: [0xf0,0x6c,0x85,0xbe]
+
+s_floor_f16 s5, -4.0
+// GFX1150: encoding: [0xf7,0x6c,0x85,0xbe]
+
+s_floor_f16 s5, 0xfe0b
+// GFX1150: encoding: [0xff,0x6c,0x85,0xbe,0x0b,0xfe,0x00,0x00]
+
+s_floor_f16 s5, 0x3456
+// GFX1150: encoding: [0xff,0x6c,0x85,0xbe,0x56,0x34,0x00,0x00]
+
+s_ceil_f16 s5, s1
+// GFX1150: encoding: [0x01,0x6b,0x85,0xbe]
+
+s_ceil_f16 s105, s1
+// GFX1150: encoding: [0x01,0x6b,0xe9,0xbe]
+
+s_ceil_f16 s5, s105
+// GFX1150: encoding: [0x69,0x6b,0x85,0xbe]
+
+s_ceil_f16 s5, s101
+// GFX1150: encoding: [0x65,0x6b,0x85,0xbe]
+
+s_ceil_f16 s5, vcc_lo
+// GFX1150: encoding: [0x6a,0x6b,0x85,0xbe]
+
+s_ceil_f16 s5, vcc_hi
+// GFX1150: encoding: [0x6b,0x6b,0x85,0xbe]
+
+s_ceil_f16 s5, m0
+// GFX1150: encoding: [0x7d,0x6b,0x85,0xbe]
+
+s_ceil_f16 s5, exec_lo
+// GFX1150: encoding: [0x7e,0x6b,0x85,0xbe]
+
+s_ceil_f16 s5, exec_hi
+// GFX1150: encoding: [0x7f,0x6b,0x85,0xbe]
+
+s_ceil_f16 s5, 0
+// GFX1150: encoding: [0x80,0x6b,0x85,0xbe]
+
+s_ceil_f16 s5, -1
+// GFX1150: encoding: [0xc1,0x6b,0x85,0xbe]
+
+s_ceil_f16 s5, 0.5
+// GFX1150: encoding: [0xf0,0x6b,0x85,0xbe]
+
+s_ceil_f16 s5, -4.0
+// GFX1150: encoding: [0xf7,0x6b,0x85,0xbe]
+
+s_ceil_f16 s5, 0xfe0b
+// GFX1150: encoding: [0xff,0x6b,0x85,0xbe,0x0b,0xfe,0x00,0x00]
+
+s_ceil_f16 s5, 0x3456
+// GFX1150: encoding: [0xff,0x6b,0x85,0xbe,0x56,0x34,0x00,0x00]
+
+s_trunc_f16 s5, s1
+// GFX1150: encoding: [0x01,0x6d,0x85,0xbe]
+
+s_trunc_f16 s105, s1
+// GFX1150: encoding: [0x01,0x6d,0xe9,0xbe]
+
+s_trunc_f16 s5, s105
+// GFX1150: encoding: [0x69,0x6d,0x85,0xbe]
+
+s_trunc_f16 s5, s101
+// GFX1150: encoding: [0x65,0x6d,0x85,0xbe]
+
+s_trunc_f16 s5, vcc_lo
+// GFX1150: encoding: [0x6a,0x6d,0x85,0xbe]
+
+s_trunc_f16 s5, vcc_hi
+// GFX1150: encoding: [0x6b,0x6d,0x85,0xbe]
+
+s_trunc_f16 s5, m0
+// GFX1150: encoding: [0x7d,0x6d,0x85,0xbe]
+
+s_trunc_f16 s5, exec_lo
+// GFX1150: encoding: [0x7e,0x6d,0x85,0xbe]
+
+s_trunc_f16 s5, exec_hi
+// GFX1150: encoding: [0x7f,0x6d,0x85,0xbe]
+
+s_trunc_f16 s5, 0
+// GFX1150: encoding: [0x80,0x6d,0x85,0xbe]
+
+s_trunc_f16 s5, -1
+// GFX1150: encoding: [0xc1,0x6d,0x85,0xbe]
+
+s_trunc_f16 s5, 0.5
+// GFX1150: encoding: [0xf0,0x6d,0x85,0xbe]
+
+s_trunc_f16 s5, -4.0
+// GFX1150: encoding: [0xf7,0x6d,0x85,0xbe]
+
+s_trunc_f16 s5, 0xfe0b
+// GFX1150: encoding: [0xff,0x6d,0x85,0xbe,0x0b,0xfe,0x00,0x00]
+
+s_trunc_f16 s5, 0x3456
+// GFX1150: encoding: [0xff,0x6d,0x85,0xbe,0x56,0x34,0x00,0x00]
+
+s_rndne_f16 s5, s1
+// GFX1150: encoding: [0x01,0x6e,0x85,0xbe]
+
+s_rndne_f16 s105, s1
+// GFX1150: encoding: [0x01,0x6e,0xe9,0xbe]
+
+s_rndne_f16 s5, s105
+// GFX1150: encoding: [0x69,0x6e,0x85,0xbe]
+
+s_rndne_f16 s5, s101
+// GFX1150: encoding: [0x65,0x6e,0x85,0xbe]
+
+s_rndne_f16 s5, vcc_lo
+// GFX1150: encoding: [0x6a,0x6e,0x85,0xbe]
+
+s_rndne_f16 s5, vcc_hi
+// GFX1150: encoding: [0x6b,0x6e,0x85,0xbe]
+
+s_rndne_f16 s5, m0
+// GFX1150: encoding: [0x7d,0x6e,0x85,0xbe]
+
+s_rndne_f16 s5, exec_lo
+// GFX1150: encoding: [0x7e,0x6e,0x85,0xbe]
+
+s_rndne_f16 s5, exec_hi
+// GFX1150: encoding: [0x7f,0x6e,0x85,0xbe]
+
+s_rndne_f16 s5, 0
+// GFX1150: encoding: [0x80,0x6e,0x85,0xbe]
+
+s_rndne_f16 s5, -1
+// GFX1150: encoding: [0xc1,0x6e,0x85,0xbe]
+
+s_rndne_f16 s5, 0.5
+// GFX1150: encoding: [0xf0,0x6e,0x85,0xbe]
+
+s_rndne_f16 s5, -4.0
+// GFX1150: encoding: [0xf7,0x6e,0x85,0xbe]
+
+s_rndne_f16 s5, 0xfe0b
+// GFX1150: encoding: [0xff,0x6e,0x85,0xbe,0x0b,0xfe,0x00,0x00]
+
+s_rndne_f16 s5, 0x3456
+// GFX1150: encoding: [0xff,0x6e,0x85,0xbe,0x56,0x34,0x00,0x00]
+
+s_add_f32 s5, s1, s2
+// GFX1150: encoding: [0x01,0x02,0x05,0xa0]
+
+s_add_f32 s105, s1, s2
+// GFX1150: encoding: [0x01,0x02,0x69,0xa0]
+
+s_add_f32 s5, s105, s2
+// GFX1150: encoding: [0x69,0x02,0x05,0xa0]
+
+s_add_f32 s5, s101, s2
+// GFX1150: encoding: [0x65,0x02,0x05,0xa0]
+
+s_add_f32 s5, vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x05,0xa0]
+
+s_add_f32 s5, vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x05,0xa0]
+
+s_add_f32 s5, m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x05,0xa0]
+
+s_add_f32 s5, exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x05,0xa0]
+
+s_add_f32 s5, exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x05,0xa0]
+
+s_add_f32 s5, 0, s2
+// GFX1150: encoding: [0x80,0x02,0x05,0xa0]
+
+s_add_f32 s5, -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x05,0xa0]
+
+s_add_f32 s5, 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x05,0xa0]
+
+s_add_f32 s5, -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x05,0xa0]
+
+s_add_f32 s5, 0xaf123456, s2
+// GFX1150: encoding: [0xff,0x02,0x05,0xa0,0x56,0x34,0x12,0xaf]
+
+s_add_f32 s5, 0x3f717273, s2
+// GFX1150: encoding: [0xff,0x02,0x05,0xa0,0x73,0x72,0x71,0x3f]
+
+s_add_f32 s5, s1, s105
+// GFX1150: encoding: [0x01,0x69,0x05,0xa0]
+
+s_sub_f32 s5, s1, s2
+// GFX1150: encoding: [0x01,0x02,0x85,0xa0]
+
+s_sub_f32 s105, s1, s2
+// GFX1150: encoding: [0x01,0x02,0xe9,0xa0]
+
+s_sub_f32 s5, s105, s2
+// GFX1150: encoding: [0x69,0x02,0x85,0xa0]
+
+s_sub_f32 s5, s101, s2
+// GFX1150: encoding: [0x65,0x02,0x85,0xa0]
+
+s_sub_f32 s5, vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x85,0xa0]
+
+s_sub_f32 s5, vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x85,0xa0]
+
+s_sub_f32 s5, m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x85,0xa0]
+
+s_sub_f32 s5, exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x85,0xa0]
+
+s_sub_f32 s5, exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x85,0xa0]
+
+s_sub_f32 s5, 0, s2
+// GFX1150: encoding: [0x80,0x02,0x85,0xa0]
+
+s_sub_f32 s5, -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x85,0xa0]
+
+s_sub_f32 s5, 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x85,0xa0]
+
+s_sub_f32 s5, -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x85,0xa0]
+
+s_sub_f32 s5, 0xaf123456, s2
+// GFX1150: encoding: [0xff,0x02,0x85,0xa0,0x56,0x34,0x12,0xaf]
+
+s_sub_f32 s5, 0x3f717273, s2
+// GFX1150: encoding: [0xff,0x02,0x85,0xa0,0x73,0x72,0x71,0x3f]
+
+s_sub_f32 s5, s1, s105
+// GFX1150: encoding: [0x01,0x69,0x85,0xa0]
+
+s_mul_f32 s5, s1, s2
+// GFX1150: encoding: [0x01,0x02,0x05,0xa2]
+
+s_mul_f32 s105, s1, s2
+// GFX1150: encoding: [0x01,0x02,0x69,0xa2]
+
+s_mul_f32 s5, s105, s2
+// GFX1150: encoding: [0x69,0x02,0x05,0xa2]
+
+s_mul_f32 s5, s103, s2
+// GFX1150: encoding: [0x67,0x02,0x05,0xa2]
+
+s_mul_f32 s5, vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x05,0xa2]
+
+s_mul_f32 s5, vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x05,0xa2]
+
+s_mul_f32 s5, ttmp11, s2
+// GFX1150: encoding: [0x77,0x02,0x05,0xa2]
+
+s_mul_f32 s5, m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x05,0xa2]
+
+s_mul_f32 s5, exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x05,0xa2]
+
+s_mul_f32 s5, exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x05,0xa2]
+
+s_mul_f32 s5, 0, s2
+// GFX1150: encoding: [0x80,0x02,0x05,0xa2]
+
+s_mul_f32 s5, -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x05,0xa2]
+
+s_mul_f32 s5, 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x05,0xa2]
+
+s_mul_f32 s5, -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x05,0xa2]
+
+s_mul_f32 s5, 0xaf123456, s2
+// GFX1150: encoding: [0xff,0x02,0x05,0xa2,0x56,0x34,0x12,0xaf]
+
+s_mul_f32 s5, 0x3f717273, s2
+// GFX1150: encoding: [0xff,0x02,0x05,0xa2,0x73,0x72,0x71,0x3f]
+
+s_mul_f32 s5, s1, s105
+// GFX1150: encoding: [0x01,0x69,0x05,0xa2]
+
+s_min_f32 s5, s1, s2
+// GFX1150: encoding: [0x01,0x02,0x05,0xa1]
+
+s_min_f32 s105, s1, s2
+// GFX1150: encoding: [0x01,0x02,0x69,0xa1]
+
+s_min_f32 s5, s105, s2
+// GFX1150: encoding: [0x69,0x02,0x05,0xa1]
+
+s_min_f32 s5, s103, s2
+// GFX1150: encoding: [0x67,0x02,0x05,0xa1]
+
+s_min_f32 s5, vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x05,0xa1]
+
+s_min_f32 s5, vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x05,0xa1]
+
+s_min_f32 s5, ttmp11, s2
+// GFX1150: encoding: [0x77,0x02,0x05,0xa1]
+
+s_min_f32 s5, m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x05,0xa1]
+
+s_min_f32 s5, exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x05,0xa1]
+
+s_min_f32 s5, exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x05,0xa1]
+
+s_min_f32 s5, 0, s2
+// GFX1150: encoding: [0x80,0x02,0x05,0xa1]
+
+s_min_f32 s5, -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x05,0xa1]
+
+s_min_f32 s5, 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x05,0xa1]
+
+s_min_f32 s5, -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x05,0xa1]
+
+s_min_f32 s5, 0xaf123456, s2
+// GFX1150: encoding: [0xff,0x02,0x05,0xa1,0x56,0x34,0x12,0xaf]
+
+s_min_f32 s5, 0x3f717273, s2
+// GFX1150: encoding: [0xff,0x02,0x05,0xa1,0x73,0x72,0x71,0x3f]
+
+s_min_f32 s5, s1, s105
+// GFX1150: encoding: [0x01,0x69,0x05,0xa1]
+
+s_max_f32 s5, s1, s2
+// GFX1150: encoding: [0x01,0x02,0x85,0xa1]
+
+s_max_f32 s105, s1, s2
+// GFX1150: encoding: [0x01,0x02,0xe9,0xa1]
+
+s_max_f32 s5, s105, s2
+// GFX1150: encoding: [0x69,0x02,0x85,0xa1]
+
+s_max_f32 s5, s103, s2
+// GFX1150: encoding: [0x67,0x02,0x85,0xa1]
+
+s_max_f32 s5, vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x85,0xa1]
+
+s_max_f32 s5, vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x85,0xa1]
+
+s_max_f32 s5, ttmp11, s2
+// GFX1150: encoding: [0x77,0x02,0x85,0xa1]
+
+s_max_f32 s5, m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x85,0xa1]
+
+s_max_f32 s5, exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x85,0xa1]
+
+s_max_f32 s5, exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x85,0xa1]
+
+s_max_f32 s5, 0, s2
+// GFX1150: encoding: [0x80,0x02,0x85,0xa1]
+
+s_max_f32 s5, -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x85,0xa1]
+
+s_max_f32 s5, 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x85,0xa1]
+
+s_max_f32 s5, -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x85,0xa1]
+
+s_max_f32 s5, 0xaf123456, s2
+// GFX1150: encoding: [0xff,0x02,0x85,0xa1,0x56,0x34,0x12,0xaf]
+
+s_max_f32 s5, 0x3f717273, s2
+// GFX1150: encoding: [0xff,0x02,0x85,0xa1,0x73,0x72,0x71,0x3f]
+
+s_max_f32 s5, s1, s105
+// GFX1150: encoding: [0x01,0x69,0x85,0xa1]
+
+s_fmac_f32 s5, s1, s2
+// GFX1150: encoding: [0x01,0x02,0x85,0xa3]
+
+s_fmac_f32 s105, s1, s2
+// GFX1150: encoding: [0x01,0x02,0xe9,0xa3]
+
+s_fmac_f32 s5, s105, s2
+// GFX1150: encoding: [0x69,0x02,0x85,0xa3]
+
+s_fmac_f32 s5, s103, s2
+// GFX1150: encoding: [0x67,0x02,0x85,0xa3]
+
+s_fmac_f32 s5, vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x85,0xa3]
+
+s_fmac_f32 s5, vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x85,0xa3]
+
+s_fmac_f32 s5, ttmp11, s2
+// GFX1150: encoding: [0x77,0x02,0x85,0xa3]
+
+s_fmac_f32 s5, m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x85,0xa3]
+
+s_fmac_f32 s5, exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x85,0xa3]
+
+s_fmac_f32 s5, exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x85,0xa3]
+
+s_fmac_f32 s5, 0, s2
+// GFX1150: encoding: [0x80,0x02,0x85,0xa3]
+
+s_fmac_f32 s5, -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x85,0xa3]
+
+s_fmac_f32 s5, 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x85,0xa3]
+
+s_fmac_f32 s5, -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x85,0xa3]
+
+s_fmac_f32 s5, 0xaf123456, s2
+// GFX1150: encoding: [0xff,0x02,0x85,0xa3,0x56,0x34,0x12,0xaf]
+
+s_fmac_f32 s5, 0x3f717273, s2
+// GFX1150: encoding: [0xff,0x02,0x85,0xa3,0x73,0x72,0x71,0x3f]
+
+s_fmac_f32 s5, s1, s105
+// GFX1150: encoding: [0x01,0x69,0x85,0xa3]
+
+s_fmamk_f32 s5, s1, 0x11213141, s3
+// GFX1150: encoding: [0x01,0x03,0x05,0xa3,0x41,0x31,0x21,0x11]
+
+s_fmamk_f32 s105, s1, 0x11213141, s3
+// GFX1150: encoding: [0x01,0x03,0x69,0xa3,0x41,0x31,0x21,0x11]
+
+s_fmamk_f32 s5, s105, 0x11213141, s3
+// GFX1150: encoding: [0x69,0x03,0x05,0xa3,0x41,0x31,0x21,0x11]
+
+s_fmamk_f32 s5, 0, 0x11213141, s3
+// GFX1150: encoding: [0x80,0x03,0x05,0xa3,0x41,0x31,0x21,0x11]
+
+s_fmamk_f32 s5, -1, 0x11213141, s3
+// GFX1150: encoding: [0xc1,0x03,0x05,0xa3,0x41,0x31,0x21,0x11]
+
+s_fmamk_f32 s5, 0.5, 0x11213141, s3
+// GFX1150: encoding: [0xf0,0x03,0x05,0xa3,0x41,0x31,0x21,0x11]
+
+s_fmamk_f32 s5, -4.0, 0x11213141, s3
+// GFX1150: encoding: [0xf7,0x03,0x05,0xa3,0x41,0x31,0x21,0x11]
+
+s_fmamk_f32 s5, s1, 0xa1b1c1d1, s3
+// GFX1150: encoding: [0x01,0x03,0x05,0xa3,0xd1,0xc1,0xb1,0xa1]
+
+s_fmamk_f32 s5, s1, 0x11213141, s105
+// GFX1150: encoding: [0x01,0x69,0x05,0xa3,0x41,0x31,0x21,0x11]
+
+s_fmamk_f32 s5, 0x11213141, 0x11213141, s105
+// GFX1150 encoding: [0xff,0x69,0x05,0xa3,0x41,0x31,0x21,0x11]
+
+s_fmamk_f32 s5, s105, 0x11213141, 0x11213141
+// GFX1150 encoding: [0x69,0xff,0x05,0xa3,0x41,0x31,0x21,0x11]
+
+s_fmaak_f32 s5, s1, s2, 0x11213141
+// GFX1150: encoding: [0x01,0x02,0x85,0xa2,0x41,0x31,0x21,0x11]
+
+s_fmaak_f32 s105, s1, s2, 0x11213141
+// GFX1150: encoding: [0x01,0x02,0xe9,0xa2,0x41,0x31,0x21,0x11]
+
+s_fmaak_f32 s5, s105, s2, 0x11213141
+// GFX1150: encoding: [0x69,0x02,0x85,0xa2,0x41,0x31,0x21,0x11]
+
+s_fmaak_f32 s5, 0, s2, 0x11213141
+// GFX1150: encoding: [0x80,0x02,0x85,0xa2,0x41,0x31,0x21,0x11]
+
+s_fmaak_f32 s5, -1, s2, 0x11213141
+// GFX1150: encoding: [0xc1,0x02,0x85,0xa2,0x41,0x31,0x21,0x11]
+
+s_fmaak_f32 s5, 0.5, s2, 0x11213141
+// GFX1150: encoding: [0xf0,0x02,0x85,0xa2,0x41,0x31,0x21,0x11]
+
+s_fmaak_f32 s5, -4.0, s2, 0x11213141
+// GFX1150: encoding: [0xf7,0x02,0x85,0xa2,0x41,0x31,0x21,0x11]
+
+s_fmaak_f32 s5, s1, s105, 0x11213141
+// GFX1150: encoding: [0x01,0x69,0x85,0xa2,0x41,0x31,0x21,0x11]
+
+s_fmaak_f32 s5, s1, s2, 0xa1b1c1d1
+// GFX1150: encoding: [0x01,0x02,0x85,0xa2,0xd1,0xc1,0xb1,0xa1]
+
+s_fmaak_f32 s5, 0x11213141, s2, 0x11213141
+// GFX1150: encoding: [0xff,0x02,0x85,0xa2,0x41,0x31,0x21,0x11]
+
+s_fmaak_f32 s5, s105, 0x11213141, 0x11213141
+// GFX1150: encoding: [0x69,0xff,0x85,0xa2,0x41,0x31,0x21,0x11]
+
+s_fmaak_f32 s5, 0x11213141, 0x11213141, 0x11213141
+// GFX1150: encoding: [0xff,0xff,0x85,0xa2,0x41,0x31,0x21,0x11]
+
+s_cvt_pk_rtz_f16_f32 s5, s1, s2
+// GFX1150: encoding: [0x01,0x02,0x05,0xa4]
+
+s_cvt_pk_rtz_f16_f32 s105, s1, s2
+// GFX1150: encoding: [0x01,0x02,0x69,0xa4]
+
+s_cvt_pk_rtz_f16_f32 s5, s105, s2
+// GFX1150: encoding: [0x69,0x02,0x05,0xa4]
+
+s_cvt_pk_rtz_f16_f32 s5, s103, s2
+// GFX1150: encoding: [0x67,0x02,0x05,0xa4]
+
+s_cvt_pk_rtz_f16_f32 s5, vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x05,0xa4]
+
+s_cvt_pk_rtz_f16_f32 s5, vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x05,0xa4]
+
+s_cvt_pk_rtz_f16_f32 s5, ttmp11, s2
+// GFX1150: encoding: [0x77,0x02,0x05,0xa4]
+
+s_cvt_pk_rtz_f16_f32 s5, m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x05,0xa4]
+
+s_cvt_pk_rtz_f16_f32 s5, exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x05,0xa4]
+
+s_cvt_pk_rtz_f16_f32 s5, exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x05,0xa4]
+
+s_cvt_pk_rtz_f16_f32 s5, 0, s2
+// GFX1150: encoding: [0x80,0x02,0x05,0xa4]
+
+s_cvt_pk_rtz_f16_f32 s5, -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x05,0xa4]
+
+s_cvt_pk_rtz_f16_f32 s5, 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x05,0xa4]
+
+s_cvt_pk_rtz_f16_f32 s5, -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x05,0xa4]
+
+s_cvt_pk_rtz_f16_f32 s5, 0xaf123456, s2
+// GFX1150: encoding: [0xff,0x02,0x05,0xa4,0x56,0x34,0x12,0xaf]
+
+s_cvt_pk_rtz_f16_f32 s5, 0x3f717273, s2
+// GFX1150: encoding: [0xff,0x02,0x05,0xa4,0x73,0x72,0x71,0x3f]
+
+s_cvt_pk_rtz_f16_f32 s5, s1, s105
+// GFX1150: encoding: [0x01,0x69,0x05,0xa4]
+
+s_add_f16 s5, s1, s2
+// GFX1150: encoding: [0x01,0x02,0x85,0xa4]
+
+s_add_f16 s105, s1, s2
+// GFX1150: encoding: [0x01,0x02,0xe9,0xa4]
+
+s_add_f16 s5, s105, s2
+// GFX1150: encoding: [0x69,0x02,0x85,0xa4]
+
+s_add_f16 s5, s101, s2
+// GFX1150: encoding: [0x65,0x02,0x85,0xa4]
+
+s_add_f16 s5, vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x85,0xa4]
+
+s_add_f16 s5, vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x85,0xa4]
+
+s_add_f16 s5, m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x85,0xa4]
+
+s_add_f16 s5, exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x85,0xa4]
+
+s_add_f16 s5, exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x85,0xa4]
+
+s_add_f16 s5, 0, s2
+// GFX1150: encoding: [0x80,0x02,0x85,0xa4]
+
+s_add_f16 s5, -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x85,0xa4]
+
+s_add_f16 s5, 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x85,0xa4]
+
+s_add_f16 s5, -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x85,0xa4]
+
+s_add_f16 s5, 0xfe0b, s2
+// GFX1150: encoding: [0xff,0x02,0x85,0xa4,0x0b,0xfe,0x00,0x00]
+
+s_add_f16 s5, 0x3456, s2
+// GFX1150: encoding: [0xff,0x02,0x85,0xa4,0x56,0x34,0x00,0x00]
+
+s_add_f16 s5, s1, s105
+// GFX1150: encoding: [0x01,0x69,0x85,0xa4]
+
+s_sub_f16 s5, s1, s2
+// GFX1150: encoding: [0x01,0x02,0x05,0xa5]
+
+s_sub_f16 s105, s1, s2
+// GFX1150: encoding: [0x01,0x02,0x69,0xa5]
+
+s_sub_f16 s5, s105, s2
+// GFX1150: encoding: [0x69,0x02,0x05,0xa5]
+
+s_sub_f16 s5, s101, s2
+// GFX1150: encoding: [0x65,0x02,0x05,0xa5]
+
+s_sub_f16 s5, vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x05,0xa5]
+
+s_sub_f16 s5, vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x05,0xa5]
+
+s_sub_f16 s5, m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x05,0xa5]
+
+s_sub_f16 s5, exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x05,0xa5]
+
+s_sub_f16 s5, exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x05,0xa5]
+
+s_sub_f16 s5, 0, s2
+// GFX1150: encoding: [0x80,0x02,0x05,0xa5]
+
+s_sub_f16 s5, -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x05,0xa5]
+
+s_sub_f16 s5, 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x05,0xa5]
+
+s_sub_f16 s5, -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x05,0xa5]
+
+s_sub_f16 s5, 0xfe0b, s2
+// GFX1150: encoding: [0xff,0x02,0x05,0xa5,0x0b,0xfe,0x00,0x00]
+
+s_sub_f16 s5, 0x3456, s2
+// GFX1150: encoding: [0xff,0x02,0x05,0xa5,0x56,0x34,0x00,0x00]
+
+s_sub_f16 s5, s1, s105
+// GFX1150: encoding: [0x01,0x69,0x05,0xa5]
+
+s_mul_f16 s5, s1, s2
+// GFX1150: encoding: [0x01,0x02,0x85,0xa6]
+
+s_mul_f16 s105, s1, s2
+// GFX1150: encoding: [0x01,0x02,0xe9,0xa6]
+
+s_mul_f16 s5, s105, s2
+// GFX1150: encoding: [0x69,0x02,0x85,0xa6]
+
+s_mul_f16 s5, s101, s2
+// GFX1150: encoding: [0x65,0x02,0x85,0xa6]
+
+s_mul_f16 s5, vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x85,0xa6]
+
+s_mul_f16 s5, vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x85,0xa6]
+
+s_mul_f16 s5, m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x85,0xa6]
+
+s_mul_f16 s5, exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x85,0xa6]
+
+s_mul_f16 s5, exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x85,0xa6]
+
+s_mul_f16 s5, 0, s2
+// GFX1150: encoding: [0x80,0x02,0x85,0xa6]
+
+s_mul_f16 s5, -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x85,0xa6]
+
+s_mul_f16 s5, 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x85,0xa6]
+
+s_mul_f16 s5, -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x85,0xa6]
+
+s_mul_f16 s5, 0xfe0b, s2
+// GFX1150: encoding: [0xff,0x02,0x85,0xa6,0x0b,0xfe,0x00,0x00]
+
+s_mul_f16 s5, 0x3456, s2
+// GFX1150: encoding: [0xff,0x02,0x85,0xa6,0x56,0x34,0x00,0x00]
+
+s_mul_f16 s5, s1, s105
+// GFX1150: encoding: [0x01,0x69,0x85,0xa6]
+
+s_fmac_f16 s5, s1, s2
+// GFX1150: encoding: [0x01,0x02,0x05,0xa7]
+
+s_fmac_f16 s105, s1, s2
+// GFX1150: encoding: [0x01,0x02,0x69,0xa7]
+
+s_fmac_f16 s5, s105, s2
+// GFX1150: encoding: [0x69,0x02,0x05,0xa7]
+
+s_fmac_f16 s5, s103, s2
+// GFX1150: encoding: [0x67,0x02,0x05,0xa7]
+
+s_fmac_f16 s5, vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x05,0xa7]
+
+s_fmac_f16 s5, vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x05,0xa7]
+
+s_fmac_f16 s5, ttmp11, s2
+// GFX1150: encoding: [0x77,0x02,0x05,0xa7]
+
+s_fmac_f16 s5, m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x05,0xa7]
+
+s_fmac_f16 s5, exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x05,0xa7]
+
+s_fmac_f16 s5, exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x05,0xa7]
+
+s_fmac_f16 s5, 0, s2
+// GFX1150: encoding: [0x80,0x02,0x05,0xa7]
+
+s_fmac_f16 s5, -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x05,0xa7]
+
+s_fmac_f16 s5, 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x05,0xa7]
+
+s_fmac_f16 s5, -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x05,0xa7]
+
+s_fmac_f16 s5, 0x1234, s2
+// GFX1150: encoding: [0xff,0x02,0x05,0xa7,0x34,0x12,0x00,0x00]
+
+s_fmac_f16 s5, s1, s105
+// GFX1150: encoding: [0x01,0x69,0x05,0xa7]
+
+s_max_f16 s5, s1, s2
+// GFX1150: encoding: [0x01,0x02,0x05,0xa6]
+
+s_max_f16 s105, s1, s2
+// GFX1150: encoding: [0x01,0x02,0x69,0xa6]
+
+s_max_f16 s5, s105, s2
+// GFX1150: encoding: [0x69,0x02,0x05,0xa6]
+
+s_max_f16 s5, s101, s2
+// GFX1150: encoding: [0x65,0x02,0x05,0xa6]
+
+s_max_f16 s5, vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x05,0xa6]
+
+s_max_f16 s5, vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x05,0xa6]
+
+s_max_f16 s5, m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x05,0xa6]
+
+s_max_f16 s5, exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x05,0xa6]
+
+s_max_f16 s5, exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x05,0xa6]
+
+s_max_f16 s5, 0, s2
+// GFX1150: encoding: [0x80,0x02,0x05,0xa6]
+
+s_max_f16 s5, -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x05,0xa6]
+
+s_max_f16 s5, 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x05,0xa6]
+
+s_max_f16 s5, -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x05,0xa6]
+
+s_max_f16 s5, 0xfe0b, s2
+// GFX1150: encoding: [0xff,0x02,0x05,0xa6,0x0b,0xfe,0x00,0x00]
+
+s_max_f16 s5, 0x3456, s2
+// GFX1150: encoding: [0xff,0x02,0x05,0xa6,0x56,0x34,0x00,0x00]
+
+s_max_f16 s5, s1, s105
+// GFX1150: encoding: [0x01,0x69,0x05,0xa6]
+
+s_min_f16 s5, s1, s2
+// GFX1150: encoding: [0x01,0x02,0x85,0xa5]
+
+s_min_f16 s105, s1, s2
+// GFX1150: encoding: [0x01,0x02,0xe9,0xa5]
+
+s_min_f16 s5, s105, s2
+// GFX1150: encoding: [0x69,0x02,0x85,0xa5]
+
+s_min_f16 s5, s101, s2
+// GFX1150: encoding: [0x65,0x02,0x85,0xa5]
+
+s_min_f16 s5, vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x85,0xa5]
+
+s_min_f16 s5, vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x85,0xa5]
+
+s_min_f16 s5, m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x85,0xa5]
+
+s_min_f16 s5, exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x85,0xa5]
+
+s_min_f16 s5, exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x85,0xa5]
+
+s_min_f16 s5, 0, s2
+// GFX1150: encoding: [0x80,0x02,0x85,0xa5]
+
+s_min_f16 s5, -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x85,0xa5]
+
+s_min_f16 s5, 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x85,0xa5]
+
+s_min_f16 s5, -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x85,0xa5]
+
+s_min_f16 s5, 0xfe0b, s2
+// GFX1150: encoding: [0xff,0x02,0x85,0xa5,0x0b,0xfe,0x00,0x00]
+
+s_min_f16 s5, 0x3456, s2
+// GFX1150: encoding: [0xff,0x02,0x85,0xa5,0x56,0x34,0x00,0x00]
+
+s_min_f16 s5, s1, s105
+// GFX1150: encoding: [0x01,0x69,0x85,0xa5]
+
+s_cmp_lt_f32 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x41,0xbf]
+
+s_cmp_lt_f32 s105, s2
+// GFX1150: encoding: [0x69,0x02,0x41,0xbf]
+
+s_cmp_lt_f32 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x41,0xbf]
+
+s_cmp_lt_f32 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x41,0xbf]
+
+s_cmp_lt_f32 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x41,0xbf]
+
+s_cmp_lt_f32 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x41,0xbf]
+
+s_cmp_lt_f32 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x41,0xbf]
+
+s_cmp_lt_f32 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x41,0xbf]
+
+s_cmp_lt_f32 0, s2
+// GFX1150: encoding: [0x80,0x02,0x41,0xbf]
+
+s_cmp_lt_f32 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x41,0xbf]
+
+s_cmp_lt_f32 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x41,0xbf]
+
+s_cmp_lt_f32 -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x41,0xbf]
+
+s_cmp_lt_f32 0xaf123456, s2
+// GFX1150: encoding: [0xff,0x02,0x41,0xbf,0x56,0x34,0x12,0xaf]
+
+s_cmp_lt_f32 0x3f717273, s2
+// GFX1150: encoding: [0xff,0x02,0x41,0xbf,0x73,0x72,0x71,0x3f]
+
+s_cmp_lt_f32 s1, s105
+// GFX1150: encoding: [0x01,0x69,0x41,0xbf]
+
+s_cmp_eq_f32 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x42,0xbf]
+
+s_cmp_eq_f32 s105, s2
+// GFX1150: encoding: [0x69,0x02,0x42,0xbf]
+
+s_cmp_eq_f32 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x42,0xbf]
+
+s_cmp_eq_f32 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x42,0xbf]
+
+s_cmp_eq_f32 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x42,0xbf]
+
+s_cmp_eq_f32 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x42,0xbf]
+
+s_cmp_eq_f32 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x42,0xbf]
+
+s_cmp_eq_f32 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x42,0xbf]
+
+s_cmp_eq_f32 0, s2
+// GFX1150: encoding: [0x80,0x02,0x42,0xbf]
+
+s_cmp_eq_f32 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x42,0xbf]
+
+s_cmp_eq_f32 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x42,0xbf]
+
+s_cmp_eq_f32 -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x42,0xbf]
+
+s_cmp_eq_f32 0xaf123456, s2
+// GFX1150: encoding: [0xff,0x02,0x42,0xbf,0x56,0x34,0x12,0xaf]
+
+s_cmp_eq_f32 0x3f717273, s2
+// GFX1150: encoding: [0xff,0x02,0x42,0xbf,0x73,0x72,0x71,0x3f]
+
+s_cmp_eq_f32 s1, s105
+// GFX1150: encoding: [0x01,0x69,0x42,0xbf]
+
+s_cmp_le_f32 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x43,0xbf]
+
+s_cmp_le_f32 s105, s2
+// GFX1150: encoding: [0x69,0x02,0x43,0xbf]
+
+s_cmp_le_f32 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x43,0xbf]
+
+s_cmp_le_f32 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x43,0xbf]
+
+s_cmp_le_f32 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x43,0xbf]
+
+s_cmp_le_f32 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x43,0xbf]
+
+s_cmp_le_f32 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x43,0xbf]
+
+s_cmp_le_f32 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x43,0xbf]
+
+s_cmp_le_f32 0, s2
+// GFX1150: encoding: [0x80,0x02,0x43,0xbf]
+
+s_cmp_le_f32 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x43,0xbf]
+
+s_cmp_le_f32 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x43,0xbf]
+
+s_cmp_le_f32 -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x43,0xbf]
+
+s_cmp_le_f32 0xaf123456, s2
+// GFX1150: encoding: [0xff,0x02,0x43,0xbf,0x56,0x34,0x12,0xaf]
+
+s_cmp_le_f32 0x3f717273, s2
+// GFX1150: encoding: [0xff,0x02,0x43,0xbf,0x73,0x72,0x71,0x3f]
+
+s_cmp_le_f32 s1, s105
+// GFX1150: encoding: [0x01,0x69,0x43,0xbf]
+
+s_cmp_gt_f32 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x44,0xbf]
+
+s_cmp_gt_f32 s105, s2
+// GFX1150: encoding: [0x69,0x02,0x44,0xbf]
+
+s_cmp_gt_f32 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x44,0xbf]
+
+s_cmp_gt_f32 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x44,0xbf]
+
+s_cmp_gt_f32 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x44,0xbf]
+
+s_cmp_gt_f32 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x44,0xbf]
+
+s_cmp_gt_f32 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x44,0xbf]
+
+s_cmp_gt_f32 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x44,0xbf]
+
+s_cmp_gt_f32 0, s2
+// GFX1150: encoding: [0x80,0x02,0x44,0xbf]
+
+s_cmp_gt_f32 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x44,0xbf]
+
+s_cmp_gt_f32 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x44,0xbf]
+
+s_cmp_gt_f32 -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x44,0xbf]
+
+s_cmp_gt_f32 0xaf123456, s2
+// GFX1150: encoding: [0xff,0x02,0x44,0xbf,0x56,0x34,0x12,0xaf]
+
+s_cmp_gt_f32 0x3f717273, s2
+// GFX1150: encoding: [0xff,0x02,0x44,0xbf,0x73,0x72,0x71,0x3f]
+
+s_cmp_gt_f32 s1, s105
+// GFX1150: encoding: [0x01,0x69,0x44,0xbf]
+
+s_cmp_lg_f32 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x45,0xbf]
+
+s_cmp_lg_f32 s105, s2
+// GFX1150: encoding: [0x69,0x02,0x45,0xbf]
+
+s_cmp_lg_f32 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x45,0xbf]
+
+s_cmp_lg_f32 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x45,0xbf]
+
+s_cmp_lg_f32 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x45,0xbf]
+
+s_cmp_lg_f32 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x45,0xbf]
+
+s_cmp_lg_f32 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x45,0xbf]
+
+s_cmp_lg_f32 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x45,0xbf]
+
+s_cmp_lg_f32 0, s2
+// GFX1150: encoding: [0x80,0x02,0x45,0xbf]
+
+s_cmp_lg_f32 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x45,0xbf]
+
+s_cmp_lg_f32 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x45,0xbf]
+
+s_cmp_lg_f32 -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x45,0xbf]
+
+s_cmp_lg_f32 0xaf123456, s2
+// GFX1150: encoding: [0xff,0x02,0x45,0xbf,0x56,0x34,0x12,0xaf]
+
+s_cmp_lg_f32 0x3f717273, s2
+// GFX1150: encoding: [0xff,0x02,0x45,0xbf,0x73,0x72,0x71,0x3f]
+
+s_cmp_lg_f32 s1, s105
+// GFX1150: encoding: [0x01,0x69,0x45,0xbf]
+
+s_cmp_ge_f32 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x46,0xbf]
+
+s_cmp_ge_f32 s105, s2
+// GFX1150: encoding: [0x69,0x02,0x46,0xbf]
+
+s_cmp_ge_f32 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x46,0xbf]
+
+s_cmp_ge_f32 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x46,0xbf]
+
+s_cmp_ge_f32 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x46,0xbf]
+
+s_cmp_ge_f32 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x46,0xbf]
+
+s_cmp_ge_f32 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x46,0xbf]
+
+s_cmp_ge_f32 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x46,0xbf]
+
+s_cmp_ge_f32 0, s2
+// GFX1150: encoding: [0x80,0x02,0x46,0xbf]
+
+s_cmp_ge_f32 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x46,0xbf]
+
+s_cmp_ge_f32 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x46,0xbf]
+
+s_cmp_ge_f32 -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x46,0xbf]
+
+s_cmp_ge_f32 0xaf123456, s2
+// GFX1150: encoding: [0xff,0x02,0x46,0xbf,0x56,0x34,0x12,0xaf]
+
+s_cmp_ge_f32 0x3f717273, s2
+// GFX1150: encoding: [0xff,0x02,0x46,0xbf,0x73,0x72,0x71,0x3f]
+
+s_cmp_ge_f32 s1, s105
+// GFX1150: encoding: [0x01,0x69,0x46,0xbf]
+
+s_cmp_o_f32 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x47,0xbf]
+
+s_cmp_o_f32 s105, s2
+// GFX1150: encoding: [0x69,0x02,0x47,0xbf]
+
+s_cmp_o_f32 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x47,0xbf]
+
+s_cmp_o_f32 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x47,0xbf]
+
+s_cmp_o_f32 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x47,0xbf]
+
+s_cmp_o_f32 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x47,0xbf]
+
+s_cmp_o_f32 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x47,0xbf]
+
+s_cmp_o_f32 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x47,0xbf]
+
+s_cmp_o_f32 0, s2
+// GFX1150: encoding: [0x80,0x02,0x47,0xbf]
+
+s_cmp_o_f32 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x47,0xbf]
+
+s_cmp_o_f32 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x47,0xbf]
+
+s_cmp_o_f32 -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x47,0xbf]
+
+s_cmp_o_f32 0xaf123456, s2
+// GFX1150: encoding: [0xff,0x02,0x47,0xbf,0x56,0x34,0x12,0xaf]
+
+s_cmp_o_f32 0x3f717273, s2
+// GFX1150: encoding: [0xff,0x02,0x47,0xbf,0x73,0x72,0x71,0x3f]
+
+s_cmp_o_f32 s1, s105
+// GFX1150: encoding: [0x01,0x69,0x47,0xbf]
+
+s_cmp_u_f32 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x48,0xbf]
+
+s_cmp_u_f32 s105, s2
+// GFX1150: encoding: [0x69,0x02,0x48,0xbf]
+
+s_cmp_u_f32 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x48,0xbf]
+
+s_cmp_u_f32 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x48,0xbf]
+
+s_cmp_u_f32 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x48,0xbf]
+
+s_cmp_u_f32 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x48,0xbf]
+
+s_cmp_u_f32 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x48,0xbf]
+
+s_cmp_u_f32 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x48,0xbf]
+
+s_cmp_u_f32 0, s2
+// GFX1150: encoding: [0x80,0x02,0x48,0xbf]
+
+s_cmp_u_f32 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x48,0xbf]
+
+s_cmp_u_f32 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x48,0xbf]
+
+s_cmp_u_f32 -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x48,0xbf]
+
+s_cmp_u_f32 0xaf123456, s2
+// GFX1150: encoding: [0xff,0x02,0x48,0xbf,0x56,0x34,0x12,0xaf]
+
+s_cmp_u_f32 0x3f717273, s2
+// GFX1150: encoding: [0xff,0x02,0x48,0xbf,0x73,0x72,0x71,0x3f]
+
+s_cmp_u_f32 s1, s105
+// GFX1150: encoding: [0x01,0x69,0x48,0xbf]
+
+s_cmp_nge_f32 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x49,0xbf]
+
+s_cmp_nge_f32 s105, s2
+// GFX1150: encoding: [0x69,0x02,0x49,0xbf]
+
+s_cmp_nge_f32 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x49,0xbf]
+
+s_cmp_nge_f32 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x49,0xbf]
+
+s_cmp_nge_f32 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x49,0xbf]
+
+s_cmp_nge_f32 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x49,0xbf]
+
+s_cmp_nge_f32 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x49,0xbf]
+
+s_cmp_nge_f32 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x49,0xbf]
+
+s_cmp_nge_f32 0, s2
+// GFX1150: encoding: [0x80,0x02,0x49,0xbf]
+
+s_cmp_nge_f32 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x49,0xbf]
+
+s_cmp_nge_f32 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x49,0xbf]
+
+s_cmp_nge_f32 -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x49,0xbf]
+
+s_cmp_nge_f32 0xaf123456, s2
+// GFX1150: encoding: [0xff,0x02,0x49,0xbf,0x56,0x34,0x12,0xaf]
+
+s_cmp_nge_f32 0x3f717273, s2
+// GFX1150: encoding: [0xff,0x02,0x49,0xbf,0x73,0x72,0x71,0x3f]
+
+s_cmp_nge_f32 s1, s105
+// GFX1150: encoding: [0x01,0x69,0x49,0xbf]
+
+s_cmp_nlg_f32 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x4a,0xbf]
+
+s_cmp_nlg_f32 s105, s2
+// GFX1150: encoding: [0x69,0x02,0x4a,0xbf]
+
+s_cmp_nlg_f32 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x4a,0xbf]
+
+s_cmp_nlg_f32 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x4a,0xbf]
+
+s_cmp_nlg_f32 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x4a,0xbf]
+
+s_cmp_nlg_f32 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x4a,0xbf]
+
+s_cmp_nlg_f32 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x4a,0xbf]
+
+s_cmp_nlg_f32 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x4a,0xbf]
+
+s_cmp_nlg_f32 0, s2
+// GFX1150: encoding: [0x80,0x02,0x4a,0xbf]
+
+s_cmp_nlg_f32 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x4a,0xbf]
+
+s_cmp_nlg_f32 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x4a,0xbf]
+
+s_cmp_nlg_f32 -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x4a,0xbf]
+
+s_cmp_nlg_f32 0xaf123456, s2
+// GFX1150: encoding: [0xff,0x02,0x4a,0xbf,0x56,0x34,0x12,0xaf]
+
+s_cmp_nlg_f32 0x3f717273, s2
+// GFX1150: encoding: [0xff,0x02,0x4a,0xbf,0x73,0x72,0x71,0x3f]
+
+s_cmp_nlg_f32 s1, s105
+// GFX1150: encoding: [0x01,0x69,0x4a,0xbf]
+
+s_cmp_ngt_f32 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x4b,0xbf]
+
+s_cmp_ngt_f32 s105, s2
+// GFX1150: encoding: [0x69,0x02,0x4b,0xbf]
+
+s_cmp_ngt_f32 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x4b,0xbf]
+
+s_cmp_ngt_f32 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x4b,0xbf]
+
+s_cmp_ngt_f32 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x4b,0xbf]
+
+s_cmp_ngt_f32 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x4b,0xbf]
+
+s_cmp_ngt_f32 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x4b,0xbf]
+
+s_cmp_ngt_f32 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x4b,0xbf]
+
+s_cmp_ngt_f32 0, s2
+// GFX1150: encoding: [0x80,0x02,0x4b,0xbf]
+
+s_cmp_ngt_f32 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x4b,0xbf]
+
+s_cmp_ngt_f32 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x4b,0xbf]
+
+s_cmp_ngt_f32 -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x4b,0xbf]
+
+s_cmp_ngt_f32 0xaf123456, s2
+// GFX1150: encoding: [0xff,0x02,0x4b,0xbf,0x56,0x34,0x12,0xaf]
+
+s_cmp_ngt_f32 0x3f717273, s2
+// GFX1150: encoding: [0xff,0x02,0x4b,0xbf,0x73,0x72,0x71,0x3f]
+
+s_cmp_ngt_f32 s1, s105
+// GFX1150: encoding: [0x01,0x69,0x4b,0xbf]
+
+s_cmp_nle_f32 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x4c,0xbf]
+
+s_cmp_nle_f32 s105, s2
+// GFX1150: encoding: [0x69,0x02,0x4c,0xbf]
+
+s_cmp_nle_f32 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x4c,0xbf]
+
+s_cmp_nle_f32 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x4c,0xbf]
+
+s_cmp_nle_f32 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x4c,0xbf]
+
+s_cmp_nle_f32 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x4c,0xbf]
+
+s_cmp_nle_f32 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x4c,0xbf]
+
+s_cmp_nle_f32 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x4c,0xbf]
+
+s_cmp_nle_f32 0, s2
+// GFX1150: encoding: [0x80,0x02,0x4c,0xbf]
+
+s_cmp_nle_f32 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x4c,0xbf]
+
+s_cmp_nle_f32 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x4c,0xbf]
+
+s_cmp_nle_f32 -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x4c,0xbf]
+
+s_cmp_nle_f32 0xaf123456, s2
+// GFX1150: encoding: [0xff,0x02,0x4c,0xbf,0x56,0x34,0x12,0xaf]
+
+s_cmp_nle_f32 0x3f717273, s2
+// GFX1150: encoding: [0xff,0x02,0x4c,0xbf,0x73,0x72,0x71,0x3f]
+
+s_cmp_nle_f32 s1, s105
+// GFX1150: encoding: [0x01,0x69,0x4c,0xbf]
+
+s_cmp_neq_f32 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x4d,0xbf]
+
+s_cmp_neq_f32 s105, s2
+// GFX1150: encoding: [0x69,0x02,0x4d,0xbf]
+
+s_cmp_neq_f32 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x4d,0xbf]
+
+s_cmp_neq_f32 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x4d,0xbf]
+
+s_cmp_neq_f32 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x4d,0xbf]
+
+s_cmp_neq_f32 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x4d,0xbf]
+
+s_cmp_neq_f32 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x4d,0xbf]
+
+s_cmp_neq_f32 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x4d,0xbf]
+
+s_cmp_neq_f32 0, s2
+// GFX1150: encoding: [0x80,0x02,0x4d,0xbf]
+
+s_cmp_neq_f32 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x4d,0xbf]
+
+s_cmp_neq_f32 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x4d,0xbf]
+
+s_cmp_neq_f32 -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x4d,0xbf]
+
+s_cmp_neq_f32 0xaf123456, s2
+// GFX1150: encoding: [0xff,0x02,0x4d,0xbf,0x56,0x34,0x12,0xaf]
+
+s_cmp_neq_f32 0x3f717273, s2
+// GFX1150: encoding: [0xff,0x02,0x4d,0xbf,0x73,0x72,0x71,0x3f]
+
+s_cmp_neq_f32 s1, s105
+// GFX1150: encoding: [0x01,0x69,0x4d,0xbf]
+
+s_cmp_nlt_f32 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x4e,0xbf]
+
+s_cmp_nlt_f32 s105, s2
+// GFX1150: encoding: [0x69,0x02,0x4e,0xbf]
+
+s_cmp_nlt_f32 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x4e,0xbf]
+
+s_cmp_nlt_f32 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x4e,0xbf]
+
+s_cmp_nlt_f32 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x4e,0xbf]
+
+s_cmp_nlt_f32 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x4e,0xbf]
+
+s_cmp_nlt_f32 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x4e,0xbf]
+
+s_cmp_nlt_f32 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x4e,0xbf]
+
+s_cmp_nlt_f32 0, s2
+// GFX1150: encoding: [0x80,0x02,0x4e,0xbf]
+
+s_cmp_nlt_f32 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x4e,0xbf]
+
+s_cmp_nlt_f32 0.5, s2
+// GFX1150: encoding: [0xf0,0x02,0x4e,0xbf]
+
+s_cmp_nlt_f32 -4.0, s2
+// GFX1150: encoding: [0xf7,0x02,0x4e,0xbf]
+
+s_cmp_nlt_f32 0xaf123456, s2
+// GFX1150: encoding: [0xff,0x02,0x4e,0xbf,0x56,0x34,0x12,0xaf]
+
+s_cmp_nlt_f32 0x3f717273, s2
+// GFX1150: encoding: [0xff,0x02,0x4e,0xbf,0x73,0x72,0x71,0x3f]
+
+s_cmp_nlt_f32 s1, s105
+// GFX1150: encoding: [0x01,0x69,0x4e,0xbf]
+
+s_cmp_lt_f16 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x51,0xbf]
+
+s_cmp_lt_f16 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x51,0xbf]
+
+s_cmp_lt_f16 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x51,0xbf]
+
+s_cmp_lt_f16 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x51,0xbf]
+
+s_cmp_lt_f16 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x51,0xbf]
+
+s_cmp_lt_f16 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x51,0xbf]
+
+s_cmp_lt_f16 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x51,0xbf]
+
+s_cmp_lt_f16 0, s2
+// GFX1150: encoding: [0x80,0x02,0x51,0xbf]
+
+s_cmp_lt_f16 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x51,0xbf]
+
+s_cmp_lt_f16 0x3800, s2
+// GFX1150: encoding: [0xff,0x02,0x51,0xbf,0x00,0x38,0x00,0x00]
+
+s_cmp_lt_f16 0xfe0b, s2
+// GFX1150: encoding: [0xff,0x02,0x51,0xbf,0x0b,0xfe,0x00,0x00]
+
+s_cmp_lt_f16 0x3456, s2
+// GFX1150: encoding: [0xff,0x02,0x51,0xbf,0x56,0x34,0x00,0x00]
+
+s_cmp_eq_f16 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x52,0xbf]
+
+s_cmp_eq_f16 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x52,0xbf]
+
+s_cmp_eq_f16 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x52,0xbf]
+
+s_cmp_eq_f16 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x52,0xbf]
+
+s_cmp_eq_f16 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x52,0xbf]
+
+s_cmp_eq_f16 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x52,0xbf]
+
+s_cmp_eq_f16 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x52,0xbf]
+
+s_cmp_eq_f16 0, s2
+// GFX1150: encoding: [0x80,0x02,0x52,0xbf]
+
+s_cmp_eq_f16 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x52,0xbf]
+
+s_cmp_eq_f16 0x3800, s2
+// GFX1150: encoding: [0xff,0x02,0x52,0xbf,0x00,0x38,0x00,0x00]
+
+s_cmp_eq_f16 0xfe0b, s2
+// GFX1150: encoding: [0xff,0x02,0x52,0xbf,0x0b,0xfe,0x00,0x00]
+
+s_cmp_eq_f16 0x3456, s2
+// GFX1150: encoding: [0xff,0x02,0x52,0xbf,0x56,0x34,0x00,0x00]
+
+s_cmp_le_f16 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x53,0xbf]
+
+s_cmp_le_f16 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x53,0xbf]
+
+s_cmp_le_f16 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x53,0xbf]
+
+s_cmp_le_f16 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x53,0xbf]
+
+s_cmp_le_f16 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x53,0xbf]
+
+s_cmp_le_f16 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x53,0xbf]
+
+s_cmp_le_f16 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x53,0xbf]
+
+s_cmp_le_f16 0, s2
+// GFX1150: encoding: [0x80,0x02,0x53,0xbf]
+
+s_cmp_le_f16 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x53,0xbf]
+
+s_cmp_le_f16 0x3800, s2
+// GFX1150: encoding: [0xff,0x02,0x53,0xbf,0x00,0x38,0x00,0x00]
+
+s_cmp_le_f16 0xfe0b, s2
+// GFX1150: encoding: [0xff,0x02,0x53,0xbf,0x0b,0xfe,0x00,0x00]
+
+s_cmp_le_f16 0x3456, s2
+// GFX1150: encoding: [0xff,0x02,0x53,0xbf,0x56,0x34,0x00,0x00]
+
+s_cmp_gt_f16 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x54,0xbf]
+
+s_cmp_gt_f16 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x54,0xbf]
+
+s_cmp_gt_f16 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x54,0xbf]
+
+s_cmp_gt_f16 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x54,0xbf]
+
+s_cmp_gt_f16 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x54,0xbf]
+
+s_cmp_gt_f16 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x54,0xbf]
+
+s_cmp_gt_f16 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x54,0xbf]
+
+s_cmp_gt_f16 0, s2
+// GFX1150: encoding: [0x80,0x02,0x54,0xbf]
+
+s_cmp_gt_f16 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x54,0xbf]
+
+s_cmp_gt_f16 0x3800, s2
+// GFX1150: encoding: [0xff,0x02,0x54,0xbf,0x00,0x38,0x00,0x00]
+
+s_cmp_gt_f16 0xfe0b, s2
+// GFX1150: encoding: [0xff,0x02,0x54,0xbf,0x0b,0xfe,0x00,0x00]
+
+s_cmp_gt_f16 0x3456, s2
+// GFX1150: encoding: [0xff,0x02,0x54,0xbf,0x56,0x34,0x00,0x00]
+
+s_cmp_lg_f16 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x55,0xbf]
+
+s_cmp_lg_f16 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x55,0xbf]
+
+s_cmp_lg_f16 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x55,0xbf]
+
+s_cmp_lg_f16 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x55,0xbf]
+
+s_cmp_lg_f16 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x55,0xbf]
+
+s_cmp_lg_f16 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x55,0xbf]
+
+s_cmp_lg_f16 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x55,0xbf]
+
+s_cmp_lg_f16 0, s2
+// GFX1150: encoding: [0x80,0x02,0x55,0xbf]
+
+s_cmp_lg_f16 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x55,0xbf]
+
+s_cmp_lg_f16 0x3800, s2
+// GFX1150: encoding: [0xff,0x02,0x55,0xbf,0x00,0x38,0x00,0x00]
+
+s_cmp_lg_f16 0xfe0b, s2
+// GFX1150: encoding: [0xff,0x02,0x55,0xbf,0x0b,0xfe,0x00,0x00]
+
+s_cmp_lg_f16 0x3456, s2
+// GFX1150: encoding: [0xff,0x02,0x55,0xbf,0x56,0x34,0x00,0x00]
+
+s_cmp_ge_f16 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x56,0xbf]
+
+s_cmp_ge_f16 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x56,0xbf]
+
+s_cmp_ge_f16 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x56,0xbf]
+
+s_cmp_ge_f16 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x56,0xbf]
+
+s_cmp_ge_f16 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x56,0xbf]
+
+s_cmp_ge_f16 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x56,0xbf]
+
+s_cmp_ge_f16 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x56,0xbf]
+
+s_cmp_ge_f16 0, s2
+// GFX1150: encoding: [0x80,0x02,0x56,0xbf]
+
+s_cmp_ge_f16 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x56,0xbf]
+
+s_cmp_ge_f16 0x3800, s2
+// GFX1150: encoding: [0xff,0x02,0x56,0xbf,0x00,0x38,0x00,0x00]
+
+s_cmp_ge_f16 0xfe0b, s2
+// GFX1150: encoding: [0xff,0x02,0x56,0xbf,0x0b,0xfe,0x00,0x00]
+
+s_cmp_ge_f16 0x3456, s2
+// GFX1150: encoding: [0xff,0x02,0x56,0xbf,0x56,0x34,0x00,0x00]
+
+s_cmp_o_f16 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x57,0xbf]
+
+s_cmp_o_f16 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x57,0xbf]
+
+s_cmp_o_f16 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x57,0xbf]
+
+s_cmp_o_f16 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x57,0xbf]
+
+s_cmp_o_f16 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x57,0xbf]
+
+s_cmp_o_f16 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x57,0xbf]
+
+s_cmp_o_f16 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x57,0xbf]
+
+s_cmp_o_f16 0, s2
+// GFX1150: encoding: [0x80,0x02,0x57,0xbf]
+
+s_cmp_o_f16 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x57,0xbf]
+
+s_cmp_o_f16 0x3800, s2
+// GFX1150: encoding: [0xff,0x02,0x57,0xbf,0x00,0x38,0x00,0x00]
+
+s_cmp_o_f16 0xfe0b, s2
+// GFX1150: encoding: [0xff,0x02,0x57,0xbf,0x0b,0xfe,0x00,0x00]
+
+s_cmp_o_f16 0x3456, s2
+// GFX1150: encoding: [0xff,0x02,0x57,0xbf,0x56,0x34,0x00,0x00]
+
+s_cmp_u_f16 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x58,0xbf]
+
+s_cmp_u_f16 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x58,0xbf]
+
+s_cmp_u_f16 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x58,0xbf]
+
+s_cmp_u_f16 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x58,0xbf]
+
+s_cmp_u_f16 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x58,0xbf]
+
+s_cmp_u_f16 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x58,0xbf]
+
+s_cmp_u_f16 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x58,0xbf]
+
+s_cmp_u_f16 0, s2
+// GFX1150: encoding: [0x80,0x02,0x58,0xbf]
+
+s_cmp_u_f16 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x58,0xbf]
+
+s_cmp_u_f16 0x3800, s2
+// GFX1150: encoding: [0xff,0x02,0x58,0xbf,0x00,0x38,0x00,0x00]
+
+s_cmp_u_f16 0xfe0b, s2
+// GFX1150: encoding: [0xff,0x02,0x58,0xbf,0x0b,0xfe,0x00,0x00]
+
+s_cmp_u_f16 0x3456, s2
+// GFX1150: encoding: [0xff,0x02,0x58,0xbf,0x56,0x34,0x00,0x00]
+
+s_cmp_nge_f16 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x59,0xbf]
+
+s_cmp_nge_f16 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x59,0xbf]
+
+s_cmp_nge_f16 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x59,0xbf]
+
+s_cmp_nge_f16 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x59,0xbf]
+
+s_cmp_nge_f16 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x59,0xbf]
+
+s_cmp_nge_f16 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x59,0xbf]
+
+s_cmp_nge_f16 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x59,0xbf]
+
+s_cmp_nge_f16 0, s2
+// GFX1150: encoding: [0x80,0x02,0x59,0xbf]
+
+s_cmp_nge_f16 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x59,0xbf]
+
+s_cmp_nge_f16 0x3800, s2
+// GFX1150: encoding: [0xff,0x02,0x59,0xbf,0x00,0x38,0x00,0x00]
+
+s_cmp_nge_f16 0xfe0b, s2
+// GFX1150: encoding: [0xff,0x02,0x59,0xbf,0x0b,0xfe,0x00,0x00]
+
+s_cmp_nge_f16 0x3456, s2
+// GFX1150: encoding: [0xff,0x02,0x59,0xbf,0x56,0x34,0x00,0x00]
+
+s_cmp_nlg_f16 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x5a,0xbf]
+
+s_cmp_nlg_f16 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x5a,0xbf]
+
+s_cmp_nlg_f16 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x5a,0xbf]
+
+s_cmp_nlg_f16 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x5a,0xbf]
+
+s_cmp_nlg_f16 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x5a,0xbf]
+
+s_cmp_nlg_f16 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x5a,0xbf]
+
+s_cmp_nlg_f16 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x5a,0xbf]
+
+s_cmp_nlg_f16 0, s2
+// GFX1150: encoding: [0x80,0x02,0x5a,0xbf]
+
+s_cmp_nlg_f16 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x5a,0xbf]
+
+s_cmp_nlg_f16 0x3800, s2
+// GFX1150: encoding: [0xff,0x02,0x5a,0xbf,0x00,0x38,0x00,0x00]
+
+s_cmp_nlg_f16 0xfe0b, s2
+// GFX1150: encoding: [0xff,0x02,0x5a,0xbf,0x0b,0xfe,0x00,0x00]
+
+s_cmp_nlg_f16 0x3456, s2
+// GFX1150: encoding: [0xff,0x02,0x5a,0xbf,0x56,0x34,0x00,0x00]
+
+s_cmp_ngt_f16 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x5b,0xbf]
+
+s_cmp_ngt_f16 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x5b,0xbf]
+
+s_cmp_ngt_f16 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x5b,0xbf]
+
+s_cmp_ngt_f16 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x5b,0xbf]
+
+s_cmp_ngt_f16 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x5b,0xbf]
+
+s_cmp_ngt_f16 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x5b,0xbf]
+
+s_cmp_ngt_f16 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x5b,0xbf]
+
+s_cmp_ngt_f16 0, s2
+// GFX1150: encoding: [0x80,0x02,0x5b,0xbf]
+
+s_cmp_ngt_f16 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x5b,0xbf]
+
+s_cmp_ngt_f16 0x3800, s2
+// GFX1150: encoding: [0xff,0x02,0x5b,0xbf,0x00,0x38,0x00,0x00]
+
+s_cmp_ngt_f16 0xfe0b, s2
+// GFX1150: encoding: [0xff,0x02,0x5b,0xbf,0x0b,0xfe,0x00,0x00]
+
+s_cmp_ngt_f16 0x3456, s2
+// GFX1150: encoding: [0xff,0x02,0x5b,0xbf,0x56,0x34,0x00,0x00]
+
+s_cmp_nle_f16 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x5c,0xbf]
+
+s_cmp_nle_f16 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x5c,0xbf]
+
+s_cmp_nle_f16 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x5c,0xbf]
+
+s_cmp_nle_f16 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x5c,0xbf]
+
+s_cmp_nle_f16 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x5c,0xbf]
+
+s_cmp_nle_f16 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x5c,0xbf]
+
+s_cmp_nle_f16 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x5c,0xbf]
+
+s_cmp_nle_f16 0, s2
+// GFX1150: encoding: [0x80,0x02,0x5c,0xbf]
+
+s_cmp_nle_f16 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x5c,0xbf]
+
+s_cmp_nle_f16 0x3800, s2
+// GFX1150: encoding: [0xff,0x02,0x5c,0xbf,0x00,0x38,0x00,0x00]
+
+s_cmp_nle_f16 0xfe0b, s2
+// GFX1150: encoding: [0xff,0x02,0x5c,0xbf,0x0b,0xfe,0x00,0x00]
+
+s_cmp_nle_f16 0x3456, s2
+// GFX1150: encoding: [0xff,0x02,0x5c,0xbf,0x56,0x34,0x00,0x00]
+
+s_cmp_neq_f16 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x5d,0xbf]
+
+s_cmp_neq_f16 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x5d,0xbf]
+
+s_cmp_neq_f16 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x5d,0xbf]
+
+s_cmp_neq_f16 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x5d,0xbf]
+
+s_cmp_neq_f16 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x5d,0xbf]
+
+s_cmp_neq_f16 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x5d,0xbf]
+
+s_cmp_neq_f16 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x5d,0xbf]
+
+s_cmp_neq_f16 0, s2
+// GFX1150: encoding: [0x80,0x02,0x5d,0xbf]
+
+s_cmp_neq_f16 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x5d,0xbf]
+
+s_cmp_neq_f16 0x3800, s2
+// GFX1150: encoding: [0xff,0x02,0x5d,0xbf,0x00,0x38,0x00,0x00]
+
+s_cmp_neq_f16 0xfe0b, s2
+// GFX1150: encoding: [0xff,0x02,0x5d,0xbf,0x0b,0xfe,0x00,0x00]
+
+s_cmp_neq_f16 0x3456, s2
+// GFX1150: encoding: [0xff,0x02,0x5d,0xbf,0x56,0x34,0x00,0x00]
+
+s_cmp_nlt_f16 s1, s2
+// GFX1150: encoding: [0x01,0x02,0x5e,0xbf]
+
+s_cmp_nlt_f16 s101, s2
+// GFX1150: encoding: [0x65,0x02,0x5e,0xbf]
+
+s_cmp_nlt_f16 vcc_lo, s2
+// GFX1150: encoding: [0x6a,0x02,0x5e,0xbf]
+
+s_cmp_nlt_f16 vcc_hi, s2
+// GFX1150: encoding: [0x6b,0x02,0x5e,0xbf]
+
+s_cmp_nlt_f16 m0, s2
+// GFX1150: encoding: [0x7d,0x02,0x5e,0xbf]
+
+s_cmp_nlt_f16 exec_lo, s2
+// GFX1150: encoding: [0x7e,0x02,0x5e,0xbf]
+
+s_cmp_nlt_f16 exec_hi, s2
+// GFX1150: encoding: [0x7f,0x02,0x5e,0xbf]
+
+s_cmp_nlt_f16 0, s2
+// GFX1150: encoding: [0x80,0x02,0x5e,0xbf]
+
+s_cmp_nlt_f16 -1, s2
+// GFX1150: encoding: [0xc1,0x02,0x5e,0xbf]
+
+s_cmp_nlt_f16 0x3800, s2
+// GFX1150: encoding: [0xff,0x02,0x5e,0xbf,0x00,0x38,0x00,0x00]
+
+s_cmp_nlt_f16 0xfe0b, s2
+// GFX1150: encoding: [0xff,0x02,0x5e,0xbf,0x0b,0xfe,0x00,0x00]
+
+s_cmp_nlt_f16 0x3456, s2
+// GFX1150: encoding: [0xff,0x02,0x5e,0xbf,0x56,0x34,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_unsupported.s b/llvm/test/MC/AMDGPU/gfx11_unsupported.s
index 4c2d4d3890cbb6a..5e9714be224edcf 100644
--- a/llvm/test/MC/AMDGPU/gfx11_unsupported.s
+++ b/llvm/test/MC/AMDGPU/gfx11_unsupported.s
@@ -1806,3 +1806,177 @@ v_subrev_u16_e64 v255, v1, v2
v_subrev_u16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cvt_f32_i32 s5, s1
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cvt_f32_u32 s5, s1
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cvt_u32_f32 s5, s1
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cvt_i32_f32 s5, s1
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cvt_f16_f32 s5, s1
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cvt_f32_f16 s5, s1
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cvt_hi_f32_f16 s5, s1
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_trunc_f32 s5, s1
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_ceil_f32 s5, s1
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_rndne_f32 s5, s1
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_floor_f32 s5, s1
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_floor_f16 s5, s1
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_ceil_f16 s5, s1
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_trunc_f16 s5, s1
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_rndne_f16 s5, s1
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_add_f32 s5, s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_sub_f32 s5, s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_mul_f32 s5, s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_min_f32 s5, s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_max_f32 s5, s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_fmac_f32 s5, s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_fmamk_f32 s5, s1, 0x11213141, s3
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_fmaak_f32 s5, s1, s2, 0x11213141
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cvt_pk_rtz_f16_f32 s5, s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_add_f16 s5, s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_sub_f16 s5, s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_mul_f16 s5, s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_fmac_f16 s5, s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_max_f16 s5, s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_min_f16 s5, s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_lt_f32 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_eq_f32 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_le_f32 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_gt_f32 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_lg_f32 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_ge_f32 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_o_f32 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_u_f32 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_nge_f32 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_nlg_f32 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_ngt_f32 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_nle_f32 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_neq_f32 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_nlt_f32 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_lt_f16 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_eq_f16 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_le_f16 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_gt_f16 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_lg_f16 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_ge_f16 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_o_f16 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_u_f16 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_nge_f16 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_nlg_f16 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_ngt_f16 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_nle_f16 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_neq_f16 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_cmp_nlt_f16 s1, s2
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1150_asm_salu_float.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1150_asm_salu_float.txt
new file mode 100644
index 000000000000000..14732def4bfee79
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1150_asm_salu_float.txt
@@ -0,0 +1,2527 @@
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1150 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1150 %s
+
+# GFX1150: s_cvt_f32_i32 s5, s1 ; encoding: [0x01,0x64,0x85,0xbe]
+0x01,0x64,0x85,0xbe
+
+# GFX1150: s_cvt_f32_i32 s105, s1 ; encoding: [0x01,0x64,0xe9,0xbe]
+0x01,0x64,0xe9,0xbe
+
+# GFX1150: s_cvt_f32_i32 s5, s105 ; encoding: [0x69,0x64,0x85,0xbe]
+0x69,0x64,0x85,0xbe
+
+# GFX1150: s_cvt_f32_i32 s5, s103 ; encoding: [0x67,0x64,0x85,0xbe]
+0x67,0x64,0x85,0xbe
+
+# GFX1150: s_cvt_f32_i32 s5, vcc_lo ; encoding: [0x6a,0x64,0x85,0xbe]
+0x6a,0x64,0x85,0xbe
+
+# GFX1150: s_cvt_f32_i32 s5, vcc_hi ; encoding: [0x6b,0x64,0x85,0xbe]
+0x6b,0x64,0x85,0xbe
+
+# GFX1150: s_cvt_f32_i32 s5, ttmp11 ; encoding: [0x77,0x64,0x85,0xbe]
+0x77,0x64,0x85,0xbe
+
+# GFX1150: s_cvt_f32_i32 s5, m0 ; encoding: [0x7d,0x64,0x85,0xbe]
+0x7d,0x64,0x85,0xbe
+
+# GFX1150: s_cvt_f32_i32 s5, exec_lo ; encoding: [0x7e,0x64,0x85,0xbe]
+0x7e,0x64,0x85,0xbe
+
+# GFX1150: s_cvt_f32_i32 s5, exec_hi ; encoding: [0x7f,0x64,0x85,0xbe]
+0x7f,0x64,0x85,0xbe
+
+# GFX1150: s_cvt_f32_i32 s5, 0 ; encoding: [0x80,0x64,0x85,0xbe]
+0x80,0x64,0x85,0xbe
+
+# GFX1150: s_cvt_f32_i32 s5, -1 ; encoding: [0xc1,0x64,0x85,0xbe]
+0xc1,0x64,0x85,0xbe
+
+# GFX1150: s_cvt_f32_i32 s5, 0.5 ; encoding: [0xf0,0x64,0x85,0xbe]
+0xf0,0x64,0x85,0xbe
+
+# GFX1150: s_cvt_f32_i32 s5, -4.0 ; encoding: [0xf7,0x64,0x85,0xbe]
+0xf7,0x64,0x85,0xbe
+
+# GFX1150: s_cvt_f32_i32 s5, 0xaf123456 ; encoding: [0xff,0x64,0x85,0xbe,0x56,0x34,0x12,0xaf]
+0xff,0x64,0x85,0xbe,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_cvt_f32_i32 s5, 0x3f717273 ; encoding: [0xff,0x64,0x85,0xbe,0x73,0x72,0x71,0x3f]
+0xff,0x64,0x85,0xbe,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_cvt_f32_u32 s5, s1 ; encoding: [0x01,0x65,0x85,0xbe]
+0x01,0x65,0x85,0xbe
+
+# GFX1150: s_cvt_f32_u32 s105, s1 ; encoding: [0x01,0x65,0xe9,0xbe]
+0x01,0x65,0xe9,0xbe
+
+# GFX1150: s_cvt_f32_u32 s5, s105 ; encoding: [0x69,0x65,0x85,0xbe]
+0x69,0x65,0x85,0xbe
+
+# GFX1150: s_cvt_f32_u32 s5, s103 ; encoding: [0x67,0x65,0x85,0xbe]
+0x67,0x65,0x85,0xbe
+
+# GFX1150: s_cvt_f32_u32 s5, vcc_lo ; encoding: [0x6a,0x65,0x85,0xbe]
+0x6a,0x65,0x85,0xbe
+
+# GFX1150: s_cvt_f32_u32 s5, vcc_hi ; encoding: [0x6b,0x65,0x85,0xbe]
+0x6b,0x65,0x85,0xbe
+
+# GFX1150: s_cvt_f32_u32 s5, ttmp11 ; encoding: [0x77,0x65,0x85,0xbe]
+0x77,0x65,0x85,0xbe
+
+# GFX1150: s_cvt_f32_u32 s5, m0 ; encoding: [0x7d,0x65,0x85,0xbe]
+0x7d,0x65,0x85,0xbe
+
+# GFX1150: s_cvt_f32_u32 s5, exec_lo ; encoding: [0x7e,0x65,0x85,0xbe]
+0x7e,0x65,0x85,0xbe
+
+# GFX1150: s_cvt_f32_u32 s5, exec_hi ; encoding: [0x7f,0x65,0x85,0xbe]
+0x7f,0x65,0x85,0xbe
+
+# GFX1150: s_cvt_f32_u32 s5, 0 ; encoding: [0x80,0x65,0x85,0xbe]
+0x80,0x65,0x85,0xbe
+
+# GFX1150: s_cvt_f32_u32 s5, -1 ; encoding: [0xc1,0x65,0x85,0xbe]
+0xc1,0x65,0x85,0xbe
+
+# GFX1150: s_cvt_f32_u32 s5, 0.5 ; encoding: [0xf0,0x65,0x85,0xbe]
+0xf0,0x65,0x85,0xbe
+
+# GFX1150: s_cvt_f32_u32 s5, -4.0 ; encoding: [0xf7,0x65,0x85,0xbe]
+0xf7,0x65,0x85,0xbe
+
+# GFX1150: s_cvt_f32_u32 s5, 0xaf123456 ; encoding: [0xff,0x65,0x85,0xbe,0x56,0x34,0x12,0xaf]
+0xff,0x65,0x85,0xbe,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_cvt_f32_u32 s5, 0x3f717273 ; encoding: [0xff,0x65,0x85,0xbe,0x73,0x72,0x71,0x3f]
+0xff,0x65,0x85,0xbe,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_cvt_u32_f32 s5, s1 ; encoding: [0x01,0x67,0x85,0xbe]
+0x01,0x67,0x85,0xbe
+
+# GFX1150: s_cvt_u32_f32 s105, s1 ; encoding: [0x01,0x67,0xe9,0xbe]
+0x01,0x67,0xe9,0xbe
+
+# GFX1150: s_cvt_u32_f32 s5, s105 ; encoding: [0x69,0x67,0x85,0xbe]
+0x69,0x67,0x85,0xbe
+
+# GFX1150: s_cvt_u32_f32 s5, s103 ; encoding: [0x67,0x67,0x85,0xbe]
+0x67,0x67,0x85,0xbe
+
+# GFX1150: s_cvt_u32_f32 s5, vcc_lo ; encoding: [0x6a,0x67,0x85,0xbe]
+0x6a,0x67,0x85,0xbe
+
+# GFX1150: s_cvt_u32_f32 s5, vcc_hi ; encoding: [0x6b,0x67,0x85,0xbe]
+0x6b,0x67,0x85,0xbe
+
+# GFX1150: s_cvt_u32_f32 s5, ttmp11 ; encoding: [0x77,0x67,0x85,0xbe]
+0x77,0x67,0x85,0xbe
+
+# GFX1150: s_cvt_u32_f32 s5, m0 ; encoding: [0x7d,0x67,0x85,0xbe]
+0x7d,0x67,0x85,0xbe
+
+# GFX1150: s_cvt_u32_f32 s5, exec_lo ; encoding: [0x7e,0x67,0x85,0xbe]
+0x7e,0x67,0x85,0xbe
+
+# GFX1150: s_cvt_u32_f32 s5, exec_hi ; encoding: [0x7f,0x67,0x85,0xbe]
+0x7f,0x67,0x85,0xbe
+
+# GFX1150: s_cvt_u32_f32 s5, 0 ; encoding: [0x80,0x67,0x85,0xbe]
+0x80,0x67,0x85,0xbe
+
+# GFX1150: s_cvt_u32_f32 s5, -1 ; encoding: [0xc1,0x67,0x85,0xbe]
+0xc1,0x67,0x85,0xbe
+
+# GFX1150: s_cvt_u32_f32 s5, 0.5 ; encoding: [0xf0,0x67,0x85,0xbe]
+0xf0,0x67,0x85,0xbe
+
+# GFX1150: s_cvt_u32_f32 s5, -4.0 ; encoding: [0xf7,0x67,0x85,0xbe]
+0xf7,0x67,0x85,0xbe
+
+# GFX1150: s_cvt_u32_f32 s5, 0xaf123456 ; encoding: [0xff,0x67,0x85,0xbe,0x56,0x34,0x12,0xaf]
+0xff,0x67,0x85,0xbe,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_cvt_u32_f32 s5, 0x3f717273 ; encoding: [0xff,0x67,0x85,0xbe,0x73,0x72,0x71,0x3f]
+0xff,0x67,0x85,0xbe,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_cvt_i32_f32 s5, s1 ; encoding: [0x01,0x66,0x85,0xbe]
+0x01,0x66,0x85,0xbe
+
+# GFX1150: s_cvt_i32_f32 s105, s1 ; encoding: [0x01,0x66,0xe9,0xbe]
+0x01,0x66,0xe9,0xbe
+
+# GFX1150: s_cvt_i32_f32 s5, s105 ; encoding: [0x69,0x66,0x85,0xbe]
+0x69,0x66,0x85,0xbe
+
+# GFX1150: s_cvt_i32_f32 s5, s103 ; encoding: [0x67,0x66,0x85,0xbe]
+0x67,0x66,0x85,0xbe
+
+# GFX1150: s_cvt_i32_f32 s5, vcc_lo ; encoding: [0x6a,0x66,0x85,0xbe]
+0x6a,0x66,0x85,0xbe
+
+# GFX1150: s_cvt_i32_f32 s5, vcc_hi ; encoding: [0x6b,0x66,0x85,0xbe]
+0x6b,0x66,0x85,0xbe
+
+# GFX1150: s_cvt_i32_f32 s5, ttmp11 ; encoding: [0x77,0x66,0x85,0xbe]
+0x77,0x66,0x85,0xbe
+
+# GFX1150: s_cvt_i32_f32 s5, m0 ; encoding: [0x7d,0x66,0x85,0xbe]
+0x7d,0x66,0x85,0xbe
+
+# GFX1150: s_cvt_i32_f32 s5, exec_lo ; encoding: [0x7e,0x66,0x85,0xbe]
+0x7e,0x66,0x85,0xbe
+
+# GFX1150: s_cvt_i32_f32 s5, exec_hi ; encoding: [0x7f,0x66,0x85,0xbe]
+0x7f,0x66,0x85,0xbe
+
+# GFX1150: s_cvt_i32_f32 s5, 0 ; encoding: [0x80,0x66,0x85,0xbe]
+0x80,0x66,0x85,0xbe
+
+# GFX1150: s_cvt_i32_f32 s5, -1 ; encoding: [0xc1,0x66,0x85,0xbe]
+0xc1,0x66,0x85,0xbe
+
+# GFX1150: s_cvt_i32_f32 s5, 0.5 ; encoding: [0xf0,0x66,0x85,0xbe]
+0xf0,0x66,0x85,0xbe
+
+# GFX1150: s_cvt_i32_f32 s5, -4.0 ; encoding: [0xf7,0x66,0x85,0xbe]
+0xf7,0x66,0x85,0xbe
+
+# GFX1150: s_cvt_i32_f32 s5, 0xaf123456 ; encoding: [0xff,0x66,0x85,0xbe,0x56,0x34,0x12,0xaf]
+0xff,0x66,0x85,0xbe,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_cvt_i32_f32 s5, 0x3f717273 ; encoding: [0xff,0x66,0x85,0xbe,0x73,0x72,0x71,0x3f]
+0xff,0x66,0x85,0xbe,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_cvt_f16_f32 s5, s1 ; encoding: [0x01,0x68,0x85,0xbe]
+0x01,0x68,0x85,0xbe
+
+# GFX1150: s_cvt_f16_f32 s105, s1 ; encoding: [0x01,0x68,0xe9,0xbe]
+0x01,0x68,0xe9,0xbe
+
+# GFX1150: s_cvt_f16_f32 s5, s105 ; encoding: [0x69,0x68,0x85,0xbe]
+0x69,0x68,0x85,0xbe
+
+# GFX1150: s_cvt_f16_f32 s5, s103 ; encoding: [0x67,0x68,0x85,0xbe]
+0x67,0x68,0x85,0xbe
+
+# GFX1150: s_cvt_f16_f32 s5, vcc_lo ; encoding: [0x6a,0x68,0x85,0xbe]
+0x6a,0x68,0x85,0xbe
+
+# GFX1150: s_cvt_f16_f32 s5, vcc_hi ; encoding: [0x6b,0x68,0x85,0xbe]
+0x6b,0x68,0x85,0xbe
+
+# GFX1150: s_cvt_f16_f32 s5, ttmp11 ; encoding: [0x77,0x68,0x85,0xbe]
+0x77,0x68,0x85,0xbe
+
+# GFX1150: s_cvt_f16_f32 s5, m0 ; encoding: [0x7d,0x68,0x85,0xbe]
+0x7d,0x68,0x85,0xbe
+
+# GFX1150: s_cvt_f16_f32 s5, exec_lo ; encoding: [0x7e,0x68,0x85,0xbe]
+0x7e,0x68,0x85,0xbe
+
+# GFX1150: s_cvt_f16_f32 s5, exec_hi ; encoding: [0x7f,0x68,0x85,0xbe]
+0x7f,0x68,0x85,0xbe
+
+# GFX1150: s_cvt_f16_f32 s5, 0 ; encoding: [0x80,0x68,0x85,0xbe]
+0x80,0x68,0x85,0xbe
+
+# GFX1150: s_cvt_f16_f32 s5, -1 ; encoding: [0xc1,0x68,0x85,0xbe]
+0xc1,0x68,0x85,0xbe
+
+# GFX1150: s_cvt_f16_f32 s5, 0.5 ; encoding: [0xf0,0x68,0x85,0xbe]
+0xf0,0x68,0x85,0xbe
+
+# GFX1150: s_cvt_f16_f32 s5, -4.0 ; encoding: [0xf7,0x68,0x85,0xbe]
+0xf7,0x68,0x85,0xbe
+
+# GFX1150: s_cvt_f16_f32 s5, 0xaf123456 ; encoding: [0xff,0x68,0x85,0xbe,0x56,0x34,0x12,0xaf]
+0xff,0x68,0x85,0xbe,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_cvt_f16_f32 s5, 0x3f717273 ; encoding: [0xff,0x68,0x85,0xbe,0x73,0x72,0x71,0x3f]
+0xff,0x68,0x85,0xbe,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_cvt_f32_f16 s5, s1 ; encoding: [0x01,0x69,0x85,0xbe]
+0x01,0x69,0x85,0xbe
+
+# GFX1150: s_cvt_f32_f16 s105, s1 ; encoding: [0x01,0x69,0xe9,0xbe]
+0x01,0x69,0xe9,0xbe
+
+# GFX1150: s_cvt_f32_f16 s5, s105 ; encoding: [0x69,0x69,0x85,0xbe]
+0x69,0x69,0x85,0xbe
+
+# GFX1150: s_cvt_f32_f16 s5, s103 ; encoding: [0x67,0x69,0x85,0xbe]
+0x67,0x69,0x85,0xbe
+
+# GFX1150: s_cvt_f32_f16 s5, vcc_lo ; encoding: [0x6a,0x69,0x85,0xbe]
+0x6a,0x69,0x85,0xbe
+
+# GFX1150: s_cvt_f32_f16 s5, vcc_hi ; encoding: [0x6b,0x69,0x85,0xbe]
+0x6b,0x69,0x85,0xbe
+
+# GFX1150: s_cvt_f32_f16 s5, ttmp11 ; encoding: [0x77,0x69,0x85,0xbe]
+0x77,0x69,0x85,0xbe
+
+# GFX1150: s_cvt_f32_f16 s5, m0 ; encoding: [0x7d,0x69,0x85,0xbe]
+0x7d,0x69,0x85,0xbe
+
+# GFX1150: s_cvt_f32_f16 s5, exec_lo ; encoding: [0x7e,0x69,0x85,0xbe]
+0x7e,0x69,0x85,0xbe
+
+# GFX1150: s_cvt_f32_f16 s5, exec_hi ; encoding: [0x7f,0x69,0x85,0xbe]
+0x7f,0x69,0x85,0xbe
+
+# GFX1150: s_cvt_f32_f16 s5, 0 ; encoding: [0x80,0x69,0x85,0xbe]
+0x80,0x69,0x85,0xbe
+
+# GFX1150: s_cvt_f32_f16 s5, -1 ; encoding: [0xc1,0x69,0x85,0xbe]
+0xc1,0x69,0x85,0xbe
+
+# GFX1150: s_cvt_hi_f32_f16 s5, s1 ; encoding: [0x01,0x6a,0x85,0xbe]
+0x01,0x6a,0x85,0xbe
+
+# GFX1150: s_cvt_hi_f32_f16 s105, s1 ; encoding: [0x01,0x6a,0xe9,0xbe]
+0x01,0x6a,0xe9,0xbe
+
+# GFX1150: s_cvt_hi_f32_f16 s5, s105 ; encoding: [0x69,0x6a,0x85,0xbe]
+0x69,0x6a,0x85,0xbe
+
+# GFX1150: s_cvt_hi_f32_f16 s5, s103 ; encoding: [0x67,0x6a,0x85,0xbe]
+0x67,0x6a,0x85,0xbe
+
+# GFX1150: s_cvt_hi_f32_f16 s5, vcc_lo ; encoding: [0x6a,0x6a,0x85,0xbe]
+0x6a,0x6a,0x85,0xbe
+
+# GFX1150: s_cvt_hi_f32_f16 s5, vcc_hi ; encoding: [0x6b,0x6a,0x85,0xbe]
+0x6b,0x6a,0x85,0xbe
+
+# GFX1150: s_cvt_hi_f32_f16 s5, ttmp11 ; encoding: [0x77,0x6a,0x85,0xbe]
+0x77,0x6a,0x85,0xbe
+
+# GFX1150: s_cvt_hi_f32_f16 s5, m0 ; encoding: [0x7d,0x6a,0x85,0xbe]
+0x7d,0x6a,0x85,0xbe
+
+# GFX1150: s_cvt_hi_f32_f16 s5, exec_lo ; encoding: [0x7e,0x6a,0x85,0xbe]
+0x7e,0x6a,0x85,0xbe
+
+# GFX1150: s_cvt_hi_f32_f16 s5, exec_hi ; encoding: [0x7f,0x6a,0x85,0xbe]
+0x7f,0x6a,0x85,0xbe
+
+# GFX1150: s_cvt_hi_f32_f16 s5, 0 ; encoding: [0x80,0x6a,0x85,0xbe]
+0x80,0x6a,0x85,0xbe
+
+# GFX1150: s_cvt_hi_f32_f16 s5, -1 ; encoding: [0xc1,0x6a,0x85,0xbe]
+0xc1,0x6a,0x85,0xbe
+
+# GFX1150: s_trunc_f32 s5, s1 ; encoding: [0x01,0x62,0x85,0xbe]
+0x01,0x62,0x85,0xbe
+
+# GFX1150: s_trunc_f32 s105, s1 ; encoding: [0x01,0x62,0xe9,0xbe]
+0x01,0x62,0xe9,0xbe
+
+# GFX1150: s_trunc_f32 s5, s105 ; encoding: [0x69,0x62,0x85,0xbe]
+0x69,0x62,0x85,0xbe
+
+# GFX1150: s_trunc_f32 s5, s103 ; encoding: [0x67,0x62,0x85,0xbe]
+0x67,0x62,0x85,0xbe
+
+# GFX1150: s_trunc_f32 s5, vcc_lo ; encoding: [0x6a,0x62,0x85,0xbe]
+0x6a,0x62,0x85,0xbe
+
+# GFX1150: s_trunc_f32 s5, vcc_hi ; encoding: [0x6b,0x62,0x85,0xbe]
+0x6b,0x62,0x85,0xbe
+
+# GFX1150: s_trunc_f32 s5, ttmp11 ; encoding: [0x77,0x62,0x85,0xbe]
+0x77,0x62,0x85,0xbe
+
+# GFX1150: s_trunc_f32 s5, m0 ; encoding: [0x7d,0x62,0x85,0xbe]
+0x7d,0x62,0x85,0xbe
+
+# GFX1150: s_trunc_f32 s5, exec_lo ; encoding: [0x7e,0x62,0x85,0xbe]
+0x7e,0x62,0x85,0xbe
+
+# GFX1150: s_trunc_f32 s5, exec_hi ; encoding: [0x7f,0x62,0x85,0xbe]
+0x7f,0x62,0x85,0xbe
+
+# GFX1150: s_trunc_f32 s5, 0 ; encoding: [0x80,0x62,0x85,0xbe]
+0x80,0x62,0x85,0xbe
+
+# GFX1150: s_trunc_f32 s5, -1 ; encoding: [0xc1,0x62,0x85,0xbe]
+0xc1,0x62,0x85,0xbe
+
+# GFX1150: s_trunc_f32 s5, 0.5 ; encoding: [0xf0,0x62,0x85,0xbe]
+0xf0,0x62,0x85,0xbe
+
+# GFX1150: s_trunc_f32 s5, -4.0 ; encoding: [0xf7,0x62,0x85,0xbe]
+0xf7,0x62,0x85,0xbe
+
+# GFX1150: s_trunc_f32 s5, 0xaf123456 ; encoding: [0xff,0x62,0x85,0xbe,0x56,0x34,0x12,0xaf]
+0xff,0x62,0x85,0xbe,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_trunc_f32 s5, 0x3f717273 ; encoding: [0xff,0x62,0x85,0xbe,0x73,0x72,0x71,0x3f]
+0xff,0x62,0x85,0xbe,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_ceil_f32 s5, s1 ; encoding: [0x01,0x60,0x85,0xbe]
+0x01,0x60,0x85,0xbe
+
+# GFX1150: s_ceil_f32 s105, s1 ; encoding: [0x01,0x60,0xe9,0xbe]
+0x01,0x60,0xe9,0xbe
+
+# GFX1150: s_ceil_f32 s5, s105 ; encoding: [0x69,0x60,0x85,0xbe]
+0x69,0x60,0x85,0xbe
+
+# GFX1150: s_ceil_f32 s5, s103 ; encoding: [0x67,0x60,0x85,0xbe]
+0x67,0x60,0x85,0xbe
+
+# GFX1150: s_ceil_f32 s5, vcc_lo ; encoding: [0x6a,0x60,0x85,0xbe]
+0x6a,0x60,0x85,0xbe
+
+# GFX1150: s_ceil_f32 s5, vcc_hi ; encoding: [0x6b,0x60,0x85,0xbe]
+0x6b,0x60,0x85,0xbe
+
+# GFX1150: s_ceil_f32 s5, ttmp11 ; encoding: [0x77,0x60,0x85,0xbe]
+0x77,0x60,0x85,0xbe
+
+# GFX1150: s_ceil_f32 s5, m0 ; encoding: [0x7d,0x60,0x85,0xbe]
+0x7d,0x60,0x85,0xbe
+
+# GFX1150: s_ceil_f32 s5, exec_lo ; encoding: [0x7e,0x60,0x85,0xbe]
+0x7e,0x60,0x85,0xbe
+
+# GFX1150: s_ceil_f32 s5, exec_hi ; encoding: [0x7f,0x60,0x85,0xbe]
+0x7f,0x60,0x85,0xbe
+
+# GFX1150: s_ceil_f32 s5, 0 ; encoding: [0x80,0x60,0x85,0xbe]
+0x80,0x60,0x85,0xbe
+
+# GFX1150: s_ceil_f32 s5, -1 ; encoding: [0xc1,0x60,0x85,0xbe]
+0xc1,0x60,0x85,0xbe
+
+# GFX1150: s_ceil_f32 s5, 0.5 ; encoding: [0xf0,0x60,0x85,0xbe]
+0xf0,0x60,0x85,0xbe
+
+# GFX1150: s_ceil_f32 s5, -4.0 ; encoding: [0xf7,0x60,0x85,0xbe]
+0xf7,0x60,0x85,0xbe
+
+# GFX1150: s_ceil_f32 s5, 0xaf123456 ; encoding: [0xff,0x60,0x85,0xbe,0x56,0x34,0x12,0xaf]
+0xff,0x60,0x85,0xbe,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_ceil_f32 s5, 0x3f717273 ; encoding: [0xff,0x60,0x85,0xbe,0x73,0x72,0x71,0x3f]
+0xff,0x60,0x85,0xbe,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_rndne_f32 s5, s1 ; encoding: [0x01,0x63,0x85,0xbe]
+0x01,0x63,0x85,0xbe
+
+# GFX1150: s_rndne_f32 s105, s1 ; encoding: [0x01,0x63,0xe9,0xbe]
+0x01,0x63,0xe9,0xbe
+
+# GFX1150: s_rndne_f32 s5, s105 ; encoding: [0x69,0x63,0x85,0xbe]
+0x69,0x63,0x85,0xbe
+
+# GFX1150: s_rndne_f32 s5, s103 ; encoding: [0x67,0x63,0x85,0xbe]
+0x67,0x63,0x85,0xbe
+
+# GFX1150: s_rndne_f32 s5, vcc_lo ; encoding: [0x6a,0x63,0x85,0xbe]
+0x6a,0x63,0x85,0xbe
+
+# GFX1150: s_rndne_f32 s5, vcc_hi ; encoding: [0x6b,0x63,0x85,0xbe]
+0x6b,0x63,0x85,0xbe
+
+# GFX1150: s_rndne_f32 s5, ttmp11 ; encoding: [0x77,0x63,0x85,0xbe]
+0x77,0x63,0x85,0xbe
+
+# GFX1150: s_rndne_f32 s5, m0 ; encoding: [0x7d,0x63,0x85,0xbe]
+0x7d,0x63,0x85,0xbe
+
+# GFX1150: s_rndne_f32 s5, exec_lo ; encoding: [0x7e,0x63,0x85,0xbe]
+0x7e,0x63,0x85,0xbe
+
+# GFX1150: s_rndne_f32 s5, exec_hi ; encoding: [0x7f,0x63,0x85,0xbe]
+0x7f,0x63,0x85,0xbe
+
+# GFX1150: s_rndne_f32 s5, 0 ; encoding: [0x80,0x63,0x85,0xbe]
+0x80,0x63,0x85,0xbe
+
+# GFX1150: s_rndne_f32 s5, -1 ; encoding: [0xc1,0x63,0x85,0xbe]
+0xc1,0x63,0x85,0xbe
+
+# GFX1150: s_rndne_f32 s5, 0.5 ; encoding: [0xf0,0x63,0x85,0xbe]
+0xf0,0x63,0x85,0xbe
+
+# GFX1150: s_rndne_f32 s5, -4.0 ; encoding: [0xf7,0x63,0x85,0xbe]
+0xf7,0x63,0x85,0xbe
+
+# GFX1150: s_rndne_f32 s5, 0xaf123456 ; encoding: [0xff,0x63,0x85,0xbe,0x56,0x34,0x12,0xaf]
+0xff,0x63,0x85,0xbe,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_rndne_f32 s5, 0x3f717273 ; encoding: [0xff,0x63,0x85,0xbe,0x73,0x72,0x71,0x3f]
+0xff,0x63,0x85,0xbe,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_floor_f32 s5, s1 ; encoding: [0x01,0x61,0x85,0xbe]
+0x01,0x61,0x85,0xbe
+
+# GFX1150: s_floor_f32 s105, s1 ; encoding: [0x01,0x61,0xe9,0xbe]
+0x01,0x61,0xe9,0xbe
+
+# GFX1150: s_floor_f32 s5, s105 ; encoding: [0x69,0x61,0x85,0xbe]
+0x69,0x61,0x85,0xbe
+
+# GFX1150: s_floor_f32 s5, s103 ; encoding: [0x67,0x61,0x85,0xbe]
+0x67,0x61,0x85,0xbe
+
+# GFX1150: s_floor_f32 s5, vcc_lo ; encoding: [0x6a,0x61,0x85,0xbe]
+0x6a,0x61,0x85,0xbe
+
+# GFX1150: s_floor_f32 s5, vcc_hi ; encoding: [0x6b,0x61,0x85,0xbe]
+0x6b,0x61,0x85,0xbe
+
+# GFX1150: s_floor_f32 s5, ttmp11 ; encoding: [0x77,0x61,0x85,0xbe]
+0x77,0x61,0x85,0xbe
+
+# GFX1150: s_floor_f32 s5, m0 ; encoding: [0x7d,0x61,0x85,0xbe]
+0x7d,0x61,0x85,0xbe
+
+# GFX1150: s_floor_f32 s5, exec_lo ; encoding: [0x7e,0x61,0x85,0xbe]
+0x7e,0x61,0x85,0xbe
+
+# GFX1150: s_floor_f32 s5, exec_hi ; encoding: [0x7f,0x61,0x85,0xbe]
+0x7f,0x61,0x85,0xbe
+
+# GFX1150: s_floor_f32 s5, 0 ; encoding: [0x80,0x61,0x85,0xbe]
+0x80,0x61,0x85,0xbe
+
+# GFX1150: s_floor_f32 s5, -1 ; encoding: [0xc1,0x61,0x85,0xbe]
+0xc1,0x61,0x85,0xbe
+
+# GFX1150: s_floor_f32 s5, 0.5 ; encoding: [0xf0,0x61,0x85,0xbe]
+0xf0,0x61,0x85,0xbe
+
+# GFX1150: s_floor_f32 s5, -4.0 ; encoding: [0xf7,0x61,0x85,0xbe]
+0xf7,0x61,0x85,0xbe
+
+# GFX1150: s_floor_f32 s5, 0xaf123456 ; encoding: [0xff,0x61,0x85,0xbe,0x56,0x34,0x12,0xaf]
+0xff,0x61,0x85,0xbe,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_floor_f32 s5, 0x3f717273 ; encoding: [0xff,0x61,0x85,0xbe,0x73,0x72,0x71,0x3f]
+0xff,0x61,0x85,0xbe,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_floor_f16 s5, s1 ; encoding: [0x01,0x6c,0x85,0xbe]
+0x01,0x6c,0x85,0xbe
+
+# GFX1150: s_floor_f16 s105, s1 ; encoding: [0x01,0x6c,0xe9,0xbe]
+0x01,0x6c,0xe9,0xbe
+
+# GFX1150: s_floor_f16 s5, s105 ; encoding: [0x69,0x6c,0x85,0xbe]
+0x69,0x6c,0x85,0xbe
+
+# GFX1150: s_floor_f16 s5, s101 ; encoding: [0x65,0x6c,0x85,0xbe]
+0x65,0x6c,0x85,0xbe
+
+# GFX1150: s_floor_f16 s5, vcc_lo ; encoding: [0x6a,0x6c,0x85,0xbe]
+0x6a,0x6c,0x85,0xbe
+
+# GFX1150: s_floor_f16 s5, vcc_hi ; encoding: [0x6b,0x6c,0x85,0xbe]
+0x6b,0x6c,0x85,0xbe
+
+# GFX1150: s_floor_f16 s5, m0 ; encoding: [0x7d,0x6c,0x85,0xbe]
+0x7d,0x6c,0x85,0xbe
+
+# GFX1150: s_floor_f16 s5, exec_lo ; encoding: [0x7e,0x6c,0x85,0xbe]
+0x7e,0x6c,0x85,0xbe
+
+# GFX1150: s_floor_f16 s5, exec_hi ; encoding: [0x7f,0x6c,0x85,0xbe]
+0x7f,0x6c,0x85,0xbe
+
+# GFX1150: s_floor_f16 s5, 0 ; encoding: [0x80,0x6c,0x85,0xbe]
+0x80,0x6c,0x85,0xbe
+
+# GFX1150: s_floor_f16 s5, -1 ; encoding: [0xc1,0x6c,0x85,0xbe]
+0xc1,0x6c,0x85,0xbe
+
+# GFX1150: s_floor_f16 s5, 0.5 ; encoding: [0xf0,0x6c,0x85,0xbe]
+0xf0,0x6c,0x85,0xbe
+
+# GFX1150: s_floor_f16 s5, -4.0 ; encoding: [0xf7,0x6c,0x85,0xbe]
+0xf7,0x6c,0x85,0xbe
+
+# GFX1150: s_floor_f16 s5, 0xfe0b ; encoding: [0xff,0x6c,0x85,0xbe,0x0b,0xfe,0x00,0x00]
+0xff,0x6c,0x85,0xbe,0x0b,0xfe,0x00,0x00
+
+# GFX1150: s_floor_f16 s5, 0x3456 ; encoding: [0xff,0x6c,0x85,0xbe,0x56,0x34,0x00,0x00]
+0xff,0x6c,0x85,0xbe,0x56,0x34,0x00,0x00
+
+# GFX1150: s_ceil_f16 s5, s1 ; encoding: [0x01,0x6b,0x85,0xbe]
+0x01,0x6b,0x85,0xbe
+
+# GFX1150: s_ceil_f16 s105, s1 ; encoding: [0x01,0x6b,0xe9,0xbe]
+0x01,0x6b,0xe9,0xbe
+
+# GFX1150: s_ceil_f16 s5, s105 ; encoding: [0x69,0x6b,0x85,0xbe]
+0x69,0x6b,0x85,0xbe
+
+# GFX1150: s_ceil_f16 s5, s101 ; encoding: [0x65,0x6b,0x85,0xbe]
+0x65,0x6b,0x85,0xbe
+
+# GFX1150: s_ceil_f16 s5, vcc_lo ; encoding: [0x6a,0x6b,0x85,0xbe]
+0x6a,0x6b,0x85,0xbe
+
+# GFX1150: s_ceil_f16 s5, vcc_hi ; encoding: [0x6b,0x6b,0x85,0xbe]
+0x6b,0x6b,0x85,0xbe
+
+# GFX1150: s_ceil_f16 s5, m0 ; encoding: [0x7d,0x6b,0x85,0xbe]
+0x7d,0x6b,0x85,0xbe
+
+# GFX1150: s_ceil_f16 s5, exec_lo ; encoding: [0x7e,0x6b,0x85,0xbe]
+0x7e,0x6b,0x85,0xbe
+
+# GFX1150: s_ceil_f16 s5, exec_hi ; encoding: [0x7f,0x6b,0x85,0xbe]
+0x7f,0x6b,0x85,0xbe
+
+# GFX1150: s_ceil_f16 s5, 0 ; encoding: [0x80,0x6b,0x85,0xbe]
+0x80,0x6b,0x85,0xbe
+
+# GFX1150: s_ceil_f16 s5, -1 ; encoding: [0xc1,0x6b,0x85,0xbe]
+0xc1,0x6b,0x85,0xbe
+
+# GFX1150: s_ceil_f16 s5, 0.5 ; encoding: [0xf0,0x6b,0x85,0xbe]
+0xf0,0x6b,0x85,0xbe
+
+# GFX1150: s_ceil_f16 s5, -4.0 ; encoding: [0xf7,0x6b,0x85,0xbe]
+0xf7,0x6b,0x85,0xbe
+
+# GFX1150: s_ceil_f16 s5, 0xfe0b ; encoding: [0xff,0x6b,0x85,0xbe,0x0b,0xfe,0x00,0x00]
+0xff,0x6b,0x85,0xbe,0x0b,0xfe,0x00,0x00
+
+# GFX1150: s_ceil_f16 s5, 0x3456 ; encoding: [0xff,0x6b,0x85,0xbe,0x56,0x34,0x00,0x00]
+0xff,0x6b,0x85,0xbe,0x56,0x34,0x00,0x00
+
+# GFX1150: s_trunc_f16 s5, s1 ; encoding: [0x01,0x6d,0x85,0xbe]
+0x01,0x6d,0x85,0xbe
+
+# GFX1150: s_trunc_f16 s105, s1 ; encoding: [0x01,0x6d,0xe9,0xbe]
+0x01,0x6d,0xe9,0xbe
+
+# GFX1150: s_trunc_f16 s5, s105 ; encoding: [0x69,0x6d,0x85,0xbe]
+0x69,0x6d,0x85,0xbe
+
+# GFX1150: s_trunc_f16 s5, s101 ; encoding: [0x65,0x6d,0x85,0xbe]
+0x65,0x6d,0x85,0xbe
+
+# GFX1150: s_trunc_f16 s5, vcc_lo ; encoding: [0x6a,0x6d,0x85,0xbe]
+0x6a,0x6d,0x85,0xbe
+
+# GFX1150: s_trunc_f16 s5, vcc_hi ; encoding: [0x6b,0x6d,0x85,0xbe]
+0x6b,0x6d,0x85,0xbe
+
+# GFX1150: s_trunc_f16 s5, m0 ; encoding: [0x7d,0x6d,0x85,0xbe]
+0x7d,0x6d,0x85,0xbe
+
+# GFX1150: s_trunc_f16 s5, exec_lo ; encoding: [0x7e,0x6d,0x85,0xbe]
+0x7e,0x6d,0x85,0xbe
+
+# GFX1150: s_trunc_f16 s5, exec_hi ; encoding: [0x7f,0x6d,0x85,0xbe]
+0x7f,0x6d,0x85,0xbe
+
+# GFX1150: s_trunc_f16 s5, 0 ; encoding: [0x80,0x6d,0x85,0xbe]
+0x80,0x6d,0x85,0xbe
+
+# GFX1150: s_trunc_f16 s5, -1 ; encoding: [0xc1,0x6d,0x85,0xbe]
+0xc1,0x6d,0x85,0xbe
+
+# GFX1150: s_trunc_f16 s5, 0.5 ; encoding: [0xf0,0x6d,0x85,0xbe]
+0xf0,0x6d,0x85,0xbe
+
+# GFX1150: s_trunc_f16 s5, -4.0 ; encoding: [0xf7,0x6d,0x85,0xbe]
+0xf7,0x6d,0x85,0xbe
+
+# GFX1150: s_trunc_f16 s5, 0xfe0b ; encoding: [0xff,0x6d,0x85,0xbe,0x0b,0xfe,0x00,0x00]
+0xff,0x6d,0x85,0xbe,0x0b,0xfe,0x00,0x00
+
+# GFX1150: s_trunc_f16 s5, 0x3456 ; encoding: [0xff,0x6d,0x85,0xbe,0x56,0x34,0x00,0x00]
+0xff,0x6d,0x85,0xbe,0x56,0x34,0x00,0x00
+
+# GFX1150: s_rndne_f16 s5, s1 ; encoding: [0x01,0x6e,0x85,0xbe]
+0x01,0x6e,0x85,0xbe
+
+# GFX1150: s_rndne_f16 s105, s1 ; encoding: [0x01,0x6e,0xe9,0xbe]
+0x01,0x6e,0xe9,0xbe
+
+# GFX1150: s_rndne_f16 s5, s105 ; encoding: [0x69,0x6e,0x85,0xbe]
+0x69,0x6e,0x85,0xbe
+
+# GFX1150: s_rndne_f16 s5, s101 ; encoding: [0x65,0x6e,0x85,0xbe]
+0x65,0x6e,0x85,0xbe
+
+# GFX1150: s_rndne_f16 s5, vcc_lo ; encoding: [0x6a,0x6e,0x85,0xbe]
+0x6a,0x6e,0x85,0xbe
+
+# GFX1150: s_rndne_f16 s5, vcc_hi ; encoding: [0x6b,0x6e,0x85,0xbe]
+0x6b,0x6e,0x85,0xbe
+
+# GFX1150: s_rndne_f16 s5, m0 ; encoding: [0x7d,0x6e,0x85,0xbe]
+0x7d,0x6e,0x85,0xbe
+
+# GFX1150: s_rndne_f16 s5, exec_lo ; encoding: [0x7e,0x6e,0x85,0xbe]
+0x7e,0x6e,0x85,0xbe
+
+# GFX1150: s_rndne_f16 s5, exec_hi ; encoding: [0x7f,0x6e,0x85,0xbe]
+0x7f,0x6e,0x85,0xbe
+
+# GFX1150: s_rndne_f16 s5, 0 ; encoding: [0x80,0x6e,0x85,0xbe]
+0x80,0x6e,0x85,0xbe
+
+# GFX1150: s_rndne_f16 s5, -1 ; encoding: [0xc1,0x6e,0x85,0xbe]
+0xc1,0x6e,0x85,0xbe
+
+# GFX1150: s_rndne_f16 s5, 0.5 ; encoding: [0xf0,0x6e,0x85,0xbe]
+0xf0,0x6e,0x85,0xbe
+
+# GFX1150: s_rndne_f16 s5, -4.0 ; encoding: [0xf7,0x6e,0x85,0xbe]
+0xf7,0x6e,0x85,0xbe
+
+# GFX1150: s_rndne_f16 s5, 0xfe0b ; encoding: [0xff,0x6e,0x85,0xbe,0x0b,0xfe,0x00,0x00]
+0xff,0x6e,0x85,0xbe,0x0b,0xfe,0x00,0x00
+
+# GFX1150: s_rndne_f16 s5, 0x3456 ; encoding: [0xff,0x6e,0x85,0xbe,0x56,0x34,0x00,0x00]
+0xff,0x6e,0x85,0xbe,0x56,0x34,0x00,0x00
+
+# GFX1150: s_add_f32 s5, s1, s2 ; encoding: [0x01,0x02,0x05,0xa0]
+0x01,0x02,0x05,0xa0
+
+# GFX1150: s_add_f32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0xa0]
+0x01,0x02,0x69,0xa0
+
+# GFX1150: s_add_f32 s5, s105, s2 ; encoding: [0x69,0x02,0x05,0xa0]
+0x69,0x02,0x05,0xa0
+
+# GFX1150: s_add_f32 s5, s101, s2 ; encoding: [0x65,0x02,0x05,0xa0]
+0x65,0x02,0x05,0xa0
+
+# GFX1150: s_add_f32 s5, vcc_lo, s2 ; encoding: [0x6a,0x02,0x05,0xa0]
+0x6a,0x02,0x05,0xa0
+
+# GFX1150: s_add_f32 s5, vcc_hi, s2 ; encoding: [0x6b,0x02,0x05,0xa0]
+0x6b,0x02,0x05,0xa0
+
+# GFX1150: s_add_f32 s5, m0, s2 ; encoding: [0x7d,0x02,0x05,0xa0]
+0x7d,0x02,0x05,0xa0
+
+# GFX1150: s_add_f32 s5, exec_lo, s2 ; encoding: [0x7e,0x02,0x05,0xa0]
+0x7e,0x02,0x05,0xa0
+
+# GFX1150: s_add_f32 s5, exec_hi, s2 ; encoding: [0x7f,0x02,0x05,0xa0]
+0x7f,0x02,0x05,0xa0
+
+# GFX1150: s_add_f32 s5, 0, s2 ; encoding: [0x80,0x02,0x05,0xa0]
+0x80,0x02,0x05,0xa0
+
+# GFX1150: s_add_f32 s5, -1, s2 ; encoding: [0xc1,0x02,0x05,0xa0]
+0xc1,0x02,0x05,0xa0
+
+# GFX1150: s_add_f32 s5, 0.5, s2 ; encoding: [0xf0,0x02,0x05,0xa0]
+0xf0,0x02,0x05,0xa0
+
+# GFX1150: s_add_f32 s5, -4.0, s2 ; encoding: [0xf7,0x02,0x05,0xa0]
+0xf7,0x02,0x05,0xa0
+
+# GFX1150: s_add_f32 s5, 0xaf123456, s2 ; encoding: [0xff,0x02,0x05,0xa0,0x56,0x34,0x12,0xaf]
+0xff,0x02,0x05,0xa0,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_add_f32 s5, 0x3f717273, s2 ; encoding: [0xff,0x02,0x05,0xa0,0x73,0x72,0x71,0x3f]
+0xff,0x02,0x05,0xa0,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_add_f32 s5, s1, s105 ; encoding: [0x01,0x69,0x05,0xa0]
+0x01,0x69,0x05,0xa0
+
+# GFX1150: s_sub_f32 s5, s1, s2 ; encoding: [0x01,0x02,0x85,0xa0]
+0x01,0x02,0x85,0xa0
+
+# GFX1150: s_sub_f32 s105, s1, s2 ; encoding: [0x01,0x02,0xe9,0xa0]
+0x01,0x02,0xe9,0xa0
+
+# GFX1150: s_sub_f32 s5, s105, s2 ; encoding: [0x69,0x02,0x85,0xa0]
+0x69,0x02,0x85,0xa0
+
+# GFX1150: s_sub_f32 s5, s101, s2 ; encoding: [0x65,0x02,0x85,0xa0]
+0x65,0x02,0x85,0xa0
+
+# GFX1150: s_sub_f32 s5, vcc_lo, s2 ; encoding: [0x6a,0x02,0x85,0xa0]
+0x6a,0x02,0x85,0xa0
+
+# GFX1150: s_sub_f32 s5, vcc_hi, s2 ; encoding: [0x6b,0x02,0x85,0xa0]
+0x6b,0x02,0x85,0xa0
+
+# GFX1150: s_sub_f32 s5, m0, s2 ; encoding: [0x7d,0x02,0x85,0xa0]
+0x7d,0x02,0x85,0xa0
+
+# GFX1150: s_sub_f32 s5, exec_lo, s2 ; encoding: [0x7e,0x02,0x85,0xa0]
+0x7e,0x02,0x85,0xa0
+
+# GFX1150: s_sub_f32 s5, exec_hi, s2 ; encoding: [0x7f,0x02,0x85,0xa0]
+0x7f,0x02,0x85,0xa0
+
+# GFX1150: s_sub_f32 s5, 0, s2 ; encoding: [0x80,0x02,0x85,0xa0]
+0x80,0x02,0x85,0xa0
+
+# GFX1150: s_sub_f32 s5, -1, s2 ; encoding: [0xc1,0x02,0x85,0xa0]
+0xc1,0x02,0x85,0xa0
+
+# GFX1150: s_sub_f32 s5, 0.5, s2 ; encoding: [0xf0,0x02,0x85,0xa0]
+0xf0,0x02,0x85,0xa0
+
+# GFX1150: s_sub_f32 s5, -4.0, s2 ; encoding: [0xf7,0x02,0x85,0xa0]
+0xf7,0x02,0x85,0xa0
+
+# GFX1150: s_sub_f32 s5, 0xaf123456, s2 ; encoding: [0xff,0x02,0x85,0xa0,0x56,0x34,0x12,0xaf]
+0xff,0x02,0x85,0xa0,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_sub_f32 s5, 0x3f717273, s2 ; encoding: [0xff,0x02,0x85,0xa0,0x73,0x72,0x71,0x3f]
+0xff,0x02,0x85,0xa0,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_sub_f32 s5, s1, s105 ; encoding: [0x01,0x69,0x85,0xa0]
+0x01,0x69,0x85,0xa0
+
+# GFX1150: s_mul_f32 s5, s1, s2 ; encoding: [0x01,0x02,0x05,0xa2]
+0x01,0x02,0x05,0xa2
+
+# GFX1150: s_mul_f32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0xa2]
+0x01,0x02,0x69,0xa2
+
+# GFX1150: s_mul_f32 s5, s105, s2 ; encoding: [0x69,0x02,0x05,0xa2]
+0x69,0x02,0x05,0xa2
+
+# GFX1150: s_mul_f32 s5, s103, s2 ; encoding: [0x67,0x02,0x05,0xa2]
+0x67,0x02,0x05,0xa2
+
+# GFX1150: s_mul_f32 s5, vcc_lo, s2 ; encoding: [0x6a,0x02,0x05,0xa2]
+0x6a,0x02,0x05,0xa2
+
+# GFX1150: s_mul_f32 s5, vcc_hi, s2 ; encoding: [0x6b,0x02,0x05,0xa2]
+0x6b,0x02,0x05,0xa2
+
+# GFX1150: s_mul_f32 s5, ttmp11, s2 ; encoding: [0x77,0x02,0x05,0xa2]
+0x77,0x02,0x05,0xa2
+
+# GFX1150: s_mul_f32 s5, m0, s2 ; encoding: [0x7d,0x02,0x05,0xa2]
+0x7d,0x02,0x05,0xa2
+
+# GFX1150: s_mul_f32 s5, exec_lo, s2 ; encoding: [0x7e,0x02,0x05,0xa2]
+0x7e,0x02,0x05,0xa2
+
+# GFX1150: s_mul_f32 s5, exec_hi, s2 ; encoding: [0x7f,0x02,0x05,0xa2]
+0x7f,0x02,0x05,0xa2
+
+# GFX1150: s_mul_f32 s5, 0, s2 ; encoding: [0x80,0x02,0x05,0xa2]
+0x80,0x02,0x05,0xa2
+
+# GFX1150: s_mul_f32 s5, -1, s2 ; encoding: [0xc1,0x02,0x05,0xa2]
+0xc1,0x02,0x05,0xa2
+
+# GFX1150: s_mul_f32 s5, 0.5, s2 ; encoding: [0xf0,0x02,0x05,0xa2]
+0xf0,0x02,0x05,0xa2
+
+# GFX1150: s_mul_f32 s5, -4.0, s2 ; encoding: [0xf7,0x02,0x05,0xa2]
+0xf7,0x02,0x05,0xa2
+
+# GFX1150: s_mul_f32 s5, 0xaf123456, s2 ; encoding: [0xff,0x02,0x05,0xa2,0x56,0x34,0x12,0xaf]
+0xff,0x02,0x05,0xa2,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_mul_f32 s5, 0x3f717273, s2 ; encoding: [0xff,0x02,0x05,0xa2,0x73,0x72,0x71,0x3f]
+0xff,0x02,0x05,0xa2,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_mul_f32 s5, s1, s105 ; encoding: [0x01,0x69,0x05,0xa2]
+0x01,0x69,0x05,0xa2
+
+# GFX1150: s_min_f32 s5, s1, s2 ; encoding: [0x01,0x02,0x05,0xa1]
+0x01,0x02,0x05,0xa1
+
+# GFX1150: s_min_f32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0xa1]
+0x01,0x02,0x69,0xa1
+
+# GFX1150: s_min_f32 s5, s105, s2 ; encoding: [0x69,0x02,0x05,0xa1]
+0x69,0x02,0x05,0xa1
+
+# GFX1150: s_min_f32 s5, s103, s2 ; encoding: [0x67,0x02,0x05,0xa1]
+0x67,0x02,0x05,0xa1
+
+# GFX1150: s_min_f32 s5, vcc_lo, s2 ; encoding: [0x6a,0x02,0x05,0xa1]
+0x6a,0x02,0x05,0xa1
+
+# GFX1150: s_min_f32 s5, vcc_hi, s2 ; encoding: [0x6b,0x02,0x05,0xa1]
+0x6b,0x02,0x05,0xa1
+
+# GFX1150: s_min_f32 s5, ttmp11, s2 ; encoding: [0x77,0x02,0x05,0xa1]
+0x77,0x02,0x05,0xa1
+
+# GFX1150: s_min_f32 s5, m0, s2 ; encoding: [0x7d,0x02,0x05,0xa1]
+0x7d,0x02,0x05,0xa1
+
+# GFX1150: s_min_f32 s5, exec_lo, s2 ; encoding: [0x7e,0x02,0x05,0xa1]
+0x7e,0x02,0x05,0xa1
+
+# GFX1150: s_min_f32 s5, exec_hi, s2 ; encoding: [0x7f,0x02,0x05,0xa1]
+0x7f,0x02,0x05,0xa1
+
+# GFX1150: s_min_f32 s5, 0, s2 ; encoding: [0x80,0x02,0x05,0xa1]
+0x80,0x02,0x05,0xa1
+
+# GFX1150: s_min_f32 s5, -1, s2 ; encoding: [0xc1,0x02,0x05,0xa1]
+0xc1,0x02,0x05,0xa1
+
+# GFX1150: s_min_f32 s5, 0.5, s2 ; encoding: [0xf0,0x02,0x05,0xa1]
+0xf0,0x02,0x05,0xa1
+
+# GFX1150: s_min_f32 s5, -4.0, s2 ; encoding: [0xf7,0x02,0x05,0xa1]
+0xf7,0x02,0x05,0xa1
+
+# GFX1150: s_min_f32 s5, 0xaf123456, s2 ; encoding: [0xff,0x02,0x05,0xa1,0x56,0x34,0x12,0xaf]
+0xff,0x02,0x05,0xa1,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_min_f32 s5, 0x3f717273, s2 ; encoding: [0xff,0x02,0x05,0xa1,0x73,0x72,0x71,0x3f]
+0xff,0x02,0x05,0xa1,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_min_f32 s5, s1, s105 ; encoding: [0x01,0x69,0x05,0xa1]
+0x01,0x69,0x05,0xa1
+
+# GFX1150: s_max_f32 s5, s1, s2 ; encoding: [0x01,0x02,0x85,0xa1]
+0x01,0x02,0x85,0xa1
+
+# GFX1150: s_max_f32 s105, s1, s2 ; encoding: [0x01,0x02,0xe9,0xa1]
+0x01,0x02,0xe9,0xa1
+
+# GFX1150: s_max_f32 s5, s105, s2 ; encoding: [0x69,0x02,0x85,0xa1]
+0x69,0x02,0x85,0xa1
+
+# GFX1150: s_max_f32 s5, s103, s2 ; encoding: [0x67,0x02,0x85,0xa1]
+0x67,0x02,0x85,0xa1
+
+# GFX1150: s_max_f32 s5, vcc_lo, s2 ; encoding: [0x6a,0x02,0x85,0xa1]
+0x6a,0x02,0x85,0xa1
+
+# GFX1150: s_max_f32 s5, vcc_hi, s2 ; encoding: [0x6b,0x02,0x85,0xa1]
+0x6b,0x02,0x85,0xa1
+
+# GFX1150: s_max_f32 s5, ttmp11, s2 ; encoding: [0x77,0x02,0x85,0xa1]
+0x77,0x02,0x85,0xa1
+
+# GFX1150: s_max_f32 s5, m0, s2 ; encoding: [0x7d,0x02,0x85,0xa1]
+0x7d,0x02,0x85,0xa1
+
+# GFX1150: s_max_f32 s5, exec_lo, s2 ; encoding: [0x7e,0x02,0x85,0xa1]
+0x7e,0x02,0x85,0xa1
+
+# GFX1150: s_max_f32 s5, exec_hi, s2 ; encoding: [0x7f,0x02,0x85,0xa1]
+0x7f,0x02,0x85,0xa1
+
+# GFX1150: s_max_f32 s5, 0, s2 ; encoding: [0x80,0x02,0x85,0xa1]
+0x80,0x02,0x85,0xa1
+
+# GFX1150: s_max_f32 s5, -1, s2 ; encoding: [0xc1,0x02,0x85,0xa1]
+0xc1,0x02,0x85,0xa1
+
+# GFX1150: s_max_f32 s5, 0.5, s2 ; encoding: [0xf0,0x02,0x85,0xa1]
+0xf0,0x02,0x85,0xa1
+
+# GFX1150: s_max_f32 s5, -4.0, s2 ; encoding: [0xf7,0x02,0x85,0xa1]
+0xf7,0x02,0x85,0xa1
+
+# GFX1150: s_max_f32 s5, 0xaf123456, s2 ; encoding: [0xff,0x02,0x85,0xa1,0x56,0x34,0x12,0xaf]
+0xff,0x02,0x85,0xa1,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_max_f32 s5, 0x3f717273, s2 ; encoding: [0xff,0x02,0x85,0xa1,0x73,0x72,0x71,0x3f]
+0xff,0x02,0x85,0xa1,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_max_f32 s5, s1, s105 ; encoding: [0x01,0x69,0x85,0xa1]
+0x01,0x69,0x85,0xa1
+
+# GFX1150: s_fmac_f32 s5, s1, s2 ; encoding: [0x01,0x02,0x85,0xa3]
+0x01,0x02,0x85,0xa3
+
+# GFX1150: s_fmac_f32 s105, s1, s2 ; encoding: [0x01,0x02,0xe9,0xa3]
+0x01,0x02,0xe9,0xa3
+
+# GFX1150: s_fmac_f32 s5, s105, s2 ; encoding: [0x69,0x02,0x85,0xa3]
+0x69,0x02,0x85,0xa3
+
+# GFX1150: s_fmac_f32 s5, s103, s2 ; encoding: [0x67,0x02,0x85,0xa3]
+0x67,0x02,0x85,0xa3
+
+# GFX1150: s_fmac_f32 s5, vcc_lo, s2 ; encoding: [0x6a,0x02,0x85,0xa3]
+0x6a,0x02,0x85,0xa3
+
+# GFX1150: s_fmac_f32 s5, vcc_hi, s2 ; encoding: [0x6b,0x02,0x85,0xa3]
+0x6b,0x02,0x85,0xa3
+
+# GFX1150: s_fmac_f32 s5, ttmp11, s2 ; encoding: [0x77,0x02,0x85,0xa3]
+0x77,0x02,0x85,0xa3
+
+# GFX1150: s_fmac_f32 s5, m0, s2 ; encoding: [0x7d,0x02,0x85,0xa3]
+0x7d,0x02,0x85,0xa3
+
+# GFX1150: s_fmac_f32 s5, exec_lo, s2 ; encoding: [0x7e,0x02,0x85,0xa3]
+0x7e,0x02,0x85,0xa3
+
+# GFX1150: s_fmac_f32 s5, exec_hi, s2 ; encoding: [0x7f,0x02,0x85,0xa3]
+0x7f,0x02,0x85,0xa3
+
+# GFX1150: s_fmac_f32 s5, 0, s2 ; encoding: [0x80,0x02,0x85,0xa3]
+0x80,0x02,0x85,0xa3
+
+# GFX1150: s_fmac_f32 s5, -1, s2 ; encoding: [0xc1,0x02,0x85,0xa3]
+0xc1,0x02,0x85,0xa3
+
+# GFX1150: s_fmac_f32 s5, 0.5, s2 ; encoding: [0xf0,0x02,0x85,0xa3]
+0xf0,0x02,0x85,0xa3
+
+# GFX1150: s_fmac_f32 s5, -4.0, s2 ; encoding: [0xf7,0x02,0x85,0xa3]
+0xf7,0x02,0x85,0xa3
+
+# GFX1150: s_fmac_f32 s5, 0xaf123456, s2 ; encoding: [0xff,0x02,0x85,0xa3,0x56,0x34,0x12,0xaf]
+0xff,0x02,0x85,0xa3,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_fmac_f32 s5, 0x3f717273, s2 ; encoding: [0xff,0x02,0x85,0xa3,0x73,0x72,0x71,0x3f]
+0xff,0x02,0x85,0xa3,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_fmac_f32 s5, s1, s105 ; encoding: [0x01,0x69,0x85,0xa3]
+0x01,0x69,0x85,0xa3
+
+# GFX1150: s_fmamk_f32 s5, s1, 0x11213141, s3 ; encoding: [0x01,0x03,0x05,0xa3,0x41,0x31,0x21,0x11]
+0x01,0x03,0x05,0xa3,0x41,0x31,0x21,0x11
+
+# GFX1150: s_fmamk_f32 s105, s1, 0x11213141, s3 ; encoding: [0x01,0x03,0x69,0xa3,0x41,0x31,0x21,0x11]
+0x01,0x03,0x69,0xa3,0x41,0x31,0x21,0x11
+
+# GFX1150: s_fmamk_f32 s5, s105, 0x11213141, s3 ; encoding: [0x69,0x03,0x05,0xa3,0x41,0x31,0x21,0x11]
+0x69,0x03,0x05,0xa3,0x41,0x31,0x21,0x11
+
+# GFX1150: s_fmamk_f32 s5, 0, 0x11213141, s3 ; encoding: [0x80,0x03,0x05,0xa3,0x41,0x31,0x21,0x11]
+0x80,0x03,0x05,0xa3,0x41,0x31,0x21,0x11
+
+# GFX1150: s_fmamk_f32 s5, -1, 0x11213141, s3 ; encoding: [0xc1,0x03,0x05,0xa3,0x41,0x31,0x21,0x11]
+0xc1,0x03,0x05,0xa3,0x41,0x31,0x21,0x11
+
+# GFX1150: s_fmamk_f32 s5, 0.5, 0x11213141, s3 ; encoding: [0xf0,0x03,0x05,0xa3,0x41,0x31,0x21,0x11]
+0xf0,0x03,0x05,0xa3,0x41,0x31,0x21,0x11
+
+# GFX1150: s_fmamk_f32 s5, -4.0, 0x11213141, s3 ; encoding: [0xf7,0x03,0x05,0xa3,0x41,0x31,0x21,0x11]
+0xf7,0x03,0x05,0xa3,0x41,0x31,0x21,0x11
+
+# GFX1150: s_fmamk_f32 s5, s1, 0xa1b1c1d1, s3 ; encoding: [0x01,0x03,0x05,0xa3,0xd1,0xc1,0xb1,0xa1]
+0x01,0x03,0x05,0xa3,0xd1,0xc1,0xb1,0xa1
+
+# GFX1150: s_fmamk_f32 s5, s1, 0x11213141, s105 ; encoding: [0x01,0x69,0x05,0xa3,0x41,0x31,0x21,0x11]
+0x01,0x69,0x05,0xa3,0x41,0x31,0x21,0x11
+
+# GFX1150: s_fmamk_f32 s5, 0x11213141, 0x11213141, s105 ; encoding: [0xff,0x69,0x05,0xa3,0x41,0x31,0x21,0x11]
+0xff,0x69,0x05,0xa3,0x41,0x31,0x21,0x11
+
+# GFX1150: s_fmamk_f32 s5, s105, 0x11213141, 0x11213141 ; encoding: [0x69,0xff,0x05,0xa3,0x41,0x31,0x21,0x11]
+0x69,0xff,0x05,0xa3,0x41,0x31,0x21,0x11
+
+# GFX1150: s_fmaak_f32 s5, 0x11213141, 0x11213141, 0x11213141 ; encoding: [0xff,0xff,0x85,0xa2,0x41,0x31,0x21,0x11]
+0xff,0xff,0x85,0xa2,0x41,0x31,0x21,0x11
+
+# GFX1150: s_fmaak_f32 s5, s1, s2, 0x11213141 ; encoding: [0x01,0x02,0x85,0xa2,0x41,0x31,0x21,0x11]
+0x01,0x02,0x85,0xa2,0x41,0x31,0x21,0x11
+
+# GFX1150: s_fmaak_f32 s105, s1, s2, 0x11213141 ; encoding: [0x01,0x02,0xe9,0xa2,0x41,0x31,0x21,0x11]
+0x01,0x02,0xe9,0xa2,0x41,0x31,0x21,0x11
+
+# GFX1150: s_fmaak_f32 s5, s105, s2, 0x11213141 ; encoding: [0x69,0x02,0x85,0xa2,0x41,0x31,0x21,0x11]
+0x69,0x02,0x85,0xa2,0x41,0x31,0x21,0x11
+
+# GFX1150: s_fmaak_f32 s5, 0, s2, 0x11213141 ; encoding: [0x80,0x02,0x85,0xa2,0x41,0x31,0x21,0x11]
+0x80,0x02,0x85,0xa2,0x41,0x31,0x21,0x11
+
+# GFX1150: s_fmaak_f32 s5, -1, s2, 0x11213141 ; encoding: [0xc1,0x02,0x85,0xa2,0x41,0x31,0x21,0x11]
+0xc1,0x02,0x85,0xa2,0x41,0x31,0x21,0x11
+
+# GFX1150: s_fmaak_f32 s5, 0.5, s2, 0x11213141 ; encoding: [0xf0,0x02,0x85,0xa2,0x41,0x31,0x21,0x11]
+0xf0,0x02,0x85,0xa2,0x41,0x31,0x21,0x11
+
+# GFX1150: s_fmaak_f32 s5, -4.0, s2, 0x11213141 ; encoding: [0xf7,0x02,0x85,0xa2,0x41,0x31,0x21,0x11]
+0xf7,0x02,0x85,0xa2,0x41,0x31,0x21,0x11
+
+# GFX1150: s_fmaak_f32 s5, s1, s105, 0x11213141 ; encoding: [0x01,0x69,0x85,0xa2,0x41,0x31,0x21,0x11]
+0x01,0x69,0x85,0xa2,0x41,0x31,0x21,0x11
+
+# GFX1150: s_fmaak_f32 s5, s1, s2, 0xa1b1c1d1 ; encoding: [0x01,0x02,0x85,0xa2,0xd1,0xc1,0xb1,0xa1]
+0x01,0x02,0x85,0xa2,0xd1,0xc1,0xb1,0xa1
+
+# GFX1150: s_fmaak_f32 s5, 0x11213141, s2, 0x11213141 ; encoding: [0xff,0x02,0x85,0xa2,0x41,0x31,0x21,0x11]
+0xff,0x02,0x85,0xa2,0x41,0x31,0x21,0x11
+
+# GFX1150: s_fmaak_f32 s5, s105, 0x11213141, 0x11213141 ; encoding: [0x69,0xff,0x85,0xa2,0x41,0x31,0x21,0x11]
+0x69,0xff,0x85,0xa2,0x41,0x31,0x21,0x11
+
+# GFX1150: s_cvt_pk_rtz_f16_f32 s5, s1, s2 ; encoding: [0x01,0x02,0x05,0xa4]
+0x01,0x02,0x05,0xa4
+
+# GFX1150: s_cvt_pk_rtz_f16_f32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0xa4]
+0x01,0x02,0x69,0xa4
+
+# GFX1150: s_cvt_pk_rtz_f16_f32 s5, s105, s2 ; encoding: [0x69,0x02,0x05,0xa4]
+0x69,0x02,0x05,0xa4
+
+# GFX1150: s_cvt_pk_rtz_f16_f32 s5, s103, s2 ; encoding: [0x67,0x02,0x05,0xa4]
+0x67,0x02,0x05,0xa4
+
+# GFX1150: s_cvt_pk_rtz_f16_f32 s5, vcc_lo, s2 ; encoding: [0x6a,0x02,0x05,0xa4]
+0x6a,0x02,0x05,0xa4
+
+# GFX1150: s_cvt_pk_rtz_f16_f32 s5, vcc_hi, s2 ; encoding: [0x6b,0x02,0x05,0xa4]
+0x6b,0x02,0x05,0xa4
+
+# GFX1150: s_cvt_pk_rtz_f16_f32 s5, ttmp11, s2 ; encoding: [0x77,0x02,0x05,0xa4]
+0x77,0x02,0x05,0xa4
+
+# GFX1150: s_cvt_pk_rtz_f16_f32 s5, m0, s2 ; encoding: [0x7d,0x02,0x05,0xa4]
+0x7d,0x02,0x05,0xa4
+
+# GFX1150: s_cvt_pk_rtz_f16_f32 s5, exec_lo, s2 ; encoding: [0x7e,0x02,0x05,0xa4]
+0x7e,0x02,0x05,0xa4
+
+# GFX1150: s_cvt_pk_rtz_f16_f32 s5, exec_hi, s2 ; encoding: [0x7f,0x02,0x05,0xa4]
+0x7f,0x02,0x05,0xa4
+
+# GFX1150: s_cvt_pk_rtz_f16_f32 s5, 0, s2 ; encoding: [0x80,0x02,0x05,0xa4]
+0x80,0x02,0x05,0xa4
+
+# GFX1150: s_cvt_pk_rtz_f16_f32 s5, -1, s2 ; encoding: [0xc1,0x02,0x05,0xa4]
+0xc1,0x02,0x05,0xa4
+
+# GFX1150: s_cvt_pk_rtz_f16_f32 s5, 0.5, s2 ; encoding: [0xf0,0x02,0x05,0xa4]
+0xf0,0x02,0x05,0xa4
+
+# GFX1150: s_cvt_pk_rtz_f16_f32 s5, -4.0, s2 ; encoding: [0xf7,0x02,0x05,0xa4]
+0xf7,0x02,0x05,0xa4
+
+# GFX1150: s_cvt_pk_rtz_f16_f32 s5, 0xaf123456, s2 ; encoding: [0xff,0x02,0x05,0xa4,0x56,0x34,0x12,0xaf]
+0xff,0x02,0x05,0xa4,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_cvt_pk_rtz_f16_f32 s5, 0x3f717273, s2 ; encoding: [0xff,0x02,0x05,0xa4,0x73,0x72,0x71,0x3f]
+0xff,0x02,0x05,0xa4,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_cvt_pk_rtz_f16_f32 s5, s1, s105 ; encoding: [0x01,0x69,0x05,0xa4]
+0x01,0x69,0x05,0xa4
+
+# GFX1150: s_add_f16 s5, s1, s2 ; encoding: [0x01,0x02,0x85,0xa4]
+0x01,0x02,0x85,0xa4
+
+# GFX1150: s_add_f16 s105, s1, s2 ; encoding: [0x01,0x02,0xe9,0xa4]
+0x01,0x02,0xe9,0xa4
+
+# GFX1150: s_add_f16 s5, s105, s2 ; encoding: [0x69,0x02,0x85,0xa4]
+0x69,0x02,0x85,0xa4
+
+# GFX1150: s_add_f16 s5, s101, s2 ; encoding: [0x65,0x02,0x85,0xa4]
+0x65,0x02,0x85,0xa4
+
+# GFX1150: s_add_f16 s5, vcc_lo, s2 ; encoding: [0x6a,0x02,0x85,0xa4]
+0x6a,0x02,0x85,0xa4
+
+# GFX1150: s_add_f16 s5, vcc_hi, s2 ; encoding: [0x6b,0x02,0x85,0xa4]
+0x6b,0x02,0x85,0xa4
+
+# GFX1150: s_add_f16 s5, m0, s2 ; encoding: [0x7d,0x02,0x85,0xa4]
+0x7d,0x02,0x85,0xa4
+
+# GFX1150: s_add_f16 s5, exec_lo, s2 ; encoding: [0x7e,0x02,0x85,0xa4]
+0x7e,0x02,0x85,0xa4
+
+# GFX1150: s_add_f16 s5, exec_hi, s2 ; encoding: [0x7f,0x02,0x85,0xa4]
+0x7f,0x02,0x85,0xa4
+
+# GFX1150: s_add_f16 s5, 0, s2 ; encoding: [0x80,0x02,0x85,0xa4]
+0x80,0x02,0x85,0xa4
+
+# GFX1150: s_add_f16 s5, -1, s2 ; encoding: [0xc1,0x02,0x85,0xa4]
+0xc1,0x02,0x85,0xa4
+
+# GFX1150: s_add_f16 s5, 0.5, s2 ; encoding: [0xf0,0x02,0x85,0xa4]
+0xf0,0x02,0x85,0xa4
+
+# GFX1150: s_add_f16 s5, -4.0, s2 ; encoding: [0xf7,0x02,0x85,0xa4]
+0xf7,0x02,0x85,0xa4
+
+# GFX1150: s_add_f16 s5, 0xfe0b, s2 ; encoding: [0xff,0x02,0x85,0xa4,0x0b,0xfe,0x00,0x00]
+0xff,0x02,0x85,0xa4,0x0b,0xfe,0x00,0x00
+
+# GFX1150: s_add_f16 s5, 0x3456, s2 ; encoding: [0xff,0x02,0x85,0xa4,0x56,0x34,0x00,0x00]
+0xff,0x02,0x85,0xa4,0x56,0x34,0x00,0x00
+
+# GFX1150: s_add_f16 s5, s1, s105 ; encoding: [0x01,0x69,0x85,0xa4]
+0x01,0x69,0x85,0xa4
+
+# GFX1150: s_sub_f16 s5, s1, s2 ; encoding: [0x01,0x02,0x05,0xa5]
+0x01,0x02,0x05,0xa5
+
+# GFX1150: s_sub_f16 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0xa5]
+0x01,0x02,0x69,0xa5
+
+# GFX1150: s_sub_f16 s5, s105, s2 ; encoding: [0x69,0x02,0x05,0xa5]
+0x69,0x02,0x05,0xa5
+
+# GFX1150: s_sub_f16 s5, s101, s2 ; encoding: [0x65,0x02,0x05,0xa5]
+0x65,0x02,0x05,0xa5
+
+# GFX1150: s_sub_f16 s5, vcc_lo, s2 ; encoding: [0x6a,0x02,0x05,0xa5]
+0x6a,0x02,0x05,0xa5
+
+# GFX1150: s_sub_f16 s5, vcc_hi, s2 ; encoding: [0x6b,0x02,0x05,0xa5]
+0x6b,0x02,0x05,0xa5
+
+# GFX1150: s_sub_f16 s5, m0, s2 ; encoding: [0x7d,0x02,0x05,0xa5]
+0x7d,0x02,0x05,0xa5
+
+# GFX1150: s_sub_f16 s5, exec_lo, s2 ; encoding: [0x7e,0x02,0x05,0xa5]
+0x7e,0x02,0x05,0xa5
+
+# GFX1150: s_sub_f16 s5, exec_hi, s2 ; encoding: [0x7f,0x02,0x05,0xa5]
+0x7f,0x02,0x05,0xa5
+
+# GFX1150: s_sub_f16 s5, 0, s2 ; encoding: [0x80,0x02,0x05,0xa5]
+0x80,0x02,0x05,0xa5
+
+# GFX1150: s_sub_f16 s5, -1, s2 ; encoding: [0xc1,0x02,0x05,0xa5]
+0xc1,0x02,0x05,0xa5
+
+# GFX1150: s_sub_f16 s5, 0.5, s2 ; encoding: [0xf0,0x02,0x05,0xa5]
+0xf0,0x02,0x05,0xa5
+
+# GFX1150: s_sub_f16 s5, -4.0, s2 ; encoding: [0xf7,0x02,0x05,0xa5]
+0xf7,0x02,0x05,0xa5
+
+# GFX1150: s_sub_f16 s5, 0xfe0b, s2 ; encoding: [0xff,0x02,0x05,0xa5,0x0b,0xfe,0x00,0x00]
+0xff,0x02,0x05,0xa5,0x0b,0xfe,0x00,0x00
+
+# GFX1150: s_sub_f16 s5, 0x3456, s2 ; encoding: [0xff,0x02,0x05,0xa5,0x56,0x34,0x00,0x00]
+0xff,0x02,0x05,0xa5,0x56,0x34,0x00,0x00
+
+# GFX1150: s_sub_f16 s5, s1, s105 ; encoding: [0x01,0x69,0x05,0xa5]
+0x01,0x69,0x05,0xa5
+
+# GFX1150: s_mul_f16 s5, s1, s2 ; encoding: [0x01,0x02,0x85,0xa6]
+0x01,0x02,0x85,0xa6
+
+# GFX1150: s_mul_f16 s105, s1, s2 ; encoding: [0x01,0x02,0xe9,0xa6]
+0x01,0x02,0xe9,0xa6
+
+# GFX1150: s_mul_f16 s5, s105, s2 ; encoding: [0x69,0x02,0x85,0xa6]
+0x69,0x02,0x85,0xa6
+
+# GFX1150: s_mul_f16 s5, s101, s2 ; encoding: [0x65,0x02,0x85,0xa6]
+0x65,0x02,0x85,0xa6
+
+# GFX1150: s_mul_f16 s5, vcc_lo, s2 ; encoding: [0x6a,0x02,0x85,0xa6]
+0x6a,0x02,0x85,0xa6
+
+# GFX1150: s_mul_f16 s5, vcc_hi, s2 ; encoding: [0x6b,0x02,0x85,0xa6]
+0x6b,0x02,0x85,0xa6
+
+# GFX1150: s_mul_f16 s5, m0, s2 ; encoding: [0x7d,0x02,0x85,0xa6]
+0x7d,0x02,0x85,0xa6
+
+# GFX1150: s_mul_f16 s5, exec_lo, s2 ; encoding: [0x7e,0x02,0x85,0xa6]
+0x7e,0x02,0x85,0xa6
+
+# GFX1150: s_mul_f16 s5, exec_hi, s2 ; encoding: [0x7f,0x02,0x85,0xa6]
+0x7f,0x02,0x85,0xa6
+
+# GFX1150: s_mul_f16 s5, 0, s2 ; encoding: [0x80,0x02,0x85,0xa6]
+0x80,0x02,0x85,0xa6
+
+# GFX1150: s_mul_f16 s5, -1, s2 ; encoding: [0xc1,0x02,0x85,0xa6]
+0xc1,0x02,0x85,0xa6
+
+# GFX1150: s_mul_f16 s5, 0.5, s2 ; encoding: [0xf0,0x02,0x85,0xa6]
+0xf0,0x02,0x85,0xa6
+
+# GFX1150: s_mul_f16 s5, -4.0, s2 ; encoding: [0xf7,0x02,0x85,0xa6]
+0xf7,0x02,0x85,0xa6
+
+# GFX1150: s_mul_f16 s5, 0xfe0b, s2 ; encoding: [0xff,0x02,0x85,0xa6,0x0b,0xfe,0x00,0x00]
+0xff,0x02,0x85,0xa6,0x0b,0xfe,0x00,0x00
+
+# GFX1150: s_mul_f16 s5, 0x3456, s2 ; encoding: [0xff,0x02,0x85,0xa6,0x56,0x34,0x00,0x00]
+0xff,0x02,0x85,0xa6,0x56,0x34,0x00,0x00
+
+# GFX1150: s_mul_f16 s5, s1, s105 ; encoding: [0x01,0x69,0x85,0xa6]
+0x01,0x69,0x85,0xa6
+
+# GFX1150: s_fmac_f16 s5, s1, s2 ; encoding: [0x01,0x02,0x05,0xa7]
+0x01,0x02,0x05,0xa7
+
+# GFX1150: s_fmac_f16 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0xa7]
+0x01,0x02,0x69,0xa7
+
+# GFX1150: s_fmac_f16 s5, s105, s2 ; encoding: [0x69,0x02,0x05,0xa7]
+0x69,0x02,0x05,0xa7
+
+# GFX1150: s_fmac_f16 s5, s103, s2 ; encoding: [0x67,0x02,0x05,0xa7]
+0x67,0x02,0x05,0xa7
+
+# GFX1150: s_fmac_f16 s5, vcc_lo, s2 ; encoding: [0x6a,0x02,0x05,0xa7]
+0x6a,0x02,0x05,0xa7
+
+# GFX1150: s_fmac_f16 s5, vcc_hi, s2 ; encoding: [0x6b,0x02,0x05,0xa7]
+0x6b,0x02,0x05,0xa7
+
+# GFX1150: s_fmac_f16 s5, ttmp11, s2 ; encoding: [0x77,0x02,0x05,0xa7]
+0x77,0x02,0x05,0xa7
+
+# GFX1150: s_fmac_f16 s5, m0, s2 ; encoding: [0x7d,0x02,0x05,0xa7]
+0x7d,0x02,0x05,0xa7
+
+# GFX1150: s_fmac_f16 s5, exec_lo, s2 ; encoding: [0x7e,0x02,0x05,0xa7]
+0x7e,0x02,0x05,0xa7
+
+# GFX1150: s_fmac_f16 s5, exec_hi, s2 ; encoding: [0x7f,0x02,0x05,0xa7]
+0x7f,0x02,0x05,0xa7
+
+# GFX1150: s_fmac_f16 s5, 0, s2 ; encoding: [0x80,0x02,0x05,0xa7]
+0x80,0x02,0x05,0xa7
+
+# GFX1150: s_fmac_f16 s5, -1, s2 ; encoding: [0xc1,0x02,0x05,0xa7]
+0xc1,0x02,0x05,0xa7
+
+# GFX1150: s_fmac_f16 s5, 0.5, s2 ; encoding: [0xf0,0x02,0x05,0xa7]
+0xf0,0x02,0x05,0xa7
+
+# GFX1150: s_fmac_f16 s5, -4.0, s2 ; encoding: [0xf7,0x02,0x05,0xa7]
+0xf7,0x02,0x05,0xa7
+
+# GFX1150: s_fmac_f16 s5, 0x1234, s2 ; encoding: [0xff,0x02,0x05,0xa7,0x34,0x12,0x00,0x00]
+0xff,0x02,0x05,0xa7,0x34,0x12,0x00,0x00
+
+# GFX1150: s_fmac_f16 s5, s1, s105 ; encoding: [0x01,0x69,0x05,0xa7]
+0x01,0x69,0x05,0xa7
+
+# GFX1150: s_max_f16 s5, s1, s2 ; encoding: [0x01,0x02,0x05,0xa6]
+0x01,0x02,0x05,0xa6
+
+# GFX1150: s_max_f16 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0xa6]
+0x01,0x02,0x69,0xa6
+
+# GFX1150: s_max_f16 s5, s105, s2 ; encoding: [0x69,0x02,0x05,0xa6]
+0x69,0x02,0x05,0xa6
+
+# GFX1150: s_max_f16 s5, s101, s2 ; encoding: [0x65,0x02,0x05,0xa6]
+0x65,0x02,0x05,0xa6
+
+# GFX1150: s_max_f16 s5, vcc_lo, s2 ; encoding: [0x6a,0x02,0x05,0xa6]
+0x6a,0x02,0x05,0xa6
+
+# GFX1150: s_max_f16 s5, vcc_hi, s2 ; encoding: [0x6b,0x02,0x05,0xa6]
+0x6b,0x02,0x05,0xa6
+
+# GFX1150: s_max_f16 s5, m0, s2 ; encoding: [0x7d,0x02,0x05,0xa6]
+0x7d,0x02,0x05,0xa6
+
+# GFX1150: s_max_f16 s5, exec_lo, s2 ; encoding: [0x7e,0x02,0x05,0xa6]
+0x7e,0x02,0x05,0xa6
+
+# GFX1150: s_max_f16 s5, exec_hi, s2 ; encoding: [0x7f,0x02,0x05,0xa6]
+0x7f,0x02,0x05,0xa6
+
+# GFX1150: s_max_f16 s5, 0, s2 ; encoding: [0x80,0x02,0x05,0xa6]
+0x80,0x02,0x05,0xa6
+
+# GFX1150: s_max_f16 s5, -1, s2 ; encoding: [0xc1,0x02,0x05,0xa6]
+0xc1,0x02,0x05,0xa6
+
+# GFX1150: s_max_f16 s5, 0.5, s2 ; encoding: [0xf0,0x02,0x05,0xa6]
+0xf0,0x02,0x05,0xa6
+
+# GFX1150: s_max_f16 s5, -4.0, s2 ; encoding: [0xf7,0x02,0x05,0xa6]
+0xf7,0x02,0x05,0xa6
+
+# GFX1150: s_max_f16 s5, 0xfe0b, s2 ; encoding: [0xff,0x02,0x05,0xa6,0x0b,0xfe,0x00,0x00]
+0xff,0x02,0x05,0xa6,0x0b,0xfe,0x00,0x00
+
+# GFX1150: s_max_f16 s5, 0x3456, s2 ; encoding: [0xff,0x02,0x05,0xa6,0x56,0x34,0x00,0x00]
+0xff,0x02,0x05,0xa6,0x56,0x34,0x00,0x00
+
+# GFX1150: s_max_f16 s5, s1, s105 ; encoding: [0x01,0x69,0x05,0xa6]
+0x01,0x69,0x05,0xa6
+
+# GFX1150: s_min_f16 s5, s1, s2 ; encoding: [0x01,0x02,0x85,0xa5]
+0x01,0x02,0x85,0xa5
+
+# GFX1150: s_min_f16 s105, s1, s2 ; encoding: [0x01,0x02,0xe9,0xa5]
+0x01,0x02,0xe9,0xa5
+
+# GFX1150: s_min_f16 s5, s105, s2 ; encoding: [0x69,0x02,0x85,0xa5]
+0x69,0x02,0x85,0xa5
+
+# GFX1150: s_min_f16 s5, s101, s2 ; encoding: [0x65,0x02,0x85,0xa5]
+0x65,0x02,0x85,0xa5
+
+# GFX1150: s_min_f16 s5, vcc_lo, s2 ; encoding: [0x6a,0x02,0x85,0xa5]
+0x6a,0x02,0x85,0xa5
+
+# GFX1150: s_min_f16 s5, vcc_hi, s2 ; encoding: [0x6b,0x02,0x85,0xa5]
+0x6b,0x02,0x85,0xa5
+
+# GFX1150: s_min_f16 s5, m0, s2 ; encoding: [0x7d,0x02,0x85,0xa5]
+0x7d,0x02,0x85,0xa5
+
+# GFX1150: s_min_f16 s5, exec_lo, s2 ; encoding: [0x7e,0x02,0x85,0xa5]
+0x7e,0x02,0x85,0xa5
+
+# GFX1150: s_min_f16 s5, exec_hi, s2 ; encoding: [0x7f,0x02,0x85,0xa5]
+0x7f,0x02,0x85,0xa5
+
+# GFX1150: s_min_f16 s5, 0, s2 ; encoding: [0x80,0x02,0x85,0xa5]
+0x80,0x02,0x85,0xa5
+
+# GFX1150: s_min_f16 s5, -1, s2 ; encoding: [0xc1,0x02,0x85,0xa5]
+0xc1,0x02,0x85,0xa5
+
+# GFX1150: s_min_f16 s5, 0.5, s2 ; encoding: [0xf0,0x02,0x85,0xa5]
+0xf0,0x02,0x85,0xa5
+
+# GFX1150: s_min_f16 s5, -4.0, s2 ; encoding: [0xf7,0x02,0x85,0xa5]
+0xf7,0x02,0x85,0xa5
+
+# GFX1150: s_min_f16 s5, 0xfe0b, s2 ; encoding: [0xff,0x02,0x85,0xa5,0x0b,0xfe,0x00,0x00]
+0xff,0x02,0x85,0xa5,0x0b,0xfe,0x00,0x00
+
+# GFX1150: s_min_f16 s5, 0x3456, s2 ; encoding: [0xff,0x02,0x85,0xa5,0x56,0x34,0x00,0x00]
+0xff,0x02,0x85,0xa5,0x56,0x34,0x00,0x00
+
+# GFX1150: s_min_f16 s5, s1, s105 ; encoding: [0x01,0x69,0x85,0xa5]
+0x01,0x69,0x85,0xa5
+
+# GFX1150: s_cmp_lt_f32 s1, s2 ; encoding: [0x01,0x02,0x41,0xbf]
+0x01,0x02,0x41,0xbf
+
+# GFX1150: s_cmp_lt_f32 s105, s2 ; encoding: [0x69,0x02,0x41,0xbf]
+0x69,0x02,0x41,0xbf
+
+# GFX1150: s_cmp_lt_f32 s101, s2 ; encoding: [0x65,0x02,0x41,0xbf]
+0x65,0x02,0x41,0xbf
+
+# GFX1150: s_cmp_lt_f32 vcc_lo, s2 ; encoding: [0x6a,0x02,0x41,0xbf]
+0x6a,0x02,0x41,0xbf
+
+# GFX1150: s_cmp_lt_f32 vcc_hi, s2 ; encoding: [0x6b,0x02,0x41,0xbf]
+0x6b,0x02,0x41,0xbf
+
+# GFX1150: s_cmp_lt_f32 m0, s2 ; encoding: [0x7d,0x02,0x41,0xbf]
+0x7d,0x02,0x41,0xbf
+
+# GFX1150: s_cmp_lt_f32 exec_lo, s2 ; encoding: [0x7e,0x02,0x41,0xbf]
+0x7e,0x02,0x41,0xbf
+
+# GFX1150: s_cmp_lt_f32 exec_hi, s2 ; encoding: [0x7f,0x02,0x41,0xbf]
+0x7f,0x02,0x41,0xbf
+
+# GFX1150: s_cmp_lt_f32 0, s2 ; encoding: [0x80,0x02,0x41,0xbf]
+0x80,0x02,0x41,0xbf
+
+# GFX1150: s_cmp_lt_f32 -1, s2 ; encoding: [0xc1,0x02,0x41,0xbf]
+0xc1,0x02,0x41,0xbf
+
+# GFX1150: s_cmp_lt_f32 0.5, s2 ; encoding: [0xf0,0x02,0x41,0xbf]
+0xf0,0x02,0x41,0xbf
+
+# GFX1150: s_cmp_lt_f32 -4.0, s2 ; encoding: [0xf7,0x02,0x41,0xbf]
+0xf7,0x02,0x41,0xbf
+
+# GFX1150: s_cmp_lt_f32 0xaf123456, s2 ; encoding: [0xff,0x02,0x41,0xbf,0x56,0x34,0x12,0xaf]
+0xff,0x02,0x41,0xbf,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_cmp_lt_f32 0x3f717273, s2 ; encoding: [0xff,0x02,0x41,0xbf,0x73,0x72,0x71,0x3f]
+0xff,0x02,0x41,0xbf,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_cmp_lt_f32 s1, s105 ; encoding: [0x01,0x69,0x41,0xbf]
+0x01,0x69,0x41,0xbf
+
+# GFX1150: s_cmp_eq_f32 s1, s2 ; encoding: [0x01,0x02,0x42,0xbf]
+0x01,0x02,0x42,0xbf
+
+# GFX1150: s_cmp_eq_f32 s105, s2 ; encoding: [0x69,0x02,0x42,0xbf]
+0x69,0x02,0x42,0xbf
+
+# GFX1150: s_cmp_eq_f32 s101, s2 ; encoding: [0x65,0x02,0x42,0xbf]
+0x65,0x02,0x42,0xbf
+
+# GFX1150: s_cmp_eq_f32 vcc_lo, s2 ; encoding: [0x6a,0x02,0x42,0xbf]
+0x6a,0x02,0x42,0xbf
+
+# GFX1150: s_cmp_eq_f32 vcc_hi, s2 ; encoding: [0x6b,0x02,0x42,0xbf]
+0x6b,0x02,0x42,0xbf
+
+# GFX1150: s_cmp_eq_f32 m0, s2 ; encoding: [0x7d,0x02,0x42,0xbf]
+0x7d,0x02,0x42,0xbf
+
+# GFX1150: s_cmp_eq_f32 exec_lo, s2 ; encoding: [0x7e,0x02,0x42,0xbf]
+0x7e,0x02,0x42,0xbf
+
+# GFX1150: s_cmp_eq_f32 exec_hi, s2 ; encoding: [0x7f,0x02,0x42,0xbf]
+0x7f,0x02,0x42,0xbf
+
+# GFX1150: s_cmp_eq_f32 0, s2 ; encoding: [0x80,0x02,0x42,0xbf]
+0x80,0x02,0x42,0xbf
+
+# GFX1150: s_cmp_eq_f32 -1, s2 ; encoding: [0xc1,0x02,0x42,0xbf]
+0xc1,0x02,0x42,0xbf
+
+# GFX1150: s_cmp_eq_f32 0.5, s2 ; encoding: [0xf0,0x02,0x42,0xbf]
+0xf0,0x02,0x42,0xbf
+
+# GFX1150: s_cmp_eq_f32 -4.0, s2 ; encoding: [0xf7,0x02,0x42,0xbf]
+0xf7,0x02,0x42,0xbf
+
+# GFX1150: s_cmp_eq_f32 0xaf123456, s2 ; encoding: [0xff,0x02,0x42,0xbf,0x56,0x34,0x12,0xaf]
+0xff,0x02,0x42,0xbf,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_cmp_eq_f32 0x3f717273, s2 ; encoding: [0xff,0x02,0x42,0xbf,0x73,0x72,0x71,0x3f]
+0xff,0x02,0x42,0xbf,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_cmp_eq_f32 s1, s105 ; encoding: [0x01,0x69,0x42,0xbf]
+0x01,0x69,0x42,0xbf
+
+# GFX1150: s_cmp_le_f32 s1, s2 ; encoding: [0x01,0x02,0x43,0xbf]
+0x01,0x02,0x43,0xbf
+
+# GFX1150: s_cmp_le_f32 s105, s2 ; encoding: [0x69,0x02,0x43,0xbf]
+0x69,0x02,0x43,0xbf
+
+# GFX1150: s_cmp_le_f32 s101, s2 ; encoding: [0x65,0x02,0x43,0xbf]
+0x65,0x02,0x43,0xbf
+
+# GFX1150: s_cmp_le_f32 vcc_lo, s2 ; encoding: [0x6a,0x02,0x43,0xbf]
+0x6a,0x02,0x43,0xbf
+
+# GFX1150: s_cmp_le_f32 vcc_hi, s2 ; encoding: [0x6b,0x02,0x43,0xbf]
+0x6b,0x02,0x43,0xbf
+
+# GFX1150: s_cmp_le_f32 m0, s2 ; encoding: [0x7d,0x02,0x43,0xbf]
+0x7d,0x02,0x43,0xbf
+
+# GFX1150: s_cmp_le_f32 exec_lo, s2 ; encoding: [0x7e,0x02,0x43,0xbf]
+0x7e,0x02,0x43,0xbf
+
+# GFX1150: s_cmp_le_f32 exec_hi, s2 ; encoding: [0x7f,0x02,0x43,0xbf]
+0x7f,0x02,0x43,0xbf
+
+# GFX1150: s_cmp_le_f32 0, s2 ; encoding: [0x80,0x02,0x43,0xbf]
+0x80,0x02,0x43,0xbf
+
+# GFX1150: s_cmp_le_f32 -1, s2 ; encoding: [0xc1,0x02,0x43,0xbf]
+0xc1,0x02,0x43,0xbf
+
+# GFX1150: s_cmp_le_f32 0.5, s2 ; encoding: [0xf0,0x02,0x43,0xbf]
+0xf0,0x02,0x43,0xbf
+
+# GFX1150: s_cmp_le_f32 -4.0, s2 ; encoding: [0xf7,0x02,0x43,0xbf]
+0xf7,0x02,0x43,0xbf
+
+# GFX1150: s_cmp_le_f32 0xaf123456, s2 ; encoding: [0xff,0x02,0x43,0xbf,0x56,0x34,0x12,0xaf]
+0xff,0x02,0x43,0xbf,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_cmp_le_f32 0x3f717273, s2 ; encoding: [0xff,0x02,0x43,0xbf,0x73,0x72,0x71,0x3f]
+0xff,0x02,0x43,0xbf,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_cmp_le_f32 s1, s105 ; encoding: [0x01,0x69,0x43,0xbf]
+0x01,0x69,0x43,0xbf
+
+# GFX1150: s_cmp_gt_f32 s1, s2 ; encoding: [0x01,0x02,0x44,0xbf]
+0x01,0x02,0x44,0xbf
+
+# GFX1150: s_cmp_gt_f32 s105, s2 ; encoding: [0x69,0x02,0x44,0xbf]
+0x69,0x02,0x44,0xbf
+
+# GFX1150: s_cmp_gt_f32 s101, s2 ; encoding: [0x65,0x02,0x44,0xbf]
+0x65,0x02,0x44,0xbf
+
+# GFX1150: s_cmp_gt_f32 vcc_lo, s2 ; encoding: [0x6a,0x02,0x44,0xbf]
+0x6a,0x02,0x44,0xbf
+
+# GFX1150: s_cmp_gt_f32 vcc_hi, s2 ; encoding: [0x6b,0x02,0x44,0xbf]
+0x6b,0x02,0x44,0xbf
+
+# GFX1150: s_cmp_gt_f32 m0, s2 ; encoding: [0x7d,0x02,0x44,0xbf]
+0x7d,0x02,0x44,0xbf
+
+# GFX1150: s_cmp_gt_f32 exec_lo, s2 ; encoding: [0x7e,0x02,0x44,0xbf]
+0x7e,0x02,0x44,0xbf
+
+# GFX1150: s_cmp_gt_f32 exec_hi, s2 ; encoding: [0x7f,0x02,0x44,0xbf]
+0x7f,0x02,0x44,0xbf
+
+# GFX1150: s_cmp_gt_f32 0, s2 ; encoding: [0x80,0x02,0x44,0xbf]
+0x80,0x02,0x44,0xbf
+
+# GFX1150: s_cmp_gt_f32 -1, s2 ; encoding: [0xc1,0x02,0x44,0xbf]
+0xc1,0x02,0x44,0xbf
+
+# GFX1150: s_cmp_gt_f32 0.5, s2 ; encoding: [0xf0,0x02,0x44,0xbf]
+0xf0,0x02,0x44,0xbf
+
+# GFX1150: s_cmp_gt_f32 -4.0, s2 ; encoding: [0xf7,0x02,0x44,0xbf]
+0xf7,0x02,0x44,0xbf
+
+# GFX1150: s_cmp_gt_f32 0xaf123456, s2 ; encoding: [0xff,0x02,0x44,0xbf,0x56,0x34,0x12,0xaf]
+0xff,0x02,0x44,0xbf,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_cmp_gt_f32 0x3f717273, s2 ; encoding: [0xff,0x02,0x44,0xbf,0x73,0x72,0x71,0x3f]
+0xff,0x02,0x44,0xbf,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_cmp_gt_f32 s1, s105 ; encoding: [0x01,0x69,0x44,0xbf]
+0x01,0x69,0x44,0xbf
+
+# GFX1150: s_cmp_lg_f32 s1, s2 ; encoding: [0x01,0x02,0x45,0xbf]
+0x01,0x02,0x45,0xbf
+
+# GFX1150: s_cmp_lg_f32 s105, s2 ; encoding: [0x69,0x02,0x45,0xbf]
+0x69,0x02,0x45,0xbf
+
+# GFX1150: s_cmp_lg_f32 s101, s2 ; encoding: [0x65,0x02,0x45,0xbf]
+0x65,0x02,0x45,0xbf
+
+# GFX1150: s_cmp_lg_f32 vcc_lo, s2 ; encoding: [0x6a,0x02,0x45,0xbf]
+0x6a,0x02,0x45,0xbf
+
+# GFX1150: s_cmp_lg_f32 vcc_hi, s2 ; encoding: [0x6b,0x02,0x45,0xbf]
+0x6b,0x02,0x45,0xbf
+
+# GFX1150: s_cmp_lg_f32 m0, s2 ; encoding: [0x7d,0x02,0x45,0xbf]
+0x7d,0x02,0x45,0xbf
+
+# GFX1150: s_cmp_lg_f32 exec_lo, s2 ; encoding: [0x7e,0x02,0x45,0xbf]
+0x7e,0x02,0x45,0xbf
+
+# GFX1150: s_cmp_lg_f32 exec_hi, s2 ; encoding: [0x7f,0x02,0x45,0xbf]
+0x7f,0x02,0x45,0xbf
+
+# GFX1150: s_cmp_lg_f32 0, s2 ; encoding: [0x80,0x02,0x45,0xbf]
+0x80,0x02,0x45,0xbf
+
+# GFX1150: s_cmp_lg_f32 -1, s2 ; encoding: [0xc1,0x02,0x45,0xbf]
+0xc1,0x02,0x45,0xbf
+
+# GFX1150: s_cmp_lg_f32 0.5, s2 ; encoding: [0xf0,0x02,0x45,0xbf]
+0xf0,0x02,0x45,0xbf
+
+# GFX1150: s_cmp_lg_f32 -4.0, s2 ; encoding: [0xf7,0x02,0x45,0xbf]
+0xf7,0x02,0x45,0xbf
+
+# GFX1150: s_cmp_lg_f32 0xaf123456, s2 ; encoding: [0xff,0x02,0x45,0xbf,0x56,0x34,0x12,0xaf]
+0xff,0x02,0x45,0xbf,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_cmp_lg_f32 0x3f717273, s2 ; encoding: [0xff,0x02,0x45,0xbf,0x73,0x72,0x71,0x3f]
+0xff,0x02,0x45,0xbf,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_cmp_lg_f32 s1, s105 ; encoding: [0x01,0x69,0x45,0xbf]
+0x01,0x69,0x45,0xbf
+
+# GFX1150: s_cmp_ge_f32 s1, s2 ; encoding: [0x01,0x02,0x46,0xbf]
+0x01,0x02,0x46,0xbf
+
+# GFX1150: s_cmp_ge_f32 s105, s2 ; encoding: [0x69,0x02,0x46,0xbf]
+0x69,0x02,0x46,0xbf
+
+# GFX1150: s_cmp_ge_f32 s101, s2 ; encoding: [0x65,0x02,0x46,0xbf]
+0x65,0x02,0x46,0xbf
+
+# GFX1150: s_cmp_ge_f32 vcc_lo, s2 ; encoding: [0x6a,0x02,0x46,0xbf]
+0x6a,0x02,0x46,0xbf
+
+# GFX1150: s_cmp_ge_f32 vcc_hi, s2 ; encoding: [0x6b,0x02,0x46,0xbf]
+0x6b,0x02,0x46,0xbf
+
+# GFX1150: s_cmp_ge_f32 m0, s2 ; encoding: [0x7d,0x02,0x46,0xbf]
+0x7d,0x02,0x46,0xbf
+
+# GFX1150: s_cmp_ge_f32 exec_lo, s2 ; encoding: [0x7e,0x02,0x46,0xbf]
+0x7e,0x02,0x46,0xbf
+
+# GFX1150: s_cmp_ge_f32 exec_hi, s2 ; encoding: [0x7f,0x02,0x46,0xbf]
+0x7f,0x02,0x46,0xbf
+
+# GFX1150: s_cmp_ge_f32 0, s2 ; encoding: [0x80,0x02,0x46,0xbf]
+0x80,0x02,0x46,0xbf
+
+# GFX1150: s_cmp_ge_f32 -1, s2 ; encoding: [0xc1,0x02,0x46,0xbf]
+0xc1,0x02,0x46,0xbf
+
+# GFX1150: s_cmp_ge_f32 0.5, s2 ; encoding: [0xf0,0x02,0x46,0xbf]
+0xf0,0x02,0x46,0xbf
+
+# GFX1150: s_cmp_ge_f32 -4.0, s2 ; encoding: [0xf7,0x02,0x46,0xbf]
+0xf7,0x02,0x46,0xbf
+
+# GFX1150: s_cmp_ge_f32 0xaf123456, s2 ; encoding: [0xff,0x02,0x46,0xbf,0x56,0x34,0x12,0xaf]
+0xff,0x02,0x46,0xbf,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_cmp_ge_f32 0x3f717273, s2 ; encoding: [0xff,0x02,0x46,0xbf,0x73,0x72,0x71,0x3f]
+0xff,0x02,0x46,0xbf,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_cmp_ge_f32 s1, s105 ; encoding: [0x01,0x69,0x46,0xbf]
+0x01,0x69,0x46,0xbf
+
+# GFX1150: s_cmp_o_f32 s1, s2 ; encoding: [0x01,0x02,0x47,0xbf]
+0x01,0x02,0x47,0xbf
+
+# GFX1150: s_cmp_o_f32 s105, s2 ; encoding: [0x69,0x02,0x47,0xbf]
+0x69,0x02,0x47,0xbf
+
+# GFX1150: s_cmp_o_f32 s101, s2 ; encoding: [0x65,0x02,0x47,0xbf]
+0x65,0x02,0x47,0xbf
+
+# GFX1150: s_cmp_o_f32 vcc_lo, s2 ; encoding: [0x6a,0x02,0x47,0xbf]
+0x6a,0x02,0x47,0xbf
+
+# GFX1150: s_cmp_o_f32 vcc_hi, s2 ; encoding: [0x6b,0x02,0x47,0xbf]
+0x6b,0x02,0x47,0xbf
+
+# GFX1150: s_cmp_o_f32 m0, s2 ; encoding: [0x7d,0x02,0x47,0xbf]
+0x7d,0x02,0x47,0xbf
+
+# GFX1150: s_cmp_o_f32 exec_lo, s2 ; encoding: [0x7e,0x02,0x47,0xbf]
+0x7e,0x02,0x47,0xbf
+
+# GFX1150: s_cmp_o_f32 exec_hi, s2 ; encoding: [0x7f,0x02,0x47,0xbf]
+0x7f,0x02,0x47,0xbf
+
+# GFX1150: s_cmp_o_f32 0, s2 ; encoding: [0x80,0x02,0x47,0xbf]
+0x80,0x02,0x47,0xbf
+
+# GFX1150: s_cmp_o_f32 -1, s2 ; encoding: [0xc1,0x02,0x47,0xbf]
+0xc1,0x02,0x47,0xbf
+
+# GFX1150: s_cmp_o_f32 0.5, s2 ; encoding: [0xf0,0x02,0x47,0xbf]
+0xf0,0x02,0x47,0xbf
+
+# GFX1150: s_cmp_o_f32 -4.0, s2 ; encoding: [0xf7,0x02,0x47,0xbf]
+0xf7,0x02,0x47,0xbf
+
+# GFX1150: s_cmp_o_f32 0xaf123456, s2 ; encoding: [0xff,0x02,0x47,0xbf,0x56,0x34,0x12,0xaf]
+0xff,0x02,0x47,0xbf,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_cmp_o_f32 0x3f717273, s2 ; encoding: [0xff,0x02,0x47,0xbf,0x73,0x72,0x71,0x3f]
+0xff,0x02,0x47,0xbf,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_cmp_o_f32 s1, s105 ; encoding: [0x01,0x69,0x47,0xbf]
+0x01,0x69,0x47,0xbf
+
+# GFX1150: s_cmp_u_f32 s1, s2 ; encoding: [0x01,0x02,0x48,0xbf]
+0x01,0x02,0x48,0xbf
+
+# GFX1150: s_cmp_u_f32 s105, s2 ; encoding: [0x69,0x02,0x48,0xbf]
+0x69,0x02,0x48,0xbf
+
+# GFX1150: s_cmp_u_f32 s101, s2 ; encoding: [0x65,0x02,0x48,0xbf]
+0x65,0x02,0x48,0xbf
+
+# GFX1150: s_cmp_u_f32 vcc_lo, s2 ; encoding: [0x6a,0x02,0x48,0xbf]
+0x6a,0x02,0x48,0xbf
+
+# GFX1150: s_cmp_u_f32 vcc_hi, s2 ; encoding: [0x6b,0x02,0x48,0xbf]
+0x6b,0x02,0x48,0xbf
+
+# GFX1150: s_cmp_u_f32 m0, s2 ; encoding: [0x7d,0x02,0x48,0xbf]
+0x7d,0x02,0x48,0xbf
+
+# GFX1150: s_cmp_u_f32 exec_lo, s2 ; encoding: [0x7e,0x02,0x48,0xbf]
+0x7e,0x02,0x48,0xbf
+
+# GFX1150: s_cmp_u_f32 exec_hi, s2 ; encoding: [0x7f,0x02,0x48,0xbf]
+0x7f,0x02,0x48,0xbf
+
+# GFX1150: s_cmp_u_f32 0, s2 ; encoding: [0x80,0x02,0x48,0xbf]
+0x80,0x02,0x48,0xbf
+
+# GFX1150: s_cmp_u_f32 -1, s2 ; encoding: [0xc1,0x02,0x48,0xbf]
+0xc1,0x02,0x48,0xbf
+
+# GFX1150: s_cmp_u_f32 0.5, s2 ; encoding: [0xf0,0x02,0x48,0xbf]
+0xf0,0x02,0x48,0xbf
+
+# GFX1150: s_cmp_u_f32 -4.0, s2 ; encoding: [0xf7,0x02,0x48,0xbf]
+0xf7,0x02,0x48,0xbf
+
+# GFX1150: s_cmp_u_f32 0xaf123456, s2 ; encoding: [0xff,0x02,0x48,0xbf,0x56,0x34,0x12,0xaf]
+0xff,0x02,0x48,0xbf,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_cmp_u_f32 0x3f717273, s2 ; encoding: [0xff,0x02,0x48,0xbf,0x73,0x72,0x71,0x3f]
+0xff,0x02,0x48,0xbf,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_cmp_u_f32 s1, s105 ; encoding: [0x01,0x69,0x48,0xbf]
+0x01,0x69,0x48,0xbf
+
+# GFX1150: s_cmp_nge_f32 s1, s2 ; encoding: [0x01,0x02,0x49,0xbf]
+0x01,0x02,0x49,0xbf
+
+# GFX1150: s_cmp_nge_f32 s105, s2 ; encoding: [0x69,0x02,0x49,0xbf]
+0x69,0x02,0x49,0xbf
+
+# GFX1150: s_cmp_nge_f32 s101, s2 ; encoding: [0x65,0x02,0x49,0xbf]
+0x65,0x02,0x49,0xbf
+
+# GFX1150: s_cmp_nge_f32 vcc_lo, s2 ; encoding: [0x6a,0x02,0x49,0xbf]
+0x6a,0x02,0x49,0xbf
+
+# GFX1150: s_cmp_nge_f32 vcc_hi, s2 ; encoding: [0x6b,0x02,0x49,0xbf]
+0x6b,0x02,0x49,0xbf
+
+# GFX1150: s_cmp_nge_f32 m0, s2 ; encoding: [0x7d,0x02,0x49,0xbf]
+0x7d,0x02,0x49,0xbf
+
+# GFX1150: s_cmp_nge_f32 exec_lo, s2 ; encoding: [0x7e,0x02,0x49,0xbf]
+0x7e,0x02,0x49,0xbf
+
+# GFX1150: s_cmp_nge_f32 exec_hi, s2 ; encoding: [0x7f,0x02,0x49,0xbf]
+0x7f,0x02,0x49,0xbf
+
+# GFX1150: s_cmp_nge_f32 0, s2 ; encoding: [0x80,0x02,0x49,0xbf]
+0x80,0x02,0x49,0xbf
+
+# GFX1150: s_cmp_nge_f32 -1, s2 ; encoding: [0xc1,0x02,0x49,0xbf]
+0xc1,0x02,0x49,0xbf
+
+# GFX1150: s_cmp_nge_f32 0.5, s2 ; encoding: [0xf0,0x02,0x49,0xbf]
+0xf0,0x02,0x49,0xbf
+
+# GFX1150: s_cmp_nge_f32 -4.0, s2 ; encoding: [0xf7,0x02,0x49,0xbf]
+0xf7,0x02,0x49,0xbf
+
+# GFX1150: s_cmp_nge_f32 0xaf123456, s2 ; encoding: [0xff,0x02,0x49,0xbf,0x56,0x34,0x12,0xaf]
+0xff,0x02,0x49,0xbf,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_cmp_nge_f32 0x3f717273, s2 ; encoding: [0xff,0x02,0x49,0xbf,0x73,0x72,0x71,0x3f]
+0xff,0x02,0x49,0xbf,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_cmp_nge_f32 s1, s105 ; encoding: [0x01,0x69,0x49,0xbf]
+0x01,0x69,0x49,0xbf
+
+# GFX1150: s_cmp_nlg_f32 s1, s2 ; encoding: [0x01,0x02,0x4a,0xbf]
+0x01,0x02,0x4a,0xbf
+
+# GFX1150: s_cmp_nlg_f32 s105, s2 ; encoding: [0x69,0x02,0x4a,0xbf]
+0x69,0x02,0x4a,0xbf
+
+# GFX1150: s_cmp_nlg_f32 s101, s2 ; encoding: [0x65,0x02,0x4a,0xbf]
+0x65,0x02,0x4a,0xbf
+
+# GFX1150: s_cmp_nlg_f32 vcc_lo, s2 ; encoding: [0x6a,0x02,0x4a,0xbf]
+0x6a,0x02,0x4a,0xbf
+
+# GFX1150: s_cmp_nlg_f32 vcc_hi, s2 ; encoding: [0x6b,0x02,0x4a,0xbf]
+0x6b,0x02,0x4a,0xbf
+
+# GFX1150: s_cmp_nlg_f32 m0, s2 ; encoding: [0x7d,0x02,0x4a,0xbf]
+0x7d,0x02,0x4a,0xbf
+
+# GFX1150: s_cmp_nlg_f32 exec_lo, s2 ; encoding: [0x7e,0x02,0x4a,0xbf]
+0x7e,0x02,0x4a,0xbf
+
+# GFX1150: s_cmp_nlg_f32 exec_hi, s2 ; encoding: [0x7f,0x02,0x4a,0xbf]
+0x7f,0x02,0x4a,0xbf
+
+# GFX1150: s_cmp_nlg_f32 0, s2 ; encoding: [0x80,0x02,0x4a,0xbf]
+0x80,0x02,0x4a,0xbf
+
+# GFX1150: s_cmp_nlg_f32 -1, s2 ; encoding: [0xc1,0x02,0x4a,0xbf]
+0xc1,0x02,0x4a,0xbf
+
+# GFX1150: s_cmp_nlg_f32 0.5, s2 ; encoding: [0xf0,0x02,0x4a,0xbf]
+0xf0,0x02,0x4a,0xbf
+
+# GFX1150: s_cmp_nlg_f32 -4.0, s2 ; encoding: [0xf7,0x02,0x4a,0xbf]
+0xf7,0x02,0x4a,0xbf
+
+# GFX1150: s_cmp_nlg_f32 0xaf123456, s2 ; encoding: [0xff,0x02,0x4a,0xbf,0x56,0x34,0x12,0xaf]
+0xff,0x02,0x4a,0xbf,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_cmp_nlg_f32 0x3f717273, s2 ; encoding: [0xff,0x02,0x4a,0xbf,0x73,0x72,0x71,0x3f]
+0xff,0x02,0x4a,0xbf,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_cmp_nlg_f32 s1, s105 ; encoding: [0x01,0x69,0x4a,0xbf]
+0x01,0x69,0x4a,0xbf
+
+# GFX1150: s_cmp_ngt_f32 s1, s2 ; encoding: [0x01,0x02,0x4b,0xbf]
+0x01,0x02,0x4b,0xbf
+
+# GFX1150: s_cmp_ngt_f32 s105, s2 ; encoding: [0x69,0x02,0x4b,0xbf]
+0x69,0x02,0x4b,0xbf
+
+# GFX1150: s_cmp_ngt_f32 s101, s2 ; encoding: [0x65,0x02,0x4b,0xbf]
+0x65,0x02,0x4b,0xbf
+
+# GFX1150: s_cmp_ngt_f32 vcc_lo, s2 ; encoding: [0x6a,0x02,0x4b,0xbf]
+0x6a,0x02,0x4b,0xbf
+
+# GFX1150: s_cmp_ngt_f32 vcc_hi, s2 ; encoding: [0x6b,0x02,0x4b,0xbf]
+0x6b,0x02,0x4b,0xbf
+
+# GFX1150: s_cmp_ngt_f32 m0, s2 ; encoding: [0x7d,0x02,0x4b,0xbf]
+0x7d,0x02,0x4b,0xbf
+
+# GFX1150: s_cmp_ngt_f32 exec_lo, s2 ; encoding: [0x7e,0x02,0x4b,0xbf]
+0x7e,0x02,0x4b,0xbf
+
+# GFX1150: s_cmp_ngt_f32 exec_hi, s2 ; encoding: [0x7f,0x02,0x4b,0xbf]
+0x7f,0x02,0x4b,0xbf
+
+# GFX1150: s_cmp_ngt_f32 0, s2 ; encoding: [0x80,0x02,0x4b,0xbf]
+0x80,0x02,0x4b,0xbf
+
+# GFX1150: s_cmp_ngt_f32 -1, s2 ; encoding: [0xc1,0x02,0x4b,0xbf]
+0xc1,0x02,0x4b,0xbf
+
+# GFX1150: s_cmp_ngt_f32 0.5, s2 ; encoding: [0xf0,0x02,0x4b,0xbf]
+0xf0,0x02,0x4b,0xbf
+
+# GFX1150: s_cmp_ngt_f32 -4.0, s2 ; encoding: [0xf7,0x02,0x4b,0xbf]
+0xf7,0x02,0x4b,0xbf
+
+# GFX1150: s_cmp_ngt_f32 0xaf123456, s2 ; encoding: [0xff,0x02,0x4b,0xbf,0x56,0x34,0x12,0xaf]
+0xff,0x02,0x4b,0xbf,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_cmp_ngt_f32 0x3f717273, s2 ; encoding: [0xff,0x02,0x4b,0xbf,0x73,0x72,0x71,0x3f]
+0xff,0x02,0x4b,0xbf,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_cmp_ngt_f32 s1, s105 ; encoding: [0x01,0x69,0x4b,0xbf]
+0x01,0x69,0x4b,0xbf
+
+# GFX1150: s_cmp_nle_f32 s1, s2 ; encoding: [0x01,0x02,0x4c,0xbf]
+0x01,0x02,0x4c,0xbf
+
+# GFX1150: s_cmp_nle_f32 s105, s2 ; encoding: [0x69,0x02,0x4c,0xbf]
+0x69,0x02,0x4c,0xbf
+
+# GFX1150: s_cmp_nle_f32 s101, s2 ; encoding: [0x65,0x02,0x4c,0xbf]
+0x65,0x02,0x4c,0xbf
+
+# GFX1150: s_cmp_nle_f32 vcc_lo, s2 ; encoding: [0x6a,0x02,0x4c,0xbf]
+0x6a,0x02,0x4c,0xbf
+
+# GFX1150: s_cmp_nle_f32 vcc_hi, s2 ; encoding: [0x6b,0x02,0x4c,0xbf]
+0x6b,0x02,0x4c,0xbf
+
+# GFX1150: s_cmp_nle_f32 m0, s2 ; encoding: [0x7d,0x02,0x4c,0xbf]
+0x7d,0x02,0x4c,0xbf
+
+# GFX1150: s_cmp_nle_f32 exec_lo, s2 ; encoding: [0x7e,0x02,0x4c,0xbf]
+0x7e,0x02,0x4c,0xbf
+
+# GFX1150: s_cmp_nle_f32 exec_hi, s2 ; encoding: [0x7f,0x02,0x4c,0xbf]
+0x7f,0x02,0x4c,0xbf
+
+# GFX1150: s_cmp_nle_f32 0, s2 ; encoding: [0x80,0x02,0x4c,0xbf]
+0x80,0x02,0x4c,0xbf
+
+# GFX1150: s_cmp_nle_f32 -1, s2 ; encoding: [0xc1,0x02,0x4c,0xbf]
+0xc1,0x02,0x4c,0xbf
+
+# GFX1150: s_cmp_nle_f32 0.5, s2 ; encoding: [0xf0,0x02,0x4c,0xbf]
+0xf0,0x02,0x4c,0xbf
+
+# GFX1150: s_cmp_nle_f32 -4.0, s2 ; encoding: [0xf7,0x02,0x4c,0xbf]
+0xf7,0x02,0x4c,0xbf
+
+# GFX1150: s_cmp_nle_f32 0xaf123456, s2 ; encoding: [0xff,0x02,0x4c,0xbf,0x56,0x34,0x12,0xaf]
+0xff,0x02,0x4c,0xbf,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_cmp_nle_f32 0x3f717273, s2 ; encoding: [0xff,0x02,0x4c,0xbf,0x73,0x72,0x71,0x3f]
+0xff,0x02,0x4c,0xbf,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_cmp_nle_f32 s1, s105 ; encoding: [0x01,0x69,0x4c,0xbf]
+0x01,0x69,0x4c,0xbf
+
+# GFX1150: s_cmp_neq_f32 s1, s2 ; encoding: [0x01,0x02,0x4d,0xbf]
+0x01,0x02,0x4d,0xbf
+
+# GFX1150: s_cmp_neq_f32 s105, s2 ; encoding: [0x69,0x02,0x4d,0xbf]
+0x69,0x02,0x4d,0xbf
+
+# GFX1150: s_cmp_neq_f32 s101, s2 ; encoding: [0x65,0x02,0x4d,0xbf]
+0x65,0x02,0x4d,0xbf
+
+# GFX1150: s_cmp_neq_f32 vcc_lo, s2 ; encoding: [0x6a,0x02,0x4d,0xbf]
+0x6a,0x02,0x4d,0xbf
+
+# GFX1150: s_cmp_neq_f32 vcc_hi, s2 ; encoding: [0x6b,0x02,0x4d,0xbf]
+0x6b,0x02,0x4d,0xbf
+
+# GFX1150: s_cmp_neq_f32 m0, s2 ; encoding: [0x7d,0x02,0x4d,0xbf]
+0x7d,0x02,0x4d,0xbf
+
+# GFX1150: s_cmp_neq_f32 exec_lo, s2 ; encoding: [0x7e,0x02,0x4d,0xbf]
+0x7e,0x02,0x4d,0xbf
+
+# GFX1150: s_cmp_neq_f32 exec_hi, s2 ; encoding: [0x7f,0x02,0x4d,0xbf]
+0x7f,0x02,0x4d,0xbf
+
+# GFX1150: s_cmp_neq_f32 0, s2 ; encoding: [0x80,0x02,0x4d,0xbf]
+0x80,0x02,0x4d,0xbf
+
+# GFX1150: s_cmp_neq_f32 -1, s2 ; encoding: [0xc1,0x02,0x4d,0xbf]
+0xc1,0x02,0x4d,0xbf
+
+# GFX1150: s_cmp_neq_f32 0.5, s2 ; encoding: [0xf0,0x02,0x4d,0xbf]
+0xf0,0x02,0x4d,0xbf
+
+# GFX1150: s_cmp_neq_f32 -4.0, s2 ; encoding: [0xf7,0x02,0x4d,0xbf]
+0xf7,0x02,0x4d,0xbf
+
+# GFX1150: s_cmp_neq_f32 0xaf123456, s2 ; encoding: [0xff,0x02,0x4d,0xbf,0x56,0x34,0x12,0xaf]
+0xff,0x02,0x4d,0xbf,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_cmp_neq_f32 0x3f717273, s2 ; encoding: [0xff,0x02,0x4d,0xbf,0x73,0x72,0x71,0x3f]
+0xff,0x02,0x4d,0xbf,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_cmp_neq_f32 s1, s105 ; encoding: [0x01,0x69,0x4d,0xbf]
+0x01,0x69,0x4d,0xbf
+
+# GFX1150: s_cmp_nlt_f32 s1, s2 ; encoding: [0x01,0x02,0x4e,0xbf]
+0x01,0x02,0x4e,0xbf
+
+# GFX1150: s_cmp_nlt_f32 s105, s2 ; encoding: [0x69,0x02,0x4e,0xbf]
+0x69,0x02,0x4e,0xbf
+
+# GFX1150: s_cmp_nlt_f32 s101, s2 ; encoding: [0x65,0x02,0x4e,0xbf]
+0x65,0x02,0x4e,0xbf
+
+# GFX1150: s_cmp_nlt_f32 vcc_lo, s2 ; encoding: [0x6a,0x02,0x4e,0xbf]
+0x6a,0x02,0x4e,0xbf
+
+# GFX1150: s_cmp_nlt_f32 vcc_hi, s2 ; encoding: [0x6b,0x02,0x4e,0xbf]
+0x6b,0x02,0x4e,0xbf
+
+# GFX1150: s_cmp_nlt_f32 m0, s2 ; encoding: [0x7d,0x02,0x4e,0xbf]
+0x7d,0x02,0x4e,0xbf
+
+# GFX1150: s_cmp_nlt_f32 exec_lo, s2 ; encoding: [0x7e,0x02,0x4e,0xbf]
+0x7e,0x02,0x4e,0xbf
+
+# GFX1150: s_cmp_nlt_f32 exec_hi, s2 ; encoding: [0x7f,0x02,0x4e,0xbf]
+0x7f,0x02,0x4e,0xbf
+
+# GFX1150: s_cmp_nlt_f32 0, s2 ; encoding: [0x80,0x02,0x4e,0xbf]
+0x80,0x02,0x4e,0xbf
+
+# GFX1150: s_cmp_nlt_f32 -1, s2 ; encoding: [0xc1,0x02,0x4e,0xbf]
+0xc1,0x02,0x4e,0xbf
+
+# GFX1150: s_cmp_nlt_f32 0.5, s2 ; encoding: [0xf0,0x02,0x4e,0xbf]
+0xf0,0x02,0x4e,0xbf
+
+# GFX1150: s_cmp_nlt_f32 -4.0, s2 ; encoding: [0xf7,0x02,0x4e,0xbf]
+0xf7,0x02,0x4e,0xbf
+
+# GFX1150: s_cmp_nlt_f32 0xaf123456, s2 ; encoding: [0xff,0x02,0x4e,0xbf,0x56,0x34,0x12,0xaf]
+0xff,0x02,0x4e,0xbf,0x56,0x34,0x12,0xaf
+
+# GFX1150: s_cmp_nlt_f32 0x3f717273, s2 ; encoding: [0xff,0x02,0x4e,0xbf,0x73,0x72,0x71,0x3f]
+0xff,0x02,0x4e,0xbf,0x73,0x72,0x71,0x3f
+
+# GFX1150: s_cmp_nlt_f32 s1, s105 ; encoding: [0x01,0x69,0x4e,0xbf]
+0x01,0x69,0x4e,0xbf
+
+# GFX1150: s_cmp_lt_f16 s1, s2 ; encoding: [0x01,0x02,0x51,0xbf]
+0x01,0x02,0x51,0xbf
+
+# GFX1150: s_cmp_lt_f16 s101, s2 ; encoding: [0x65,0x02,0x51,0xbf]
+0x65,0x02,0x51,0xbf
+
+# GFX1150: s_cmp_lt_f16 vcc_lo, s2 ; encoding: [0x6a,0x02,0x51,0xbf]
+0x6a,0x02,0x51,0xbf
+
+# GFX1150: s_cmp_lt_f16 vcc_hi, s2 ; encoding: [0x6b,0x02,0x51,0xbf]
+0x6b,0x02,0x51,0xbf
+
+# GFX1150: s_cmp_lt_f16 m0, s2 ; encoding: [0x7d,0x02,0x51,0xbf]
+0x7d,0x02,0x51,0xbf
+
+# GFX1150: s_cmp_lt_f16 exec_lo, s2 ; encoding: [0x7e,0x02,0x51,0xbf]
+0x7e,0x02,0x51,0xbf
+
+# GFX1150: s_cmp_lt_f16 exec_hi, s2 ; encoding: [0x7f,0x02,0x51,0xbf]
+0x7f,0x02,0x51,0xbf
+
+# GFX1150: s_cmp_lt_f16 0, s2 ; encoding: [0x80,0x02,0x51,0xbf]
+0x80,0x02,0x51,0xbf
+
+# GFX1150: s_cmp_lt_f16 -1, s2 ; encoding: [0xc1,0x02,0x51,0xbf]
+0xc1,0x02,0x51,0xbf
+
+# GFX1150: s_cmp_lt_f16 0x3800, s2 ; encoding: [0xff,0x02,0x51,0xbf,0x00,0x38,0x00,0x00]
+0xff,0x02,0x51,0xbf,0x00,0x38,0x00,0x00
+
+# GFX1150: s_cmp_lt_f16 0xfe0b, s2 ; encoding: [0xff,0x02,0x51,0xbf,0x0b,0xfe,0x00,0x00]
+0xff,0x02,0x51,0xbf,0x0b,0xfe,0x00,0x00
+
+# GFX1150: s_cmp_lt_f16 0x3456, s2 ; encoding: [0xff,0x02,0x51,0xbf,0x56,0x34,0x00,0x00]
+0xff,0x02,0x51,0xbf,0x56,0x34,0x00,0x00
+
+# GFX1150: s_cmp_eq_f16 s1, s2 ; encoding: [0x01,0x02,0x52,0xbf]
+0x01,0x02,0x52,0xbf
+
+# GFX1150: s_cmp_eq_f16 s101, s2 ; encoding: [0x65,0x02,0x52,0xbf]
+0x65,0x02,0x52,0xbf
+
+# GFX1150: s_cmp_eq_f16 vcc_lo, s2 ; encoding: [0x6a,0x02,0x52,0xbf]
+0x6a,0x02,0x52,0xbf
+
+# GFX1150: s_cmp_eq_f16 vcc_hi, s2 ; encoding: [0x6b,0x02,0x52,0xbf]
+0x6b,0x02,0x52,0xbf
+
+# GFX1150: s_cmp_eq_f16 m0, s2 ; encoding: [0x7d,0x02,0x52,0xbf]
+0x7d,0x02,0x52,0xbf
+
+# GFX1150: s_cmp_eq_f16 exec_lo, s2 ; encoding: [0x7e,0x02,0x52,0xbf]
+0x7e,0x02,0x52,0xbf
+
+# GFX1150: s_cmp_eq_f16 exec_hi, s2 ; encoding: [0x7f,0x02,0x52,0xbf]
+0x7f,0x02,0x52,0xbf
+
+# GFX1150: s_cmp_eq_f16 0, s2 ; encoding: [0x80,0x02,0x52,0xbf]
+0x80,0x02,0x52,0xbf
+
+# GFX1150: s_cmp_eq_f16 -1, s2 ; encoding: [0xc1,0x02,0x52,0xbf]
+0xc1,0x02,0x52,0xbf
+
+# GFX1150: s_cmp_eq_f16 0x3800, s2 ; encoding: [0xff,0x02,0x52,0xbf,0x00,0x38,0x00,0x00]
+0xff,0x02,0x52,0xbf,0x00,0x38,0x00,0x00
+
+# GFX1150: s_cmp_eq_f16 0xfe0b, s2 ; encoding: [0xff,0x02,0x52,0xbf,0x0b,0xfe,0x00,0x00]
+0xff,0x02,0x52,0xbf,0x0b,0xfe,0x00,0x00
+
+# GFX1150: s_cmp_eq_f16 0x3456, s2 ; encoding: [0xff,0x02,0x52,0xbf,0x56,0x34,0x00,0x00]
+0xff,0x02,0x52,0xbf,0x56,0x34,0x00,0x00
+
+# GFX1150: s_cmp_le_f16 s1, s2 ; encoding: [0x01,0x02,0x53,0xbf]
+0x01,0x02,0x53,0xbf
+
+# GFX1150: s_cmp_le_f16 s101, s2 ; encoding: [0x65,0x02,0x53,0xbf]
+0x65,0x02,0x53,0xbf
+
+# GFX1150: s_cmp_le_f16 vcc_lo, s2 ; encoding: [0x6a,0x02,0x53,0xbf]
+0x6a,0x02,0x53,0xbf
+
+# GFX1150: s_cmp_le_f16 vcc_hi, s2 ; encoding: [0x6b,0x02,0x53,0xbf]
+0x6b,0x02,0x53,0xbf
+
+# GFX1150: s_cmp_le_f16 m0, s2 ; encoding: [0x7d,0x02,0x53,0xbf]
+0x7d,0x02,0x53,0xbf
+
+# GFX1150: s_cmp_le_f16 exec_lo, s2 ; encoding: [0x7e,0x02,0x53,0xbf]
+0x7e,0x02,0x53,0xbf
+
+# GFX1150: s_cmp_le_f16 exec_hi, s2 ; encoding: [0x7f,0x02,0x53,0xbf]
+0x7f,0x02,0x53,0xbf
+
+# GFX1150: s_cmp_le_f16 0, s2 ; encoding: [0x80,0x02,0x53,0xbf]
+0x80,0x02,0x53,0xbf
+
+# GFX1150: s_cmp_le_f16 -1, s2 ; encoding: [0xc1,0x02,0x53,0xbf]
+0xc1,0x02,0x53,0xbf
+
+# GFX1150: s_cmp_le_f16 0x3800, s2 ; encoding: [0xff,0x02,0x53,0xbf,0x00,0x38,0x00,0x00]
+0xff,0x02,0x53,0xbf,0x00,0x38,0x00,0x00
+
+# GFX1150: s_cmp_le_f16 0xfe0b, s2 ; encoding: [0xff,0x02,0x53,0xbf,0x0b,0xfe,0x00,0x00]
+0xff,0x02,0x53,0xbf,0x0b,0xfe,0x00,0x00
+
+# GFX1150: s_cmp_le_f16 0x3456, s2 ; encoding: [0xff,0x02,0x53,0xbf,0x56,0x34,0x00,0x00]
+0xff,0x02,0x53,0xbf,0x56,0x34,0x00,0x00
+
+# GFX1150: s_cmp_gt_f16 s1, s2 ; encoding: [0x01,0x02,0x54,0xbf]
+0x01,0x02,0x54,0xbf
+
+# GFX1150: s_cmp_gt_f16 s101, s2 ; encoding: [0x65,0x02,0x54,0xbf]
+0x65,0x02,0x54,0xbf
+
+# GFX1150: s_cmp_gt_f16 vcc_lo, s2 ; encoding: [0x6a,0x02,0x54,0xbf]
+0x6a,0x02,0x54,0xbf
+
+# GFX1150: s_cmp_gt_f16 vcc_hi, s2 ; encoding: [0x6b,0x02,0x54,0xbf]
+0x6b,0x02,0x54,0xbf
+
+# GFX1150: s_cmp_gt_f16 m0, s2 ; encoding: [0x7d,0x02,0x54,0xbf]
+0x7d,0x02,0x54,0xbf
+
+# GFX1150: s_cmp_gt_f16 exec_lo, s2 ; encoding: [0x7e,0x02,0x54,0xbf]
+0x7e,0x02,0x54,0xbf
+
+# GFX1150: s_cmp_gt_f16 exec_hi, s2 ; encoding: [0x7f,0x02,0x54,0xbf]
+0x7f,0x02,0x54,0xbf
+
+# GFX1150: s_cmp_gt_f16 0, s2 ; encoding: [0x80,0x02,0x54,0xbf]
+0x80,0x02,0x54,0xbf
+
+# GFX1150: s_cmp_gt_f16 -1, s2 ; encoding: [0xc1,0x02,0x54,0xbf]
+0xc1,0x02,0x54,0xbf
+
+# GFX1150: s_cmp_gt_f16 0x3800, s2 ; encoding: [0xff,0x02,0x54,0xbf,0x00,0x38,0x00,0x00]
+0xff,0x02,0x54,0xbf,0x00,0x38,0x00,0x00
+
+# GFX1150: s_cmp_gt_f16 0xfe0b, s2 ; encoding: [0xff,0x02,0x54,0xbf,0x0b,0xfe,0x00,0x00]
+0xff,0x02,0x54,0xbf,0x0b,0xfe,0x00,0x00
+
+# GFX1150: s_cmp_gt_f16 0x3456, s2 ; encoding: [0xff,0x02,0x54,0xbf,0x56,0x34,0x00,0x00]
+0xff,0x02,0x54,0xbf,0x56,0x34,0x00,0x00
+
+# GFX1150: s_cmp_lg_f16 s1, s2 ; encoding: [0x01,0x02,0x55,0xbf]
+0x01,0x02,0x55,0xbf
+
+# GFX1150: s_cmp_lg_f16 s101, s2 ; encoding: [0x65,0x02,0x55,0xbf]
+0x65,0x02,0x55,0xbf
+
+# GFX1150: s_cmp_lg_f16 vcc_lo, s2 ; encoding: [0x6a,0x02,0x55,0xbf]
+0x6a,0x02,0x55,0xbf
+
+# GFX1150: s_cmp_lg_f16 vcc_hi, s2 ; encoding: [0x6b,0x02,0x55,0xbf]
+0x6b,0x02,0x55,0xbf
+
+# GFX1150: s_cmp_lg_f16 m0, s2 ; encoding: [0x7d,0x02,0x55,0xbf]
+0x7d,0x02,0x55,0xbf
+
+# GFX1150: s_cmp_lg_f16 exec_lo, s2 ; encoding: [0x7e,0x02,0x55,0xbf]
+0x7e,0x02,0x55,0xbf
+
+# GFX1150: s_cmp_lg_f16 exec_hi, s2 ; encoding: [0x7f,0x02,0x55,0xbf]
+0x7f,0x02,0x55,0xbf
+
+# GFX1150: s_cmp_lg_f16 0, s2 ; encoding: [0x80,0x02,0x55,0xbf]
+0x80,0x02,0x55,0xbf
+
+# GFX1150: s_cmp_lg_f16 -1, s2 ; encoding: [0xc1,0x02,0x55,0xbf]
+0xc1,0x02,0x55,0xbf
+
+# GFX1150: s_cmp_lg_f16 0x3800, s2 ; encoding: [0xff,0x02,0x55,0xbf,0x00,0x38,0x00,0x00]
+0xff,0x02,0x55,0xbf,0x00,0x38,0x00,0x00
+
+# GFX1150: s_cmp_lg_f16 0xfe0b, s2 ; encoding: [0xff,0x02,0x55,0xbf,0x0b,0xfe,0x00,0x00]
+0xff,0x02,0x55,0xbf,0x0b,0xfe,0x00,0x00
+
+# GFX1150: s_cmp_lg_f16 0x3456, s2 ; encoding: [0xff,0x02,0x55,0xbf,0x56,0x34,0x00,0x00]
+0xff,0x02,0x55,0xbf,0x56,0x34,0x00,0x00
+
+# GFX1150: s_cmp_ge_f16 s1, s2 ; encoding: [0x01,0x02,0x56,0xbf]
+0x01,0x02,0x56,0xbf
+
+# GFX1150: s_cmp_ge_f16 s101, s2 ; encoding: [0x65,0x02,0x56,0xbf]
+0x65,0x02,0x56,0xbf
+
+# GFX1150: s_cmp_ge_f16 vcc_lo, s2 ; encoding: [0x6a,0x02,0x56,0xbf]
+0x6a,0x02,0x56,0xbf
+
+# GFX1150: s_cmp_ge_f16 vcc_hi, s2 ; encoding: [0x6b,0x02,0x56,0xbf]
+0x6b,0x02,0x56,0xbf
+
+# GFX1150: s_cmp_ge_f16 m0, s2 ; encoding: [0x7d,0x02,0x56,0xbf]
+0x7d,0x02,0x56,0xbf
+
+# GFX1150: s_cmp_ge_f16 exec_lo, s2 ; encoding: [0x7e,0x02,0x56,0xbf]
+0x7e,0x02,0x56,0xbf
+
+# GFX1150: s_cmp_ge_f16 exec_hi, s2 ; encoding: [0x7f,0x02,0x56,0xbf]
+0x7f,0x02,0x56,0xbf
+
+# GFX1150: s_cmp_ge_f16 0, s2 ; encoding: [0x80,0x02,0x56,0xbf]
+0x80,0x02,0x56,0xbf
+
+# GFX1150: s_cmp_ge_f16 -1, s2 ; encoding: [0xc1,0x02,0x56,0xbf]
+0xc1,0x02,0x56,0xbf
+
+# GFX1150: s_cmp_ge_f16 0x3800, s2 ; encoding: [0xff,0x02,0x56,0xbf,0x00,0x38,0x00,0x00]
+0xff,0x02,0x56,0xbf,0x00,0x38,0x00,0x00
+
+# GFX1150: s_cmp_ge_f16 0xfe0b, s2 ; encoding: [0xff,0x02,0x56,0xbf,0x0b,0xfe,0x00,0x00]
+0xff,0x02,0x56,0xbf,0x0b,0xfe,0x00,0x00
+
+# GFX1150: s_cmp_ge_f16 0x3456, s2 ; encoding: [0xff,0x02,0x56,0xbf,0x56,0x34,0x00,0x00]
+0xff,0x02,0x56,0xbf,0x56,0x34,0x00,0x00
+
+# GFX1150: s_cmp_o_f16 s1, s2 ; encoding: [0x01,0x02,0x57,0xbf]
+0x01,0x02,0x57,0xbf
+
+# GFX1150: s_cmp_o_f16 s101, s2 ; encoding: [0x65,0x02,0x57,0xbf]
+0x65,0x02,0x57,0xbf
+
+# GFX1150: s_cmp_o_f16 vcc_lo, s2 ; encoding: [0x6a,0x02,0x57,0xbf]
+0x6a,0x02,0x57,0xbf
+
+# GFX1150: s_cmp_o_f16 vcc_hi, s2 ; encoding: [0x6b,0x02,0x57,0xbf]
+0x6b,0x02,0x57,0xbf
+
+# GFX1150: s_cmp_o_f16 m0, s2 ; encoding: [0x7d,0x02,0x57,0xbf]
+0x7d,0x02,0x57,0xbf
+
+# GFX1150: s_cmp_o_f16 exec_lo, s2 ; encoding: [0x7e,0x02,0x57,0xbf]
+0x7e,0x02,0x57,0xbf
+
+# GFX1150: s_cmp_o_f16 exec_hi, s2 ; encoding: [0x7f,0x02,0x57,0xbf]
+0x7f,0x02,0x57,0xbf
+
+# GFX1150: s_cmp_o_f16 0, s2 ; encoding: [0x80,0x02,0x57,0xbf]
+0x80,0x02,0x57,0xbf
+
+# GFX1150: s_cmp_o_f16 -1, s2 ; encoding: [0xc1,0x02,0x57,0xbf]
+0xc1,0x02,0x57,0xbf
+
+# GFX1150: s_cmp_o_f16 0x3800, s2 ; encoding: [0xff,0x02,0x57,0xbf,0x00,0x38,0x00,0x00]
+0xff,0x02,0x57,0xbf,0x00,0x38,0x00,0x00
+
+# GFX1150: s_cmp_o_f16 0xfe0b, s2 ; encoding: [0xff,0x02,0x57,0xbf,0x0b,0xfe,0x00,0x00]
+0xff,0x02,0x57,0xbf,0x0b,0xfe,0x00,0x00
+
+# GFX1150: s_cmp_o_f16 0x3456, s2 ; encoding: [0xff,0x02,0x57,0xbf,0x56,0x34,0x00,0x00]
+0xff,0x02,0x57,0xbf,0x56,0x34,0x00,0x00
+
+# GFX1150: s_cmp_u_f16 s1, s2 ; encoding: [0x01,0x02,0x58,0xbf]
+0x01,0x02,0x58,0xbf
+
+# GFX1150: s_cmp_u_f16 s101, s2 ; encoding: [0x65,0x02,0x58,0xbf]
+0x65,0x02,0x58,0xbf
+
+# GFX1150: s_cmp_u_f16 vcc_lo, s2 ; encoding: [0x6a,0x02,0x58,0xbf]
+0x6a,0x02,0x58,0xbf
+
+# GFX1150: s_cmp_u_f16 vcc_hi, s2 ; encoding: [0x6b,0x02,0x58,0xbf]
+0x6b,0x02,0x58,0xbf
+
+# GFX1150: s_cmp_u_f16 m0, s2 ; encoding: [0x7d,0x02,0x58,0xbf]
+0x7d,0x02,0x58,0xbf
+
+# GFX1150: s_cmp_u_f16 exec_lo, s2 ; encoding: [0x7e,0x02,0x58,0xbf]
+0x7e,0x02,0x58,0xbf
+
+# GFX1150: s_cmp_u_f16 exec_hi, s2 ; encoding: [0x7f,0x02,0x58,0xbf]
+0x7f,0x02,0x58,0xbf
+
+# GFX1150: s_cmp_u_f16 0, s2 ; encoding: [0x80,0x02,0x58,0xbf]
+0x80,0x02,0x58,0xbf
+
+# GFX1150: s_cmp_u_f16 -1, s2 ; encoding: [0xc1,0x02,0x58,0xbf]
+0xc1,0x02,0x58,0xbf
+
+# GFX1150: s_cmp_u_f16 0x3800, s2 ; encoding: [0xff,0x02,0x58,0xbf,0x00,0x38,0x00,0x00]
+0xff,0x02,0x58,0xbf,0x00,0x38,0x00,0x00
+
+# GFX1150: s_cmp_u_f16 0xfe0b, s2 ; encoding: [0xff,0x02,0x58,0xbf,0x0b,0xfe,0x00,0x00]
+0xff,0x02,0x58,0xbf,0x0b,0xfe,0x00,0x00
+
+# GFX1150: s_cmp_u_f16 0x3456, s2 ; encoding: [0xff,0x02,0x58,0xbf,0x56,0x34,0x00,0x00]
+0xff,0x02,0x58,0xbf,0x56,0x34,0x00,0x00
+
+# GFX1150: s_cmp_nge_f16 s1, s2 ; encoding: [0x01,0x02,0x59,0xbf]
+0x01,0x02,0x59,0xbf
+
+# GFX1150: s_cmp_nge_f16 s101, s2 ; encoding: [0x65,0x02,0x59,0xbf]
+0x65,0x02,0x59,0xbf
+
+# GFX1150: s_cmp_nge_f16 vcc_lo, s2 ; encoding: [0x6a,0x02,0x59,0xbf]
+0x6a,0x02,0x59,0xbf
+
+# GFX1150: s_cmp_nge_f16 vcc_hi, s2 ; encoding: [0x6b,0x02,0x59,0xbf]
+0x6b,0x02,0x59,0xbf
+
+# GFX1150: s_cmp_nge_f16 m0, s2 ; encoding: [0x7d,0x02,0x59,0xbf]
+0x7d,0x02,0x59,0xbf
+
+# GFX1150: s_cmp_nge_f16 exec_lo, s2 ; encoding: [0x7e,0x02,0x59,0xbf]
+0x7e,0x02,0x59,0xbf
+
+# GFX1150: s_cmp_nge_f16 exec_hi, s2 ; encoding: [0x7f,0x02,0x59,0xbf]
+0x7f,0x02,0x59,0xbf
+
+# GFX1150: s_cmp_nge_f16 0, s2 ; encoding: [0x80,0x02,0x59,0xbf]
+0x80,0x02,0x59,0xbf
+
+# GFX1150: s_cmp_nge_f16 -1, s2 ; encoding: [0xc1,0x02,0x59,0xbf]
+0xc1,0x02,0x59,0xbf
+
+# GFX1150: s_cmp_nge_f16 0x3800, s2 ; encoding: [0xff,0x02,0x59,0xbf,0x00,0x38,0x00,0x00]
+0xff,0x02,0x59,0xbf,0x00,0x38,0x00,0x00
+
+# GFX1150: s_cmp_nge_f16 0xfe0b, s2 ; encoding: [0xff,0x02,0x59,0xbf,0x0b,0xfe,0x00,0x00]
+0xff,0x02,0x59,0xbf,0x0b,0xfe,0x00,0x00
+
+# GFX1150: s_cmp_nge_f16 0x3456, s2 ; encoding: [0xff,0x02,0x59,0xbf,0x56,0x34,0x00,0x00]
+0xff,0x02,0x59,0xbf,0x56,0x34,0x00,0x00
+
+# GFX1150: s_cmp_nlg_f16 s1, s2 ; encoding: [0x01,0x02,0x5a,0xbf]
+0x01,0x02,0x5a,0xbf
+
+# GFX1150: s_cmp_nlg_f16 s101, s2 ; encoding: [0x65,0x02,0x5a,0xbf]
+0x65,0x02,0x5a,0xbf
+
+# GFX1150: s_cmp_nlg_f16 vcc_lo, s2 ; encoding: [0x6a,0x02,0x5a,0xbf]
+0x6a,0x02,0x5a,0xbf
+
+# GFX1150: s_cmp_nlg_f16 vcc_hi, s2 ; encoding: [0x6b,0x02,0x5a,0xbf]
+0x6b,0x02,0x5a,0xbf
+
+# GFX1150: s_cmp_nlg_f16 m0, s2 ; encoding: [0x7d,0x02,0x5a,0xbf]
+0x7d,0x02,0x5a,0xbf
+
+# GFX1150: s_cmp_nlg_f16 exec_lo, s2 ; encoding: [0x7e,0x02,0x5a,0xbf]
+0x7e,0x02,0x5a,0xbf
+
+# GFX1150: s_cmp_nlg_f16 exec_hi, s2 ; encoding: [0x7f,0x02,0x5a,0xbf]
+0x7f,0x02,0x5a,0xbf
+
+# GFX1150: s_cmp_nlg_f16 0, s2 ; encoding: [0x80,0x02,0x5a,0xbf]
+0x80,0x02,0x5a,0xbf
+
+# GFX1150: s_cmp_nlg_f16 -1, s2 ; encoding: [0xc1,0x02,0x5a,0xbf]
+0xc1,0x02,0x5a,0xbf
+
+# GFX1150: s_cmp_nlg_f16 0x3800, s2 ; encoding: [0xff,0x02,0x5a,0xbf,0x00,0x38,0x00,0x00]
+0xff,0x02,0x5a,0xbf,0x00,0x38,0x00,0x00
+
+# GFX1150: s_cmp_nlg_f16 0xfe0b, s2 ; encoding: [0xff,0x02,0x5a,0xbf,0x0b,0xfe,0x00,0x00]
+0xff,0x02,0x5a,0xbf,0x0b,0xfe,0x00,0x00
+
+# GFX1150: s_cmp_nlg_f16 0x3456, s2 ; encoding: [0xff,0x02,0x5a,0xbf,0x56,0x34,0x00,0x00]
+0xff,0x02,0x5a,0xbf,0x56,0x34,0x00,0x00
+
+# GFX1150: s_cmp_ngt_f16 s1, s2 ; encoding: [0x01,0x02,0x5b,0xbf]
+0x01,0x02,0x5b,0xbf
+
+# GFX1150: s_cmp_ngt_f16 s101, s2 ; encoding: [0x65,0x02,0x5b,0xbf]
+0x65,0x02,0x5b,0xbf
+
+# GFX1150: s_cmp_ngt_f16 vcc_lo, s2 ; encoding: [0x6a,0x02,0x5b,0xbf]
+0x6a,0x02,0x5b,0xbf
+
+# GFX1150: s_cmp_ngt_f16 vcc_hi, s2 ; encoding: [0x6b,0x02,0x5b,0xbf]
+0x6b,0x02,0x5b,0xbf
+
+# GFX1150: s_cmp_ngt_f16 m0, s2 ; encoding: [0x7d,0x02,0x5b,0xbf]
+0x7d,0x02,0x5b,0xbf
+
+# GFX1150: s_cmp_ngt_f16 exec_lo, s2 ; encoding: [0x7e,0x02,0x5b,0xbf]
+0x7e,0x02,0x5b,0xbf
+
+# GFX1150: s_cmp_ngt_f16 exec_hi, s2 ; encoding: [0x7f,0x02,0x5b,0xbf]
+0x7f,0x02,0x5b,0xbf
+
+# GFX1150: s_cmp_ngt_f16 0, s2 ; encoding: [0x80,0x02,0x5b,0xbf]
+0x80,0x02,0x5b,0xbf
+
+# GFX1150: s_cmp_ngt_f16 -1, s2 ; encoding: [0xc1,0x02,0x5b,0xbf]
+0xc1,0x02,0x5b,0xbf
+
+# GFX1150: s_cmp_ngt_f16 0x3800, s2 ; encoding: [0xff,0x02,0x5b,0xbf,0x00,0x38,0x00,0x00]
+0xff,0x02,0x5b,0xbf,0x00,0x38,0x00,0x00
+
+# GFX1150: s_cmp_ngt_f16 0xfe0b, s2 ; encoding: [0xff,0x02,0x5b,0xbf,0x0b,0xfe,0x00,0x00]
+0xff,0x02,0x5b,0xbf,0x0b,0xfe,0x00,0x00
+
+# GFX1150: s_cmp_ngt_f16 0x3456, s2 ; encoding: [0xff,0x02,0x5b,0xbf,0x56,0x34,0x00,0x00]
+0xff,0x02,0x5b,0xbf,0x56,0x34,0x00,0x00
+
+# GFX1150: s_cmp_nle_f16 s1, s2 ; encoding: [0x01,0x02,0x5c,0xbf]
+0x01,0x02,0x5c,0xbf
+
+# GFX1150: s_cmp_nle_f16 s101, s2 ; encoding: [0x65,0x02,0x5c,0xbf]
+0x65,0x02,0x5c,0xbf
+
+# GFX1150: s_cmp_nle_f16 vcc_lo, s2 ; encoding: [0x6a,0x02,0x5c,0xbf]
+0x6a,0x02,0x5c,0xbf
+
+# GFX1150: s_cmp_nle_f16 vcc_hi, s2 ; encoding: [0x6b,0x02,0x5c,0xbf]
+0x6b,0x02,0x5c,0xbf
+
+# GFX1150: s_cmp_nle_f16 m0, s2 ; encoding: [0x7d,0x02,0x5c,0xbf]
+0x7d,0x02,0x5c,0xbf
+
+# GFX1150: s_cmp_nle_f16 exec_lo, s2 ; encoding: [0x7e,0x02,0x5c,0xbf]
+0x7e,0x02,0x5c,0xbf
+
+# GFX1150: s_cmp_nle_f16 exec_hi, s2 ; encoding: [0x7f,0x02,0x5c,0xbf]
+0x7f,0x02,0x5c,0xbf
+
+# GFX1150: s_cmp_nle_f16 0, s2 ; encoding: [0x80,0x02,0x5c,0xbf]
+0x80,0x02,0x5c,0xbf
+
+# GFX1150: s_cmp_nle_f16 -1, s2 ; encoding: [0xc1,0x02,0x5c,0xbf]
+0xc1,0x02,0x5c,0xbf
+
+# GFX1150: s_cmp_nle_f16 0x3800, s2 ; encoding: [0xff,0x02,0x5c,0xbf,0x00,0x38,0x00,0x00]
+0xff,0x02,0x5c,0xbf,0x00,0x38,0x00,0x00
+
+# GFX1150: s_cmp_nle_f16 0xfe0b, s2 ; encoding: [0xff,0x02,0x5c,0xbf,0x0b,0xfe,0x00,0x00]
+0xff,0x02,0x5c,0xbf,0x0b,0xfe,0x00,0x00
+
+# GFX1150: s_cmp_nle_f16 0x3456, s2 ; encoding: [0xff,0x02,0x5c,0xbf,0x56,0x34,0x00,0x00]
+0xff,0x02,0x5c,0xbf,0x56,0x34,0x00,0x00
+
+# GFX1150: s_cmp_neq_f16 s1, s2 ; encoding: [0x01,0x02,0x5d,0xbf]
+0x01,0x02,0x5d,0xbf
+
+# GFX1150: s_cmp_neq_f16 s101, s2 ; encoding: [0x65,0x02,0x5d,0xbf]
+0x65,0x02,0x5d,0xbf
+
+# GFX1150: s_cmp_neq_f16 vcc_lo, s2 ; encoding: [0x6a,0x02,0x5d,0xbf]
+0x6a,0x02,0x5d,0xbf
+
+# GFX1150: s_cmp_neq_f16 vcc_hi, s2 ; encoding: [0x6b,0x02,0x5d,0xbf]
+0x6b,0x02,0x5d,0xbf
+
+# GFX1150: s_cmp_neq_f16 m0, s2 ; encoding: [0x7d,0x02,0x5d,0xbf]
+0x7d,0x02,0x5d,0xbf
+
+# GFX1150: s_cmp_neq_f16 exec_lo, s2 ; encoding: [0x7e,0x02,0x5d,0xbf]
+0x7e,0x02,0x5d,0xbf
+
+# GFX1150: s_cmp_neq_f16 exec_hi, s2 ; encoding: [0x7f,0x02,0x5d,0xbf]
+0x7f,0x02,0x5d,0xbf
+
+# GFX1150: s_cmp_neq_f16 0, s2 ; encoding: [0x80,0x02,0x5d,0xbf]
+0x80,0x02,0x5d,0xbf
+
+# GFX1150: s_cmp_neq_f16 -1, s2 ; encoding: [0xc1,0x02,0x5d,0xbf]
+0xc1,0x02,0x5d,0xbf
+
+# GFX1150: s_cmp_neq_f16 0x3800, s2 ; encoding: [0xff,0x02,0x5d,0xbf,0x00,0x38,0x00,0x00]
+0xff,0x02,0x5d,0xbf,0x00,0x38,0x00,0x00
+
+# GFX1150: s_cmp_neq_f16 0xfe0b, s2 ; encoding: [0xff,0x02,0x5d,0xbf,0x0b,0xfe,0x00,0x00]
+0xff,0x02,0x5d,0xbf,0x0b,0xfe,0x00,0x00
+
+# GFX1150: s_cmp_neq_f16 0x3456, s2 ; encoding: [0xff,0x02,0x5d,0xbf,0x56,0x34,0x00,0x00]
+0xff,0x02,0x5d,0xbf,0x56,0x34,0x00,0x00
+
+# GFX1150: s_cmp_nlt_f16 s1, s2 ; encoding: [0x01,0x02,0x5e,0xbf]
+0x01,0x02,0x5e,0xbf
+
+# GFX1150: s_cmp_nlt_f16 s101, s2 ; encoding: [0x65,0x02,0x5e,0xbf]
+0x65,0x02,0x5e,0xbf
+
+# GFX1150: s_cmp_nlt_f16 vcc_lo, s2 ; encoding: [0x6a,0x02,0x5e,0xbf]
+0x6a,0x02,0x5e,0xbf
+
+# GFX1150: s_cmp_nlt_f16 vcc_hi, s2 ; encoding: [0x6b,0x02,0x5e,0xbf]
+0x6b,0x02,0x5e,0xbf
+
+# GFX1150: s_cmp_nlt_f16 m0, s2 ; encoding: [0x7d,0x02,0x5e,0xbf]
+0x7d,0x02,0x5e,0xbf
+
+# GFX1150: s_cmp_nlt_f16 exec_lo, s2 ; encoding: [0x7e,0x02,0x5e,0xbf]
+0x7e,0x02,0x5e,0xbf
+
+# GFX1150: s_cmp_nlt_f16 exec_hi, s2 ; encoding: [0x7f,0x02,0x5e,0xbf]
+0x7f,0x02,0x5e,0xbf
+
+# GFX1150: s_cmp_nlt_f16 0, s2 ; encoding: [0x80,0x02,0x5e,0xbf]
+0x80,0x02,0x5e,0xbf
+
+# GFX1150: s_cmp_nlt_f16 -1, s2 ; encoding: [0xc1,0x02,0x5e,0xbf]
+0xc1,0x02,0x5e,0xbf
+
+# GFX1150: s_cmp_nlt_f16 0x3800, s2 ; encoding: [0xff,0x02,0x5e,0xbf,0x00,0x38,0x00,0x00]
+0xff,0x02,0x5e,0xbf,0x00,0x38,0x00,0x00
+
+# GFX1150: s_cmp_nlt_f16 0xfe0b, s2 ; encoding: [0xff,0x02,0x5e,0xbf,0x0b,0xfe,0x00,0x00]
+0xff,0x02,0x5e,0xbf,0x0b,0xfe,0x00,0x00
+
+# GFX1150: s_cmp_nlt_f16 0x3456, s2 ; encoding: [0xff,0x02,0x5e,0xbf,0x56,0x34,0x00,0x00]
+0xff,0x02,0x5e,0xbf,0x56,0x34,0x00,0x00
>From 29db22e13cd075d9994dff16a2fabf6106571fd3 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Wed, 20 Sep 2023 12:14:05 +0200
Subject: [PATCH 2/2] [AMDGPU] Select gfx1150 SALU Float instructions
---
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 19 +
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 1 +
.../AMDGPU/AMDGPUInstructionSelector.cpp | 166 +++-
.../Target/AMDGPU/AMDGPUInstructionSelector.h | 3 +-
.../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 10 +-
.../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 99 +-
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 69 +-
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 220 ++++-
llvm/lib/Target/AMDGPU/SOPInstructions.td | 73 +-
.../inst-select-scalar-float-sop1.mir | 302 ++++++
.../inst-select-scalar-float-sop2.mir | 294 ++++++
.../inst-select-scalar-float-sopc.mir | 647 +++++++++++++
.../AMDGPU/GlobalISel/legalize-fcmp-s32.mir | 46 +
.../AMDGPU/GlobalISel/legalize-fcmp.mir | 1 +
.../AMDGPU/GlobalISel/regbankselect-fcmp.mir | 69 +-
.../GlobalISel/regbankselect-salu-float.mir | 246 +++++
.../test/CodeGen/AMDGPU/code-size-estimate.ll | 37 +
.../AMDGPU/commute-compares-scalar-float.ll | 515 ++++++++++
llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir | 99 ++
.../AMDGPU/fold-operands-scalar-fmac.mir | 238 +++++
llvm/test/CodeGen/AMDGPU/scalar-float-sop1.ll | 174 ++++
llvm/test/CodeGen/AMDGPU/scalar-float-sop2.ll | 212 +++++
llvm/test/CodeGen/AMDGPU/scalar-float-sopc.ll | 899 ++++++++++++++++++
23 files changed, 4282 insertions(+), 157 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sop1.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sop2.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sopc.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp-s32.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-salu-float.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/commute-compares-scalar-float.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/fold-operands-scalar-fmac.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/scalar-float-sop1.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/scalar-float-sop2.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/scalar-float-sopc.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 05deb69b2bfc140..b5ceaaa14b4fd5e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -663,6 +663,9 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
case ISD::BRCOND:
SelectBRCOND(N);
return;
+ case ISD::FP_EXTEND:
+ SelectFP_EXTEND(N);
+ return;
case AMDGPUISD::CVT_PKRTZ_F16_F32:
case AMDGPUISD::CVT_PKNORM_I16_F32:
case AMDGPUISD::CVT_PKNORM_U16_F32:
@@ -2303,6 +2306,22 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
VCC.getValue(0));
}
+void AMDGPUDAGToDAGISel::SelectFP_EXTEND(SDNode *N) {
+ if (Subtarget->hasSALUFloatInsts() && N->getValueType(0) == MVT::f32 &&
+ !N->isDivergent()) {
+ SDValue Src = N->getOperand(0);
+ if (Src.getValueType() == MVT::f16) {
+ if (isExtractHiElt(Src, Src)) {
+ CurDAG->SelectNodeTo(N, AMDGPU::S_CVT_HI_F32_F16, N->getVTList(),
+ {Src});
+ return;
+ }
+ }
+ }
+
+ SelectCode(N);
+}
+
void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
// The address is assumed to be uniform, so if it ends up in a VGPR, it will
// be copied to an SGPR with readfirstlane.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 7b4a1a4aedaf7e5..a8a606f60a3faee 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -273,6 +273,7 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool isCBranchSCC(const SDNode *N) const;
void SelectBRCOND(SDNode *N);
void SelectFMAD_FMA(SDNode *N);
+ void SelectFP_EXTEND(SDNode *N);
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
void SelectDSBvhStackIntrinsic(SDNode *N);
void SelectDS_GWS(SDNode *N, unsigned IntrID);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 9f13f58f8a9f404..31d72fb8cadd8a6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1211,36 +1211,104 @@ int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P,
}
}
- if (Size != 32)
- return -1;
+ if (Size == 32) {
+ switch (P) {
+ case CmpInst::ICMP_NE:
+ return AMDGPU::S_CMP_LG_U32;
+ case CmpInst::ICMP_EQ:
+ return AMDGPU::S_CMP_EQ_U32;
+ case CmpInst::ICMP_SGT:
+ return AMDGPU::S_CMP_GT_I32;
+ case CmpInst::ICMP_SGE:
+ return AMDGPU::S_CMP_GE_I32;
+ case CmpInst::ICMP_SLT:
+ return AMDGPU::S_CMP_LT_I32;
+ case CmpInst::ICMP_SLE:
+ return AMDGPU::S_CMP_LE_I32;
+ case CmpInst::ICMP_UGT:
+ return AMDGPU::S_CMP_GT_U32;
+ case CmpInst::ICMP_UGE:
+ return AMDGPU::S_CMP_GE_U32;
+ case CmpInst::ICMP_ULT:
+ return AMDGPU::S_CMP_LT_U32;
+ case CmpInst::ICMP_ULE:
+ return AMDGPU::S_CMP_LE_U32;
+ case CmpInst::FCMP_OEQ:
+ return AMDGPU::S_CMP_EQ_F32;
+ case CmpInst::FCMP_OGT:
+ return AMDGPU::S_CMP_GT_F32;
+ case CmpInst::FCMP_OGE:
+ return AMDGPU::S_CMP_GE_F32;
+ case CmpInst::FCMP_OLT:
+ return AMDGPU::S_CMP_LT_F32;
+ case CmpInst::FCMP_OLE:
+ return AMDGPU::S_CMP_LE_F32;
+ case CmpInst::FCMP_ONE:
+ return AMDGPU::S_CMP_LG_F32;
+ case CmpInst::FCMP_ORD:
+ return AMDGPU::S_CMP_O_F32;
+ case CmpInst::FCMP_UNO:
+ return AMDGPU::S_CMP_U_F32;
+ case CmpInst::FCMP_UEQ:
+ return AMDGPU::S_CMP_NLG_F32;
+ case CmpInst::FCMP_UGT:
+ return AMDGPU::S_CMP_NLE_F32;
+ case CmpInst::FCMP_UGE:
+ return AMDGPU::S_CMP_NLT_F32;
+ case CmpInst::FCMP_ULT:
+ return AMDGPU::S_CMP_NGE_F32;
+ case CmpInst::FCMP_ULE:
+ return AMDGPU::S_CMP_NGT_F32;
+ case CmpInst::FCMP_UNE:
+ return AMDGPU::S_CMP_NEQ_F32;
+ default:
+ llvm_unreachable("Unknown condition code!");
+ }
+ }
- switch (P) {
- case CmpInst::ICMP_NE:
- return AMDGPU::S_CMP_LG_U32;
- case CmpInst::ICMP_EQ:
- return AMDGPU::S_CMP_EQ_U32;
- case CmpInst::ICMP_SGT:
- return AMDGPU::S_CMP_GT_I32;
- case CmpInst::ICMP_SGE:
- return AMDGPU::S_CMP_GE_I32;
- case CmpInst::ICMP_SLT:
- return AMDGPU::S_CMP_LT_I32;
- case CmpInst::ICMP_SLE:
- return AMDGPU::S_CMP_LE_I32;
- case CmpInst::ICMP_UGT:
- return AMDGPU::S_CMP_GT_U32;
- case CmpInst::ICMP_UGE:
- return AMDGPU::S_CMP_GE_U32;
- case CmpInst::ICMP_ULT:
- return AMDGPU::S_CMP_LT_U32;
- case CmpInst::ICMP_ULE:
- return AMDGPU::S_CMP_LE_U32;
- default:
- llvm_unreachable("Unknown condition code!");
+ if (Size == 16) {
+ if (!STI.hasSALUFloatInsts())
+ return -1;
+
+ switch (P) {
+ case CmpInst::FCMP_OEQ:
+ return AMDGPU::S_CMP_EQ_F16;
+ case CmpInst::FCMP_OGT:
+ return AMDGPU::S_CMP_GT_F16;
+ case CmpInst::FCMP_OGE:
+ return AMDGPU::S_CMP_GE_F16;
+ case CmpInst::FCMP_OLT:
+ return AMDGPU::S_CMP_LT_F16;
+ case CmpInst::FCMP_OLE:
+ return AMDGPU::S_CMP_LE_F16;
+ case CmpInst::FCMP_ONE:
+ return AMDGPU::S_CMP_LG_F16;
+ case CmpInst::FCMP_ORD:
+ return AMDGPU::S_CMP_O_F16;
+ case CmpInst::FCMP_UNO:
+ return AMDGPU::S_CMP_U_F16;
+ case CmpInst::FCMP_UEQ:
+ return AMDGPU::S_CMP_NLG_F16;
+ case CmpInst::FCMP_UGT:
+ return AMDGPU::S_CMP_NLE_F16;
+ case CmpInst::FCMP_UGE:
+ return AMDGPU::S_CMP_NLT_F16;
+ case CmpInst::FCMP_ULT:
+ return AMDGPU::S_CMP_NGE_F16;
+ case CmpInst::FCMP_ULE:
+ return AMDGPU::S_CMP_NGT_F16;
+ case CmpInst::FCMP_UNE:
+ return AMDGPU::S_CMP_NEQ_F16;
+ default:
+ llvm_unreachable("Unknown condition code!");
+ }
}
+
+ return -1;
}
-bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
+bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(MachineInstr &I) const {
+
MachineBasicBlock *BB = I.getParent();
const DebugLoc &DL = I.getDebugLoc();
@@ -1266,6 +1334,9 @@ bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
return Ret;
}
+ if (I.getOpcode() == AMDGPU::G_FCMP)
+ return false;
+
int Opcode = getV_CMPOpcode(Pred, Size, *Subtarget);
if (Opcode == -1)
return false;
@@ -2439,6 +2510,42 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
return false;
}
+static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In,
+ Register &Out) {
+ Register LShlSrc;
+ if (mi_match(In, MRI,
+ m_GTrunc(m_GLShr(m_Reg(LShlSrc), m_SpecificICst(16))))) {
+ Out = LShlSrc;
+ return true;
+ }
+ return false;
+}
+
+bool AMDGPUInstructionSelector::selectG_FPEXT(MachineInstr &I) const {
+ if (!Subtarget->hasSALUFloatInsts())
+ return false;
+
+ Register Dst = I.getOperand(0).getReg();
+ const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
+ if (DstRB->getID() != AMDGPU::SGPRRegBankID)
+ return false;
+
+ Register Src = I.getOperand(1).getReg();
+
+ if (MRI->getType(Dst) == LLT::scalar(32) &&
+ MRI->getType(Src) == LLT::scalar(16)) {
+ if (isExtractHiElt(*MRI, Src, Src)) {
+ MachineBasicBlock *BB = I.getParent();
+ BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
+ .addUse(Src);
+ I.eraseFromParent();
+ return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
+ }
+ }
+
+ return false;
+}
+
bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineOperand &ImmOp = I.getOperand(1);
@@ -3471,7 +3578,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
return selectG_INTRINSIC_W_SIDE_EFFECTS(I);
case TargetOpcode::G_ICMP:
- if (selectG_ICMP(I))
+ case TargetOpcode::G_FCMP:
+ if (selectG_ICMP_or_FCMP(I))
return true;
return selectImpl(I, *CoverageInfo);
case TargetOpcode::G_LOAD:
@@ -3508,6 +3616,10 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
selectImpl(I, *CoverageInfo))
return true;
return selectG_SZA_EXT(I);
+ case TargetOpcode::G_FPEXT:
+ if (selectG_FPEXT(I))
+ return true;
+ return selectImpl(I, *CoverageInfo);
case TargetOpcode::G_BRCOND:
return selectG_BRCOND(I);
case TargetOpcode::G_GLOBAL_VALUE:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index c9afa4e9fcc2a59..93e45fcd8682f07 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -90,6 +90,7 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
bool selectPHI(MachineInstr &I) const;
bool selectG_TRUNC(MachineInstr &I) const;
bool selectG_SZA_EXT(MachineInstr &I) const;
+ bool selectG_FPEXT(MachineInstr &I) const;
bool selectG_CONSTANT(MachineInstr &I) const;
bool selectG_FNEG(MachineInstr &I) const;
bool selectG_FABS(MachineInstr &I) const;
@@ -129,7 +130,7 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
const AMDGPU::ImageDimIntrinsicInfo *Intr) const;
bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const;
int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const;
- bool selectG_ICMP(MachineInstr &I) const;
+ bool selectG_ICMP_or_FCMP(MachineInstr &I) const;
bool hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const;
void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI,
SmallVectorImpl<GEPInfo> &AddrInfo) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 31617eef562d99d..db226a302900160 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1132,8 +1132,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0)
.legalIf(all(typeInSet(0, {S1, S32}), isPointer(1)));
- getActionDefinitionsBuilder(G_FCMP)
- .legalForCartesianProduct({S1}, ST.has16BitInsts() ? FPTypes16 : FPTypesBase)
+ auto &FCmpBuilder =
+ getActionDefinitionsBuilder(G_FCMP).legalForCartesianProduct(
+ {S1}, ST.has16BitInsts() ? FPTypes16 : FPTypesBase);
+
+ if (ST.hasSALUFloatInsts())
+ FCmpBuilder.legalForCartesianProduct({S32}, {S16, S32});
+
+ FCmpBuilder
.widenScalarToNextPow2(1)
.clampScalar(1, S32, S64)
.scalarize(0);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 121768f039f4a5d..5b056bd9e5dba2c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -2178,6 +2178,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
return;
}
+ case AMDGPU::G_FCMP:
+ if (!Subtarget.hasSALUFloatInsts())
+ break;
+ LLVM_FALLTHROUGH;
case AMDGPU::G_ICMP:
case AMDGPU::G_UADDO:
case AMDGPU::G_USUBO:
@@ -2185,7 +2189,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
case AMDGPU::G_SADDE:
case AMDGPU::G_USUBE:
case AMDGPU::G_SSUBE: {
- unsigned BoolDstOp = Opc == AMDGPU::G_ICMP ? 0 : 1;
+ unsigned BoolDstOp =
+ (Opc == AMDGPU::G_ICMP || Opc == AMDGPU::G_FCMP) ? 0 : 1;
Register DstReg = MI.getOperand(BoolDstOp).getReg();
const RegisterBank *DstBank =
@@ -3706,40 +3711,59 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_UBFX:
if (isSALUMapping(MI))
return getDefaultMappingSOP(MI);
- [[fallthrough]];
-
- case AMDGPU::G_SADDSAT: // FIXME: Could lower sat ops for SALU
- case AMDGPU::G_SSUBSAT:
- case AMDGPU::G_UADDSAT:
- case AMDGPU::G_USUBSAT:
+ return getDefaultMappingVOP(MI);
case AMDGPU::G_FADD:
case AMDGPU::G_FSUB:
- case AMDGPU::G_FPTOSI:
- case AMDGPU::G_FPTOUI:
case AMDGPU::G_FMUL:
case AMDGPU::G_FMA:
- case AMDGPU::G_FMAD:
- case AMDGPU::G_FSQRT:
case AMDGPU::G_FFLOOR:
case AMDGPU::G_FCEIL:
case AMDGPU::G_FRINT:
+ case AMDGPU::G_FMINNUM:
+ case AMDGPU::G_FMAXNUM:
+ case AMDGPU::G_INTRINSIC_TRUNC:
+ case AMDGPU::G_STRICT_FADD:
+ case AMDGPU::G_STRICT_FSUB:
+ case AMDGPU::G_STRICT_FMUL:
+ case AMDGPU::G_STRICT_FMA: {
+ unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ if (Subtarget.hasSALUFloatInsts() && (Size == 32 || Size == 16) &&
+ isSALUMapping(MI))
+ return getDefaultMappingSOP(MI);
+ return getDefaultMappingVOP(MI);
+ }
+ case AMDGPU::G_FPTOSI:
+ case AMDGPU::G_FPTOUI:
case AMDGPU::G_SITOFP:
- case AMDGPU::G_UITOFP:
+ case AMDGPU::G_UITOFP: {
+ unsigned SizeDst = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ unsigned SizeSrc = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ if (Subtarget.hasSALUFloatInsts() && SizeDst == 32 && SizeSrc == 32 &&
+ isSALUMapping(MI))
+ return getDefaultMappingSOP(MI);
+ return getDefaultMappingVOP(MI);
+ }
case AMDGPU::G_FPTRUNC:
- case AMDGPU::G_FPEXT:
+ case AMDGPU::G_FPEXT: {
+ unsigned SizeDst = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ unsigned SizeSrc = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ if (Subtarget.hasSALUFloatInsts() && SizeDst != 64 && SizeSrc != 64 &&
+ isSALUMapping(MI))
+ return getDefaultMappingSOP(MI);
+ return getDefaultMappingVOP(MI);
+ }
+ case AMDGPU::G_SADDSAT: // FIXME: Could lower sat ops for SALU
+ case AMDGPU::G_SSUBSAT:
+ case AMDGPU::G_UADDSAT:
+ case AMDGPU::G_USUBSAT:
+ case AMDGPU::G_FMAD:
+ case AMDGPU::G_FSQRT:
case AMDGPU::G_FEXP2:
case AMDGPU::G_FLOG2:
case AMDGPU::G_FLDEXP:
- case AMDGPU::G_FMINNUM:
- case AMDGPU::G_FMAXNUM:
case AMDGPU::G_FMINNUM_IEEE:
case AMDGPU::G_FMAXNUM_IEEE:
case AMDGPU::G_FCANONICALIZE:
- case AMDGPU::G_INTRINSIC_TRUNC:
- case AMDGPU::G_STRICT_FADD:
- case AMDGPU::G_STRICT_FSUB:
- case AMDGPU::G_STRICT_FMUL:
- case AMDGPU::G_STRICT_FMA:
case AMDGPU::G_STRICT_FLDEXP:
case AMDGPU::G_BSWAP: // TODO: Somehow expand for scalar?
case AMDGPU::G_FSHR: // TODO: Expand for scalar
@@ -3959,14 +3983,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
SrcSize);
break;
}
- case AMDGPU::G_FCMP: {
- unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
- OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
- OpdsMapping[1] = nullptr; // Predicate Operand.
- OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
- OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
- break;
- }
case AMDGPU::G_IS_FPCLASS: {
Register SrcReg = MI.getOperand(1).getReg();
unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
@@ -3987,8 +4003,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[1] = getValueMappingForPtr(MRI, MI.getOperand(1).getReg());
break;
}
- case AMDGPU::G_ICMP: {
- auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+ case AMDGPU::G_ICMP:
+ case AMDGPU::G_FCMP: {
unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
// See if the result register has already been constrained to vcc, which may
@@ -3998,12 +4014,23 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI);
unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI);
+ auto canUseSCCICMP = [&]() {
+ auto Pred =
+ static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+ return Size == 32 ||
+ (Size == 64 &&
+ (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) &&
+ Subtarget.hasScalarCompareEq64());
+ };
+ auto canUseSCCFCMP = [&]() {
+ return Subtarget.hasSALUFloatInsts() && (Size == 32 || Size == 16);
+ };
+
+ bool isICMP = MI.getOpcode() == AMDGPU::G_ICMP;
bool CanUseSCC = DstBank == AMDGPU::SGPRRegBankID &&
Op2Bank == AMDGPU::SGPRRegBankID &&
Op3Bank == AMDGPU::SGPRRegBankID &&
- (Size == 32 || (Size == 64 &&
- (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) &&
- Subtarget.hasScalarCompareEq64()));
+ (isICMP ? canUseSCCICMP() : canUseSCCFCMP());
DstBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
unsigned SrcBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
@@ -4013,6 +4040,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
const unsigned ResultSize = 1;
OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, ResultSize);
+ OpdsMapping[1] = nullptr; // Predicate Operand.
OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, Size);
OpdsMapping[3] = AMDGPU::getValueMapping(SrcBank, Size);
break;
@@ -4209,7 +4237,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_frexp_mant:
case Intrinsic::amdgcn_frexp_exp:
case Intrinsic::amdgcn_fract:
- case Intrinsic::amdgcn_cvt_pkrtz:
case Intrinsic::amdgcn_cvt_pknorm_i16:
case Intrinsic::amdgcn_cvt_pknorm_u16:
case Intrinsic::amdgcn_cvt_pk_i16:
@@ -4276,6 +4303,10 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_set_inactive:
case Intrinsic::amdgcn_permlane64:
return getDefaultMappingAllVGPR(MI);
+ case Intrinsic::amdgcn_cvt_pkrtz:
+ if (Subtarget.hasSALUFloatInsts() && isSALUMapping(MI))
+ return getDefaultMappingSOP(MI);
+ return getDefaultMappingVOP(MI);
case Intrinsic::amdgcn_kernarg_segment_ptr:
case Intrinsic::amdgcn_s_getpc:
case Intrinsic::amdgcn_groupstaticsize:
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 3adaa092aaefc4a..1032f7a95d791d6 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -345,9 +345,44 @@ static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
bool SIFoldOperands::tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
MachineInstr *MI, unsigned OpNo,
MachineOperand *OpToFold) const {
+ const unsigned Opc = MI->getOpcode();
+
+ auto tryToFoldAsFMAAKorMK = [&]() {
+ if (!OpToFold->isImm())
+ return false;
+
+ const bool TryAK = OpNo == 3;
+ const unsigned NewOpc = TryAK ? AMDGPU::S_FMAAK_F32 : AMDGPU::S_FMAMK_F32;
+ MI->setDesc(TII->get(NewOpc));
+
+ // We have to fold into operand which would be Imm not into OpNo.
+ bool FoldAsFMAAKorMK =
+ tryAddToFoldList(FoldList, MI, TryAK ? 3 : 2, OpToFold);
+ if (FoldAsFMAAKorMK) {
+ // Untie Src2 of fmac.
+ MI->untieRegOperand(3);
+ // For fmamk swap operands 1 and 2 if OpToFold was meant for operand 1.
+ if (OpNo == 1) {
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ Register OldReg = Op1.getReg();
+ // Operand 2 might be an inlinable constant
+ if (Op2.isImm()) {
+ Op1.ChangeToImmediate(Op2.getImm());
+ Op2.ChangeToRegister(OldReg, false);
+ } else {
+ Op1.setReg(Op2.getReg());
+ Op2.setReg(OldReg);
+ }
+ }
+ return true;
+ }
+ MI->setDesc(TII->get(Opc));
+ return false;
+ };
+
if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
// Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2
- unsigned Opc = MI->getOpcode();
unsigned NewOpc = macToMad(Opc);
if (NewOpc != AMDGPU::INSTRUCTION_LIST_END) {
// Check if changing this to a v_mad_{f16, f32} instruction will allow us
@@ -367,6 +402,13 @@ bool SIFoldOperands::tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
MI->setDesc(TII->get(Opc));
}
+ // Special case for s_fmac_f32 if we are trying to fold into Src2.
+ // By transforming into fmaak we can untie Src2 and make folding legal.
+ if (Opc == AMDGPU::S_FMAC_F32 && OpNo == 3) {
+ if (tryToFoldAsFMAAKorMK())
+ return true;
+ }
+
// Special case for s_setreg_b32
if (OpToFold->isImm()) {
unsigned ImmOpc = 0;
@@ -447,6 +489,28 @@ bool SIFoldOperands::tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
return true;
}
+ // Inlineable constant might have been folded into Imm operand of fmaak or
+ // fmamk and we are trying to fold a non-inlinable constant.
+ if ((Opc == AMDGPU::S_FMAAK_F32 || Opc == AMDGPU::S_FMAMK_F32) &&
+ !OpToFold->isReg() && !TII->isInlineConstant(*OpToFold)) {
+ unsigned ImmIdx = Opc == AMDGPU::S_FMAAK_F32 ? 3 : 2;
+ MachineOperand &OpImm = MI->getOperand(ImmIdx);
+ if (!OpImm.isReg() &&
+ TII->isInlineConstant(*MI, MI->getOperand(OpNo), OpImm))
+ return tryToFoldAsFMAAKorMK();
+ }
+
+ // Special case for s_fmac_f32 if we are trying to fold into Src0 or Src1.
+ // By changing into fmamk we can untie Src2.
+ // If folding for Src0 happens first and it is identical operand to Src1 we
+ // should avoid transforming into fmamk which requires commuting as it would
+ // cause folding into Src1 to fail later on due to wrong OpNo used.
+ if (Opc == AMDGPU::S_FMAC_F32 &&
+ (OpNo != 1 || !MI->getOperand(1).isIdenticalTo(MI->getOperand(2)))) {
+ if (tryToFoldAsFMAAKorMK())
+ return true;
+ }
+
// Check the case where we might introduce a second constant operand to a
// scalar instruction
if (TII->isSALU(MI->getOpcode())) {
@@ -458,7 +522,8 @@ bool SIFoldOperands::tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
// Otherwise check for another constant
for (unsigned i = 0, e = InstDesc.getNumOperands(); i != e; ++i) {
auto &Op = MI->getOperand(i);
- if (OpNo != i && !Op.isReg() && !TII->isInlineConstant(Op, OpInfo))
+ if (OpNo != i && !Op.isReg() &&
+ !TII->isInlineConstant(Op, InstDesc.operands()[i]))
return false;
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 38b5e0114903cdf..cf391856bf733fb 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4990,6 +4990,64 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ;
case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ;
+ case AMDGPU::S_CVT_F32_I32: return AMDGPU::V_CVT_F32_I32_e64;
+ case AMDGPU::S_CVT_F32_U32: return AMDGPU::V_CVT_F32_U32_e64;
+ case AMDGPU::S_CVT_I32_F32: return AMDGPU::V_CVT_I32_F32_e64;
+ case AMDGPU::S_CVT_U32_F32: return AMDGPU::V_CVT_U32_F32_e64;
+ case AMDGPU::S_CVT_F32_F16: return AMDGPU::V_CVT_F32_F16_t16_e64;
+ case AMDGPU::S_CVT_HI_F32_F16: return AMDGPU::V_CVT_F32_F16_t16_e64;
+ case AMDGPU::S_CVT_F16_F32: return AMDGPU::V_CVT_F16_F32_t16_e64;
+ case AMDGPU::S_CEIL_F32: return AMDGPU::V_CEIL_F32_e64;
+ case AMDGPU::S_FLOOR_F32: return AMDGPU::V_FLOOR_F32_e64;
+ case AMDGPU::S_TRUNC_F32: return AMDGPU::V_TRUNC_F32_e64;
+ case AMDGPU::S_RNDNE_F32: return AMDGPU::V_RNDNE_F32_e64;
+ case AMDGPU::S_CEIL_F16: return AMDGPU::V_CEIL_F16_t16_e64;
+ case AMDGPU::S_FLOOR_F16: return AMDGPU::V_FLOOR_F16_t16_e64;
+ case AMDGPU::S_TRUNC_F16: return AMDGPU::V_TRUNC_F16_t16_e64;
+ case AMDGPU::S_RNDNE_F16: return AMDGPU::V_RNDNE_F16_t16_e64;
+ case AMDGPU::S_ADD_F32: return AMDGPU::V_ADD_F32_e64;
+ case AMDGPU::S_SUB_F32: return AMDGPU::V_SUB_F32_e64;
+ case AMDGPU::S_MIN_F32: return AMDGPU::V_MIN_F32_e64;
+ case AMDGPU::S_MAX_F32: return AMDGPU::V_MAX_F32_e64;
+ case AMDGPU::S_MUL_F32: return AMDGPU::V_MUL_F32_e64;
+ case AMDGPU::S_ADD_F16: return AMDGPU::V_ADD_F16_t16_e64;
+ case AMDGPU::S_SUB_F16: return AMDGPU::V_SUB_F16_t16_e64;
+ case AMDGPU::S_MIN_F16: return AMDGPU::V_MIN_F16_t16_e64;
+ case AMDGPU::S_MAX_F16: return AMDGPU::V_MAX_F16_t16_e64;
+ case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_t16_e64;
+ case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
+ case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64;
+ case AMDGPU::S_FMAC_F16: return AMDGPU::V_FMAC_F16_t16_e64;
+ case AMDGPU::S_FMAMK_F32: return AMDGPU::V_FMAMK_F32;
+ case AMDGPU::S_FMAAK_F32: return AMDGPU::V_FMAAK_F32;
+ case AMDGPU::S_CMP_LT_F32: return AMDGPU::V_CMP_LT_F32_e64;
+ case AMDGPU::S_CMP_EQ_F32: return AMDGPU::V_CMP_EQ_F32_e64;
+ case AMDGPU::S_CMP_LE_F32: return AMDGPU::V_CMP_LE_F32_e64;
+ case AMDGPU::S_CMP_GT_F32: return AMDGPU::V_CMP_GT_F32_e64;
+ case AMDGPU::S_CMP_LG_F32: return AMDGPU::V_CMP_LG_F32_e64;
+ case AMDGPU::S_CMP_GE_F32: return AMDGPU::V_CMP_GE_F32_e64;
+ case AMDGPU::S_CMP_O_F32: return AMDGPU::V_CMP_O_F32_e64;
+ case AMDGPU::S_CMP_U_F32: return AMDGPU::V_CMP_U_F32_e64;
+ case AMDGPU::S_CMP_NGE_F32: return AMDGPU::V_CMP_NGE_F32_e64;
+ case AMDGPU::S_CMP_NLG_F32: return AMDGPU::V_CMP_NLG_F32_e64;
+ case AMDGPU::S_CMP_NGT_F32: return AMDGPU::V_CMP_NGT_F32_e64;
+ case AMDGPU::S_CMP_NLE_F32: return AMDGPU::V_CMP_NLE_F32_e64;
+ case AMDGPU::S_CMP_NEQ_F32: return AMDGPU::V_CMP_NEQ_F32_e64;
+ case AMDGPU::S_CMP_NLT_F32: return AMDGPU::V_CMP_NLT_F32_e64;
+ case AMDGPU::S_CMP_LT_F16: return AMDGPU::V_CMP_LT_F16_t16_e64;
+ case AMDGPU::S_CMP_EQ_F16: return AMDGPU::V_CMP_EQ_F16_t16_e64;
+ case AMDGPU::S_CMP_LE_F16: return AMDGPU::V_CMP_LE_F16_t16_e64;
+ case AMDGPU::S_CMP_GT_F16: return AMDGPU::V_CMP_GT_F16_t16_e64;
+ case AMDGPU::S_CMP_LG_F16: return AMDGPU::V_CMP_LG_F16_t16_e64;
+ case AMDGPU::S_CMP_GE_F16: return AMDGPU::V_CMP_GE_F16_t16_e64;
+ case AMDGPU::S_CMP_O_F16: return AMDGPU::V_CMP_O_F16_t16_e64;
+ case AMDGPU::S_CMP_U_F16: return AMDGPU::V_CMP_U_F16_t16_e64;
+ case AMDGPU::S_CMP_NGE_F16: return AMDGPU::V_CMP_NGE_F16_t16_e64;
+ case AMDGPU::S_CMP_NLG_F16: return AMDGPU::V_CMP_NLG_F16_t16_e64;
+ case AMDGPU::S_CMP_NGT_F16: return AMDGPU::V_CMP_NGT_F16_t16_e64;
+ case AMDGPU::S_CMP_NLE_F16: return AMDGPU::V_CMP_NLE_F16_t16_e64;
+ case AMDGPU::S_CMP_NEQ_F16: return AMDGPU::V_CMP_NEQ_F16_t16_e64;
+ case AMDGPU::S_CMP_NLT_F16: return AMDGPU::V_CMP_NLT_F16_t16_e64;
}
llvm_unreachable(
"Unexpected scalar opcode without corresponding vector one!");
@@ -5383,6 +5441,13 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
if (Src1.isReg() && RI.isAGPR(MRI, Src1.getReg()))
legalizeOpWithMove(MI, Src1Idx);
+ // Special case: V_FMAC_F32 and V_FMAC_F16 have src2.
+ if (Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F16_e32) {
+ int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
+ if (!RI.isVGPR(MRI, MI.getOperand(Src2Idx).getReg()))
+ legalizeOpWithMove(MI, Src2Idx);
+ }
+
// VOP2 src0 instructions support all operand types, so we don't need to check
// their legality. If src1 is already legal, we don't need to do anything.
if (isLegalRegOperand(MRI, InstrDesc.operands()[Src1Idx], Src1))
@@ -5532,6 +5597,11 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
// legalize it.
legalizeOpWithMove(MI, Idx);
}
+
+ // Special case: V_FMAC_F32 and V_FMAC_F16 have src2 tied to vdst.
+ if ((Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) &&
+ !RI.isVGPR(MRI, MI.getOperand(VOP3Idx[2]).getReg()))
+ legalizeOpWithMove(MI, VOP3Idx[2]);
}
Register SIInstrInfo::readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI,
@@ -6665,21 +6735,78 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
case AMDGPU::S_CMP_LT_U32:
case AMDGPU::S_CMP_LE_U32:
case AMDGPU::S_CMP_EQ_U64:
- case AMDGPU::S_CMP_LG_U64: {
- const MCInstrDesc &NewDesc = get(NewOpcode);
+ case AMDGPU::S_CMP_LG_U64:
+ case AMDGPU::S_CMP_LT_F32:
+ case AMDGPU::S_CMP_EQ_F32:
+ case AMDGPU::S_CMP_LE_F32:
+ case AMDGPU::S_CMP_GT_F32:
+ case AMDGPU::S_CMP_LG_F32:
+ case AMDGPU::S_CMP_GE_F32:
+ case AMDGPU::S_CMP_O_F32:
+ case AMDGPU::S_CMP_U_F32:
+ case AMDGPU::S_CMP_NGE_F32:
+ case AMDGPU::S_CMP_NLG_F32:
+ case AMDGPU::S_CMP_NGT_F32:
+ case AMDGPU::S_CMP_NLE_F32:
+ case AMDGPU::S_CMP_NEQ_F32:
+ case AMDGPU::S_CMP_NLT_F32:
+ case AMDGPU::S_CMP_LT_F16:
+ case AMDGPU::S_CMP_EQ_F16:
+ case AMDGPU::S_CMP_LE_F16:
+ case AMDGPU::S_CMP_GT_F16:
+ case AMDGPU::S_CMP_LG_F16:
+ case AMDGPU::S_CMP_GE_F16:
+ case AMDGPU::S_CMP_O_F16:
+ case AMDGPU::S_CMP_U_F16:
+ case AMDGPU::S_CMP_NGE_F16:
+ case AMDGPU::S_CMP_NLG_F16:
+ case AMDGPU::S_CMP_NGT_F16:
+ case AMDGPU::S_CMP_NLE_F16:
+ case AMDGPU::S_CMP_NEQ_F16:
+ case AMDGPU::S_CMP_NLT_F16: {
Register CondReg = MRI.createVirtualRegister(RI.getWaveMaskRegClass());
- MachineInstr *NewInstr =
- BuildMI(*MBB, Inst, Inst.getDebugLoc(), NewDesc, CondReg)
- .add(Inst.getOperand(0))
- .add(Inst.getOperand(1));
+ auto NewInstr =
+ BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(NewOpcode), CondReg)
+ .setMIFlags(Inst.getFlags());
+ if (AMDGPU::getNamedOperandIdx(NewOpcode,
+ AMDGPU::OpName::src0_modifiers) >= 0) {
+ NewInstr
+ .addImm(0) // src0_modifiers
+ .add(Inst.getOperand(0)) // src0
+ .addImm(0) // src1_modifiers
+ .add(Inst.getOperand(1)) // src1
+ .addImm(0); // clamp
+ } else {
+ NewInstr
+ .add(Inst.getOperand(0))
+ .add(Inst.getOperand(1));
+ }
legalizeOperands(*NewInstr, MDT);
int SCCIdx = Inst.findRegisterDefOperandIdx(AMDGPU::SCC);
MachineOperand SCCOp = Inst.getOperand(SCCIdx);
addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
Inst.eraseFromParent();
+ return;
}
+ case AMDGPU::S_CVT_HI_F32_F16: {
+ const DebugLoc &DL = Inst.getDebugLoc();
+ Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register NewDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg)
+ .addImm(16)
+ .add(Inst.getOperand(1));
+ BuildMI(*MBB, Inst, DL, get(NewOpcode), NewDst)
+ .addImm(0) // src0_modifiers
+ .addReg(TmpReg)
+ .addImm(0) // clamp
+ .addImm(0); // omod
+
+ MRI.replaceRegWith(Inst.getOperand(0).getReg(), NewDst);
+ addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
+ Inst.eraseFromParent();
return;
}
+ }
if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
// We cannot move this instruction to the VALU, so we should try to
@@ -6723,8 +6850,61 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
// Use the new VALU Opcode.
auto NewInstr = BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(NewOpcode))
.setMIFlags(Inst.getFlags());
- for (const MachineOperand &Op : Inst.explicit_operands())
- NewInstr->addOperand(Op);
+ if (isVOP3(NewOpcode)) {
+ // Intersperse VOP3 modifiers among the SALU operands.
+ NewInstr->addOperand(Inst.getOperand(0));
+ if (AMDGPU::getNamedOperandIdx(NewOpcode,
+ AMDGPU::OpName::src0_modifiers) >= 0)
+ NewInstr.addImm(0);
+ if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0) >= 0)
+ NewInstr->addOperand(Inst.getOperand(1));
+
+ if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
+ // We are converting these to a BFE, so we need to add the missing
+ // operands for the size and offset.
+ unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
+ NewInstr.addImm(0);
+ NewInstr.addImm(Size);
+ } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
+ // The VALU version adds the second operand to the result, so insert an
+ // extra 0 operand.
+ NewInstr.addImm(0);
+ } else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
+ const MachineOperand &OffsetWidthOp = Inst.getOperand(2);
+ // If we need to move this to VGPRs, we need to unpack the second
+ // operand back into the 2 separate ones for bit offset and width.
+ assert(OffsetWidthOp.isImm() &&
+ "Scalar BFE is only implemented for constant width and offset");
+ uint32_t Imm = OffsetWidthOp.getImm();
+
+ uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
+ uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
+ NewInstr.addImm(Offset);
+ NewInstr.addImm(BitWidth);
+ } else {
+ if (AMDGPU::getNamedOperandIdx(NewOpcode,
+ AMDGPU::OpName::src1_modifiers) >= 0)
+ NewInstr.addImm(0);
+ if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
+ NewInstr->addOperand(Inst.getOperand(2));
+ if (AMDGPU::getNamedOperandIdx(NewOpcode,
+ AMDGPU::OpName::src2_modifiers) >= 0)
+ NewInstr.addImm(0);
+ if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
+ NewInstr->addOperand(Inst.getOperand(3));
+ if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
+ NewInstr.addImm(0);
+ if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
+ NewInstr.addImm(0);
+ if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
+ NewInstr.addImm(0);
+ }
+ } else {
+ // Just copy the SALU operands.
+ for (const MachineOperand &Op : Inst.explicit_operands())
+ NewInstr->addOperand(Op);
+ }
+
// Remove any references to SCC. Vector instructions can't read from it, and
// We're just about to add the implicit use / defs of VCC, and we don't want
// both.
@@ -6748,30 +6928,6 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
NewDstReg = MRI.createVirtualRegister(NewDstRC);
MRI.replaceRegWith(DstReg, NewDstReg);
}
- if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
- // We are converting these to a BFE, so we need to add the missing
- // operands for the size and offset.
- unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
- NewInstr.addImm(0);
- NewInstr.addImm(Size);
- } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
- // The VALU version adds the second operand to the result, so insert an
- // extra 0 operand.
- NewInstr.addImm(0);
- }
- if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
- const MachineOperand &OffsetWidthOp = NewInstr->getOperand(2);
- // If we need to move this to VGPRs, we need to unpack the second operand
- // back into the 2 separate ones for bit offset and width.
- assert(OffsetWidthOp.isImm() &&
- "Scalar BFE is only implemented for constant width and offset");
- uint32_t Imm = OffsetWidthOp.getImm();
- uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
- uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
- NewInstr->removeOperand(2);
- NewInstr.addImm(Offset);
- NewInstr.addImm(BitWidth);
- }
fixImplicitOperands(*NewInstr);
// Legalize the operands
legalizeOperands(*NewInstr, MDT);
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 08ab81848710d5e..8c7d511ea4dc8a5 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -401,29 +401,33 @@ let SubtargetPredicate = isGFX11Plus in {
}
} // End SubtargetPredicate = isGFX11Plus
+class SOP1_F32_Inst<string opName, SDPatternOperator Op, ValueType vt0=f32,
+ ValueType vt1=vt0> :
+ SOP1_32<opName, [(set vt0:$sdst, (UniformUnaryFrag<Op> vt1:$src0))]>;
+
let SubtargetPredicate = HasSALUFloatInsts, Uses = [MODE],
SchedRW = [WriteSFPU], isReMaterializable = 1 in {
- def S_CVT_F32_I32 : SOP1_32<"s_cvt_f32_i32">;
- def S_CVT_F32_U32 : SOP1_32<"s_cvt_f32_u32">;
+ def S_CVT_F32_I32 : SOP1_F32_Inst<"s_cvt_f32_i32", sint_to_fp, f32, i32>;
+ def S_CVT_F32_U32 : SOP1_F32_Inst<"s_cvt_f32_u32", uint_to_fp, f32, i32>;
let mayRaiseFPException = 1 in {
- def S_CVT_I32_F32 : SOP1_32<"s_cvt_i32_f32">;
- def S_CVT_U32_F32 : SOP1_32<"s_cvt_u32_f32">;
- def S_CVT_F32_F16 : SOP1_32<"s_cvt_f32_f16">;
+ def S_CVT_I32_F32 : SOP1_F32_Inst<"s_cvt_i32_f32", fp_to_sint, i32, f32>;
+ def S_CVT_U32_F32 : SOP1_F32_Inst<"s_cvt_u32_f32", fp_to_uint, i32, f32>;
+ def S_CVT_F32_F16 : SOP1_F32_Inst<"s_cvt_f32_f16", fpextend, f32, f16>;
def S_CVT_HI_F32_F16 : SOP1_32<"s_cvt_hi_f32_f16">;
- def S_CEIL_F32 : SOP1_32<"s_ceil_f32">;
- def S_FLOOR_F32 : SOP1_32<"s_floor_f32">;
- def S_TRUNC_F32 : SOP1_32<"s_trunc_f32">;
- def S_RNDNE_F32 : SOP1_32<"s_rndne_f32">;
+ def S_CEIL_F32 : SOP1_F32_Inst<"s_ceil_f32", fceil>;
+ def S_FLOOR_F32 : SOP1_F32_Inst<"s_floor_f32", ffloor>;
+ def S_TRUNC_F32 : SOP1_F32_Inst<"s_trunc_f32", ftrunc>;
+ def S_RNDNE_F32 : SOP1_F32_Inst<"s_rndne_f32", frint>;
let FPDPRounding = 1 in
- def S_CVT_F16_F32 : SOP1_32<"s_cvt_f16_f32">;
+ def S_CVT_F16_F32 : SOP1_F32_Inst<"s_cvt_f16_f32", fpround, f16, f32>;
- def S_CEIL_F16 : SOP1_32<"s_ceil_f16">;
- def S_FLOOR_F16 : SOP1_32<"s_floor_f16">;
- def S_TRUNC_F16 : SOP1_32<"s_trunc_f16">;
- def S_RNDNE_F16 : SOP1_32<"s_rndne_f16">;
+ def S_CEIL_F16 : SOP1_F32_Inst<"s_ceil_f16", fceil, f16>;
+ def S_FLOOR_F16 : SOP1_F32_Inst<"s_floor_f16", ffloor, f16>;
+ def S_TRUNC_F16 : SOP1_F32_Inst<"s_trunc_f16", ftrunc, f16>;
+ def S_RNDNE_F16 : SOP1_F32_Inst<"s_rndne_f16", frint, f16>;
} // End mayRaiseFPException = 1
} // End SubtargetPredicate = HasSALUFloatInsts, Uses = [MODE]
// SchedRW = [WriteSFPU], isReMaterializable = 1
@@ -756,14 +760,22 @@ let SubtargetPredicate = isGFX11Plus in {
def S_PACK_HL_B32_B16 : SOP2_32<"s_pack_hl_b32_b16">;
} // End SubtargetPredicate = isGFX11Plus
+class SOP2_F32_Inst<string opName, SDPatternOperator Op, ValueType dstVt=f32> :
+ SOP2_F32<opName,
+ [(set dstVt:$sdst, (UniformBinFrag<Op> SSrc_f32:$src0, SSrc_f32:$src1))]>;
+
+class SOP2_F16_Inst<string opName, SDPatternOperator Op> :
+ SOP2_F16<opName,
+ [(set f16:$sdst, (UniformBinFrag<Op> SSrc_f16:$src0, SSrc_f16:$src1))]>;
+
let SubtargetPredicate = HasSALUFloatInsts, mayRaiseFPException = 1,
Uses = [MODE], SchedRW = [WriteSFPU] in {
let isReMaterializable = 1 in {
let isCommutable = 1 in {
- def S_ADD_F32 : SOP2_F32<"s_add_f32">;
- def S_MIN_F32 : SOP2_F32<"s_min_f32">;
- def S_MAX_F32 : SOP2_F32<"s_max_f32">;
- def S_MUL_F32 : SOP2_F32<"s_mul_f32">;
+ def S_ADD_F32 : SOP2_F32_Inst<"s_add_f32", any_fadd>;
+ def S_MIN_F32 : SOP2_F32_Inst<"s_min_f32", fminnum_like>;
+ def S_MAX_F32 : SOP2_F32_Inst<"s_max_f32", fmaxnum_like>;
+ def S_MUL_F32 : SOP2_F32_Inst<"s_mul_f32", any_fmul>;
let FixedSize = 1 in
def S_FMAAK_F32 : SOP2_Pseudo<
@@ -773,19 +785,20 @@ let SubtargetPredicate = HasSALUFloatInsts, mayRaiseFPException = 1,
>;
let FPDPRounding = 1 in {
- def S_ADD_F16 : SOP2_F16<"s_add_f16">;
- def S_MUL_F16 : SOP2_F16<"s_mul_f16">;
+ def S_ADD_F16 : SOP2_F16_Inst<"s_add_f16", any_fadd>;
+ def S_MUL_F16 : SOP2_F16_Inst<"s_mul_f16", any_fmul>;
} // End FPDPRounding
- def S_MIN_F16 : SOP2_F16<"s_min_f16">;
- def S_MAX_F16 : SOP2_F16<"s_max_f16">;
+ def S_MIN_F16 : SOP2_F16_Inst<"s_min_f16", fminnum_like>;
+ def S_MAX_F16 : SOP2_F16_Inst<"s_max_f16", fmaxnum_like>;
} // End isCommutable = 1
let FPDPRounding = 1 in
- def S_SUB_F16 : SOP2_F16<"s_sub_f16">;
+ def S_SUB_F16 : SOP2_F16_Inst<"s_sub_f16", any_fsub>;
- def S_SUB_F32 : SOP2_F32<"s_sub_f32">;
- def S_CVT_PK_RTZ_F16_F32 : SOP2_F32<"s_cvt_pk_rtz_f16_f32">;
+ def S_SUB_F32 : SOP2_F32_Inst<"s_sub_f32", any_fsub>;
+ def S_CVT_PK_RTZ_F16_F32 : SOP2_F32_Inst<"s_cvt_pk_rtz_f16_f32",
+ AMDGPUpkrtz_f16_f32, v2f16>;
let FixedSize = 1 in
def S_FMAMK_F32 : SOP2_Pseudo<
@@ -796,20 +809,22 @@ let SubtargetPredicate = HasSALUFloatInsts, mayRaiseFPException = 1,
} // End isReMaterializable = 1
let Constraints = "$sdst = $src2", DisableEncoding="$src2",
- isCommutable = 1 in {
+ isCommutable = 1, AddedComplexity = 20 in {
def S_FMAC_F32 : SOP2_Pseudo<
"s_fmac_f32", (outs SReg_32:$sdst),
(ins SSrc_f32:$src0, SSrc_f32:$src1, SReg_32:$src2),
- "$sdst, $src0, $src1"
+ "$sdst, $src0, $src1",
+ [(set f32:$sdst, (UniformTernaryFrag<any_fma> SSrc_f32:$src0, SSrc_f32:$src1, SReg_32:$src2))]
>;
def S_FMAC_F16 : SOP2_Pseudo<
"s_fmac_f16", (outs SReg_32:$sdst),
(ins SSrc_f16:$src0, SSrc_f16:$src1, SReg_32:$src2),
- "$sdst, $src0, $src1"
+ "$sdst, $src0, $src1",
+ [(set f16:$sdst, (UniformTernaryFrag<any_fma> SSrc_f16:$src0, SSrc_f16:$src1, SReg_32:$src2))]
>;
} // End Constraints = "$sdst = $src2", DisableEncoding="$src2",
- // isCommutable = 1
+ // isCommutable = 1, AddedComplexity = 20
} // End SubtargetPredicate = HasSALUFloatInsts, mayRaiseFPException = 1,
// Uses = [MODE], SchedRW = [WriteSFPU]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sop1.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sop1.mir
new file mode 100644
index 000000000000000..bb86413964098ba
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sop1.mir
@@ -0,0 +1,302 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1150 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX1150 %s
+
+---
+name: sitofp_i32_to_f32
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX1150-LABEL: name: sitofp_i32_to_f32
+ ; GFX1150: liveins: $sgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[S_CVT_F32_I32_:%[0-9]+]]:sreg_32 = S_CVT_F32_I32 [[COPY]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY [[S_CVT_F32_I32_]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = G_SITOFP %0(s32)
+ $sgpr0 = COPY %1(s32)
+
+...
+---
+name: uitofp_u32_to_f32
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX1150-LABEL: name: uitofp_u32_to_f32
+ ; GFX1150: liveins: $sgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[S_CVT_F32_U32_:%[0-9]+]]:sreg_32 = S_CVT_F32_U32 [[COPY]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY [[S_CVT_F32_U32_]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = G_UITOFP %0(s32)
+ $sgpr0 = COPY %1(s32)
+
+...
+---
+name: fptosi_f32_to_i32
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX1150-LABEL: name: fptosi_f32_to_i32
+ ; GFX1150: liveins: $sgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: %1:sreg_32 = nofpexcept S_CVT_I32_F32 [[COPY]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY %1
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = G_FPTOSI %0(s32)
+ $sgpr0 = COPY %1(s32)
+
+...
+---
+name: fptoui_f32_to_u32
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX1150-LABEL: name: fptoui_f32_to_u32
+ ; GFX1150: liveins: $sgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: %1:sreg_32 = nofpexcept S_CVT_U32_F32 [[COPY]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY %1
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = G_FPTOUI %0(s32)
+ $sgpr0 = COPY %1(s32)
+
+...
+---
+name: fpext_f16_to_f32
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX1150-LABEL: name: fpext_f16_to_f32
+ ; GFX1150: liveins: $sgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: %2:sreg_32 = nofpexcept S_CVT_F32_F16 [[COPY]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY %2
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0(s32)
+ %2:sgpr(s32) = G_FPEXT %1(s16)
+ $sgpr0 = COPY %2(s32)
+
+...
+---
+name: fpext_hif16_to_32
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX1150-LABEL: name: fpext_hif16_to_32
+ ; GFX1150: liveins: $sgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[S_CVT_HI_F32_F16_:%[0-9]+]]:sreg_32 = S_CVT_HI_F32_F16 [[COPY]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY [[S_CVT_HI_F32_F16_]]
+ %0:sgpr(<2 x s16>) = COPY $sgpr0
+ %2:sgpr(s32) = G_BITCAST %0(<2 x s16>)
+ %3:sgpr(s32) = G_CONSTANT i32 16
+ %4:sgpr(s32) = G_LSHR %2, %3(s32)
+ %5:sgpr(s16) = G_TRUNC %4(s32)
+ %6:sgpr(s32) = G_FPEXT %5(s16)
+ $sgpr0 = COPY %6(s32)
+
+...
+---
+name: fptrunc_f32_to_f16
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX1150-LABEL: name: fptrunc_f32_to_f16
+ ; GFX1150: liveins: $sgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: %1:sreg_32 = nofpexcept S_CVT_F16_F32 [[COPY]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY %1
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_FPTRUNC %0(s32)
+ %2:sgpr(s32) = G_ANYEXT %1(s16)
+ $sgpr0 = COPY %2(s32)
+
+...
+---
+name: fceil_f32
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX1150-LABEL: name: fceil_f32
+ ; GFX1150: liveins: $sgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: %1:sreg_32 = nofpexcept S_CEIL_F32 [[COPY]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY %1
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = G_FCEIL %0
+ $sgpr0 = COPY %1(s32)
+
+...
+---
+name: ffloor_f32
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX1150-LABEL: name: ffloor_f32
+ ; GFX1150: liveins: $sgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: %1:sreg_32 = nofpexcept S_FLOOR_F32 [[COPY]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY %1
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = G_FFLOOR %0
+ $sgpr0 = COPY %1(s32)
+
+...
+---
+name: ftrunc_f32
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX1150-LABEL: name: ftrunc_f32
+ ; GFX1150: liveins: $sgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: %1:sreg_32 = nofpexcept S_TRUNC_F32 [[COPY]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY %1
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = G_INTRINSIC_TRUNC %0
+ $sgpr0 = COPY %1(s32)
+
+...
+---
+name: frint_f32
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX1150-LABEL: name: frint_f32
+ ; GFX1150: liveins: $sgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: %1:sreg_32 = nofpexcept S_RNDNE_F32 [[COPY]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY %1
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = G_FRINT %0
+ $sgpr0 = COPY %1(s32)
+
+...
+---
+name: fceil_f16
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX1150-LABEL: name: fceil_f16
+ ; GFX1150: liveins: $sgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: %2:sreg_32 = nofpexcept S_CEIL_F16 [[COPY]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY %2
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0(s32)
+ %2:sgpr(s16) = G_FCEIL %1
+ %3:sgpr(s32) = G_ANYEXT %2(s16)
+ $sgpr0 = COPY %3(s32)
+
+...
+---
+name: ffloor_f16
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX1150-LABEL: name: ffloor_f16
+ ; GFX1150: liveins: $sgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: %2:sreg_32 = nofpexcept S_FLOOR_F16 [[COPY]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY %2
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0(s32)
+ %2:sgpr(s16) = G_FFLOOR %1
+ %3:sgpr(s32) = G_ANYEXT %2(s16)
+ $sgpr0 = COPY %3(s32)
+
+...
+---
+name: ftrunc_f16
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX1150-LABEL: name: ftrunc_f16
+ ; GFX1150: liveins: $sgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: %2:sreg_32 = nofpexcept S_TRUNC_F16 [[COPY]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY %2
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0(s32)
+ %2:sgpr(s16) = G_INTRINSIC_TRUNC %1
+ %3:sgpr(s32) = G_ANYEXT %2(s16)
+ $sgpr0 = COPY %3(s32)
+
+...
+---
+name: frint_f16
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX1150-LABEL: name: frint_f16
+ ; GFX1150: liveins: $sgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: %2:sreg_32 = nofpexcept S_RNDNE_F16 [[COPY]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY %2
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0(s32)
+ %2:sgpr(s16) = G_FRINT %1
+ %3:sgpr(s32) = G_ANYEXT %2(s16)
+ $sgpr0 = COPY %3(s32)
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sop2.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sop2.mir
new file mode 100644
index 000000000000000..48b4534c871ee0c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sop2.mir
@@ -0,0 +1,294 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1150 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX1150 %s
+
+---
+name: fadd_f32
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: fadd_f32
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: %2:sreg_32 = nofpexcept S_ADD_F32 [[COPY]], [[COPY1]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY %2
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(s32) = G_FADD %0, %1
+ $sgpr0 = COPY %2(s32)
+
+...
+---
+name: fsub_f32
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: fsub_f32
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: %2:sreg_32 = nofpexcept S_SUB_F32 [[COPY]], [[COPY1]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY %2
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(s32) = G_FSUB %0, %1
+ $sgpr0 = COPY %2(s32)
+
+...
+---
+name: fmul_f32
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: fmul_f32
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: %2:sreg_32 = nofpexcept S_MUL_F32 [[COPY]], [[COPY1]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY %2
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(s32) = G_FMUL %0, %1
+ $sgpr0 = COPY %2(s32)
+
+...
+---
+name: fmin_f32
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: fmin_f32
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: %2:sreg_32 = nofpexcept S_MIN_F32 [[COPY]], [[COPY1]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY %2
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(s32) = G_FMINNUM %0, %1
+ $sgpr0 = COPY %2(s32)
+
+...
+---
+name: fmax_f32
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: fmax_f32
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: %2:sreg_32 = nofpexcept S_MAX_F32 [[COPY]], [[COPY1]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY %2
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(s32) = G_FMAXNUM %0, %1
+ $sgpr0 = COPY %2(s32)
+
+...
+---
+name: fadd_f16
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: fadd_f16
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: %4:sreg_32 = nofpexcept S_ADD_F16 [[COPY]], [[COPY1]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY %4
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0(s32)
+ %2:sgpr(s32) = COPY $sgpr1
+ %3:sgpr(s16) = G_TRUNC %2(s32)
+ %4:sgpr(s16) = G_FADD %1, %3
+ %5:sgpr(s32) = G_ANYEXT %4(s16)
+ $sgpr0 = COPY %5(s32)
+
+...
+---
+name: fsub_f16
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: fsub_f16
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: %4:sreg_32 = nofpexcept S_SUB_F16 [[COPY]], [[COPY1]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY %4
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0(s32)
+ %2:sgpr(s32) = COPY $sgpr1
+ %3:sgpr(s16) = G_TRUNC %2(s32)
+ %4:sgpr(s16) = G_FSUB %1, %3
+ %5:sgpr(s32) = G_ANYEXT %4(s16)
+ $sgpr0 = COPY %5(s32)
+
+...
+---
+name: fmul_f16
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: fmul_f16
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: %4:sreg_32 = nofpexcept S_MUL_F16 [[COPY]], [[COPY1]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY %4
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0(s32)
+ %2:sgpr(s32) = COPY $sgpr1
+ %3:sgpr(s16) = G_TRUNC %2(s32)
+ %4:sgpr(s16) = G_FMUL %1, %3
+ %5:sgpr(s32) = G_ANYEXT %4(s16)
+ $sgpr0 = COPY %5(s32)
+
+...
+---
+name: fmin_f16
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: fmin_f16
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: %4:sreg_32 = nofpexcept S_MIN_F16 [[COPY]], [[COPY1]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY %4
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0(s32)
+ %2:sgpr(s32) = COPY $sgpr1
+ %3:sgpr(s16) = G_TRUNC %2(s32)
+ %4:sgpr(s16) = G_FMINNUM %1, %3
+ %5:sgpr(s32) = G_ANYEXT %4(s16)
+ $sgpr0 = COPY %5(s32)
+
+...
+---
+name: fmax_f16
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: fmax_f16
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0(s32)
+ %2:sgpr(s32) = COPY $sgpr1
+ %3:sgpr(s16) = G_TRUNC %2(s32)
+ %4:sgpr(s16) = G_FMAXNUM %1, %3
+ %5:sgpr(s32) = G_ANYEXT %4(s16)
+
+...
+---
+name: s_cvt_pkrtz_v2f16_f32
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: s_cvt_pkrtz_v2f16_f32
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: %2:sreg_32 = nofpexcept S_CVT_PK_RTZ_F16_F32 [[COPY]], [[COPY1]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY %2
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %0(s32), %1(s32)
+ $sgpr0 = COPY %2(<2 x s16>)
+
+...
+---
+name: fmac_f32
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2
+
+ ; GFX1150-LABEL: name: fmac_f32
+ ; GFX1150: liveins: $sgpr0, $sgpr1, $sgpr2
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX1150-NEXT: %3:sreg_32 = nofpexcept S_FMAC_F32 [[COPY1]], [[COPY2]], [[COPY]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY %3
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(s32) = COPY $sgpr2
+ %3:sgpr(s32) = G_FMA %1, %2, %0
+ $sgpr0 = COPY %3(s32)
+
+...
+---
+name: fmac_f16
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2
+
+ ; GFX1150-LABEL: name: fmac_f16
+ ; GFX1150: liveins: $sgpr0, $sgpr1, $sgpr2
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX1150-NEXT: %6:sreg_32 = nofpexcept S_FMAC_F16 [[COPY1]], [[COPY2]], [[COPY]], implicit $mode
+ ; GFX1150-NEXT: $sgpr0 = COPY %6
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0(s32)
+ %2:sgpr(s32) = COPY $sgpr1
+ %3:sgpr(s16) = G_TRUNC %2(s32)
+ %4:sgpr(s32) = COPY $sgpr2
+ %5:sgpr(s16) = G_TRUNC %4(s32)
+ %6:sgpr(s16) = G_FMA %3, %5, %1
+ %7:sgpr(s32) = G_ANYEXT %6(s16)
+ $sgpr0 = COPY %7(s32)
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sopc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sopc.mir
new file mode 100644
index 000000000000000..3ef974135d2f156
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sopc.mir
@@ -0,0 +1,647 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1150 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX1150 %s
+
+---
+name: f32_olt
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f32_olt
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_LT_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(s32) = G_FCMP floatpred(olt), %0(s32), %1
+ $sgpr0 = COPY %2(s32)
+
+...
+---
+name: f32_oeq
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f32_oeq
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_EQ_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(s32) = G_FCMP floatpred(oeq), %0(s32), %1
+ $sgpr0 = COPY %2(s32)
+
+...
+---
+name: f32_ole
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f32_ole
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_LE_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(s32) = G_FCMP floatpred(ole), %0(s32), %1
+ $sgpr0 = COPY %2(s32)
+
+...
+---
+name: f32_ogt
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f32_ogt
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_GT_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(s32) = G_FCMP floatpred(ogt), %0(s32), %1
+ $sgpr0 = COPY %2(s32)
+
+...
+---
+name: f32_one
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f32_one
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_LG_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(s32) = G_FCMP floatpred(one), %0(s32), %1
+ $sgpr0 = COPY %2(s32)
+
+...
+---
+name: f32_oge
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f32_oge
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_GE_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(s32) = G_FCMP floatpred(oge), %0(s32), %1
+ $sgpr0 = COPY %2(s32)
+
+...
+---
+name: f32_ord
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f32_ord
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_O_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(s32) = G_FCMP floatpred(ord), %0(s32), %1
+ $sgpr0 = COPY %2(s32)
+
+...
+---
+name: f32_uno
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f32_uno
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_U_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(s32) = G_FCMP floatpred(uno), %0(s32), %1
+ $sgpr0 = COPY %2(s32)
+
+...
+---
+name: f32_ult
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f32_ult
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_NGE_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(s32) = G_FCMP floatpred(ult), %0(s32), %1
+ $sgpr0 = COPY %2(s32)
+
+...
+---
+name: f32_ueq
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f32_ueq
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_NLG_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(s32) = G_FCMP floatpred(ueq), %0(s32), %1
+ $sgpr0 = COPY %2(s32)
+
+...
+---
+name: f32_ule
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f32_ule
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_NGT_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(s32) = G_FCMP floatpred(ule), %0(s32), %1
+ $sgpr0 = COPY %2(s32)
+
+...
+---
+name: f32_ugt
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f32_ugt
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_NLE_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(s32) = G_FCMP floatpred(ugt), %0(s32), %1
+ $sgpr0 = COPY %2(s32)
+
+...
+---
+name: f32_une
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f32_une
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_NEQ_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(s32) = G_FCMP floatpred(une), %0(s32), %1
+ $sgpr0 = COPY %2(s32)
+
+...
+---
+name: f32_uge
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f32_uge
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_NLT_F32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(s32) = G_FCMP floatpred(uge), %0(s32), %1
+ $sgpr0 = COPY %2(s32)
+
+...
+---
+name: f16_olt
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f16_olt
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_LT_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0(s32)
+ %2:sgpr(s32) = COPY $sgpr1
+ %3:sgpr(s16) = G_TRUNC %2(s32)
+ %4:sgpr(s32) = G_FCMP floatpred(olt), %1(s16), %3
+ $sgpr0 = COPY %4(s32)
+
+...
+---
+name: f16_oeq
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f16_oeq
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_EQ_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0(s32)
+ %2:sgpr(s32) = COPY $sgpr1
+ %3:sgpr(s16) = G_TRUNC %2(s32)
+ %4:sgpr(s32) = G_FCMP floatpred(oeq), %1(s16), %3
+ $sgpr0 = COPY %4(s32)
+
+...
+---
+name: f16_ole
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f16_ole
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_LE_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0(s32)
+ %2:sgpr(s32) = COPY $sgpr1
+ %3:sgpr(s16) = G_TRUNC %2(s32)
+ %4:sgpr(s32) = G_FCMP floatpred(ole), %1(s16), %3
+ $sgpr0 = COPY %4(s32)
+
+...
+---
+name: f16_ogt
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f16_ogt
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_GT_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0(s32)
+ %2:sgpr(s32) = COPY $sgpr1
+ %3:sgpr(s16) = G_TRUNC %2(s32)
+ %4:sgpr(s32) = G_FCMP floatpred(ogt), %1(s16), %3
+ $sgpr0 = COPY %4(s32)
+
+...
+---
+name: f16_one
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f16_one
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_LG_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0(s32)
+ %2:sgpr(s32) = COPY $sgpr1
+ %3:sgpr(s16) = G_TRUNC %2(s32)
+ %4:sgpr(s32) = G_FCMP floatpred(one), %1(s16), %3
+ $sgpr0 = COPY %4(s32)
+
+...
+---
+name: f16_oge
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f16_oge
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_GE_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0(s32)
+ %2:sgpr(s32) = COPY $sgpr1
+ %3:sgpr(s16) = G_TRUNC %2(s32)
+ %4:sgpr(s32) = G_FCMP floatpred(oge), %1(s16), %3
+ $sgpr0 = COPY %4(s32)
+
+...
+---
+name: f16_ord
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f16_ord
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_O_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0(s32)
+ %2:sgpr(s32) = COPY $sgpr1
+ %3:sgpr(s16) = G_TRUNC %2(s32)
+ %4:sgpr(s32) = G_FCMP floatpred(ord), %1(s16), %3
+ $sgpr0 = COPY %4(s32)
+
+...
+---
+name: f16_uno
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f16_uno
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_U_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0(s32)
+ %2:sgpr(s32) = COPY $sgpr1
+ %3:sgpr(s16) = G_TRUNC %2(s32)
+ %4:sgpr(s32) = G_FCMP floatpred(uno), %1(s16), %3
+ $sgpr0 = COPY %4(s32)
+
+...
+---
+name: f16_ult
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f16_ult
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_NGE_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0(s32)
+ %2:sgpr(s32) = COPY $sgpr1
+ %3:sgpr(s16) = G_TRUNC %2(s32)
+ %4:sgpr(s32) = G_FCMP floatpred(ult), %1(s16), %3
+ $sgpr0 = COPY %4(s32)
+
+...
+---
+name: f16_ueq
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f16_ueq
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_NLG_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0(s32)
+ %2:sgpr(s32) = COPY $sgpr1
+ %3:sgpr(s16) = G_TRUNC %2(s32)
+ %4:sgpr(s32) = G_FCMP floatpred(ueq), %1(s16), %3
+ $sgpr0 = COPY %4(s32)
+
+...
+---
+name: f16_ule
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f16_ule
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_NGT_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0(s32)
+ %2:sgpr(s32) = COPY $sgpr1
+ %3:sgpr(s16) = G_TRUNC %2(s32)
+ %4:sgpr(s32) = G_FCMP floatpred(ule), %1(s16), %3
+ $sgpr0 = COPY %4(s32)
+
+...
+---
+name: f16_ugt
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f16_ugt
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_NLE_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0(s32)
+ %2:sgpr(s32) = COPY $sgpr1
+ %3:sgpr(s16) = G_TRUNC %2(s32)
+ %4:sgpr(s32) = G_FCMP floatpred(ugt), %1(s16), %3
+ $sgpr0 = COPY %4(s32)
+
+...
+---
+name: f16_une
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f16_une
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_NEQ_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0(s32)
+ %2:sgpr(s32) = COPY $sgpr1
+ %3:sgpr(s16) = G_TRUNC %2(s32)
+ %4:sgpr(s32) = G_FCMP floatpred(une), %1(s16), %3
+ $sgpr0 = COPY %4(s32)
+
+...
+---
+name: f16_uge
+legalized: true
+regBankSelected: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f16_uge
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX1150-NEXT: S_CMP_NLT_F16 [[COPY]], [[COPY1]], implicit-def $scc, implicit $mode
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX1150-NEXT: $sgpr0 = COPY [[COPY2]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s16) = G_TRUNC %0(s32)
+ %2:sgpr(s32) = COPY $sgpr1
+ %3:sgpr(s16) = G_TRUNC %2(s32)
+ %4:sgpr(s32) = G_FCMP floatpred(uge), %1(s16), %3
+ $sgpr0 = COPY %4(s32)
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp-s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp-s32.mir
new file mode 100644
index 000000000000000..bd4acfe00a0b283
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp-s32.mir
@@ -0,0 +1,46 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -O0 -march=amdgcn -mcpu=gfx1150 -run-pass=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX1150 %s
+
+---
+name: f32_olt
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f32_olt
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+ ; GFX1150-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(olt), [[COPY]](s32), [[COPY1]]
+ ; GFX1150-NEXT: $sgpr0 = COPY [[FCMP]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = G_FCMP floatpred(olt), %0(s32), %1
+ $sgpr0 = COPY %2
+
+...
+
+---
+name: f16_olt
+body: |
+ bb.1.entry:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: f16_olt
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+ ; GFX1150-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+ ; GFX1150-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX1150-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(olt), [[TRUNC]](s16), [[TRUNC1]]
+ ; GFX1150-NEXT: $sgpr0 = COPY [[FCMP]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s16) = G_TRUNC %0(s32)
+ %2:_(s32) = COPY $sgpr1
+ %3:_(s16) = G_TRUNC %2(s32)
+ %4:_(s32) = G_FCMP floatpred(olt), %1(s16), %3
+ $sgpr0 = COPY %4
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir
index bf93a3d242e0150..d25a3fdfa3c3098 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir
@@ -4,6 +4,7 @@
# RUN: llc -O0 -march=amdgcn -mcpu=gfx900 -run-pass=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
# RUN: llc -O0 -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
# RUN: llc -O0 -march=amdgcn -mcpu=gfx1100 -run-pass=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -O0 -march=amdgcn -mcpu=gfx1150 -run-pass=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
---
name: test_fcmp_s32
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcmp.mir
index f599c869d114d36..8f09618207aa118 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcmp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcmp.mir
@@ -1,6 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck --check-prefixes=GCN,GFX803 %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck --check-prefixes=GCN,GFX803 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1150 -run-pass=amdgpu-regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck --check-prefixes=GCN,GFX1150 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1150 -run-pass=amdgpu-regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck --check-prefixes=GCN,GFX1150 %s
---
name: fcmp_ss
@@ -9,14 +11,21 @@ legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1
- ; CHECK-LABEL: name: fcmp_ss
- ; CHECK: liveins: $sgpr0, $sgpr1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; CHECK-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(uge), [[COPY2]](s32), [[COPY3]]
+ ; GFX803-LABEL: name: fcmp_ss
+ ; GFX803: liveins: $sgpr0, $sgpr1
+ ; GFX803-NEXT: {{ $}}
+ ; GFX803-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX803-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX803-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; GFX803-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; GFX803-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(uge), [[COPY2]](s32), [[COPY3]]
+ ; GFX1150-LABEL: name: fcmp_ss
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX1150-NEXT: [[FCMP:%[0-9]+]]:sgpr(s32) = G_FCMP floatpred(uge), [[COPY]](s32), [[COPY1]]
+ ; GFX1150-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[FCMP]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s1) = G_FCMP floatpred(uge), %0(s32), %1
@@ -29,13 +38,13 @@ legalized: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0
- ; CHECK-LABEL: name: fcmp_sv
- ; CHECK: liveins: $sgpr0, $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(uge), [[COPY2]](s32), [[COPY1]]
+ ; GCN-LABEL: name: fcmp_sv
+ ; GCN: liveins: $sgpr0, $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(uge), [[COPY2]](s32), [[COPY1]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s1) = G_FCMP floatpred(uge), %0, %1
@@ -48,13 +57,13 @@ legalized: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0
- ; CHECK-LABEL: name: fcmp_vs
- ; CHECK: liveins: $sgpr0, $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(uge), [[COPY1]](s32), [[COPY2]]
+ ; GCN-LABEL: name: fcmp_vs
+ ; GCN: liveins: $sgpr0, $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(uge), [[COPY1]](s32), [[COPY2]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s1) = G_FCMP floatpred(uge), %1, %0
@@ -67,12 +76,12 @@ legalized: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1
- ; CHECK-LABEL: name: fcmp_vv
- ; CHECK: liveins: $vgpr0, $vgpr1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP floatpred(uge), [[COPY]](s32), [[COPY1]]
+ ; GCN-LABEL: name: fcmp_vv
+ ; GCN: liveins: $vgpr0, $vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP floatpred(uge), [[COPY]](s32), [[COPY1]]
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP floatpred(uge), %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-salu-float.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-salu-float.mir
new file mode 100644
index 000000000000000..dba206af69fe77c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-salu-float.mir
@@ -0,0 +1,246 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1150 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck --check-prefixes=GFX1150 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1150 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck --check-prefixes=GFX1150 %s
+
+---
+name: fadd_f32
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: fadd_f32
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX1150-NEXT: [[FADD:%[0-9]+]]:sgpr(s32) = G_FADD [[COPY]], [[COPY1]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = G_FADD %0, %1
+...
+
+---
+name: fptosi_f32_to_i32
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX1150-LABEL: name: fptosi_f32_to_i32
+ ; GFX1150: liveins: $sgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX1150-NEXT: [[FPTOSI:%[0-9]+]]:sgpr(s32) = G_FPTOSI [[COPY]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = G_FPTOSI %0(s32)
+...
+
+---
+name: fptoui_f32_to_u32
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX1150-LABEL: name: fptoui_f32_to_u32
+ ; GFX1150: liveins: $sgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX1150-NEXT: [[FPTOUI:%[0-9]+]]:sgpr(s32) = G_FPTOUI [[COPY]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = G_FPTOUI %0(s32)
+...
+
+---
+name: sitofp_i32_to_f32
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX1150-LABEL: name: sitofp_i32_to_f32
+ ; GFX1150: liveins: $sgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX1150-NEXT: [[SITOFP:%[0-9]+]]:sgpr(s32) = G_SITOFP [[COPY]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = G_SITOFP %0(s32)
+...
+
+---
+name: uitofp_u32_to_f32
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX1150-LABEL: name: uitofp_u32_to_f32
+ ; GFX1150: liveins: $sgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX1150-NEXT: [[UITOFP:%[0-9]+]]:sgpr(s32) = G_UITOFP [[COPY]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = G_UITOFP %0(s32)
+...
+
+---
+name: fptrunc_f32_to_f16
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX1150-LABEL: name: fptrunc_f32_to_f16
+ ; GFX1150: liveins: $sgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX1150-NEXT: [[FPTRUNC:%[0-9]+]]:sgpr(s16) = G_FPTRUNC [[COPY]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s16) = G_FPTRUNC %0(s32)
+...
+
+---
+name: fpext_f16_to_f32
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX1150-LABEL: name: fpext_f16_to_f32
+ ; GFX1150: liveins: $sgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX1150-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ %1:_(s32) = COPY $sgpr0
+ %0:_(s16) = G_TRUNC %1(s32)
+...
+
+# Tests below should not select scalar registers
+
+---
+name: fadd_f64
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+
+ ; GFX1150-LABEL: name: fadd_f64
+ ; GFX1150: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
+ ; GFX1150-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; GFX1150-NEXT: [[FADD:%[0-9]+]]:vgpr(s64) = G_FADD [[COPY2]], [[COPY3]]
+ %0:_(s64) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_FADD %0, %1
+...
+
+---
+name: fptosi_f64_to_i32
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX1150-LABEL: name: fptosi_f64_to_i32
+ ; GFX1150: liveins: $sgpr0, $sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
+ ; GFX1150-NEXT: [[FPTOSI:%[0-9]+]]:vgpr(s32) = G_FPTOSI [[COPY1]](s64)
+ %0:_(s64) = COPY $sgpr0_sgpr1
+ %1:_(s32) = G_FPTOSI %0(s64)
+...
+
+---
+name: fptoui_f16_to_u16
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX1150-LABEL: name: fptoui_f16_to_u16
+ ; GFX1150: liveins: $sgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX1150-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16)
+ ; GFX1150-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s16) = G_FPTOUI [[COPY1]](s16)
+ %1:_(s32) = COPY $sgpr0
+ %0:_(s16) = G_TRUNC %1(s32)
+ %2:_(s16) = G_FPTOUI %0(s16)
+...
+
+---
+name: sitofp_i32_to_f64
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX1150-LABEL: name: sitofp_i32_to_f64
+ ; GFX1150: liveins: $sgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; GFX1150-NEXT: [[SITOFP:%[0-9]+]]:vgpr(s64) = G_SITOFP [[COPY1]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s64) = G_SITOFP %0(s32)
+...
+
+---
+name: uitofp_u16_to_f16
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX1150-LABEL: name: uitofp_u16_to_f16
+ ; GFX1150: liveins: $sgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX1150-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16)
+ ; GFX1150-NEXT: [[UITOFP:%[0-9]+]]:vgpr(s16) = G_UITOFP [[COPY1]](s16)
+ %1:_(s32) = COPY $sgpr0
+ %0:_(s16) = G_TRUNC %1(s32)
+ %2:_(s16) = G_UITOFP %0(s16)
+...
+
+---
+name: fptrunc_f64_to_f32
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+
+ ; GFX1150-LABEL: name: fptrunc_f64_to_f32
+ ; GFX1150: liveins: $sgpr0_sgpr1
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
+ ; GFX1150-NEXT: [[FPTRUNC:%[0-9]+]]:vgpr(s32) = G_FPTRUNC [[COPY1]](s64)
+ %0:_(s64) = COPY $sgpr0_sgpr1
+ %1:_(s32) = G_FPTRUNC %0(s64)
+...
+
+---
+name: fpext_f32_to_f64
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX1150-LABEL: name: fpext_f32_to_f64
+ ; GFX1150: liveins: $sgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; GFX1150-NEXT: [[FPEXT:%[0-9]+]]:vgpr(s64) = G_FPEXT [[COPY1]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s64) = G_FPEXT %0(s32)
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll b/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll
index 7e76569606ef991..aa24cc32047292a 100644
--- a/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll
+++ b/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll
@@ -1,6 +1,7 @@
; RUN: llc -march=amdgcn -mcpu=gfx900 -show-mc-encoding < %s | FileCheck -check-prefixes=CHECK,GFX9 %s
; RUN: llc -march=amdgcn -mcpu=gfx1030 -show-mc-encoding < %s | FileCheck -check-prefixes=CHECK,GFX10 %s
; RUN: llc -march=amdgcn -mcpu=gfx1100 -show-mc-encoding < %s | FileCheck -check-prefixes=CHECK,GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1150 -show-mc-encoding < %s | FileCheck -check-prefixes=CHECK,GFX11,GFX1150 %s
declare float @llvm.fabs.f32(float)
declare float @llvm.fma.f32(float, float, float)
@@ -311,3 +312,39 @@ define float @v_fma_k_f32_src_mods(float %x, float %y) {
; GFX9: codeLenInByte = 24
; GFX10: codeLenInByte = 20
; GFX11: codeLenInByte = 20
+
+define amdgpu_ps float @s_fmaak_f32(float inreg %x, float inreg %y) {
+; GFX9-LABEL: s_fmaak_f32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: v_mov_b32_e32 v0, s1 ; encoding: [0x01,0x02,0x00,0x7e]
+; GFX9-NEXT: v_mov_b32_e32 v1, 0x43800000 ; encoding: [0xff,0x02,0x02,0x7e,0x00,0x00,0x80,0x43]
+; GFX9-NEXT: v_fma_f32 v0, s0, v0, v1 ; encoding: [0x00,0x00,0xcb,0xd1,0x00,0x00,0x06,0x04]
+; GFX9-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: s_fmaak_f32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_mov_b32_e32 v0, 0x43800000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x00,0x80,0x43]
+; GFX10-NEXT: v_fmac_f32_e64 v0, s0, s1 ; encoding: [0x00,0x00,0x2b,0xd5,0x00,0x02,0x00,0x00]
+; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX1100-LABEL: s_fmaak_f32:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: v_mov_b32_e32 v0, 0x43800000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x00,0x80,0x43]
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) ; encoding: [0x01,0x00,0x87,0xbf]
+; GFX1100-NEXT: v_fmac_f32_e64 v0, s0, s1 ; encoding: [0x00,0x00,0x2b,0xd5,0x00,0x02,0x00,0x00]
+; GFX1100-NEXT: ; return to shader part epilog
+;
+; GFX1150-LABEL: s_fmaak_f32:
+; GFX1150: ; %bb.0:
+; GFX1150-NEXT: s_fmaak_f32 s0, s0, s1, 0x43800000 ; encoding: [0x00,0x01,0x80,0xa2,0x00,0x00,0x80,0x43]
+; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; encoding: [0x0b,0x00,0x87,0xbf]
+; GFX1150-NEXT: v_mov_b32_e32 v0, s0 ; encoding: [0x00,0x02,0x00,0x7e]
+; GFX1150-NEXT: ; return to shader part epilog
+ %fma = call float @llvm.fma.f32(float %x, float %y, float 256.0)
+ ret float %fma
+}
+
+; GFX9: codeLenInByte = 20
+; GFX10: codeLenInByte = 16
+; GFX1100: codeLenInByte = 20
+; GFX1150: codeLenInByte = 16
diff --git a/llvm/test/CodeGen/AMDGPU/commute-compares-scalar-float.ll b/llvm/test/CodeGen/AMDGPU/commute-compares-scalar-float.ll
new file mode 100644
index 000000000000000..e996fda4c9fd6ca
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/commute-compares-scalar-float.ll
@@ -0,0 +1,515 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx1150 -amdgpu-sdwa-peephole=0 -verify-machineinstrs < %s | FileCheck -check-prefix=SDAG %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1150 -amdgpu-sdwa-peephole=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL %s
+
+define amdgpu_vs void @fcmp_f32_olt_to_ogt(ptr addrspace(1) inreg %out, float inreg %a) {
+; SDAG-LABEL: fcmp_f32_olt_to_ogt:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_gt_f32 s2, 2.0
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: fcmp_f32_olt_to_ogt:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_gt_f32 s2, 2.0
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp olt float 2.0, %a
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @fcmp_f32_ogt_to_olt(ptr addrspace(1) inreg %out, float inreg %a) {
+; SDAG-LABEL: fcmp_f32_ogt_to_olt:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_lt_f32 s2, 2.0
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: fcmp_f32_ogt_to_olt:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_lt_f32 s2, 2.0
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp ogt float 2.0, %a
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @fcmp_f32_ole_to_oge(ptr addrspace(1) inreg %out, float inreg %a) {
+; SDAG-LABEL: fcmp_f32_ole_to_oge:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_ge_f32 s2, 2.0
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: fcmp_f32_ole_to_oge:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_ge_f32 s2, 2.0
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp ole float 2.0, %a
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @fcmp_f32_oge_to_ole(ptr addrspace(1) inreg %out, float inreg %a) {
+; SDAG-LABEL: fcmp_f32_oge_to_ole:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_le_f32 s2, 2.0
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: fcmp_f32_oge_to_ole:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_le_f32 s2, 2.0
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp oge float 2.0, %a
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @fcmp_f32_ult_to_ugt(ptr addrspace(1) inreg %out, float inreg %a) {
+; SDAG-LABEL: fcmp_f32_ult_to_ugt:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_nle_f32 s2, 2.0
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: fcmp_f32_ult_to_ugt:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_nle_f32 s2, 2.0
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp ult float 2.0, %a
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @fcmp_f32_ugt_to_ult(ptr addrspace(1) inreg %out, float inreg %a) {
+; SDAG-LABEL: fcmp_f32_ugt_to_ult:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_nge_f32 s2, 2.0
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: fcmp_f32_ugt_to_ult:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_nge_f32 s2, 2.0
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp ugt float 2.0, %a
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @fcmp_f32_ule_to_uge(ptr addrspace(1) inreg %out, float inreg %a) {
+; SDAG-LABEL: fcmp_f32_ule_to_uge:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_nlt_f32 s2, 2.0
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: fcmp_f32_ule_to_uge:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_nlt_f32 s2, 2.0
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp ule float 2.0, %a
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @fcmp_f32_uge_to_ule(ptr addrspace(1) inreg %out, float inreg %a) {
+; SDAG-LABEL: fcmp_f32_uge_to_ule:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_ngt_f32 s2, 2.0
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: fcmp_f32_uge_to_ule:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_ngt_f32 s2, 2.0
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp uge float 2.0, %a
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @fcmp_f16_olt_to_ogt(ptr addrspace(1) inreg %out, half inreg %a) {
+; SDAG-LABEL: fcmp_f16_olt_to_ogt:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_gt_f16 s2, 0x4000
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: fcmp_f16_olt_to_ogt:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_gt_f16 s2, 0x4000
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp olt half 2.0, %a
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @fcmp_f16_ogt_to_olt(ptr addrspace(1) inreg %out, half inreg %a) {
+; SDAG-LABEL: fcmp_f16_ogt_to_olt:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_lt_f16 s2, 0x4000
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: fcmp_f16_ogt_to_olt:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_lt_f16 s2, 0x4000
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp ogt half 2.0, %a
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @fcmp_f16_ole_to_oge(ptr addrspace(1) inreg %out, half inreg %a) {
+; SDAG-LABEL: fcmp_f16_ole_to_oge:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_ge_f16 s2, 0x4000
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: fcmp_f16_ole_to_oge:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_ge_f16 s2, 0x4000
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp ole half 2.0, %a
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @fcmp_f16_oge_to_ole(ptr addrspace(1) inreg %out, half inreg %a) {
+; SDAG-LABEL: fcmp_f16_oge_to_ole:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_le_f16 s2, 0x4000
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: fcmp_f16_oge_to_ole:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_le_f16 s2, 0x4000
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp oge half 2.0, %a
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @fcmp_f16_ult_to_ugt(ptr addrspace(1) inreg %out, half inreg %a) {
+; SDAG-LABEL: fcmp_f16_ult_to_ugt:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_nle_f16 s2, 0x4000
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: fcmp_f16_ult_to_ugt:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_nle_f16 s2, 0x4000
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp ult half 2.0, %a
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @fcmp_f16_ugt_to_ult(ptr addrspace(1) inreg %out, half inreg %a) {
+; SDAG-LABEL: fcmp_f16_ugt_to_ult:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_nge_f16 s2, 0x4000
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: fcmp_f16_ugt_to_ult:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_nge_f16 s2, 0x4000
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp ugt half 2.0, %a
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @fcmp_ule_to_uge(ptr addrspace(1) inreg %out, half inreg %a) {
+; SDAG-LABEL: fcmp_ule_to_uge:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_nlt_f16 s2, 0x4000
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: fcmp_ule_to_uge:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_nlt_f16 s2, 0x4000
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp ule half 2.0, %a
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @fcmp_uge_to_ule(ptr addrspace(1) inreg %out, half inreg %a) {
+; SDAG-LABEL: fcmp_uge_to_ule:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_ngt_f16 s2, 0x4000
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: fcmp_uge_to_ule:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_ngt_f16 s2, 0x4000
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp uge half 2.0, %a
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir
index 1315c227ecde385..36fa95c4c3ab5bf 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir
@@ -126,3 +126,102 @@ body: |
%3:sreg_32 = COPY %1:vgpr_32
%4:sreg_32 = S_CSELECT_B32 killed %2:sreg_32, killed %3:sreg_32, implicit undef $scc
---
+
+---
+name: cmp_f32
+body: |
+ bb.0:
+ ; GCN-LABEL: name: cmp_f32
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GCN-NEXT: %6:sreg_64_xexec = nofpexcept V_CMP_LT_F32_e64 0, [[V_CVT_F32_U32_e64_]], 0, [[DEF1]], 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed %6, implicit $exec
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:sreg_32 = IMPLICIT_DEF
+ %2:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %3:sreg_32 = COPY %2:vgpr_32
+ nofpexcept S_CMP_LT_F32 killed %3:sreg_32, %1:sreg_32, implicit-def $scc, implicit $mode
+ %4:sreg_64_xexec = COPY $scc
+ %5:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed %4, implicit $exec
+...
+
+---
+name: cmp_f16
+body: |
+ bb.0.entry:
+ ; GCN-LABEL: name: cmp_f16
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[V_CVT_F16_U16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F16_U16_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[V_CMP_LT_F16_t16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LT_F16_t16_e64 0, [[V_CVT_F16_U16_e64_]], 0, [[DEF1]], 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed [[V_CMP_LT_F16_t16_e64_]], implicit $exec
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:sreg_32 = IMPLICIT_DEF
+ %2:vgpr_32 = V_CVT_F16_U16_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %3:sreg_32 = COPY %2:vgpr_32
+ nofpexcept S_CMP_LT_F16 killed %3:sreg_32, %1:sreg_32, implicit-def $scc, implicit $mode
+ %4:sreg_64_xexec = COPY $scc
+ %5:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed %4, implicit $exec
+...
+
+# Needs extra shift instruction to select hi 16 bits
+---
+name: cvt_hi_f32_f16
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: cvt_hi_f32_f16
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[V_CVT_F16_U16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F16_U16_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[V_CVT_F16_U16_e64_]], implicit $exec
+ ; GCN-NEXT: [[V_CVT_F32_F16_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_t16_e64 0, [[V_LSHRREV_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:vgpr_32 = V_CVT_F16_U16_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %2:sreg_32 = COPY %1:vgpr_32
+ %3:sreg_32 = S_CVT_HI_F32_F16 %2:sreg_32, implicit $mode
+...
+
+# Test to ensure that src2 of fmac is moved to vgpr
+---
+name: fmac_f32
+body: |
+ bb.0:
+ ; GCN-LABEL: name: fmac_f32
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF2]]
+ ; GCN-NEXT: %6:vgpr_32 = nofpexcept V_FMAC_F32_e64 0, [[V_CVT_F32_U32_e64_]], 0, [[DEF1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:sreg_32 = IMPLICIT_DEF
+ %2:sreg_32 = IMPLICIT_DEF
+ %3:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %4:sreg_32 = COPY %3:vgpr_32
+ %5:sreg_32 = nofpexcept S_FMAC_F32 killed %4:sreg_32, %1:sreg_32, %2:sreg_32, implicit $mode
+...
+
+---
+name: fmac_f16
+body: |
+ bb.0:
+ ; GCN-LABEL: name: fmac_f16
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF2]]
+ ; GCN-NEXT: %6:vgpr_32 = nofpexcept V_FMAC_F16_t16_e64 0, killed [[DEF1]], 0, [[COPY]], 0, [[V_CVT_F32_U32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:sreg_32 = IMPLICIT_DEF
+ %2:sreg_32 = IMPLICIT_DEF
+ %3:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %4:sreg_32 = COPY %3:vgpr_32
+ %5:sreg_32 = nofpexcept S_FMAC_F16 killed %1:sreg_32, %2:sreg_32, %4:sreg_32, implicit $mode
+...
diff --git a/llvm/test/CodeGen/AMDGPU/fold-operands-scalar-fmac.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-scalar-fmac.mir
new file mode 100644
index 000000000000000..39511a95cebc7de
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-operands-scalar-fmac.mir
@@ -0,0 +1,238 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1150 -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck %s
+
+---
+name: fmac_fold_inlinable_src0_to_fmamk
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; CHECK-LABEL: name: fmac_fold_inlinable_src0_to_fmamk
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAMK_F32 [[COPY]], 1056964608, [[COPY1]], implicit $mode
+ ; CHECK-NEXT: $sgpr0 = COPY %fma
+ %0:sreg_32 = COPY $sgpr0
+ %1:sreg_32 = COPY $sgpr1
+ %inlinable:sreg_32 = S_MOV_B32 1056964608
+ %fma:sreg_32 = nofpexcept S_FMAC_F32 %inlinable, %0, %1, implicit $mode
+ $sgpr0 = COPY %fma
+...
+---
+name: fmac_fold_inlinable_src1_to_fmamk
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; CHECK-LABEL: name: fmac_fold_inlinable_src1_to_fmamk
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAMK_F32 [[COPY]], 1056964608, [[COPY1]], implicit $mode
+ ; CHECK-NEXT: $sgpr0 = COPY %fma
+ %0:sreg_32 = COPY $sgpr0
+ %1:sreg_32 = COPY $sgpr1
+ %inlinable:sreg_32 = S_MOV_B32 1056964608
+ %fma:sreg_32 = nofpexcept S_FMAC_F32 %0, %inlinable, %1, implicit $mode
+ $sgpr0 = COPY %fma
+...
+---
+name: fmac_fold_inlinable_src2_to_fmaak
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; CHECK-LABEL: name: fmac_fold_inlinable_src2_to_fmaak
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAAK_F32 [[COPY]], [[COPY1]], 1056964608, implicit $mode
+ ; CHECK-NEXT: $sgpr0 = COPY %fma
+ %0:sreg_32 = COPY $sgpr0
+ %1:sreg_32 = COPY $sgpr1
+ %inlinable:sreg_32 = S_MOV_B32 1056964608
+ %fma:sreg_32 = nofpexcept S_FMAC_F32 %0, %1, %inlinable, implicit $mode
+ $sgpr0 = COPY %fma
+...
+---
+name: fmac_fold_noninlinable_src0_to_fmamk
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; CHECK-LABEL: name: fmac_fold_noninlinable_src0_to_fmamk
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAMK_F32 [[COPY]], 1234567890, [[COPY1]], implicit $mode
+ ; CHECK-NEXT: $sgpr0 = COPY %fma
+ %0:sreg_32 = COPY $sgpr0
+ %1:sreg_32 = COPY $sgpr1
+ %noninlinable:sreg_32 = S_MOV_B32 1234567890
+ %fma:sreg_32 = nofpexcept S_FMAC_F32 %noninlinable, %0, %1, implicit $mode
+ $sgpr0 = COPY %fma
+...
+---
+name: fmac_fold_noninlinable_src1_to_fmamk
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; CHECK-LABEL: name: fmac_fold_noninlinable_src1_to_fmamk
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAMK_F32 [[COPY]], 1234567890, [[COPY1]], implicit $mode
+ ; CHECK-NEXT: $sgpr0 = COPY %fma
+ %0:sreg_32 = COPY $sgpr0
+ %1:sreg_32 = COPY $sgpr1
+ %noninlinable:sreg_32 = S_MOV_B32 1234567890
+ %fma:sreg_32 = nofpexcept S_FMAC_F32 %0, %noninlinable, %1, implicit $mode
+ $sgpr0 = COPY %fma
+...
+---
+name: fmac_fold_noninlinable_src2_to_fmaak
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; CHECK-LABEL: name: fmac_fold_noninlinable_src2_to_fmaak
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAAK_F32 [[COPY]], [[COPY1]], 1234567890, implicit $mode
+ ; CHECK-NEXT: $sgpr0 = COPY %fma
+ %0:sreg_32 = COPY $sgpr0
+ %1:sreg_32 = COPY $sgpr1
+ %noninlinable:sreg_32 = S_MOV_B32 1234567890
+ %fma:sreg_32 = nofpexcept S_FMAC_F32 %0, %1, %noninlinable, implicit $mode
+ $sgpr0 = COPY %fma
+...
+
+---
+name: fmac_fold_inlinable_src2_to_fmaak_noninlinable_src0_to_fmamk
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; CHECK-LABEL: name: fmac_fold_inlinable_src2_to_fmaak_noninlinable_src0_to_fmamk
+ ; CHECK: liveins: $sgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAMK_F32 [[COPY]], 1234567890, 1056964608, implicit $mode
+ ; CHECK-NEXT: $sgpr0 = COPY %fma
+ %0:sreg_32 = COPY $sgpr0
+ %inlinable:sreg_32 = S_MOV_B32 1056964608
+ %noninlinable:sreg_32 = S_MOV_B32 1234567890
+ %fma:sreg_32 = nofpexcept S_FMAC_F32 %noninlinable, %0, %inlinable, implicit $mode
+ $sgpr0 = COPY %fma
+...
+---
+name: fmac_fold_inlinable_src2_to_fmaak_noninlinable_src1_to_fmamk
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; CHECK-LABEL: name: fmac_fold_inlinable_src2_to_fmaak_noninlinable_src1_to_fmamk
+ ; CHECK: liveins: $sgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAMK_F32 [[COPY]], 1234567890, 1056964608, implicit $mode
+ ; CHECK-NEXT: $sgpr0 = COPY %fma
+ %0:sreg_32 = COPY $sgpr0
+ %inlinable:sreg_32 = S_MOV_B32 1056964608
+ %noninlinable:sreg_32 = S_MOV_B32 1234567890
+ %fma:sreg_32 = nofpexcept S_FMAC_F32 %0, %noninlinable, %inlinable, implicit $mode
+ $sgpr0 = COPY %fma
+...
+---
+name: fmac_fold_noninlinable_src2_to_fmaak_inlinable_src1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; CHECK-LABEL: name: fmac_fold_noninlinable_src2_to_fmaak_inlinable_src1
+ ; CHECK: liveins: $sgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAAK_F32 [[COPY]], 1056964608, 1234567890, implicit $mode
+ ; CHECK-NEXT: $sgpr0 = COPY %fma
+ %0:sreg_32 = COPY $sgpr0
+ %noninlinable:sreg_32 = S_MOV_B32 1234567890
+ %inlinable:sreg_32 = S_MOV_B32 1056964608
+ %fma:sreg_32 = nofpexcept S_FMAC_F32 %0, %inlinable, %noninlinable, implicit $mode
+ $sgpr0 = COPY %fma
+...
+---
+name: fmac_fold_noninlinable_src2_to_fmaak_dont_fold_other_noninlinable
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; CHECK-LABEL: name: fmac_fold_noninlinable_src2_to_fmaak_dont_fold_other_noninlinable
+ ; CHECK: liveins: $sgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; CHECK-NEXT: %noninlinable2:sreg_32 = S_MOV_B32 1234567891
+ ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAAK_F32 [[COPY]], %noninlinable2, 1234567890, implicit $mode
+ ; CHECK-NEXT: $sgpr0 = COPY %fma
+ %0:sreg_32 = COPY $sgpr0
+ %noninlinable:sreg_32 = S_MOV_B32 1234567890
+ %noninlinable2:sreg_32 = S_MOV_B32 1234567891
+ %fma:sreg_32 = nofpexcept S_FMAC_F32 %0, %noninlinable2, %noninlinable, implicit $mode
+ $sgpr0 = COPY %fma
+...
+---
+name: fmac_fold_inlinable_src1_to_fmamk_noninlinable_src2_to_fmaak
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; CHECK-LABEL: name: fmac_fold_inlinable_src1_to_fmamk_noninlinable_src2_to_fmaak
+ ; CHECK: liveins: $sgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAAK_F32 [[COPY]], 1056964608, 1234567890, implicit $mode
+ ; CHECK-NEXT: $sgpr0 = COPY %fma
+ %0:sreg_32 = COPY $sgpr0
+ %inlinable:sreg_32 = S_MOV_B32 1056964608
+ %noninlinable:sreg_32 = S_MOV_B32 1234567890
+ %fma:sreg_32 = nofpexcept S_FMAC_F32 %0, %inlinable, %noninlinable, implicit $mode
+ $sgpr0 = COPY %fma
+...
+---
+name: fmac_fold_same_noninlinable_src0_and_src1_to_fmamk
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; CHECK-LABEL: name: fmac_fold_same_noninlinable_src0_and_src1_to_fmamk
+ ; CHECK: liveins: $sgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAMK_F32 1234567890, 1234567890, [[COPY]], implicit $mode
+ ; CHECK-NEXT: $sgpr0 = COPY %fma
+ %0:sreg_32 = COPY $sgpr0
+ %noninlinable:sreg_32 = S_MOV_B32 1234567890
+ %fma:sreg_32 = nofpexcept S_FMAC_F32 %noninlinable, %noninlinable, %0, implicit $mode
+ $sgpr0 = COPY %fma
+...
diff --git a/llvm/test/CodeGen/AMDGPU/scalar-float-sop1.ll b/llvm/test/CodeGen/AMDGPU/scalar-float-sop1.ll
new file mode 100644
index 000000000000000..5fb5a8b1b0350ab
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/scalar-float-sop1.ll
@@ -0,0 +1,174 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx1150 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=gfx1150 -global-isel -verify-machineinstrs < %s | FileCheck %s
+
+define amdgpu_vs float @sitofp_i32_to_f32(i32 inreg %val) {
+; CHECK-LABEL: sitofp_i32_to_f32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_cvt_f32_i32 s0, s0
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %res = sitofp i32 %val to float
+ ret float %res
+}
+
+define amdgpu_vs float @uitofp_u32_to_f32(i32 inreg %val) {
+; CHECK-LABEL: uitofp_u32_to_f32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_cvt_f32_u32 s0, s0
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %res = uitofp i32 %val to float
+ ret float %res
+}
+
+define amdgpu_vs i32 @fptosi_f32_to_i32(float inreg %val) {
+; CHECK-LABEL: fptosi_f32_to_i32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_cvt_i32_f32 s0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %res = fptosi float %val to i32
+ ret i32 %res
+}
+
+define amdgpu_vs i32 @fptoui_f32_to_u32(float inreg %val) {
+; CHECK-LABEL: fptoui_f32_to_u32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_cvt_u32_f32 s0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %res = fptoui float %val to i32
+ ret i32 %res
+}
+
+define amdgpu_vs float @fpext_f16_to_f32(half inreg %val) {
+; CHECK-LABEL: fpext_f16_to_f32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_cvt_f32_f16 s0, s0
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %res = fpext half %val to float
+ ret float %res
+}
+
+define amdgpu_vs float @fpext_hif16_to_32(<2 x half> inreg %val) {
+; CHECK-LABEL: fpext_hif16_to_32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_cvt_hi_f32_f16 s0, s0
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %hielt = extractelement <2 x half> %val, i32 1
+ %res = fpext half %hielt to float
+ ret float %res
+}
+
+define amdgpu_vs half @fptrunc_f32_to_f16(float inreg %val) {
+; CHECK-LABEL: fptrunc_f32_to_f16:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_cvt_f16_f32 s0, s0
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %res = fptrunc float %val to half
+ ret half %res
+}
+
+define amdgpu_vs float @fceil_f32(float inreg %val) {
+; CHECK-LABEL: fceil_f32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_ceil_f32 s0, s0
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %res = call float @llvm.ceil.f32(float %val)
+ ret float %res
+}
+
+define amdgpu_vs float @ffloor_f32(float inreg %val) {
+; CHECK-LABEL: ffloor_f32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_floor_f32 s0, s0
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %res = call float @llvm.floor.f32(float %val)
+ ret float %res
+}
+
+define amdgpu_vs float @ftrunc_f32(float inreg %val) {
+; CHECK-LABEL: ftrunc_f32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_trunc_f32 s0, s0
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %res = call float @llvm.trunc.f32(float %val)
+ ret float %res
+}
+
+define amdgpu_vs float @frint_f32(float inreg %val) {
+; CHECK-LABEL: frint_f32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_rndne_f32 s0, s0
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %res = call float @llvm.rint.f32(float %val)
+ ret float %res
+}
+
+define amdgpu_vs half @fceil_f16(half inreg %val) {
+; CHECK-LABEL: fceil_f16:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_ceil_f16 s0, s0
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %res = call half @llvm.ceil.f16(half %val)
+ ret half %res
+}
+
+define amdgpu_vs half @ffloor_f16(half inreg %val) {
+; CHECK-LABEL: ffloor_f16:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_floor_f16 s0, s0
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %res = call half @llvm.floor.f16(half %val)
+ ret half %res
+}
+
+define amdgpu_vs half @ftrunc_f16(half inreg %val) {
+; CHECK-LABEL: ftrunc_f16:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_trunc_f16 s0, s0
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %res = call half @llvm.trunc.f16(half %val)
+ ret half %res
+}
+
+define amdgpu_vs half @frint_f16(half inreg %val) {
+; CHECK-LABEL: frint_f16:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_rndne_f16 s0, s0
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %res = call half @llvm.rint.f16(half %val)
+ ret half %res
+}
+
+declare float @llvm.ceil.f32(float)
+declare float @llvm.floor.f32(float)
+declare float @llvm.trunc.f32(float)
+declare float @llvm.rint.f32(float)
+declare half @llvm.ceil.f16(half)
+declare half @llvm.floor.f16(half)
+declare half @llvm.trunc.f16(half)
+declare half @llvm.rint.f16(half)
diff --git a/llvm/test/CodeGen/AMDGPU/scalar-float-sop2.ll b/llvm/test/CodeGen/AMDGPU/scalar-float-sop2.ll
new file mode 100644
index 000000000000000..d736606a2aaa567
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/scalar-float-sop2.ll
@@ -0,0 +1,212 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx1150 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=gfx1150 -global-isel -verify-machineinstrs < %s | FileCheck %s
+
+define amdgpu_vs float @fadd_f32(float inreg %a, float inreg %b) {
+; CHECK-LABEL: fadd_f32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_add_f32 s0, s0, s1
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %add = fadd float %a, %b
+ ret float %add
+}
+
+define amdgpu_vs float @fsub_f32(float inreg %a, float inreg %b) {
+; CHECK-LABEL: fsub_f32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_sub_f32 s0, s0, s1
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %sub = fsub float %a, %b
+ ret float %sub
+}
+
+define amdgpu_vs float @fmul_f32(float inreg %a, float inreg %b) {
+; CHECK-LABEL: fmul_f32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_mul_f32 s0, s0, s1
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %mul = fmul float %a, %b
+ ret float %mul
+}
+
+define amdgpu_vs float @fmin_f32(float inreg %a, float inreg %b) {
+; CHECK-LABEL: fmin_f32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_min_f32 s0, s0, s1
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %min = call float @llvm.minnum.f32(float %a, float %b)
+ ret float %min
+}
+
+define amdgpu_vs float @fmax_f32(float inreg %a, float inreg %b) {
+; CHECK-LABEL: fmax_f32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_max_f32 s0, s0, s1
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %max = call float @llvm.maxnum.f32(float %a, float %b)
+ ret float %max
+}
+
+define amdgpu_vs half @fadd_f16(half inreg %a, half inreg %b) {
+; CHECK-LABEL: fadd_f16:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_add_f16 s0, s0, s1
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %add = fadd half %a, %b
+ ret half %add
+}
+
+define amdgpu_vs half @fsub_f16(half inreg %a, half inreg %b) {
+; CHECK-LABEL: fsub_f16:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_sub_f16 s0, s0, s1
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %sub = fsub half %a, %b
+ ret half %sub
+}
+
+define amdgpu_vs half @fmul_f16(half inreg %a, half inreg %b) {
+; CHECK-LABEL: fmul_f16:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_mul_f16 s0, s0, s1
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %mul = fmul half %a, %b
+ ret half %mul
+}
+
+define amdgpu_vs half @fmin_f16(half inreg %a, half inreg %b) {
+; CHECK-LABEL: fmin_f16:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_min_f16 s0, s0, s1
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %min = call half @llvm.minnum.f16(half %a, half %b)
+ ret half %min
+}
+
+define amdgpu_vs half @fmax_f16(half inreg %a, half inreg %b) {
+; CHECK-LABEL: fmax_f16:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_max_f16 s0, s0, s1
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %max = call half @llvm.maxnum.f16(half %a, half %b)
+ ret half %max
+}
+
+define amdgpu_vs <2 x half> @s_cvt_pkrtz_v2f16_f32(float inreg %x, float inreg %y) {
+; CHECK-LABEL: s_cvt_pkrtz_v2f16_f32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_cvt_pk_rtz_f16_f32 s0, s0, s1
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y)
+ ret <2 x half> %result
+}
+
+define amdgpu_vs float @fmac_f32(float inreg %a, float inreg %b, float inreg %c) {
+; CHECK-LABEL: fmac_f32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_fmac_f32 s0, s1, s2
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %res = call float @llvm.fma.f32(float %b, float %c, float %a)
+ ret float %res
+}
+
+; Check selection of mov + fmac if src2 of fmac has a use later on
+define amdgpu_vs float @fmac_f32_with_mov(float inreg %a, float inreg %b, float inreg %c) {
+; CHECK-LABEL: fmac_f32_with_mov:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_mov_b32 s3, s2
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; CHECK-NEXT: s_fmac_f32 s3, s0, s1
+; CHECK-NEXT: s_add_f32 s0, s3, s2
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
+ %res = fadd float %fma, %c
+ ret float %res
+}
+
+define amdgpu_vs half @fmac_f16(half inreg %a, half inreg %b, half inreg %c) {
+; CHECK-LABEL: fmac_f16:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_fmac_f16 s0, s1, s2
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %res = call half @llvm.fma.f16(half %b, half %c, half %a)
+ ret half %res
+}
+
+; Check selection of mov + fmac if src2 of fmac has a use later
+define amdgpu_vs half @fmac_f16_with_mov(half inreg %a, half inreg %b, half inreg %c) {
+; CHECK-LABEL: fmac_f16_with_mov:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_mov_b32 s3, s2
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; CHECK-NEXT: s_fmac_f16 s3, s0, s1
+; CHECK-NEXT: s_add_f16 s0, s3, s2
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+ %fma = call half @llvm.fma.f16(half %a, half %b, half %c)
+ %res = fadd half %fma, %c
+ ret half %res
+}
+
+; Regression test for crash in SIFoldOperands
+define amdgpu_ps float @_amdgpu_ps_main() {
+; CHECK-LABEL: _amdgpu_ps_main:
+; CHECK: ; %bb.0: ; %bb
+; CHECK-NEXT: s_mov_b32 s0, 0
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-NEXT: s_mov_b32 s1, s0
+; CHECK-NEXT: s_mov_b32 s2, s0
+; CHECK-NEXT: s_mov_b32 s3, s0
+; CHECK-NEXT: s_buffer_load_b64 s[0:1], s[0:3], 0x0
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_fmamk_f32 s0, s1, 0x40800000, s0
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ; return to shader part epilog
+bb:
+ %i = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> zeroinitializer, i32 0, i32 0)
+ %i1 = bitcast i32 %i to float
+ %i2 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> zeroinitializer, i32 4, i32 0)
+ %i3 = bitcast i32 %i2 to float
+ %i4 = fmul contract float %i3, 4.0
+ %i5 = fadd contract float %i4, %i1
+ ret float %i5
+}
+
+declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg)
+declare float @llvm.minnum.f32(float, float)
+declare float @llvm.maxnum.f32(float, float)
+declare half @llvm.minnum.f16(half, half)
+declare half @llvm.maxnum.f16(half, half)
+declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float)
+declare float @llvm.fma.f32(float, float, float) nounwind readnone
+declare half @llvm.fma.f16(half, half, half) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/scalar-float-sopc.ll b/llvm/test/CodeGen/AMDGPU/scalar-float-sopc.ll
new file mode 100644
index 000000000000000..19e50be155a9646
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/scalar-float-sopc.ll
@@ -0,0 +1,899 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx1150 -verify-machineinstrs < %s | FileCheck -check-prefix=SDAG %s
+; RUN: llc -march=amdgcn -mcpu=gfx1150 -global-isel -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL %s
+
+define amdgpu_vs void @f32_olt(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
+; SDAG-LABEL: f32_olt:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_lt_f32 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f32_olt:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_lt_f32 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp olt float %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f32_oeq(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
+; SDAG-LABEL: f32_oeq:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_eq_f32 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f32_oeq:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_eq_f32 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp oeq float %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f32_ole(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
+; SDAG-LABEL: f32_ole:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_le_f32 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f32_ole:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_le_f32 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp ole float %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f32_ogt(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
+; SDAG-LABEL: f32_ogt:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_gt_f32 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f32_ogt:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_gt_f32 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp ogt float %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f32_one(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
+; SDAG-LABEL: f32_one:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_lg_f32 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f32_one:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_lg_f32 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp one float %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f32_oge(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
+; SDAG-LABEL: f32_oge:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_ge_f32 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f32_oge:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_ge_f32 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp oge float %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f32_ord(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
+; SDAG-LABEL: f32_ord:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_o_f32 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f32_ord:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_o_f32 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp ord float %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f32_uno(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
+; SDAG-LABEL: f32_uno:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_u_f32 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f32_uno:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_u_f32 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp uno float %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f32_ult(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
+; SDAG-LABEL: f32_ult:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_nge_f32 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f32_ult:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_nge_f32 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp ult float %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f32_ueq(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
+; SDAG-LABEL: f32_ueq:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_nlg_f32 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f32_ueq:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_nlg_f32 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp ueq float %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f32_ule(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
+; SDAG-LABEL: f32_ule:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_ngt_f32 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f32_ule:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_ngt_f32 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp ule float %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f32_ugt(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
+; SDAG-LABEL: f32_ugt:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_nle_f32 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f32_ugt:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_nle_f32 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp ugt float %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f32_une(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
+; SDAG-LABEL: f32_une:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_neq_f32 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f32_une:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_neq_f32 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp une float %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f32_uge(ptr addrspace(1) inreg %out, float inreg %a, float inreg %b) {
+; SDAG-LABEL: f32_uge:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_nlt_f32 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f32_uge:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_nlt_f32 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp uge float %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f16_olt(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
+; SDAG-LABEL: f16_olt:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_lt_f16 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f16_olt:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_lt_f16 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp olt half %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f16_oeq(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
+; SDAG-LABEL: f16_oeq:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_eq_f16 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f16_oeq:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_eq_f16 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp oeq half %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f16_ole(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
+; SDAG-LABEL: f16_ole:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_le_f16 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f16_ole:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_le_f16 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp ole half %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f16_ogt(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
+; SDAG-LABEL: f16_ogt:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_gt_f16 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f16_ogt:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_gt_f16 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp ogt half %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f16_one(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
+; SDAG-LABEL: f16_one:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_lg_f16 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f16_one:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_lg_f16 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp one half %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f16_oge(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
+; SDAG-LABEL: f16_oge:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_ge_f16 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f16_oge:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_ge_f16 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp oge half %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f16_ord(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
+; SDAG-LABEL: f16_ord:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_o_f16 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f16_ord:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_o_f16 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp ord half %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f16_uno(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
+; SDAG-LABEL: f16_uno:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_u_f16 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f16_uno:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_u_f16 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp uno half %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f16_ult(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
+; SDAG-LABEL: f16_ult:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_nge_f16 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f16_ult:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_nge_f16 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp ult half %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f16_ueq(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
+; SDAG-LABEL: f16_ueq:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_nlg_f16 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f16_ueq:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_nlg_f16 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp ueq half %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f16_ule(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
+; SDAG-LABEL: f16_ule:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_ngt_f16 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f16_ule:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_ngt_f16 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp ule half %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f16_ugt(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
+; SDAG-LABEL: f16_ugt:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_nle_f16 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f16_ugt:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_nle_f16 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp ugt half %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f16_une(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
+; SDAG-LABEL: f16_une:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_neq_f16 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f16_une:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_neq_f16 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp une half %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_vs void @f16_uge(ptr addrspace(1) inreg %out, half inreg %a, half inreg %b) {
+; SDAG-LABEL: f16_uge:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_cmp_nlt_f16 s2, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: f16_uge:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_cmp_nlt_f16 s2, s3
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
+entry:
+ %0 = fcmp uge half %a, %b
+ %1 = sext i1 %0 to i32
+ store i32 %1, ptr addrspace(1) %out
+ ret void
+}
More information about the llvm-commits
mailing list