[llvm] 587747d - [AMDGPU] G_IS_FPCLASS lower() support for IEEE fp types
Janek van Oirschot via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 7 03:53:16 PST 2022
Author: Janek van Oirschot
Date: 2022-12-07T11:53:09Z
New Revision: 587747d8d142828be31a6835d29e75e7cb657a51
URL: https://github.com/llvm/llvm-project/commit/587747d8d142828be31a6835d29e75e7cb657a51
DIFF: https://github.com/llvm/llvm-project/commit/587747d8d142828be31a6835d29e75e7cb657a51.diff
LOG: [AMDGPU] G_IS_FPCLASS lower() support for IEEE fp types
Simplified globalisel version of sdag's expandIS_FPCLASS.
Reviewed By: arsenm, #amdgpu
Differential Revision: https://reviews.llvm.org/D139128
Added:
Modified:
llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index caa6346a40dbb..a019bc9876bda 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -379,6 +379,8 @@ class LegalizerHelper {
LegalizeResult lowerFPTRUNC(MachineInstr &MI);
LegalizeResult lowerFPOWI(MachineInstr &MI);
+ LegalizeResult lowerISFPCLASS(MachineInstr &MI);
+
LegalizeResult lowerMinMax(MachineInstr &MI);
LegalizeResult lowerFCopySign(MachineInstr &MI);
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI);
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 7c27edd8e6064..1fb78f5a1e35a 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3525,6 +3525,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerAbsToAddXor(MI);
case G_SELECT:
return lowerSelect(MI);
+ case G_IS_FPCLASS:
+ return lowerISFPCLASS(MI);
case G_SDIVREM:
case G_UDIVREM:
return lowerDIVREM(MI);
@@ -7236,6 +7238,166 @@ LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
return Legalized;
}
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(SrcReg);
+ uint64_t Mask = MI.getOperand(2).getImm();
+
+ if (Mask == 0) {
+ MIRBuilder.buildConstant(DstReg, 0);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ if ((Mask & fcAllFlags) == fcAllFlags) {
+ MIRBuilder.buildConstant(DstReg, 1);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ unsigned BitSize = SrcTy.getScalarSizeInBits();
+ const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
+
+ LLT IntTy = LLT::scalar(BitSize);
+ if (SrcTy.isVector())
+ IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
+ auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
+
+ // Various masks.
+ APInt SignBit = APInt::getSignMask(BitSize);
+ APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
+ APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
+ APInt ExpMask = Inf;
+ APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
+ APInt QNaNBitMask =
+ APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
+ APInt InvertionMask = APInt::getAllOnesValue(DstTy.getScalarSizeInBits());
+
+ auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
+ auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
+ auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
+ auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
+ auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
+
+ auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
+ auto Sign =
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
+
+ auto Res = MIRBuilder.buildConstant(DstTy, 0);
+ const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
+ Res = MIRBuilder.buildOr(DstTy, Res, ToAppend);
+ };
+
+ // Tests that involve more than one class should be processed first.
+ if ((Mask & fcFinite) == fcFinite) {
+ // finite(V) ==> abs(V) u< exp_mask
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
+ ExpMaskC));
+ Mask &= ~fcFinite;
+ } else if ((Mask & fcFinite) == fcPosFinite) {
+ // finite(V) && V > 0 ==> V u< exp_mask
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
+ ExpMaskC));
+ Mask &= ~fcPosFinite;
+ } else if ((Mask & fcFinite) == fcNegFinite) {
+ // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
+ auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
+ ExpMaskC);
+ auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
+ appendToRes(And);
+ Mask &= ~fcNegFinite;
+ }
+
+ // Check for individual classes.
+ if (unsigned PartialCheck = Mask & fcZero) {
+ if (PartialCheck == fcPosZero)
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
+ AsInt, ZeroC));
+ else if (PartialCheck == fcZero)
+ appendToRes(
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
+ else // fcNegZero
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
+ AsInt, SignBitC));
+ }
+
+ if (unsigned PartialCheck = Mask & fcInf) {
+ if (PartialCheck == fcPosInf)
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
+ AsInt, InfC));
+ else if (PartialCheck == fcInf)
+ appendToRes(
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
+ else { // fcNegInf
+ APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
+ auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
+ AsInt, NegInfC));
+ }
+ }
+
+ if (unsigned PartialCheck = Mask & fcNan) {
+ auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
+ if (PartialCheck == fcNan) {
+ // isnan(V) ==> abs(V) u> int(inf)
+ appendToRes(
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
+ } else if (PartialCheck == fcQNan) {
+ // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
+ InfWithQnanBitC));
+ } else { // fcSNan
+ // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
+ // abs(V) u< (unsigned(Inf) | quiet_bit)
+ auto IsNan =
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
+ auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
+ Abs, InfWithQnanBitC);
+ appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
+ }
+ }
+
+ if (unsigned PartialCheck = Mask & fcSubnormal) {
+ // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
+ // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
+ auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
+ auto OneC = MIRBuilder.buildConstant(IntTy, 1);
+ auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
+ auto SubnormalRes =
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
+ MIRBuilder.buildConstant(IntTy, AllOneMantissa));
+ if (PartialCheck == fcNegSubnormal)
+ SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
+ appendToRes(SubnormalRes);
+ }
+
+ if (unsigned PartialCheck = Mask & fcNormal) {
+ // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
+ // (max_exp-1))
+ APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
+ auto ExpMinusOne = MIRBuilder.buildSub(
+ IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
+ APInt MaxExpMinusOne = ExpMask - ExpLSB;
+ auto NormalRes =
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
+ MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
+ if (PartialCheck == fcNegNormal)
+ NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
+ else if (PartialCheck == fcPosNormal) {
+ auto PosSign = MIRBuilder.buildXor(
+ DstTy, Sign, MIRBuilder.buildConstant(DstTy, InvertionMask));
+ NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
+ }
+ appendToRes(NormalRes);
+ }
+
+ MIRBuilder.buildCopy(DstReg, Res);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
// Implement vector G_SELECT in terms of XOR, AND, OR.
Register DstReg = MI.getOperand(0).getReg();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index b48d8a1bb6afa..4b3e396d70f9a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -978,10 +978,20 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0)
.widenScalarToNextPow2(0, 32);
- getActionDefinitionsBuilder(G_IS_FPCLASS)
- .legalForCartesianProduct({S1}, ST.has16BitInsts() ? FPTypes16 : FPTypesBase)
- .widenScalarToNextPow2(1)
- .scalarize(0);
+ // If no 16 bit instr is available, lower into
diff erent instructions.
+ if (ST.has16BitInsts())
+ getActionDefinitionsBuilder(G_IS_FPCLASS)
+ .legalForCartesianProduct({S1}, FPTypes16)
+ .widenScalarToNextPow2(1)
+ .scalarize(0)
+ .lower();
+ else
+ getActionDefinitionsBuilder(G_IS_FPCLASS)
+ .legalForCartesianProduct({S1}, FPTypesBase)
+ .lowerFor({S1, S16})
+ .widenScalarToNextPow2(1)
+ .scalarize(0)
+ .lower();
// The hardware instructions return a
diff erent result on 0 than the generic
// instructions expect. The hardware produces -1, but these produce the
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
index dcee704026a59..e074778e2dee9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
@@ -1,4 +1,5 @@
-; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx704 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7SELDAG %s
+; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx704 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7SELDAG,GFX7CHECK %s
+; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx704 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7GLISEL,GFX7CHECK %s
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX8SELDAG,GFX8CHECK %s
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX8GLISEL,GFX8CHECK %s
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9SELDAG,GFX9CHECK %s
@@ -8,6 +9,7 @@
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11SELDAG,GFX11CHECK %s
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11GLISEL,GFX11CHECK %s
+define amdgpu_kernel void @sgpr_isnan_f16(i32 addrspace(1)* %out, half %x) {
; GFX7SELDAG-LABEL: sgpr_isnan_f16:
; GFX7SELDAG: ; %bb.0:
; GFX7SELDAG-NEXT: s_load_dword s4, s[0:1], 0xb
@@ -22,6 +24,22 @@
; GFX7SELDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX7SELDAG-NEXT: s_endpgm
;
+; GFX7GLISEL-LABEL: sgpr_isnan_f16:
+; GFX7GLISEL: ; %bb.0:
+; GFX7GLISEL-NEXT: s_load_dword s3, s[0:1], 0xb
+; GFX7GLISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX7GLISEL-NEXT: s_mov_b32 s2, -1
+; GFX7GLISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7GLISEL-NEXT: s_and_b32 s3, s3, 0x7fff
+; GFX7GLISEL-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX7GLISEL-NEXT: s_cmpk_gt_u32 s3, 0x7c00
+; GFX7GLISEL-NEXT: s_cselect_b32 s3, 1, 0
+; GFX7GLISEL-NEXT: s_bfe_i32 s3, s3, 0x10000
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v0, s3
+; GFX7GLISEL-NEXT: s_mov_b32 s3, 0xf000
+; GFX7GLISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX7GLISEL-NEXT: s_endpgm
+;
; GFX8CHECK-LABEL: sgpr_isnan_f16:
; GFX8CHECK: ; %bb.0:
; GFX8CHECK-NEXT: s_load_dword s2, s[0:1], 0x2c
@@ -70,13 +88,800 @@
; GFX11CHECK-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11CHECK-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11CHECK-NEXT: s_endpgm
-define amdgpu_kernel void @sgpr_isnan_f16(i32 addrspace(1)* %out, half %x) {
- %result = call i1 @llvm.is.fpclass.f16(half %x, i32 3) ; nan
+ %result = call i1 @llvm.is.fpclass.f16(half %x, i32 3)
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
}
+define i1 @zeromask_f16(half %x) nounwind {
+; GFX7CHECK-LABEL: zeromask_f16:
+; GFX7CHECK: ; %bb.0:
+; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7CHECK-NEXT: v_mov_b32_e32 v0, 0
+; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8CHECK-LABEL: zeromask_f16:
+; GFX8CHECK: ; %bb.0:
+; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 0
+; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9CHECK-LABEL: zeromask_f16:
+; GFX9CHECK: ; %bb.0:
+; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 0
+; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10CHECK-LABEL: zeromask_f16:
+; GFX10CHECK: ; %bb.0:
+; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0
+; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11CHECK-LABEL: zeromask_f16:
+; GFX11CHECK: ; %bb.0:
+; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0
+; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
+ %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 0)
+ ret i1 %1
+}
+
+define i1 @allflags_f16(half %x) nounwind {
+; GFX7SELDAG-LABEL: allflags_f16:
+; GFX7SELDAG: ; %bb.0:
+; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7SELDAG-NEXT: v_mov_b32_e32 v0, 1
+; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7GLISEL-LABEL: allflags_f16:
+; GFX7GLISEL: ; %bb.0:
+; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v0, -1
+; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8CHECK-LABEL: allflags_f16:
+; GFX8CHECK: ; %bb.0:
+; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8CHECK-NEXT: v_mov_b32_e32 v1, 0x3ff
+; GFX8CHECK-NEXT: v_cmp_class_f16_e32 vcc, v0, v1
+; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9CHECK-LABEL: allflags_f16:
+; GFX9CHECK: ; %bb.0:
+; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 0x3ff
+; GFX9CHECK-NEXT: v_cmp_class_f16_e32 vcc, v0, v1
+; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10CHECK-LABEL: allflags_f16:
+; GFX10CHECK: ; %bb.0:
+; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x3ff
+; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11CHECK-LABEL: allflags_f16:
+; GFX11CHECK: ; %bb.0:
+; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x3ff
+; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
+ %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 1023) ; 0x3ff
+ ret i1 %1
+}
+
+define i1 @snan_f16(half %x) nounwind {
+; GFX7SELDAG-LABEL: snan_f16:
+; GFX7SELDAG: ; %bb.0:
+; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7e00
+; GFX7SELDAG-NEXT: s_movk_i32 s5, 0x7c00
+; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0
+; GFX7SELDAG-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT: v_cmp_lt_i32_e64 s[4:5], s5, v0
+; GFX7SELDAG-NEXT: s_and_b64 s[4:5], s[4:5], vcc
+; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7GLISEL-LABEL: snan_f16:
+; GFX7GLISEL: ; %bb.0:
+; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
+; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
+; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7e00
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v0, v1
+; GFX7GLISEL-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8CHECK-LABEL: snan_f16:
+; GFX8CHECK: ; %bb.0:
+; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 1
+; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9CHECK-LABEL: snan_f16:
+; GFX9CHECK: ; %bb.0:
+; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 1
+; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10CHECK-LABEL: snan_f16:
+; GFX10CHECK: ; %bb.0:
+; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 1
+; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11CHECK-LABEL: snan_f16:
+; GFX11CHECK: ; %bb.0:
+; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 1
+; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
+ %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 1) ; 0x001
+ ret i1 %1
+}
+
+define i1 @qnan_f16(half %x) nounwind {
+; GFX7SELDAG-LABEL: qnan_f16:
+; GFX7SELDAG: ; %bb.0:
+; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7dff
+; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0
+; GFX7SELDAG-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7GLISEL-LABEL: qnan_f16:
+; GFX7GLISEL: ; %bb.0:
+; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
+; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7e00
+; GFX7GLISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8CHECK-LABEL: qnan_f16:
+; GFX8CHECK: ; %bb.0:
+; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 2
+; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9CHECK-LABEL: qnan_f16:
+; GFX9CHECK: ; %bb.0:
+; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 2
+; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10CHECK-LABEL: qnan_f16:
+; GFX10CHECK: ; %bb.0:
+; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 2
+; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11CHECK-LABEL: qnan_f16:
+; GFX11CHECK: ; %bb.0:
+; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 2
+; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
+ %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 2) ; 0x002
+ ret i1 %1
+}
+
+define i1 @posinf_f16(half %x) nounwind {
+; GFX7SELDAG-LABEL: posinf_f16:
+; GFX7SELDAG: ; %bb.0:
+; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00
+; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7GLISEL-LABEL: posinf_f16:
+; GFX7GLISEL: ; %bb.0:
+; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8CHECK-LABEL: posinf_f16:
+; GFX8CHECK: ; %bb.0:
+; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8CHECK-NEXT: v_mov_b32_e32 v1, 0x200
+; GFX8CHECK-NEXT: v_cmp_class_f16_e32 vcc, v0, v1
+; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9CHECK-LABEL: posinf_f16:
+; GFX9CHECK: ; %bb.0:
+; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 0x200
+; GFX9CHECK-NEXT: v_cmp_class_f16_e32 vcc, v0, v1
+; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10CHECK-LABEL: posinf_f16:
+; GFX10CHECK: ; %bb.0:
+; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x200
+; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11CHECK-LABEL: posinf_f16:
+; GFX11CHECK: ; %bb.0:
+; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x200
+; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
+ %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 512) ; 0x200
+ ret i1 %1
+}
+
+define i1 @neginf_f16(half %x) nounwind {
+; GFX7SELDAG-LABEL: neginf_f16:
+; GFX7SELDAG: ; %bb.0:
+; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7SELDAG-NEXT: s_mov_b32 s4, 0xfc00
+; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7GLISEL-LABEL: neginf_f16:
+; GFX7GLISEL: ; %bb.0:
+; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0xfc00
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8CHECK-LABEL: neginf_f16:
+; GFX8CHECK: ; %bb.0:
+; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 4
+; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9CHECK-LABEL: neginf_f16:
+; GFX9CHECK: ; %bb.0:
+; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 4
+; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10CHECK-LABEL: neginf_f16:
+; GFX10CHECK: ; %bb.0:
+; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 4
+; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11CHECK-LABEL: neginf_f16:
+; GFX11CHECK: ; %bb.0:
+; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 4
+; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
+ %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 4) ; 0x004
+ ret i1 %1
+}
+
+define i1 @posnormal_f16(half %x) nounwind {
+; GFX7SELDAG-LABEL: posnormal_f16:
+; GFX7SELDAG: ; %bb.0:
+; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x7800
+; GFX7SELDAG-NEXT: v_bfe_i32 v1, v0, 0, 16
+; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0
+; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
+; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7SELDAG-NEXT: v_cmp_lt_i32_e64 s[4:5], -1, v1
+; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0
+; GFX7SELDAG-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
+; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7GLISEL-LABEL: posnormal_f16:
+; GFX7GLISEL: ; %bb.0:
+; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7GLISEL-NEXT: v_bfe_u32 v2, v1, 0, 16
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v2
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
+; GFX7GLISEL-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8CHECK-LABEL: posnormal_f16:
+; GFX8CHECK: ; %bb.0:
+; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8CHECK-NEXT: v_mov_b32_e32 v1, 0x100
+; GFX8CHECK-NEXT: v_cmp_class_f16_e32 vcc, v0, v1
+; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9CHECK-LABEL: posnormal_f16:
+; GFX9CHECK: ; %bb.0:
+; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 0x100
+; GFX9CHECK-NEXT: v_cmp_class_f16_e32 vcc, v0, v1
+; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10CHECK-LABEL: posnormal_f16:
+; GFX10CHECK: ; %bb.0:
+; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x100
+; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11CHECK-LABEL: posnormal_f16:
+; GFX11CHECK: ; %bb.0:
+; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x100
+; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
+ %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 256) ; 0x100
+ ret i1 %1
+}
+
+define i1 @negnormal_f16(half %x) nounwind {
+; GFX7SELDAG-LABEL: negnormal_f16:
+; GFX7SELDAG: ; %bb.0:
+; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x7800
+; GFX7SELDAG-NEXT: v_bfe_i32 v1, v0, 0, 16
+; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0
+; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
+; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7SELDAG-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v1
+; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0
+; GFX7SELDAG-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
+; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7GLISEL-LABEL: negnormal_f16:
+; GFX7GLISEL: ; %bb.0:
+; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7GLISEL-NEXT: v_bfe_u32 v2, v1, 0, 16
+; GFX7GLISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], v0, v2
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
+; GFX7GLISEL-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8CHECK-LABEL: negnormal_f16:
+; GFX8CHECK: ; %bb.0:
+; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 8
+; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9CHECK-LABEL: negnormal_f16:
+; GFX9CHECK: ; %bb.0:
+; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 8
+; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10CHECK-LABEL: negnormal_f16:
+; GFX10CHECK: ; %bb.0:
+; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 8
+; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11CHECK-LABEL: negnormal_f16:
+; GFX11CHECK: ; %bb.0:
+; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 8
+; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
+ %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 8) ; 0x008
+ ret i1 %1
+}
+
+define i1 @possubnormal_f16(half %x) nounwind {
+; GFX7SELDAG-LABEL: possubnormal_f16:
+; GFX7SELDAG: ; %bb.0:
+; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x3ff
+; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, -1, v0
+; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7GLISEL-LABEL: possubnormal_f16:
+; GFX7GLISEL: ; %bb.0:
+; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 1, v0
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x3ff
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8CHECK-LABEL: possubnormal_f16:
+; GFX8CHECK: ; %bb.0:
+; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8CHECK-NEXT: v_mov_b32_e32 v1, 0x80
+; GFX8CHECK-NEXT: v_cmp_class_f16_e32 vcc, v0, v1
+; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9CHECK-LABEL: possubnormal_f16:
+; GFX9CHECK: ; %bb.0:
+; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 0x80
+; GFX9CHECK-NEXT: v_cmp_class_f16_e32 vcc, v0, v1
+; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10CHECK-LABEL: possubnormal_f16:
+; GFX10CHECK: ; %bb.0:
+; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x80
+; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11CHECK-LABEL: possubnormal_f16:
+; GFX11CHECK: ; %bb.0:
+; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x80
+; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
+ %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 128) ; 0x080
+ ret i1 %1
+}
+
+define i1 @negsubnormal_f16(half %x) nounwind {
+; GFX7SELDAG-LABEL: negsubnormal_f16:
+; GFX7SELDAG: ; %bb.0:
+; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7SELDAG-NEXT: v_bfe_i32 v1, v0, 0, 16
+; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0
+; GFX7SELDAG-NEXT: v_add_i32_e64 v0, s[4:5], -1, v0
+; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x3ff
+; GFX7SELDAG-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1
+; GFX7SELDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v0
+; GFX7SELDAG-NEXT: s_and_b64 s[4:5], s[4:5], vcc
+; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7GLISEL-LABEL: negsubnormal_f16:
+; GFX7GLISEL: ; %bb.0:
+; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7GLISEL-NEXT: v_bfe_u32 v2, v1, 0, 16
+; GFX7GLISEL-NEXT: v_cmp_ne_u32_e32 vcc, v0, v2
+; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[4:5], 1, v1
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x3ff
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v0, v1
+; GFX7GLISEL-NEXT: s_and_b64 s[4:5], s[4:5], vcc
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8CHECK-LABEL: negsubnormal_f16:
+; GFX8CHECK: ; %bb.0:
+; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 16
+; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9CHECK-LABEL: negsubnormal_f16:
+; GFX9CHECK: ; %bb.0:
+; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 16
+; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10CHECK-LABEL: negsubnormal_f16:
+; GFX10CHECK: ; %bb.0:
+; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 16
+; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11CHECK-LABEL: negsubnormal_f16:
+; GFX11CHECK: ; %bb.0:
+; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 16
+; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
+ %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 16) ; 0x010
+ ret i1 %1
+}
+
+define i1 @poszero_f16(half %x) nounwind {
+; GFX7SELDAG-LABEL: poszero_f16:
+; GFX7SELDAG: ; %bb.0:
+; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7GLISEL-LABEL: poszero_f16:
+; GFX7GLISEL: ; %bb.0:
+; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8CHECK-LABEL: poszero_f16:
+; GFX8CHECK: ; %bb.0:
+; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 64
+; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9CHECK-LABEL: poszero_f16:
+; GFX9CHECK: ; %bb.0:
+; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 64
+; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10CHECK-LABEL: poszero_f16:
+; GFX10CHECK: ; %bb.0:
+; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 64
+; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11CHECK-LABEL: poszero_f16:
+; GFX11CHECK: ; %bb.0:
+; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 64
+; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
+ %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 64) ; 0x040
+ ret i1 %1
+}
+
+define i1 @negzero_f16(half %x) nounwind {
+; GFX7SELDAG-LABEL: negzero_f16:
+; GFX7SELDAG: ; %bb.0:
+; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7SELDAG-NEXT: s_mov_b32 s4, 0x8000
+; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7GLISEL-LABEL: negzero_f16:
+; GFX7GLISEL: ; %bb.0:
+; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x8000
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8CHECK-LABEL: negzero_f16:
+; GFX8CHECK: ; %bb.0:
+; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 32
+; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9CHECK-LABEL: negzero_f16:
+; GFX9CHECK: ; %bb.0:
+; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 32
+; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10CHECK-LABEL: negzero_f16:
+; GFX10CHECK: ; %bb.0:
+; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 32
+; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11CHECK-LABEL: negzero_f16:
+; GFX11CHECK: ; %bb.0:
+; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 32
+; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
+ %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 32) ; 0x020
+ ret i1 %1
+}
+
+define i1 @posfinite_f16(half %x) nounwind {
+; GFX7SELDAG-LABEL: posfinite_f16:
+; GFX7SELDAG: ; %bb.0:
+; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00
+; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7GLISEL-LABEL: posfinite_f16:
+; GFX7GLISEL: ; %bb.0:
+; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8SELDAG-LABEL: posfinite_f16:
+; GFX8SELDAG: ; %bb.0:
+; GFX8SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8SELDAG-NEXT: v_mov_b32_e32 v1, 0x1c0
+; GFX8SELDAG-NEXT: v_cmp_class_f16_e32 vcc, v0, v1
+; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8SELDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9SELDAG-LABEL: posfinite_f16:
+; GFX9SELDAG: ; %bb.0:
+; GFX9SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9SELDAG-NEXT: v_mov_b32_e32 v1, 0x1c0
+; GFX9SELDAG-NEXT: v_cmp_class_f16_e32 vcc, v0, v1
+; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9SELDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10SELDAG-LABEL: posfinite_f16:
+; GFX10SELDAG: ; %bb.0:
+; GFX10SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10SELDAG-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s4, v0, 0x1c0
+; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX10SELDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-LABEL: posfinite_f16:
+; GFX11SELDAG: ; %bb.0:
+; GFX11SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11SELDAG-NEXT: v_cmp_class_f16_e64 s0, v0, 0x1c0
+; GFX11SELDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-NEXT: s_setpc_b64 s[30:31]
+ %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 448) ; 0x1c0
+ ret i1 %1
+}
+
+define i1 @negfinite_f16(half %x) nounwind {
+; GFX7SELDAG-LABEL: negfinite_f16:
+; GFX7SELDAG: ; %bb.0:
+; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00
+; GFX7SELDAG-NEXT: v_bfe_i32 v1, v0, 0, 16
+; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0
+; GFX7SELDAG-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1
+; GFX7SELDAG-NEXT: v_cmp_gt_i32_e64 s[4:5], s4, v0
+; GFX7SELDAG-NEXT: s_and_b64 s[4:5], s[4:5], vcc
+; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7GLISEL-LABEL: negfinite_f16:
+; GFX7GLISEL: ; %bb.0:
+; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7GLISEL-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GFX7GLISEL-NEXT: v_cmp_ne_u32_e32 vcc, v0, v1
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v0, 0x7c00
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v1, v0
+; GFX7GLISEL-NEXT: s_and_b64 s[4:5], s[4:5], vcc
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8CHECK-LABEL: negfinite_f16:
+; GFX8CHECK: ; %bb.0:
+; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 56
+; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9CHECK-LABEL: negfinite_f16:
+; GFX9CHECK: ; %bb.0:
+; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 56
+; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10CHECK-LABEL: negfinite_f16:
+; GFX10CHECK: ; %bb.0:
+; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 56
+; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11CHECK-LABEL: negfinite_f16:
+; GFX11CHECK: ; %bb.0:
+; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 56
+; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
+ %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 56) ; 0x038
+ ret i1 %1
+}
+
define i1 @isnan_f16(half %x) nounwind {
; GFX7SELDAG-LABEL: isnan_f16:
; GFX7SELDAG: ; %bb.0:
@@ -88,6 +893,16 @@ define i1 @isnan_f16(half %x) nounwind {
; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX7GLISEL-LABEL: isnan_f16:
+; GFX7GLISEL: ; %bb.0:
+; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
+; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
+; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8CHECK-LABEL: isnan_f16:
; GFX8CHECK: ; %bb.0:
; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -137,6 +952,20 @@ define <2 x i1> @isnan_v2f16(<2 x half> %x) nounwind {
; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX7GLISEL-LABEL: isnan_v2f16:
+; GFX7GLISEL: ; %bb.0:
+; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
+; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: s_movk_i32 s4, 0x7c00
+; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v1
+; GFX7GLISEL-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s4, v0
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s4, v1
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8CHECK-LABEL: isnan_v2f16:
; GFX8CHECK: ; %bb.0:
; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -205,6 +1034,24 @@ define <3 x i1> @isnan_v3f16(<3 x half> %x) nounwind {
; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX7GLISEL-LABEL: isnan_v3f16:
+; GFX7GLISEL: ; %bb.0:
+; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
+; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: s_movk_i32 s4, 0x7c00
+; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v1
+; GFX7GLISEL-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0x7fff, v2
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s4, v0
+; GFX7GLISEL-NEXT: v_bfe_u32 v2, v2, 0, 16
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s4, v1
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s4, v2
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8SELDAG-LABEL: isnan_v3f16:
; GFX8SELDAG: ; %bb.0:
; GFX8SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -343,6 +1190,28 @@ define <4 x i1> @isnan_v4f16(<4 x half> %x) nounwind {
; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX7GLISEL-LABEL: isnan_v4f16:
+; GFX7GLISEL: ; %bb.0:
+; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
+; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: s_movk_i32 s4, 0x7c00
+; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v1
+; GFX7GLISEL-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0x7fff, v2
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s4, v0
+; GFX7GLISEL-NEXT: v_bfe_u32 v2, v2, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v3, 0x7fff, v3
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s4, v1
+; GFX7GLISEL-NEXT: v_bfe_u32 v3, v3, 0, 16
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s4, v2
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s4, v3
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
+; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8SELDAG-LABEL: isnan_v4f16:
; GFX8SELDAG: ; %bb.0:
; GFX8SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -472,6 +1341,16 @@ define i1 @isnan_f16_strictfp(half %x) strictfp nounwind {
; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX7GLISEL-LABEL: isnan_f16_strictfp:
+; GFX7GLISEL: ; %bb.0:
+; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
+; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
+; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8CHECK-LABEL: isnan_f16_strictfp:
; GFX8CHECK: ; %bb.0:
; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -517,6 +1396,16 @@ define i1 @isinf_f16(half %x) nounwind {
; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX7GLISEL-LABEL: isinf_f16:
+; GFX7GLISEL: ; %bb.0:
+; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
+; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8CHECK-LABEL: isinf_f16:
; GFX8CHECK: ; %bb.0:
; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -564,6 +1453,16 @@ define i1 @isfinite_f16(half %x) nounwind {
; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX7GLISEL-LABEL: isfinite_f16:
+; GFX7GLISEL: ; %bb.0:
+; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
+; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8CHECK-LABEL: isfinite_f16:
; GFX8CHECK: ; %bb.0:
; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
More information about the llvm-commits
mailing list