[llvm] 12c1022 - [AArch64] Lowering and legalization of strict FP16
John Brawn via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 14 08:51:43 PDT 2022
Author: John Brawn
Date: 2022-04-14T16:51:22+01:00
New Revision: 12c1022679d40e2442c9f6020281c5a010e27dee
URL: https://github.com/llvm/llvm-project/commit/12c1022679d40e2442c9f6020281c5a010e27dee
DIFF: https://github.com/llvm/llvm-project/commit/12c1022679d40e2442c9f6020281c5a010e27dee.diff
LOG: [AArch64] Lowering and legalization of strict FP16
For strict FP16 to work correctly needs some changes in lowering and
legalization:
* SelectionDAGLegalize::PromoteNode was missing handling for some
strict fp opcodes.
* Some of the custom lowering of strict fp operations needed to be
adjusted to work with FP16.
* Custom lowering needed to be added for round-to-int operations.
With this, and the previous patches for the rest of the strict fp
isel, we can set IsStrictFPEnabled = true.
Differential Revision: https://reviews.llvm.org/D115620
Added:
llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/fp-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index b5ca8589ebb09..e1353c5ba464f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4714,6 +4714,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
Tmp3, DAG.getIntPtrConstant(0, dl)));
break;
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
+ case ISD::STRICT_FMINNUM:
+ case ISD::STRICT_FMAXNUM:
case ISD::STRICT_FREM:
case ISD::STRICT_FPOW:
Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
@@ -4738,6 +4744,22 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3),
DAG.getIntPtrConstant(0, dl)));
break;
+ case ISD::STRICT_FMA:
+ Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(1)});
+ Tmp2 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(2)});
+ Tmp3 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(3)});
+ Tmp4 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Tmp1.getValue(1),
+ Tmp2.getValue(1), Tmp3.getValue(1));
+ Tmp4 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other},
+ {Tmp4, Tmp1, Tmp2, Tmp3});
+ Tmp4 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other},
+ {Tmp4.getValue(1), Tmp4, DAG.getIntPtrConstant(0, dl)});
+ Results.push_back(Tmp4);
+ Results.push_back(Tmp4.getValue(1));
+ break;
case ISD::FCOPYSIGN:
case ISD::FPOWI: {
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
@@ -4754,6 +4776,16 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Tmp3, DAG.getIntPtrConstant(isTrunc, dl)));
break;
}
+ case ISD::STRICT_FPOWI:
+ Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(1)});
+ Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other},
+ {Tmp1.getValue(1), Tmp1, Node->getOperand(2)});
+ Tmp3 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other},
+ {Tmp2.getValue(1), Tmp2, DAG.getIntPtrConstant(0, dl)});
+ Results.push_back(Tmp3);
+ Results.push_back(Tmp3.getValue(1));
+ break;
case ISD::FFLOOR:
case ISD::FCEIL:
case ISD::FRINT:
@@ -4778,12 +4810,19 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
break;
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FRINT:
+ case ISD::STRICT_FNEARBYINT:
case ISD::STRICT_FROUND:
+ case ISD::STRICT_FROUNDEVEN:
+ case ISD::STRICT_FTRUNC:
+ case ISD::STRICT_FSQRT:
case ISD::STRICT_FSIN:
case ISD::STRICT_FCOS:
case ISD::STRICT_FLOG:
+ case ISD::STRICT_FLOG2:
case ISD::STRICT_FLOG10:
case ISD::STRICT_FEXP:
+ case ISD::STRICT_FEXP2:
Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
{Node->getOperand(0), Node->getOperand(1)});
Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other},
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 32a35124ceb79..c722a44141fa9 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -539,64 +539,41 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
else
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
- setOperationAction(ISD::FREM, MVT::f16, Promote);
- setOperationAction(ISD::FREM, MVT::v4f16, Expand);
- setOperationAction(ISD::FREM, MVT::v8f16, Expand);
- setOperationAction(ISD::FPOW, MVT::f16, Promote);
- setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
- setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
- setOperationAction(ISD::FPOWI, MVT::f16, Promote);
- setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
- setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
- setOperationAction(ISD::FCOS, MVT::f16, Promote);
- setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
- setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
- setOperationAction(ISD::FSIN, MVT::f16, Promote);
- setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
- setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
- setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
- setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
- setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
- setOperationAction(ISD::FEXP, MVT::f16, Promote);
- setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
- setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
- setOperationAction(ISD::FEXP2, MVT::f16, Promote);
- setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
- setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
- setOperationAction(ISD::FLOG, MVT::f16, Promote);
- setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
- setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
- setOperationAction(ISD::FLOG2, MVT::f16, Promote);
- setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
- setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
- setOperationAction(ISD::FLOG10, MVT::f16, Promote);
- setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
- setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
+ for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
+ ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
+ ISD::FEXP, ISD::FEXP2, ISD::FLOG,
+ ISD::FLOG2, ISD::FLOG10, ISD::STRICT_FREM,
+ ISD::STRICT_FPOW, ISD::STRICT_FPOWI, ISD::STRICT_FCOS,
+ ISD::STRICT_FSIN, ISD::STRICT_FEXP, ISD::STRICT_FEXP2,
+ ISD::STRICT_FLOG, ISD::STRICT_FLOG2, ISD::STRICT_FLOG10}) {
+ setOperationAction(Op, MVT::f16, Promote);
+ setOperationAction(Op, MVT::v4f16, Expand);
+ setOperationAction(Op, MVT::v8f16, Expand);
+ }
if (!Subtarget->hasFullFP16()) {
- setOperationAction(ISD::SELECT, MVT::f16, Promote);
- setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
- setOperationAction(ISD::SETCC, MVT::f16, Promote);
- setOperationAction(ISD::BR_CC, MVT::f16, Promote);
- setOperationAction(ISD::FADD, MVT::f16, Promote);
- setOperationAction(ISD::FSUB, MVT::f16, Promote);
- setOperationAction(ISD::FMUL, MVT::f16, Promote);
- setOperationAction(ISD::FDIV, MVT::f16, Promote);
- setOperationAction(ISD::FMA, MVT::f16, Promote);
- setOperationAction(ISD::FNEG, MVT::f16, Promote);
- setOperationAction(ISD::FABS, MVT::f16, Promote);
- setOperationAction(ISD::FCEIL, MVT::f16, Promote);
- setOperationAction(ISD::FSQRT, MVT::f16, Promote);
- setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
- setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
- setOperationAction(ISD::FRINT, MVT::f16, Promote);
- setOperationAction(ISD::FROUND, MVT::f16, Promote);
- setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
- setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
- setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
- setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
- setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
- setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
+ for (auto Op :
+ {ISD::SELECT, ISD::SELECT_CC, ISD::SETCC,
+ ISD::BR_CC, ISD::FADD, ISD::FSUB,
+ ISD::FMUL, ISD::FDIV, ISD::FMA,
+ ISD::FNEG, ISD::FABS, ISD::FCEIL,
+ ISD::FSQRT, ISD::FFLOOR, ISD::FNEARBYINT,
+ ISD::FRINT, ISD::FROUND, ISD::FROUNDEVEN,
+ ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM,
+ ISD::FMINIMUM, ISD::FMAXIMUM, ISD::STRICT_FADD,
+ ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
+ ISD::STRICT_FMA, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR,
+ ISD::STRICT_FSQRT, ISD::STRICT_FRINT, ISD::STRICT_FNEARBYINT,
+ ISD::STRICT_FROUND, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN,
+ ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, ISD::STRICT_FMINIMUM,
+ ISD::STRICT_FMAXIMUM})
+ setOperationAction(Op, MVT::f16, Promote);
+
+ // Round-to-integer need custom lowering for fp16, as Promote doesn't work
+ // because the result type is integer.
+ for (auto Op : {ISD::STRICT_LROUND, ISD::STRICT_LLROUND, ISD::STRICT_LRINT,
+ ISD::STRICT_LLRINT})
+ setOperationAction(Op, MVT::f16, Custom);
// promote v4f16 to v4f32 when that is known to be safe.
setOperationAction(ISD::FADD, MVT::v4f16, Promote);
@@ -1402,6 +1379,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
}
PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
+
+ IsStrictFPEnabled = true;
}
void AArch64TargetLowering::addTypeForNEON(MVT VT) {
@@ -2592,7 +2571,18 @@ static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
bool IsSignaling) {
EVT VT = LHS.getValueType();
assert(VT != MVT::f128);
- assert(VT != MVT::f16 && "Lowering of strict fp16 not yet implemented");
+
+ const bool FullFP16 =
+ static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
+
+ if (VT == MVT::f16 && !FullFP16) {
+ LHS = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other},
+ {Chain, LHS});
+ RHS = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other},
+ {LHS.getValue(1), RHS});
+ Chain = RHS.getValue(1);
+ VT = MVT::f32;
+ }
unsigned Opcode =
IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS});
@@ -3468,8 +3458,7 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
VT.getVectorNumElements());
if (IsStrict) {
- SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
- {ExtVT, MVT::Other},
+ SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {ExtVT, MVT::Other},
{Op.getOperand(0), Op.getOperand(1)});
return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other},
{Ext.getValue(1), Ext.getValue(0)});
@@ -3506,8 +3495,14 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
// f16 conversions are promoted to f32 when full fp16 is not supported.
if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
- assert(!IsStrict && "Lowering of strict fp16 not yet implemented");
SDLoc dl(Op);
+ if (IsStrict) {
+ SDValue Ext =
+ DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other},
+ {Op.getOperand(0), SrcVal});
+ return DAG.getNode(Op.getOpcode(), dl, {Op.getValueType(), MVT::Other},
+ {Ext.getValue(1), Ext.getValue(0)});
+ }
return DAG.getNode(
Op.getOpcode(), dl, Op.getValueType(),
DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
@@ -3730,10 +3725,15 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
// f16 conversions are promoted to f32 when full fp16 is not supported.
- if (Op.getValueType() == MVT::f16 &&
- !Subtarget->hasFullFP16()) {
- assert(!IsStrict && "Lowering of strict fp16 not yet implemented");
+ if (Op.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
SDLoc dl(Op);
+ if (IsStrict) {
+ SDValue Val = DAG.getNode(Op.getOpcode(), dl, {MVT::f32, MVT::Other},
+ {Op.getOperand(0), SrcVal});
+ return DAG.getNode(
+ ISD::STRICT_FP_ROUND, dl, {MVT::f16, MVT::Other},
+ {Val.getValue(1), Val.getValue(0), DAG.getIntPtrConstant(0, dl)});
+ }
return DAG.getNode(
ISD::FP_ROUND, dl, MVT::f16,
DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
@@ -5367,6 +5367,18 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerCTTZ(Op, DAG);
case ISD::VECTOR_SPLICE:
return LowerVECTOR_SPLICE(Op, DAG);
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LLROUND:
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LLRINT: {
+ assert(Op.getOperand(1).getValueType() == MVT::f16 &&
+ "Expected custom lowering of rounding operations only for f16");
+ SDLoc DL(Op);
+ SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
+ {Op.getOperand(0), Op.getOperand(1)});
+ return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
+ {Ext.getValue(1), Ext.getValue(0)});
+ }
}
}
diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll
new file mode 100644
index 0000000000000..0a9fa6c849bbf
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll
@@ -0,0 +1,1173 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
+; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+; RUN: llc -mtriple=aarch64-none-eabi -global-isel=true -global-isel-abort=2 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
+; RUN: llc -mtriple=aarch64-none-eabi -global-isel=true -global-isel-abort=2 -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+
+; Check that constrained fp intrinsics are correctly lowered.
+
+
+; Half-precision intrinsics
+
+define half @add_f16(half %x, half %y) #0 {
+; CHECK-NOFP16-LABEL: add_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fadd s0, s0, s1
+; CHECK-NOFP16-NEXT: fcvt h0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: add_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fadd h0, h0, h1
+; CHECK-FP16-NEXT: ret
+ %val = call half @llvm.experimental.constrained.fadd.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @sub_f16(half %x, half %y) #0 {
+; CHECK-NOFP16-LABEL: sub_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fsub s0, s0, s1
+; CHECK-NOFP16-NEXT: fcvt h0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: sub_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fsub h0, h0, h1
+; CHECK-FP16-NEXT: ret
+ %val = call half @llvm.experimental.constrained.fsub.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @mul_f16(half %x, half %y) #0 {
+; CHECK-NOFP16-LABEL: mul_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fmul s0, s0, s1
+; CHECK-NOFP16-NEXT: fcvt h0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: mul_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fmul h0, h0, h1
+; CHECK-FP16-NEXT: ret
+ %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @div_f16(half %x, half %y) #0 {
+; CHECK-NOFP16-LABEL: div_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fdiv s0, s0, s1
+; CHECK-NOFP16-NEXT: fcvt h0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: div_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fdiv h0, h0, h1
+; CHECK-FP16-NEXT: ret
+ %val = call half @llvm.experimental.constrained.fdiv.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @frem_f16(half %x, half %y) #0 {
+; CHECK-LABEL: frem_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl fmodf
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %val = call half @llvm.experimental.constrained.frem.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @fma_f16(half %x, half %y, half %z) #0 {
+; CHECK-NOFP16-LABEL: fma_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s2, h2
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fmadd s0, s0, s1, s2
+; CHECK-NOFP16-NEXT: fcvt h0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fma_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fmadd h0, h0, h1, h2
+; CHECK-FP16-NEXT: ret
+ %val = call half @llvm.experimental.constrained.fma.f16(half %x, half %y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define i32 @fptosi_i32_f16(half %x) #0 {
+; CHECK-NOFP16-LABEL: fptosi_i32_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvtzs w0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fptosi_i32_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcvtzs w0, h0
+; CHECK-FP16-NEXT: ret
+ %val = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x, metadata !"fpexcept.strict") #0
+ ret i32 %val
+}
+
+define i32 @fptoui_i32_f16(half %x) #0 {
+; CHECK-NOFP16-LABEL: fptoui_i32_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvtzu w0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fptoui_i32_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcvtzu w0, h0
+; CHECK-FP16-NEXT: ret
+ %val = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict") #0
+ ret i32 %val
+}
+
+define i64 @fptosi_i64_f16(half %x) #0 {
+; CHECK-NOFP16-LABEL: fptosi_i64_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvtzs x0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fptosi_i64_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcvtzs x0, h0
+; CHECK-FP16-NEXT: ret
+ %val = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x, metadata !"fpexcept.strict") #0
+ ret i64 %val
+}
+
+define i64 @fptoui_i64_f16(half %x) #0 {
+; CHECK-NOFP16-LABEL: fptoui_i64_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvtzu x0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fptoui_i64_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcvtzu x0, h0
+; CHECK-FP16-NEXT: ret
+ %val = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x, metadata !"fpexcept.strict") #0
+ ret i64 %val
+}
+
+define half @sitofp_f16_i32(i32 %x) #0 {
+; CHECK-NOFP16-LABEL: sitofp_f16_i32:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: scvtf s0, w0
+; CHECK-NOFP16-NEXT: fcvt h0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: sitofp_f16_i32:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: scvtf h0, w0
+; CHECK-FP16-NEXT: ret
+ %val = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @uitofp_f16_i32(i32 %x) #0 {
+; CHECK-NOFP16-LABEL: uitofp_f16_i32:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: ucvtf s0, w0
+; CHECK-NOFP16-NEXT: fcvt h0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: uitofp_f16_i32:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: ucvtf h0, w0
+; CHECK-FP16-NEXT: ret
+ %val = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @sitofp_f16_i64(i64 %x) #0 {
+; CHECK-NOFP16-LABEL: sitofp_f16_i64:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: scvtf s0, x0
+; CHECK-NOFP16-NEXT: fcvt h0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: sitofp_f16_i64:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: scvtf h0, x0
+; CHECK-FP16-NEXT: ret
+ %val = call half @llvm.experimental.constrained.sitofp.f16.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @uitofp_f16_i64(i64 %x) #0 {
+; CHECK-NOFP16-LABEL: uitofp_f16_i64:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: ucvtf s0, x0
+; CHECK-NOFP16-NEXT: fcvt h0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: uitofp_f16_i64:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: ucvtf h0, x0
+; CHECK-FP16-NEXT: ret
+ %val = call half @llvm.experimental.constrained.uitofp.f16.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @sitofp_f16_i128(i128 %x) #0 {
+; CHECK-NOFP16-LABEL: sitofp_f16_i128:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NOFP16-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NOFP16-NEXT: .cfi_offset w30, -16
+; CHECK-NOFP16-NEXT: bl __floattisf
+; CHECK-NOFP16-NEXT: fcvt h0, s0
+; CHECK-NOFP16-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: sitofp_f16_i128:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-FP16-NEXT: .cfi_def_cfa_offset 16
+; CHECK-FP16-NEXT: .cfi_offset w30, -16
+; CHECK-FP16-NEXT: bl __floattihf
+; CHECK-FP16-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-FP16-NEXT: ret
+ %val = call half @llvm.experimental.constrained.sitofp.f16.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @uitofp_f16_i128(i128 %x) #0 {
+; CHECK-NOFP16-LABEL: uitofp_f16_i128:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NOFP16-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NOFP16-NEXT: .cfi_offset w30, -16
+; CHECK-NOFP16-NEXT: bl __floatuntisf
+; CHECK-NOFP16-NEXT: fcvt h0, s0
+; CHECK-NOFP16-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: uitofp_f16_i128:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-FP16-NEXT: .cfi_def_cfa_offset 16
+; CHECK-FP16-NEXT: .cfi_offset w30, -16
+; CHECK-FP16-NEXT: bl __floatuntihf
+; CHECK-FP16-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-FP16-NEXT: ret
+ %val = call half @llvm.experimental.constrained.uitofp.f16.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @sqrt_f16(half %x) #0 {
+; CHECK-NOFP16-LABEL: sqrt_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fsqrt s0, s0
+; CHECK-NOFP16-NEXT: fcvt h0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: sqrt_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fsqrt h0, h0
+; CHECK-FP16-NEXT: ret
+ %val = call half @llvm.experimental.constrained.sqrt.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @powi_f16(half %x, i32 %y) #0 {
+; CHECK-LABEL: powi_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl __powisf2
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %val = call half @llvm.experimental.constrained.powi.f16(half %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @sin_f16(half %x) #0 {
+; CHECK-LABEL: sin_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sinf
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %val = call half @llvm.experimental.constrained.sin.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @cos_f16(half %x) #0 {
+; CHECK-LABEL: cos_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl cosf
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %val = call half @llvm.experimental.constrained.cos.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @pow_f16(half %x, half %y) #0 {
+; CHECK-LABEL: pow_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl powf
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %val = call half @llvm.experimental.constrained.pow.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @log_f16(half %x) #0 {
+; CHECK-LABEL: log_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl logf
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %val = call half @llvm.experimental.constrained.log.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @log10_f16(half %x) #0 {
+; CHECK-LABEL: log10_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl log10f
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %val = call half @llvm.experimental.constrained.log10.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @log2_f16(half %x) #0 {
+; CHECK-LABEL: log2_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl log2f
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %val = call half @llvm.experimental.constrained.log2.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @exp_f16(half %x) #0 {
+; CHECK-LABEL: exp_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl expf
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %val = call half @llvm.experimental.constrained.exp.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @exp2_f16(half %x) #0 {
+; CHECK-LABEL: exp2_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl exp2f
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %val = call half @llvm.experimental.constrained.exp2.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @rint_f16(half %x) #0 {
+; CHECK-NOFP16-LABEL: rint_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: frintx s0, s0
+; CHECK-NOFP16-NEXT: fcvt h0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: rint_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: frintx h0, h0
+; CHECK-FP16-NEXT: ret
+ %val = call half @llvm.experimental.constrained.rint.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @nearbyint_f16(half %x) #0 {
+; CHECK-NOFP16-LABEL: nearbyint_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: frinti s0, s0
+; CHECK-NOFP16-NEXT: fcvt h0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: nearbyint_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: frinti h0, h0
+; CHECK-FP16-NEXT: ret
+ %val = call half @llvm.experimental.constrained.nearbyint.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define i32 @lrint_f16(half %x) #0 {
+; CHECK-NOFP16-LABEL: lrint_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: frintx s0, s0
+; CHECK-NOFP16-NEXT: fcvtzs w0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: lrint_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: frintx h0, h0
+; CHECK-FP16-NEXT: fcvtzs w0, h0
+; CHECK-FP16-NEXT: ret
+ %val = call i32 @llvm.experimental.constrained.lrint.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret i32 %val
+}
+
+define i64 @llrint_f16(half %x) #0 {
+; CHECK-NOFP16-LABEL: llrint_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: frintx s0, s0
+; CHECK-NOFP16-NEXT: fcvtzs x0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: llrint_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: frintx h0, h0
+; CHECK-FP16-NEXT: fcvtzs x0, h0
+; CHECK-FP16-NEXT: ret
+ %val = call i64 @llvm.experimental.constrained.llrint.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret i64 %val
+}
+
+define half @maxnum_f16(half %x, half %y) #0 {
+; CHECK-NOFP16-LABEL: maxnum_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fmaxnm s0, s0, s1
+; CHECK-NOFP16-NEXT: fcvt h0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: maxnum_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fmaxnm h0, h0, h1
+; CHECK-FP16-NEXT: ret
+ %val = call half @llvm.experimental.constrained.maxnum.f16(half %x, half %y, metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @minnum_f16(half %x, half %y) #0 {
+; CHECK-NOFP16-LABEL: minnum_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fminnm s0, s0, s1
+; CHECK-NOFP16-NEXT: fcvt h0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: minnum_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fminnm h0, h0, h1
+; CHECK-FP16-NEXT: ret
+ %val = call half @llvm.experimental.constrained.minnum.f16(half %x, half %y, metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @ceil_f16(half %x) #0 {
+; CHECK-NOFP16-LABEL: ceil_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: frintp s0, s0
+; CHECK-NOFP16-NEXT: fcvt h0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: ceil_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: frintp h0, h0
+; CHECK-FP16-NEXT: ret
+ %val = call half @llvm.experimental.constrained.ceil.f16(half %x, metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @floor_f16(half %x) #0 {
+; CHECK-NOFP16-LABEL: floor_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: frintm s0, s0
+; CHECK-NOFP16-NEXT: fcvt h0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: floor_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: frintm h0, h0
+; CHECK-FP16-NEXT: ret
+ %val = call half @llvm.experimental.constrained.floor.f16(half %x, metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define i32 @lround_f16(half %x) #0 {
+; CHECK-NOFP16-LABEL: lround_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvtas w0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: lround_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcvtas w0, h0
+; CHECK-FP16-NEXT: ret
+ %val = call i32 @llvm.experimental.constrained.lround.f16(half %x, metadata !"fpexcept.strict") #0
+ ret i32 %val
+}
+
+define i64 @llround_f16(half %x) #0 {
+; CHECK-NOFP16-LABEL: llround_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvtas x0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: llround_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcvtas x0, h0
+; CHECK-FP16-NEXT: ret
+ %val = call i64 @llvm.experimental.constrained.llround.f16(half %x, metadata !"fpexcept.strict") #0
+ ret i64 %val
+}
+
+define half @round_f16(half %x) #0 {
+; CHECK-NOFP16-LABEL: round_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: frinta s0, s0
+; CHECK-NOFP16-NEXT: fcvt h0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: round_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: frinta h0, h0
+; CHECK-FP16-NEXT: ret
+ %val = call half @llvm.experimental.constrained.round.f16(half %x, metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @roundeven_f16(half %x) #0 {
+; CHECK-NOFP16-LABEL: roundeven_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: frintn s0, s0
+; CHECK-NOFP16-NEXT: fcvt h0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: roundeven_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: frintn h0, h0
+; CHECK-FP16-NEXT: ret
+ %val = call half @llvm.experimental.constrained.roundeven.f16(half %x, metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define half @trunc_f16(half %x) #0 {
+; CHECK-NOFP16-LABEL: trunc_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: frintz s0, s0
+; CHECK-NOFP16-NEXT: fcvt h0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: trunc_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: frintz h0, h0
+; CHECK-FP16-NEXT: ret
+ %val = call half @llvm.experimental.constrained.trunc.f16(half %x, metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define i32 @fcmp_olt_f16(half %a, half %b) #0 {
+; CHECK-NOFP16-LABEL: fcmp_olt_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcmp s0, s1
+; CHECK-NOFP16-NEXT: cset w0, mi
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fcmp_olt_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmp h0, h1
+; CHECK-FP16-NEXT: cset w0, mi
+; CHECK-FP16-NEXT: ret
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"olt", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmp_ole_f16(half %a, half %b) #0 {
+; CHECK-NOFP16-LABEL: fcmp_ole_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcmp s0, s1
+; CHECK-NOFP16-NEXT: cset w0, ls
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fcmp_ole_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmp h0, h1
+; CHECK-FP16-NEXT: cset w0, ls
+; CHECK-FP16-NEXT: ret
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ole", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmp_ogt_f16(half %a, half %b) #0 {
+; CHECK-NOFP16-LABEL: fcmp_ogt_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcmp s0, s1
+; CHECK-NOFP16-NEXT: cset w0, gt
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fcmp_ogt_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmp h0, h1
+; CHECK-FP16-NEXT: cset w0, gt
+; CHECK-FP16-NEXT: ret
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ogt", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmp_oge_f16(half %a, half %b) #0 {
+; CHECK-NOFP16-LABEL: fcmp_oge_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcmp s0, s1
+; CHECK-NOFP16-NEXT: cset w0, ge
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fcmp_oge_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmp h0, h1
+; CHECK-FP16-NEXT: cset w0, ge
+; CHECK-FP16-NEXT: ret
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"oge", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmp_oeq_f16(half %a, half %b) #0 {
+; CHECK-NOFP16-LABEL: fcmp_oeq_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcmp s0, s1
+; CHECK-NOFP16-NEXT: cset w0, eq
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fcmp_oeq_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmp h0, h1
+; CHECK-FP16-NEXT: cset w0, eq
+; CHECK-FP16-NEXT: ret
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"oeq", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmp_one_f16(half %a, half %b) #0 {
+; CHECK-NOFP16-LABEL: fcmp_one_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcmp s0, s1
+; CHECK-NOFP16-NEXT: cset w8, mi
+; CHECK-NOFP16-NEXT: csinc w0, w8, wzr, le
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fcmp_one_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmp h0, h1
+; CHECK-FP16-NEXT: cset w8, mi
+; CHECK-FP16-NEXT: csinc w0, w8, wzr, le
+; CHECK-FP16-NEXT: ret
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"one", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmp_ult_f16(half %a, half %b) #0 {
+; CHECK-NOFP16-LABEL: fcmp_ult_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcmp s0, s1
+; CHECK-NOFP16-NEXT: cset w0, lt
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fcmp_ult_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmp h0, h1
+; CHECK-FP16-NEXT: cset w0, lt
+; CHECK-FP16-NEXT: ret
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ult", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmp_ule_f16(half %a, half %b) #0 {
+; CHECK-NOFP16-LABEL: fcmp_ule_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcmp s0, s1
+; CHECK-NOFP16-NEXT: cset w0, le
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fcmp_ule_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmp h0, h1
+; CHECK-FP16-NEXT: cset w0, le
+; CHECK-FP16-NEXT: ret
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ule", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmp_ugt_f16(half %a, half %b) #0 {
+; CHECK-NOFP16-LABEL: fcmp_ugt_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcmp s0, s1
+; CHECK-NOFP16-NEXT: cset w0, hi
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fcmp_ugt_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmp h0, h1
+; CHECK-FP16-NEXT: cset w0, hi
+; CHECK-FP16-NEXT: ret
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ugt", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmp_uge_f16(half %a, half %b) #0 {
+; CHECK-NOFP16-LABEL: fcmp_uge_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcmp s0, s1
+; CHECK-NOFP16-NEXT: cset w0, pl
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fcmp_uge_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmp h0, h1
+; CHECK-FP16-NEXT: cset w0, pl
+; CHECK-FP16-NEXT: ret
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"uge", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmp_ueq_f16(half %a, half %b) #0 {
+; CHECK-NOFP16-LABEL: fcmp_ueq_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcmp s0, s1
+; CHECK-NOFP16-NEXT: cset w8, eq
+; CHECK-NOFP16-NEXT: csinc w0, w8, wzr, vc
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fcmp_ueq_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmp h0, h1
+; CHECK-FP16-NEXT: cset w8, eq
+; CHECK-FP16-NEXT: csinc w0, w8, wzr, vc
+; CHECK-FP16-NEXT: ret
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ueq", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmp_une_f16(half %a, half %b) #0 {
+; CHECK-NOFP16-LABEL: fcmp_une_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcmp s0, s1
+; CHECK-NOFP16-NEXT: cset w0, ne
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fcmp_une_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmp h0, h1
+; CHECK-FP16-NEXT: cset w0, ne
+; CHECK-FP16-NEXT: ret
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"une", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmps_olt_f16(half %a, half %b) #0 {
+; CHECK-NOFP16-LABEL: fcmps_olt_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcmpe s0, s1
+; CHECK-NOFP16-NEXT: cset w0, mi
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fcmps_olt_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmpe h0, h1
+; CHECK-FP16-NEXT: cset w0, mi
+; CHECK-FP16-NEXT: ret
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"olt", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmps_ole_f16(half %a, half %b) #0 {
+; CHECK-NOFP16-LABEL: fcmps_ole_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcmpe s0, s1
+; CHECK-NOFP16-NEXT: cset w0, ls
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fcmps_ole_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmpe h0, h1
+; CHECK-FP16-NEXT: cset w0, ls
+; CHECK-FP16-NEXT: ret
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ole", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmps_ogt_f16(half %a, half %b) #0 {
+; CHECK-NOFP16-LABEL: fcmps_ogt_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcmpe s0, s1
+; CHECK-NOFP16-NEXT: cset w0, gt
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fcmps_ogt_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmpe h0, h1
+; CHECK-FP16-NEXT: cset w0, gt
+; CHECK-FP16-NEXT: ret
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ogt", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmps_oge_f16(half %a, half %b) #0 {
+; CHECK-NOFP16-LABEL: fcmps_oge_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcmpe s0, s1
+; CHECK-NOFP16-NEXT: cset w0, ge
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fcmps_oge_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmpe h0, h1
+; CHECK-FP16-NEXT: cset w0, ge
+; CHECK-FP16-NEXT: ret
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"oge", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmps_oeq_f16(half %a, half %b) #0 {
+; CHECK-NOFP16-LABEL: fcmps_oeq_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcmpe s0, s1
+; CHECK-NOFP16-NEXT: cset w0, eq
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fcmps_oeq_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmpe h0, h1
+; CHECK-FP16-NEXT: cset w0, eq
+; CHECK-FP16-NEXT: ret
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"oeq", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmps_one_f16(half %a, half %b) #0 {
+; CHECK-NOFP16-LABEL: fcmps_one_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcmpe s0, s1
+; CHECK-NOFP16-NEXT: cset w8, mi
+; CHECK-NOFP16-NEXT: csinc w0, w8, wzr, le
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fcmps_one_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmpe h0, h1
+; CHECK-FP16-NEXT: cset w8, mi
+; CHECK-FP16-NEXT: csinc w0, w8, wzr, le
+; CHECK-FP16-NEXT: ret
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"one", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmps_ult_f16(half %a, half %b) #0 {
+; CHECK-NOFP16-LABEL: fcmps_ult_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcmpe s0, s1
+; CHECK-NOFP16-NEXT: cset w0, lt
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fcmps_ult_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmpe h0, h1
+; CHECK-FP16-NEXT: cset w0, lt
+; CHECK-FP16-NEXT: ret
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ult", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmps_ule_f16(half %a, half %b) #0 {
+; CHECK-NOFP16-LABEL: fcmps_ule_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcmpe s0, s1
+; CHECK-NOFP16-NEXT: cset w0, le
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fcmps_ule_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmpe h0, h1
+; CHECK-FP16-NEXT: cset w0, le
+; CHECK-FP16-NEXT: ret
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ule", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmps_ugt_f16(half %a, half %b) #0 {
+; CHECK-NOFP16-LABEL: fcmps_ugt_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcmpe s0, s1
+; CHECK-NOFP16-NEXT: cset w0, hi
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fcmps_ugt_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmpe h0, h1
+; CHECK-FP16-NEXT: cset w0, hi
+; CHECK-FP16-NEXT: ret
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ugt", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmps_uge_f16(half %a, half %b) #0 {
+; CHECK-NOFP16-LABEL: fcmps_uge_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcmpe s0, s1
+; CHECK-NOFP16-NEXT: cset w0, pl
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fcmps_uge_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmpe h0, h1
+; CHECK-FP16-NEXT: cset w0, pl
+; CHECK-FP16-NEXT: ret
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"uge", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmps_ueq_f16(half %a, half %b) #0 {
+; CHECK-NOFP16-LABEL: fcmps_ueq_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcmpe s0, s1
+; CHECK-NOFP16-NEXT: cset w8, eq
+; CHECK-NOFP16-NEXT: csinc w0, w8, wzr, vc
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fcmps_ueq_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmpe h0, h1
+; CHECK-FP16-NEXT: cset w8, eq
+; CHECK-FP16-NEXT: csinc w0, w8, wzr, vc
+; CHECK-FP16-NEXT: ret
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ueq", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @fcmps_une_f16(half %a, half %b) #0 {
+; CHECK-NOFP16-LABEL: fcmps_une_f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvt s1, h1
+; CHECK-NOFP16-NEXT: fcmpe s0, s1
+; CHECK-NOFP16-NEXT: cset w0, ne
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: fcmps_une_f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmpe h0, h1
+; CHECK-FP16-NEXT: cset w0, ne
+; CHECK-FP16-NEXT: ret
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"une", metadata !"fpexcept.strict") #0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+
+; Intrinsics to convert between floating-point types
+
+define half @fptrunc_f16_f32(float %x) #0 {
+; CHECK-LABEL: fptrunc_f16_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+ %val = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret half %val
+}
+
+define float @fpext_f32_f16(half %x) #0 {
+; CHECK-LABEL: fpext_f32_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: ret
+ %val = call float @llvm.experimental.constrained.fpext.f32.f16(half %x, metadata !"fpexcept.strict") #0
+ ret float %val
+}
+
+
+attributes #0 = { strictfp }
+
+declare half @llvm.experimental.constrained.fadd.f16(half, half, metadata, metadata)
+declare half @llvm.experimental.constrained.fsub.f16(half, half, metadata, metadata)
+declare half @llvm.experimental.constrained.fmul.f16(half, half, metadata, metadata)
+declare half @llvm.experimental.constrained.fdiv.f16(half, half, metadata, metadata)
+declare half @llvm.experimental.constrained.frem.f16(half, half, metadata, metadata)
+declare half @llvm.experimental.constrained.fma.f16(half, half, half, metadata, metadata)
+declare i32 @llvm.experimental.constrained.fptosi.i32.f16(half, metadata)
+declare i32 @llvm.experimental.constrained.fptoui.i32.f16(half, metadata)
+declare i64 @llvm.experimental.constrained.fptosi.i64.f16(half, metadata)
+declare i64 @llvm.experimental.constrained.fptoui.i64.f16(half, metadata)
+declare half @llvm.experimental.constrained.sitofp.f16.i32(i32, metadata, metadata)
+declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata)
+declare half @llvm.experimental.constrained.sitofp.f16.i64(i64, metadata, metadata)
+declare half @llvm.experimental.constrained.uitofp.f16.i64(i64, metadata, metadata)
+declare half @llvm.experimental.constrained.sitofp.f16.i128(i128, metadata, metadata)
+declare half @llvm.experimental.constrained.uitofp.f16.i128(i128, metadata, metadata)
+declare half @llvm.experimental.constrained.sqrt.f16(half, metadata, metadata)
+declare half @llvm.experimental.constrained.powi.f16(half, i32, metadata, metadata)
+declare half @llvm.experimental.constrained.sin.f16(half, metadata, metadata)
+declare half @llvm.experimental.constrained.cos.f16(half, metadata, metadata)
+declare half @llvm.experimental.constrained.pow.f16(half, half, metadata, metadata)
+declare half @llvm.experimental.constrained.log.f16(half, metadata, metadata)
+declare half @llvm.experimental.constrained.log10.f16(half, metadata, metadata)
+declare half @llvm.experimental.constrained.log2.f16(half, metadata, metadata)
+declare half @llvm.experimental.constrained.exp.f16(half, metadata, metadata)
+declare half @llvm.experimental.constrained.exp2.f16(half, metadata, metadata)
+declare half @llvm.experimental.constrained.rint.f16(half, metadata, metadata)
+declare half @llvm.experimental.constrained.nearbyint.f16(half, metadata, metadata)
+declare i32 @llvm.experimental.constrained.lrint.f16(half, metadata, metadata)
+declare i64 @llvm.experimental.constrained.llrint.f16(half, metadata, metadata)
+declare half @llvm.experimental.constrained.maxnum.f16(half, half, metadata)
+declare half @llvm.experimental.constrained.minnum.f16(half, half, metadata)
+declare half @llvm.experimental.constrained.ceil.f16(half, metadata)
+declare half @llvm.experimental.constrained.floor.f16(half, metadata)
+declare i32 @llvm.experimental.constrained.lround.f16(half, metadata)
+declare i64 @llvm.experimental.constrained.llround.f16(half, metadata)
+declare half @llvm.experimental.constrained.round.f16(half, metadata)
+declare half @llvm.experimental.constrained.roundeven.f16(half, metadata)
+declare half @llvm.experimental.constrained.trunc.f16(half, metadata)
+declare i1 @llvm.experimental.constrained.fcmps.f16(half, half, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmp.f16(half, half, metadata, metadata)
+
+declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata)
diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll
index a434332270ebb..29aeb204bf7e8 100644
--- a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=aarch64-none-eabi %s -disable-strictnode-mutation -o - | FileCheck %s
-; RUN: llc -mtriple=aarch64-none-eabi -global-isel=true -global-isel-abort=2 -disable-strictnode-mutation %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-eabi %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-eabi -global-isel=true -global-isel-abort=2 %s -o - | FileCheck %s
; Check that constrained fp intrinsics are correctly lowered.
More information about the llvm-commits
mailing list