[llvm] 8e17c96 - [AArch64] Add some missing strict FP vector lowering
John Brawn via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 17 08:11:12 PST 2022
Author: John Brawn
Date: 2022-02-17T16:10:31Z
New Revision: 8e17c9613f36f23b5a9d2720f330a37e54c6924f
URL: https://github.com/llvm/llvm-project/commit/8e17c9613f36f23b5a9d2720f330a37e54c6924f
DIFF: https://github.com/llvm/llvm-project/commit/8e17c9613f36f23b5a9d2720f330a37e54c6924f.diff
LOG: [AArch64] Add some missing strict FP vector lowering
Also add a test for the codegen of strict FP vector operations so
these changes get tested.
Differential Revision: https://reviews.llvm.org/D117795
Added:
llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 810cdb748b3b..58a853c2ddf8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3404,7 +3404,8 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
// Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
// Any additional optimization in this function should be recorded
// in the cost tables.
- EVT InVT = Op.getOperand(0).getValueType();
+ bool IsStrict = Op->isStrictFPOpcode();
+ EVT InVT = Op.getOperand(IsStrict ? 1 : 0).getValueType();
EVT VT = Op.getValueType();
if (VT.isScalableVector()) {
@@ -3424,6 +3425,12 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
!Subtarget->hasFullFP16()) {
MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
SDLoc dl(Op);
+ if (IsStrict) {
+ SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NewVT, MVT::Other},
+ {Op.getOperand(0), Op.getOperand(1)});
+ return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other},
+ {Ext.getValue(1), Ext.getValue(0)});
+ }
return DAG.getNode(
Op.getOpcode(), dl, Op.getValueType(),
DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
@@ -3433,6 +3440,13 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
uint64_t InVTSize = InVT.getFixedSizeInBits();
if (VTSize < InVTSize) {
SDLoc dl(Op);
+ if (IsStrict) {
+ InVT = InVT.changeVectorElementTypeToInteger();
+ SDValue Cv = DAG.getNode(Op.getOpcode(), dl, {InVT, MVT::Other},
+ {Op.getOperand(0), Op.getOperand(1)});
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
+ return DAG.getMergeValues({Trunc, Cv.getValue(1)}, dl);
+ }
SDValue Cv =
DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
Op.getOperand(0));
@@ -3444,10 +3458,32 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
MVT ExtVT =
MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
VT.getVectorNumElements());
+ if (IsStrict) {
+ SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
+ {ExtVT, MVT::Other},
+ {Op.getOperand(0), Op.getOperand(1)});
+ return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other},
+ {Ext.getValue(1), Ext.getValue(0)});
+ }
SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
}
+ // Use a scalar operation for conversions between single-element vectors of
+ // the same size.
+ if (NumElts == 1) {
+ SDLoc dl(Op);
+ SDValue Extract = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, InVT.getScalarType(),
+ Op.getOperand(IsStrict ? 1 : 0), DAG.getConstant(0, dl, MVT::i64));
+ EVT ScalarVT = VT.getScalarType();
+ SDValue ScalarCvt;
+ if (IsStrict)
+ return DAG.getNode(Op.getOpcode(), dl, {ScalarVT, MVT::Other},
+ {Op.getOperand(0), Extract});
+ return DAG.getNode(Op.getOpcode(), dl, ScalarVT, Extract);
+ }
+
// Type changing conversions are illegal.
return Op;
}
@@ -3610,9 +3646,10 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
// Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
// Any additional optimization in this function should be recorded
// in the cost tables.
+ bool IsStrict = Op->isStrictFPOpcode();
EVT VT = Op.getValueType();
SDLoc dl(Op);
- SDValue In = Op.getOperand(0);
+ SDValue In = Op.getOperand(IsStrict ? 1 : 0);
EVT InVT = In.getValueType();
unsigned Opc = Op.getOpcode();
bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
@@ -3640,6 +3677,13 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
MVT CastVT =
MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
InVT.getVectorNumElements());
+ if (IsStrict) {
+ In = DAG.getNode(Opc, dl, {CastVT, MVT::Other},
+ {Op.getOperand(0), In});
+ return DAG.getNode(
+ ISD::STRICT_FP_ROUND, dl, {VT, MVT::Other},
+ {In.getValue(1), In.getValue(0), DAG.getIntPtrConstant(0, dl)});
+ }
In = DAG.getNode(Opc, dl, CastVT, In);
return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
}
@@ -3648,9 +3692,24 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
EVT CastVT = VT.changeVectorElementTypeToInteger();
In = DAG.getNode(CastOpc, dl, CastVT, In);
+ if (IsStrict)
+ return DAG.getNode(Opc, dl, {VT, MVT::Other}, {Op.getOperand(0), In});
return DAG.getNode(Opc, dl, VT, In);
}
+ // Use a scalar operation for conversions between single-element vectors of
+ // the same size.
+ if (VT.getVectorNumElements() == 1) {
+ SDValue Extract = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, InVT.getScalarType(),
+ In, DAG.getConstant(0, dl, MVT::i64));
+ EVT ScalarVT = VT.getScalarType();
+ if (IsStrict)
+ return DAG.getNode(Op.getOpcode(), dl, {ScalarVT, MVT::Other},
+ {Op.getOperand(0), Extract});
+ return DAG.getNode(Op.getOpcode(), dl, ScalarVT, Extract);
+ }
+
return Op;
}
diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
new file mode 100644
index 000000000000..b78798531e01
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
@@ -0,0 +1,886 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-none-eabi %s -disable-strictnode-mutation -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-eabi -global-isel=true -global-isel-abort=2 -disable-strictnode-mutation %s -o - | FileCheck %s
+
+; Check that constrained fp vector intrinsics are correctly lowered.
+
+
+; Single-precision intrinsics
+
+define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) #0 {
+; CHECK-LABEL: add_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %val = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> %x, <4 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) #0 {
+; CHECK-LABEL: sub_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fsub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %val = call <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float> %x, <4 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) #0 {
+; CHECK-LABEL: mul_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %val = call <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float> %x, <4 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @div_v4f32(<4 x float> %x, <4 x float> %y) #0 {
+; CHECK-LABEL: div_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %val = call <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float> %x, <4 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @fma_v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z) #0 {
+; CHECK-LABEL: fma_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmla v2.4s, v1.4s, v0.4s
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: ret
+ %val = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x i32> @fptosi_v4i32_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fptosi_v4i32_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %val = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0
+ ret <4 x i32> %val
+}
+
+define <4 x i32> @fptoui_v4i32_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fptoui_v4i32_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %val = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0
+ ret <4 x i32> %val
+}
+
+define <4 x i64> @fptosi_v4i64_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fptosi_v4i64_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-NEXT: fcvtl v1.2d, v1.2s
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: fcvtzs v1.2d, v1.2d
+; CHECK-NEXT: ret
+ %val = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0
+ ret <4 x i64> %val
+}
+
+define <4 x i64> @fptoui_v4i64_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fptoui_v4i64_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-NEXT: fcvtl v1.2d, v1.2s
+; CHECK-NEXT: fcvtzu v0.2d, v0.2d
+; CHECK-NEXT: fcvtzu v1.2d, v1.2d
+; CHECK-NEXT: ret
+ %val = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0
+ ret <4 x i64> %val
+}
+
+define <4 x float> @sitofp_v4f32_v4i32(<4 x i32> %x) #0 {
+; CHECK-LABEL: sitofp_v4f32_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: scvtf v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %val = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @uitofp_v4f32_v4i32(<4 x i32> %x) #0 {
+; CHECK-LABEL: uitofp_v4f32_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ucvtf v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %val = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @sitofp_v4f32_v4i64(<4 x i64> %x) #0 {
+; CHECK-LABEL: sitofp_v4f32_v4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: scvtf v0.2d, v0.2d
+; CHECK-NEXT: scvtf v1.2d, v1.2d
+; CHECK-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
+; CHECK-NEXT: ret
+ %val = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
+; CHECK-LABEL: uitofp_v4f32_v4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-NEXT: ucvtf v1.2d, v1.2d
+; CHECK-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
+; CHECK-NEXT: ret
+ %val = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @sqrt_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: sqrt_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fsqrt v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %val = call <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @rint_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: rint_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintx v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %val = call <4 x float> @llvm.experimental.constrained.rint.v4f32(<4 x float> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @nearbyint_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: nearbyint_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frinti v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %val = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @maxnum_v4f32(<4 x float> %x, <4 x float> %y) #0 {
+; CHECK-LABEL: maxnum_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %val = call <4 x float> @llvm.experimental.constrained.maxnum.v4f32(<4 x float> %x, <4 x float> %y, metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @minnum_v4f32(<4 x float> %x, <4 x float> %y) #0 {
+; CHECK-LABEL: minnum_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fminnm v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %val = call <4 x float> @llvm.experimental.constrained.minnum.v4f32(<4 x float> %x, <4 x float> %y, metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @ceil_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: ceil_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintp v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %val = call <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @floor_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: floor_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintm v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %val = call <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @round_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: round_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frinta v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %val = call <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @roundeven_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: roundeven_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintn v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %val = call <4 x float> @llvm.experimental.constrained.roundeven.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @trunc_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: trunc_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintz v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %val = call <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x i1> @fcmp_v4f32(<4 x float> %x, <4 x float> %y) #0 {
+; CHECK-LABEL: fcmp_v4f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov s2, v1.s[1]
+; CHECK-NEXT: mov s3, v0.s[1]
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: mov s4, v1.s[2]
+; CHECK-NEXT: mov s5, v0.s[2]
+; CHECK-NEXT: mov s1, v1.s[3]
+; CHECK-NEXT: mov s0, v0.s[3]
+; CHECK-NEXT: csetm w8, eq
+; CHECK-NEXT: fcmp s3, s2
+; CHECK-NEXT: fmov s2, w8
+; CHECK-NEXT: csetm w8, eq
+; CHECK-NEXT: fcmp s5, s4
+; CHECK-NEXT: mov v2.s[1], w8
+; CHECK-NEXT: csetm w8, eq
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: mov v2.s[2], w8
+; CHECK-NEXT: csetm w8, eq
+; CHECK-NEXT: mov v2.s[3], w8
+; CHECK-NEXT: xtn v0.4h, v2.4s
+; CHECK-NEXT: ret
+entry:
+ %val = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x float> %x, <4 x float> %y, metadata !"oeq", metadata !"fpexcept.strict")
+ ret <4 x i1> %val
+}
+
+define <4 x i1> @fcmps_v4f32(<4 x float> %x, <4 x float> %y) #0 {
+; CHECK-LABEL: fcmps_v4f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov s2, v1.s[1]
+; CHECK-NEXT: mov s3, v0.s[1]
+; CHECK-NEXT: fcmpe s0, s1
+; CHECK-NEXT: mov s4, v1.s[2]
+; CHECK-NEXT: mov s5, v0.s[2]
+; CHECK-NEXT: mov s1, v1.s[3]
+; CHECK-NEXT: mov s0, v0.s[3]
+; CHECK-NEXT: csetm w8, eq
+; CHECK-NEXT: fcmpe s3, s2
+; CHECK-NEXT: fmov s2, w8
+; CHECK-NEXT: csetm w8, eq
+; CHECK-NEXT: fcmpe s5, s4
+; CHECK-NEXT: mov v2.s[1], w8
+; CHECK-NEXT: csetm w8, eq
+; CHECK-NEXT: fcmpe s0, s1
+; CHECK-NEXT: mov v2.s[2], w8
+; CHECK-NEXT: csetm w8, eq
+; CHECK-NEXT: mov v2.s[3], w8
+; CHECK-NEXT: xtn v0.4h, v2.4s
+; CHECK-NEXT: ret
+entry:
+ %val = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x float> %x, <4 x float> %y, metadata !"oeq", metadata !"fpexcept.strict")
+ ret <4 x i1> %val
+}
+
+
+; Double-precision intrinsics
+
+define <2 x double> @add_v2f64(<2 x double> %x, <2 x double> %y) #0 {
+; CHECK-LABEL: add_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+ %val = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @sub_v2f64(<2 x double> %x, <2 x double> %y) #0 {
+; CHECK-LABEL: sub_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fsub v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+ %val = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @mul_v2f64(<2 x double> %x, <2 x double> %y) #0 {
+; CHECK-LABEL: mul_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+ %val = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @div_v2f64(<2 x double> %x, <2 x double> %y) #0 {
+; CHECK-LABEL: div_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+ %val = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @fma_v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z) #0 {
+; CHECK-LABEL: fma_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmla v2.2d, v1.2d, v0.2d
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: ret
+ %val = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x i32> @fptosi_v2i32_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: fptosi_v2i32_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: ret
+ %val = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0
+ ret <2 x i32> %val
+}
+
+define <2 x i32> @fptoui_v2i32_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: fptoui_v2i32_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu v0.2d, v0.2d
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: ret
+ %val = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0
+ ret <2 x i32> %val
+}
+
+define <2 x i64> @fptosi_v2i64_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: fptosi_v2i64_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %val = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0
+ ret <2 x i64> %val
+}
+
+define <2 x i64> @fptoui_v2i64_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: fptoui_v2i64_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %val = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0
+ ret <2 x i64> %val
+}
+
+define <2 x double> @sitofp_v2f64_v2i32(<2 x i32> %x) #0 {
+; CHECK-LABEL: sitofp_v2f64_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-NEXT: scvtf v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %val = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @uitofp_v2f64_v2i32(<2 x i32> %x) #0 {
+; CHECK-LABEL: uitofp_v2f64_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %val = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @sitofp_v2f64_v2i64(<2 x i64> %x) #0 {
+; CHECK-LABEL: sitofp_v2f64_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: scvtf v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %val = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @uitofp_v2f64_v2i64(<2 x i64> %x) #0 {
+; CHECK-LABEL: uitofp_v2f64_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %val = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @sqrt_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: sqrt_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fsqrt v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %val = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @rint_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: rint_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintx v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %val = call <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @nearbyint_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: nearbyint_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frinti v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %val = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @maxnum_v2f64(<2 x double> %x, <2 x double> %y) #0 {
+; CHECK-LABEL: maxnum_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmaxnm v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+ %val = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double> %x, <2 x double> %y, metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @minnum_v2f64(<2 x double> %x, <2 x double> %y) #0 {
+; CHECK-LABEL: minnum_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fminnm v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+ %val = call <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double> %x, <2 x double> %y, metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @ceil_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: ceil_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintp v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %val = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @floor_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: floor_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintm v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %val = call <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @round_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: round_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frinta v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %val = call <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @roundeven_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: roundeven_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintn v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %val = call <2 x double> @llvm.experimental.constrained.roundeven.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @trunc_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: trunc_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintz v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %val = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x i1> @fcmp_v2f64(<2 x double> %x, <2 x double> %y) #0 {
+; CHECK-LABEL: fcmp_v2f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov d2, v1.d[1]
+; CHECK-NEXT: mov d3, v0.d[1]
+; CHECK-NEXT: fcmp d0, d1
+; CHECK-NEXT: csetm x8, eq
+; CHECK-NEXT: fcmp d3, d2
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: csetm x8, eq
+; CHECK-NEXT: mov v0.d[1], x8
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %val = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %x, <2 x double> %y, metadata !"oeq", metadata !"fpexcept.strict")
+ ret <2 x i1> %val
+}
+
+define <2 x i1> @fcmps_v2f64(<2 x double> %x, <2 x double> %y) #0 {
+; CHECK-LABEL: fcmps_v2f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov d2, v1.d[1]
+; CHECK-NEXT: mov d3, v0.d[1]
+; CHECK-NEXT: fcmpe d0, d1
+; CHECK-NEXT: csetm x8, eq
+; CHECK-NEXT: fcmpe d3, d2
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: csetm x8, eq
+; CHECK-NEXT: mov v0.d[1], x8
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %val = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %x, <2 x double> %y, metadata !"oeq", metadata !"fpexcept.strict")
+ ret <2 x i1> %val
+}
+
+
+; Double-precision single element intrinsics
+
+define <1 x double> @add_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: add_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd d0, d0, d1
+; CHECK-NEXT: ret
+ %val = call <1 x double> @llvm.experimental.constrained.fadd.v1f64(<1 x double> %x, <1 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @sub_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: sub_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fsub d0, d0, d1
+; CHECK-NEXT: ret
+ %val = call <1 x double> @llvm.experimental.constrained.fsub.v1f64(<1 x double> %x, <1 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @mul_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: mul_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmul d0, d0, d1
+; CHECK-NEXT: ret
+ %val = call <1 x double> @llvm.experimental.constrained.fmul.v1f64(<1 x double> %x, <1 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @div_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: div_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv d0, d0, d1
+; CHECK-NEXT: ret
+ %val = call <1 x double> @llvm.experimental.constrained.fdiv.v1f64(<1 x double> %x, <1 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @fma_v1f64(<1 x double> %x, <1 x double> %y, <1 x double> %z) #0 {
+; CHECK-LABEL: fma_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmadd d0, d0, d1, d2
+; CHECK-NEXT: ret
+ %val = call <1 x double> @llvm.experimental.constrained.fma.v1f64(<1 x double> %x, <1 x double> %y, <1 x double> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x i32> @fptosi_v1i32_v1f64(<1 x double> %x) #0 {
+; CHECK-LABEL: fptosi_v1i32_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs w8, d0
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ret
+ %val = call <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0
+ ret <1 x i32> %val
+}
+
+define <1 x i32> @fptoui_v1i32_v1f64(<1 x double> %x) #0 {
+; CHECK-LABEL: fptoui_v1i32_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu w8, d0
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ret
+ %val = call <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0
+ ret <1 x i32> %val
+}
+
+define <1 x i64> @fptosi_v1i64_v1f64(<1 x double> %x) #0 {
+; CHECK-LABEL: fptosi_v1i64_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs x8, d0
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ret
+ %val = call <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0
+ ret <1 x i64> %val
+}
+
+define <1 x i64> @fptoui_v1i64_v1f64(<1 x double> %x) #0 {
+; CHECK-LABEL: fptoui_v1i64_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu x8, d0
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ret
+ %val = call <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0
+ ret <1 x i64> %val
+}
+
+define <1 x double> @sitofp_v1f64_v1i32(<1 x i32> %x) #0 {
+; CHECK-LABEL: sitofp_v1f64_v1i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: scvtf d0, w8
+; CHECK-NEXT: ret
+ %val = call <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i32(<1 x i32> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @uitofp_v1f64_v1i32(<1 x i32> %x) #0 {
+; CHECK-LABEL: uitofp_v1f64_v1i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: ucvtf d0, w8
+; CHECK-NEXT: ret
+ %val = call <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i32(<1 x i32> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @sitofp_v1f64_v1i64(<1 x i64> %x) #0 {
+; CHECK-LABEL: sitofp_v1f64_v1i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: scvtf d0, x8
+; CHECK-NEXT: ret
+ %val = call <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i64(<1 x i64> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @uitofp_v1f64_v1i64(<1 x i64> %x) #0 {
+; CHECK-LABEL: uitofp_v1f64_v1i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: ucvtf d0, x8
+; CHECK-NEXT: ret
+ %val = call <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i64(<1 x i64> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @sqrt_v1f64(<1 x double> %x) #0 {
+; CHECK-LABEL: sqrt_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fsqrt d0, d0
+; CHECK-NEXT: ret
+ %val = call <1 x double> @llvm.experimental.constrained.sqrt.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @rint_v1f64(<1 x double> %x) #0 {
+; CHECK-LABEL: rint_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintx d0, d0
+; CHECK-NEXT: ret
+ %val = call <1 x double> @llvm.experimental.constrained.rint.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @nearbyint_v1f64(<1 x double> %x) #0 {
+; CHECK-LABEL: nearbyint_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frinti d0, d0
+; CHECK-NEXT: ret
+ %val = call <1 x double> @llvm.experimental.constrained.nearbyint.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @maxnum_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: maxnum_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmaxnm d0, d0, d1
+; CHECK-NEXT: ret
+ %val = call <1 x double> @llvm.experimental.constrained.maxnum.v1f64(<1 x double> %x, <1 x double> %y, metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @minnum_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: minnum_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fminnm d0, d0, d1
+; CHECK-NEXT: ret
+ %val = call <1 x double> @llvm.experimental.constrained.minnum.v1f64(<1 x double> %x, <1 x double> %y, metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @ceil_v1f64(<1 x double> %x) #0 {
+; CHECK-LABEL: ceil_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintp d0, d0
+; CHECK-NEXT: ret
+ %val = call <1 x double> @llvm.experimental.constrained.ceil.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @floor_v1f64(<1 x double> %x) #0 {
+; CHECK-LABEL: floor_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintm d0, d0
+; CHECK-NEXT: ret
+ %val = call <1 x double> @llvm.experimental.constrained.floor.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @round_v1f64(<1 x double> %x) #0 {
+; CHECK-LABEL: round_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frinta d0, d0
+; CHECK-NEXT: ret
+ %val = call <1 x double> @llvm.experimental.constrained.round.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @roundeven_v1f64(<1 x double> %x) #0 {
+; CHECK-LABEL: roundeven_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintn d0, d0
+; CHECK-NEXT: ret
+ %val = call <1 x double> @llvm.experimental.constrained.roundeven.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @trunc_v1f64(<1 x double> %x) #0 {
+; CHECK-LABEL: trunc_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintz d0, d0
+; CHECK-NEXT: ret
+ %val = call <1 x double> @llvm.experimental.constrained.trunc.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x i1> @fcmp_v1f61(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: fcmp_v1f61:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcmp d0, d1
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+entry:
+ %val = call <1 x i1> @llvm.experimental.constrained.fcmp.v1f64(<1 x double> %x, <1 x double> %y, metadata !"oeq", metadata !"fpexcept.strict")
+ ret <1 x i1> %val
+}
+
+define <1 x i1> @fcmps_v1f61(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: fcmps_v1f61:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcmpe d0, d1
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+entry:
+ %val = call <1 x i1> @llvm.experimental.constrained.fcmps.v1f64(<1 x double> %x, <1 x double> %y, metadata !"oeq", metadata !"fpexcept.strict")
+ ret <1 x i1> %val
+}
+
+
+; Intrinsics to convert between floating-point types
+
+define <2 x float> @fptrunc_v2f32_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: fptrunc_v2f32_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-NEXT: ret
+ %val = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x float> %val
+}
+
+define <2 x double> @fpext_v2f64_v2f32(<2 x float> %x) #0 {
+; CHECK-LABEL: fpext_v2f64_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-NEXT: ret
+ %val = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float> %x, metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+
+attributes #0 = { strictfp }
+
+declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
+declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(<4 x float>, metadata)
+declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float>, metadata)
+declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(<4 x float>, metadata)
+declare <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(<4 x float>, metadata)
+declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.rint.v4f32(<4 x float>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.maxnum.v4f32(<4 x float>, <4 x float>, metadata)
+declare <4 x float> @llvm.experimental.constrained.minnum.v4f32(<4 x float>, <4 x float>, metadata)
+declare <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float>, metadata)
+declare <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float>, metadata)
+declare <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x float>, metadata)
+declare <4 x float> @llvm.experimental.constrained.roundeven.v4f32(<4 x float>, metadata)
+declare <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float>, metadata)
+declare <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x float>, <4 x float>, metadata, metadata)
+declare <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x float>, <4 x float>, metadata, metadata)
+
+declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
+declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double>, metadata)
+declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(<2 x double>, metadata)
+declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double>, metadata)
+declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double>, metadata)
+declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata)
+declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata)
+declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata)
+declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata)
+declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata)
+declare <2 x double> @llvm.experimental.constrained.roundeven.v2f64(<2 x double>, metadata)
+declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata)
+declare <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+
+declare <1 x double> @llvm.experimental.constrained.fadd.v1f64(<1 x double>, <1 x double>, metadata, metadata)
+declare <1 x double> @llvm.experimental.constrained.fsub.v1f64(<1 x double>, <1 x double>, metadata, metadata)
+declare <1 x double> @llvm.experimental.constrained.fmul.v1f64(<1 x double>, <1 x double>, metadata, metadata)
+declare <1 x double> @llvm.experimental.constrained.fdiv.v1f64(<1 x double>, <1 x double>, metadata, metadata)
+declare <1 x double> @llvm.experimental.constrained.fma.v1f64(<1 x double>, <1 x double>, <1 x double>, metadata, metadata)
+declare <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f64(<1 x double>, metadata)
+declare <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f64(<1 x double>, metadata)
+declare <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f64(<1 x double>, metadata)
+declare <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f64(<1 x double>, metadata)
+declare <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i32(<1 x i32>, metadata, metadata)
+declare <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i32(<1 x i32>, metadata, metadata)
+declare <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i64(<1 x i64>, metadata, metadata)
+declare <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i64(<1 x i64>, metadata, metadata)
+declare <1 x double> @llvm.experimental.constrained.sqrt.v1f64(<1 x double>, metadata, metadata)
+declare <1 x double> @llvm.experimental.constrained.rint.v1f64(<1 x double>, metadata, metadata)
+declare <1 x double> @llvm.experimental.constrained.nearbyint.v1f64(<1 x double>, metadata, metadata)
+declare <1 x double> @llvm.experimental.constrained.maxnum.v1f64(<1 x double>, <1 x double>, metadata)
+declare <1 x double> @llvm.experimental.constrained.minnum.v1f64(<1 x double>, <1 x double>, metadata)
+declare <1 x double> @llvm.experimental.constrained.ceil.v1f64(<1 x double>, metadata)
+declare <1 x double> @llvm.experimental.constrained.floor.v1f64(<1 x double>, metadata)
+declare <1 x double> @llvm.experimental.constrained.round.v1f64(<1 x double>, metadata)
+declare <1 x double> @llvm.experimental.constrained.roundeven.v1f64(<1 x double>, metadata)
+declare <1 x double> @llvm.experimental.constrained.trunc.v1f64(<1 x double>, metadata)
+declare <1 x i1> @llvm.experimental.constrained.fcmp.v1f64(<1 x double>, <1 x double>, metadata, metadata)
+declare <1 x i1> @llvm.experimental.constrained.fcmps.v1f64(<1 x double>, <1 x double>, metadata, metadata)
+
+declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata)
More information about the llvm-commits
mailing list