[llvm] [ARM] Disable strict node mutation and use correct lowering for several strict ops (PR #170136)
Erik Enikeev via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 1 05:34:39 PST 2025
https://github.com/Varnike created https://github.com/llvm/llvm-project/pull/170136
None
>From e12cf62d8574f8a6ec9d4573c9464c56971d6e4a Mon Sep 17 00:00:00 2001
From: Erik Enikeev <evonatarius at gmail.com>
Date: Fri, 14 Nov 2025 03:52:39 +0300
Subject: [PATCH] [ARM] Disable strict node mutation and use correct lowering
for several strict ops
---
llvm/lib/Target/ARM/ARMISelLowering.cpp | 64 +-
llvm/lib/Target/ARM/ARMInstrVFP.td | 8 +-
llvm/test/CodeGen/ARM/fp-intrinsics-vector.ll | 1499 +++++++++++++++++
llvm/test/CodeGen/ARM/fp16-fullfp16.ll | 8 +-
4 files changed, 1543 insertions(+), 36 deletions(-)
create mode 100644 llvm/test/CodeGen/ARM/fp-intrinsics-vector.ll
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 32f3e5fa3c842..1a8c470600394 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -546,16 +546,24 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT})
setOperationAction(Op, MVT::f64, Legal);
+
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
}
}
if (Subtarget->hasFullFP16()) {
+ for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
+ ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT})
+ setOperationAction(Op, MVT::f16, Legal);
+
addRegisterClass(MVT::f16, &ARM::HPRRegClass);
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
setOperationAction(ISD::BITCAST, MVT::f16, Custom);
setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
+ setOperationAction(ISD::STRICT_FMINNUM, MVT::f16, Legal);
+ setOperationAction(ISD::STRICT_FMAXNUM, MVT::f16, Legal);
}
if (Subtarget->hasBF16()) {
@@ -865,13 +873,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
}
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
+
if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
@@ -879,11 +888,16 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
}
+ } else {
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
}
if (!Subtarget->hasFP16()) {
setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
+ } else {
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
}
computeRegisterProperties(Subtarget->getRegisterInfo());
@@ -1223,16 +1237,16 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
- setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, LibCall);
- setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, LibCall);
+ setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, Expand);
+ setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, Expand);
}
// fp16 is a special v7 extension that adds f16 <-> f32 conversions.
if (!Subtarget->hasFP16()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
- setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, LibCall);
- setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, LibCall);
+ setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, Expand);
+ setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, Expand);
}
// Strict floating-point comparisons need custom lowering.
@@ -1248,34 +1262,26 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
// FP-ARMv8 implements a lot of rounding-like FP operations.
- if (Subtarget->hasFPARMv8Base()) {
- setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
- setOperationAction(ISD::FCEIL, MVT::f32, Legal);
- setOperationAction(ISD::FROUND, MVT::f32, Legal);
- setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
- setOperationAction(ISD::FRINT, MVT::f32, Legal);
- setOperationAction(ISD::FROUNDEVEN, MVT::f32, Legal);
- setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
+ if (Subtarget->hasFPARMv8Base()) {
+ for (auto Op :
+ {ISD::FFLOOR, ISD::FCEIL, ISD::FROUND,
+ ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT,
+ ISD::FROUNDEVEN, ISD::FMINNUM, ISD::FMAXNUM,
+ ISD::STRICT_FFLOOR, ISD::STRICT_FCEIL, ISD::STRICT_FROUND,
+ ISD::STRICT_FTRUNC, ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT,
+ ISD::STRICT_FROUNDEVEN, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM}) {
+ setOperationAction(Op, MVT::f32, Legal);
+
+ if (Subtarget->hasFP64())
+ setOperationAction(Op, MVT::f64, Legal);
+ }
+
if (Subtarget->hasNEON()) {
setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
}
-
- if (Subtarget->hasFP64()) {
- setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
- setOperationAction(ISD::FCEIL, MVT::f64, Legal);
- setOperationAction(ISD::FROUND, MVT::f64, Legal);
- setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
- setOperationAction(ISD::FRINT, MVT::f64, Legal);
- setOperationAction(ISD::FROUNDEVEN, MVT::f64, Legal);
- setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
- }
}
// FP16 often need to be promoted to call lib functions
@@ -1430,6 +1436,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
Align(1ULL << Subtarget->getPreferBranchLogAlignment()));
setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
+
+ IsStrictFPEnabled = true;
}
bool ARMTargetLowering::useSoftFloat() const {
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index 65c61c259d465..5f5f703fbabf1 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -814,7 +814,7 @@ def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
def : FP16Pat<(f32 (any_fpextend (f16 HPR:$Sm))),
(VCVTBHS (COPY_TO_REGCLASS (f16 HPR:$Sm), SPR))>;
-def : FP16Pat<(f16_to_fp GPR:$a),
+def : FP16Pat<(any_f16_to_fp GPR:$a),
(VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
let hasSideEffects = 0, mayRaiseFPException = 1, Uses = [FPSCR_RM] in
@@ -826,7 +826,7 @@ def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sda,
def : FP16Pat<(f16 (any_fpround SPR:$Sm)),
(COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$Sm), HPR)>;
-def : FP16Pat<(fp_to_f16 SPR:$a),
+def : FP16Pat<(any_fp_to_f16 SPR:$a),
(i32 (COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$a), GPR))>;
def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (any_fpround (f32 SPR:$src2))), imm_even:$lane),
(v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1),
@@ -891,7 +891,7 @@ def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
def : FullFP16Pat<(f64 (any_fpextend (f16 HPR:$Sm))),
(VCVTBHD (COPY_TO_REGCLASS (f16 HPR:$Sm), SPR))>,
Requires<[HasFPARMv8, HasDPVFP]>;
-def : FP16Pat<(f64 (f16_to_fp GPR:$a)),
+def : FP16Pat<(f64 (any_f16_to_fp GPR:$a)),
(VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>,
Requires<[HasFPARMv8, HasDPVFP]>;
@@ -917,7 +917,7 @@ def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
def : FullFP16Pat<(f16 (any_fpround DPR:$Dm)),
(COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$Dm), HPR)>,
Requires<[HasFPARMv8, HasDPVFP]>;
-def : FP16Pat<(fp_to_f16 (f64 DPR:$a)),
+def : FP16Pat<(any_fp_to_f16 (f64 DPR:$a)),
(i32 (COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$a), GPR))>,
Requires<[HasFPARMv8, HasDPVFP]>;
diff --git a/llvm/test/CodeGen/ARM/fp-intrinsics-vector.ll b/llvm/test/CodeGen/ARM/fp-intrinsics-vector.ll
new file mode 100644
index 0000000000000..d4b94b97acad8
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/fp-intrinsics-vector.ll
@@ -0,0 +1,1499 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=armv7a-none-eabihf -mattr=+neon,+vfp4 %s -o - | FileCheck %s
+
+define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) #0 {
+; CHECK-LABEL: add_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vadd.f32 s11, s3, s7
+; CHECK-NEXT: vadd.f32 s10, s2, s6
+; CHECK-NEXT: vadd.f32 s9, s1, s5
+; CHECK-NEXT: vadd.f32 s8, s0, s4
+; CHECK-NEXT: vorr q0, q2, q2
+; CHECK-NEXT: bx lr
+ %val = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> %x, <4 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) #0 {
+; CHECK-LABEL: sub_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vsub.f32 s11, s3, s7
+; CHECK-NEXT: vsub.f32 s10, s2, s6
+; CHECK-NEXT: vsub.f32 s9, s1, s5
+; CHECK-NEXT: vsub.f32 s8, s0, s4
+; CHECK-NEXT: vorr q0, q2, q2
+; CHECK-NEXT: bx lr
+ %val = call <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float> %x, <4 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) #0 {
+; CHECK-LABEL: mul_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmul.f32 s11, s3, s7
+; CHECK-NEXT: vmul.f32 s10, s2, s6
+; CHECK-NEXT: vmul.f32 s9, s1, s5
+; CHECK-NEXT: vmul.f32 s8, s0, s4
+; CHECK-NEXT: vorr q0, q2, q2
+; CHECK-NEXT: bx lr
+ %val = call <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float> %x, <4 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @div_v4f32(<4 x float> %x, <4 x float> %y) #0 {
+; CHECK-LABEL: div_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vdiv.f32 s11, s3, s7
+; CHECK-NEXT: vdiv.f32 s10, s2, s6
+; CHECK-NEXT: vdiv.f32 s9, s1, s5
+; CHECK-NEXT: vdiv.f32 s8, s0, s4
+; CHECK-NEXT: vorr q0, q2, q2
+; CHECK-NEXT: bx lr
+ %val = call <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float> %x, <4 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @fma_v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z) #0 {
+; CHECK-LABEL: fma_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vfma.f32 s11, s3, s7
+; CHECK-NEXT: vfma.f32 s10, s2, s6
+; CHECK-NEXT: vfma.f32 s9, s1, s5
+; CHECK-NEXT: vfma.f32 s8, s0, s4
+; CHECK-NEXT: vorr q0, q2, q2
+; CHECK-NEXT: bx lr
+ %val = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x i32> @fptosi_v4i32_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fptosi_v4i32_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcvt.s32.f32 s4, s2
+; CHECK-NEXT: vcvt.s32.f32 s6, s0
+; CHECK-NEXT: vcvt.s32.f32 s0, s1
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vcvt.s32.f32 s4, s3
+; CHECK-NEXT: vmov.32 d17[0], r0
+; CHECK-NEXT: vmov r0, s6
+; CHECK-NEXT: vmov.32 d16[0], r0
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vmov.32 d17[1], r0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vmov.32 d16[1], r0
+; CHECK-NEXT: vorr q0, q8, q8
+; CHECK-NEXT: bx lr
+ %val = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0
+ ret <4 x i32> %val
+}
+
+define <4 x i32> @fptoui_v4i32_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fptoui_v4i32_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcvt.u32.f32 s4, s2
+; CHECK-NEXT: vcvt.u32.f32 s6, s0
+; CHECK-NEXT: vcvt.u32.f32 s0, s1
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vcvt.u32.f32 s4, s3
+; CHECK-NEXT: vmov.32 d17[0], r0
+; CHECK-NEXT: vmov r0, s6
+; CHECK-NEXT: vmov.32 d16[0], r0
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vmov.32 d17[1], r0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vmov.32 d16[1], r0
+; CHECK-NEXT: vorr q0, q8, q8
+; CHECK-NEXT: bx lr
+ %val = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0
+ ret <4 x i32> %val
+}
+
+define <4 x i64> @fptosi_v4i64_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fptosi_v4i64_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vmov r0, s19
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: vmov r1, s16
+; CHECK-NEXT: vmov r5, s17
+; CHECK-NEXT: vmov r6, s18
+; CHECK-NEXT: vmov.32 d9[0], r0
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: vmov.32 d10[0], r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r7, r1
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: vmov.32 d11[0], r0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: vmov.32 d8[0], r0
+; CHECK-NEXT: vmov.32 d11[1], r5
+; CHECK-NEXT: vmov.32 d9[1], r4
+; CHECK-NEXT: vmov.32 d10[1], r7
+; CHECK-NEXT: vmov.32 d8[1], r1
+; CHECK-NEXT: vorr q0, q5, q5
+; CHECK-NEXT: vorr q1, q4, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
+ %val = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0
+ ret <4 x i64> %val
+}
+
+define <4 x i64> @fptoui_v4i64_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fptoui_v4i64_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vmov r0, s19
+; CHECK-NEXT: bl __aeabi_f2ulz
+; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: vmov r1, s16
+; CHECK-NEXT: vmov r5, s17
+; CHECK-NEXT: vmov r6, s18
+; CHECK-NEXT: vmov.32 d9[0], r0
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: bl __aeabi_f2ulz
+; CHECK-NEXT: vmov.32 d10[0], r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r7, r1
+; CHECK-NEXT: bl __aeabi_f2ulz
+; CHECK-NEXT: vmov.32 d11[0], r0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: bl __aeabi_f2ulz
+; CHECK-NEXT: vmov.32 d8[0], r0
+; CHECK-NEXT: vmov.32 d11[1], r5
+; CHECK-NEXT: vmov.32 d9[1], r4
+; CHECK-NEXT: vmov.32 d10[1], r7
+; CHECK-NEXT: vmov.32 d8[1], r1
+; CHECK-NEXT: vorr q0, q5, q5
+; CHECK-NEXT: vorr q1, q4, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
+ %val = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0
+ ret <4 x i64> %val
+}
+
+define <4 x float> @sitofp_v4f32_v4i32(<4 x i32> %x) #0 {
+; CHECK-LABEL: sitofp_v4f32_v4i32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .pad #32
+; CHECK-NEXT: sub sp, sp, #32
+; CHECK-NEXT: vmov r12, r1, d0
+; CHECK-NEXT: movw r0, #0
+; CHECK-NEXT: vmov r2, r3, d1
+; CHECK-NEXT: movt r0, #17200
+; CHECK-NEXT: str r0, [sp, #20]
+; CHECK-NEXT: vldr d16, .LCPI9_0
+; CHECK-NEXT: eor r1, r1, #-2147483648
+; CHECK-NEXT: str r1, [sp, #16]
+; CHECK-NEXT: str r0, [sp, #12]
+; CHECK-NEXT: eor r1, r2, #-2147483648
+; CHECK-NEXT: vldr d17, [sp, #16]
+; CHECK-NEXT: stmib sp, {r0, r1}
+; CHECK-NEXT: eor r1, r3, #-2147483648
+; CHECK-NEXT: vsub.f64 d17, d17, d16
+; CHECK-NEXT: vldr d18, [sp, #8]
+; CHECK-NEXT: str r1, [sp]
+; CHECK-NEXT: str r0, [sp, #28]
+; CHECK-NEXT: eor r0, r12, #-2147483648
+; CHECK-NEXT: vldr d19, [sp]
+; CHECK-NEXT: str r0, [sp, #24]
+; CHECK-NEXT: vsub.f64 d18, d18, d16
+; CHECK-NEXT: vsub.f64 d19, d19, d16
+; CHECK-NEXT: vldr d20, [sp, #24]
+; CHECK-NEXT: vcvt.f32.f64 s3, d19
+; CHECK-NEXT: vsub.f64 d16, d20, d16
+; CHECK-NEXT: vcvt.f32.f64 s2, d18
+; CHECK-NEXT: vcvt.f32.f64 s1, d17
+; CHECK-NEXT: vcvt.f32.f64 s0, d16
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI9_0:
+; CHECK-NEXT: .long 2147483648 @ double 4503601774854144
+; CHECK-NEXT: .long 1127219200
+ %val = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @uitofp_v4f32_v4i32(<4 x i32> %x) #0 {
+; CHECK-LABEL: uitofp_v4f32_v4i32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .pad #32
+; CHECK-NEXT: sub sp, sp, #32
+; CHECK-NEXT: vmov r0, r1, d1
+; CHECK-NEXT: movw r2, #0
+; CHECK-NEXT: vmov r12, r3, d0
+; CHECK-NEXT: movt r2, #17200
+; CHECK-NEXT: stm sp, {r1, r2}
+; CHECK-NEXT: vldr d17, [sp]
+; CHECK-NEXT: vldr d16, .LCPI10_0
+; CHECK-NEXT: str r2, [sp, #12]
+; CHECK-NEXT: vsub.f64 d17, d17, d16
+; CHECK-NEXT: vcvt.f32.f64 s3, d17
+; CHECK-NEXT: str r0, [sp, #8]
+; CHECK-NEXT: vldr d18, [sp, #8]
+; CHECK-NEXT: str r2, [sp, #20]
+; CHECK-NEXT: str r3, [sp, #16]
+; CHECK-NEXT: vsub.f64 d18, d18, d16
+; CHECK-NEXT: vldr d19, [sp, #16]
+; CHECK-NEXT: str r2, [sp, #28]
+; CHECK-NEXT: vcvt.f32.f64 s2, d18
+; CHECK-NEXT: str r12, [sp, #24]
+; CHECK-NEXT: vldr d20, [sp, #24]
+; CHECK-NEXT: vsub.f64 d19, d19, d16
+; CHECK-NEXT: vsub.f64 d16, d20, d16
+; CHECK-NEXT: vcvt.f32.f64 s1, d19
+; CHECK-NEXT: vcvt.f32.f64 s0, d16
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI10_0:
+; CHECK-NEXT: .long 0 @ double 4503599627370496
+; CHECK-NEXT: .long 1127219200
+ %val = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @sitofp_v4f32_v4i64(<4 x i64> %x) #0 {
+; CHECK-LABEL: sitofp_v4f32_v4i64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r4, r5, r6, lr}
+; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vorr q4, q1, q1
+; CHECK-NEXT: vorr q5, q0, q0
+; CHECK-NEXT: vmov r0, r1, d8
+; CHECK-NEXT: bl __aeabi_l2f
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl __aeabi_l2f
+; CHECK-NEXT: vmov r2, r1, d11
+; CHECK-NEXT: vmov s19, r0
+; CHECK-NEXT: vmov r5, r6, d10
+; CHECK-NEXT: vmov s18, r4
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: bl __aeabi_l2f
+; CHECK-NEXT: vmov s17, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: bl __aeabi_l2f
+; CHECK-NEXT: vmov s16, r0
+; CHECK-NEXT: vorr q0, q4, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r4, r5, r6, pc}
+ %val = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
+; CHECK-LABEL: uitofp_v4f32_v4i64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r4, r5, r6, lr}
+; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vorr q4, q1, q1
+; CHECK-NEXT: vorr q5, q0, q0
+; CHECK-NEXT: vmov r0, r1, d8
+; CHECK-NEXT: bl __aeabi_ul2f
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl __aeabi_ul2f
+; CHECK-NEXT: vmov r2, r1, d11
+; CHECK-NEXT: vmov s19, r0
+; CHECK-NEXT: vmov r5, r6, d10
+; CHECK-NEXT: vmov s18, r4
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: bl __aeabi_ul2f
+; CHECK-NEXT: vmov s17, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: bl __aeabi_ul2f
+; CHECK-NEXT: vmov s16, r0
+; CHECK-NEXT: vorr q0, q4, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r4, r5, r6, pc}
+ %val = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @sqrt_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: sqrt_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vsqrt.f32 s7, s3
+; CHECK-NEXT: vsqrt.f32 s6, s2
+; CHECK-NEXT: vsqrt.f32 s5, s1
+; CHECK-NEXT: vsqrt.f32 s4, s0
+; CHECK-NEXT: vorr q0, q1, q1
+; CHECK-NEXT: bx lr
+ %val = call <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @rint_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: rint_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vorr q5, q0, q0
+; CHECK-NEXT: vmov.f32 s0, s23
+; CHECK-NEXT: bl rintf
+; CHECK-NEXT: vmov.f32 s19, s0
+; CHECK-NEXT: vmov.f32 s0, s22
+; CHECK-NEXT: bl rintf
+; CHECK-NEXT: vmov.f32 s18, s0
+; CHECK-NEXT: vmov.f32 s0, s21
+; CHECK-NEXT: bl rintf
+; CHECK-NEXT: vmov.f32 s17, s0
+; CHECK-NEXT: vmov.f32 s0, s20
+; CHECK-NEXT: bl rintf
+; CHECK-NEXT: vmov.f32 s16, s0
+; CHECK-NEXT: vorr q0, q4, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <4 x float> @llvm.experimental.constrained.rint.v4f32(<4 x float> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @nearbyint_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: nearbyint_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vorr q5, q0, q0
+; CHECK-NEXT: vmov.f32 s0, s23
+; CHECK-NEXT: bl nearbyintf
+; CHECK-NEXT: vmov.f32 s19, s0
+; CHECK-NEXT: vmov.f32 s0, s22
+; CHECK-NEXT: bl nearbyintf
+; CHECK-NEXT: vmov.f32 s18, s0
+; CHECK-NEXT: vmov.f32 s0, s21
+; CHECK-NEXT: bl nearbyintf
+; CHECK-NEXT: vmov.f32 s17, s0
+; CHECK-NEXT: vmov.f32 s0, s20
+; CHECK-NEXT: bl nearbyintf
+; CHECK-NEXT: vmov.f32 s16, s0
+; CHECK-NEXT: vorr q0, q4, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @maxnum_v4f32(<4 x float> %x, <4 x float> %y) #0 {
+; CHECK-LABEL: maxnum_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: vorr q5, q0, q0
+; CHECK-NEXT: vorr q4, q1, q1
+; CHECK-NEXT: vmov.f32 s0, s23
+; CHECK-NEXT: vmov.f32 s1, s19
+; CHECK-NEXT: bl fmaxf
+; CHECK-NEXT: vmov.f32 s27, s0
+; CHECK-NEXT: vmov.f32 s0, s22
+; CHECK-NEXT: vmov.f32 s1, s18
+; CHECK-NEXT: bl fmaxf
+; CHECK-NEXT: vmov.f32 s26, s0
+; CHECK-NEXT: vmov.f32 s0, s21
+; CHECK-NEXT: vmov.f32 s1, s17
+; CHECK-NEXT: bl fmaxf
+; CHECK-NEXT: vmov.f32 s25, s0
+; CHECK-NEXT: vmov.f32 s0, s20
+; CHECK-NEXT: vmov.f32 s1, s16
+; CHECK-NEXT: bl fmaxf
+; CHECK-NEXT: vmov.f32 s24, s0
+; CHECK-NEXT: vorr q0, q6, q6
+; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <4 x float> @llvm.experimental.constrained.maxnum.v4f32(<4 x float> %x, <4 x float> %y, metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @minnum_v4f32(<4 x float> %x, <4 x float> %y) #0 {
+; CHECK-LABEL: minnum_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: vorr q5, q0, q0
+; CHECK-NEXT: vorr q4, q1, q1
+; CHECK-NEXT: vmov.f32 s0, s23
+; CHECK-NEXT: vmov.f32 s1, s19
+; CHECK-NEXT: bl fminf
+; CHECK-NEXT: vmov.f32 s27, s0
+; CHECK-NEXT: vmov.f32 s0, s22
+; CHECK-NEXT: vmov.f32 s1, s18
+; CHECK-NEXT: bl fminf
+; CHECK-NEXT: vmov.f32 s26, s0
+; CHECK-NEXT: vmov.f32 s0, s21
+; CHECK-NEXT: vmov.f32 s1, s17
+; CHECK-NEXT: bl fminf
+; CHECK-NEXT: vmov.f32 s25, s0
+; CHECK-NEXT: vmov.f32 s0, s20
+; CHECK-NEXT: vmov.f32 s1, s16
+; CHECK-NEXT: bl fminf
+; CHECK-NEXT: vmov.f32 s24, s0
+; CHECK-NEXT: vorr q0, q6, q6
+; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <4 x float> @llvm.experimental.constrained.minnum.v4f32(<4 x float> %x, <4 x float> %y, metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @ceil_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: ceil_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vorr q5, q0, q0
+; CHECK-NEXT: vmov.f32 s0, s23
+; CHECK-NEXT: bl ceilf
+; CHECK-NEXT: vmov.f32 s19, s0
+; CHECK-NEXT: vmov.f32 s0, s22
+; CHECK-NEXT: bl ceilf
+; CHECK-NEXT: vmov.f32 s18, s0
+; CHECK-NEXT: vmov.f32 s0, s21
+; CHECK-NEXT: bl ceilf
+; CHECK-NEXT: vmov.f32 s17, s0
+; CHECK-NEXT: vmov.f32 s0, s20
+; CHECK-NEXT: bl ceilf
+; CHECK-NEXT: vmov.f32 s16, s0
+; CHECK-NEXT: vorr q0, q4, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @floor_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: floor_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vorr q5, q0, q0
+; CHECK-NEXT: vmov.f32 s0, s23
+; CHECK-NEXT: bl floorf
+; CHECK-NEXT: vmov.f32 s19, s0
+; CHECK-NEXT: vmov.f32 s0, s22
+; CHECK-NEXT: bl floorf
+; CHECK-NEXT: vmov.f32 s18, s0
+; CHECK-NEXT: vmov.f32 s0, s21
+; CHECK-NEXT: bl floorf
+; CHECK-NEXT: vmov.f32 s17, s0
+; CHECK-NEXT: vmov.f32 s0, s20
+; CHECK-NEXT: bl floorf
+; CHECK-NEXT: vmov.f32 s16, s0
+; CHECK-NEXT: vorr q0, q4, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @round_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: round_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vorr q5, q0, q0
+; CHECK-NEXT: vmov.f32 s0, s23
+; CHECK-NEXT: bl roundf
+; CHECK-NEXT: vmov.f32 s19, s0
+; CHECK-NEXT: vmov.f32 s0, s22
+; CHECK-NEXT: bl roundf
+; CHECK-NEXT: vmov.f32 s18, s0
+; CHECK-NEXT: vmov.f32 s0, s21
+; CHECK-NEXT: bl roundf
+; CHECK-NEXT: vmov.f32 s17, s0
+; CHECK-NEXT: vmov.f32 s0, s20
+; CHECK-NEXT: bl roundf
+; CHECK-NEXT: vmov.f32 s16, s0
+; CHECK-NEXT: vorr q0, q4, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @roundeven_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: roundeven_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vorr q5, q0, q0
+; CHECK-NEXT: vmov.f32 s0, s23
+; CHECK-NEXT: bl roundevenf
+; CHECK-NEXT: vmov.f32 s19, s0
+; CHECK-NEXT: vmov.f32 s0, s22
+; CHECK-NEXT: bl roundevenf
+; CHECK-NEXT: vmov.f32 s18, s0
+; CHECK-NEXT: vmov.f32 s0, s21
+; CHECK-NEXT: bl roundevenf
+; CHECK-NEXT: vmov.f32 s17, s0
+; CHECK-NEXT: vmov.f32 s0, s20
+; CHECK-NEXT: bl roundevenf
+; CHECK-NEXT: vmov.f32 s16, s0
+; CHECK-NEXT: vorr q0, q4, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <4 x float> @llvm.experimental.constrained.roundeven.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @trunc_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: trunc_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vorr q5, q0, q0
+; CHECK-NEXT: vmov.f32 s0, s23
+; CHECK-NEXT: bl truncf
+; CHECK-NEXT: vmov.f32 s19, s0
+; CHECK-NEXT: vmov.f32 s0, s22
+; CHECK-NEXT: bl truncf
+; CHECK-NEXT: vmov.f32 s18, s0
+; CHECK-NEXT: vmov.f32 s0, s21
+; CHECK-NEXT: bl truncf
+; CHECK-NEXT: vmov.f32 s17, s0
+; CHECK-NEXT: vmov.f32 s0, s20
+; CHECK-NEXT: bl truncf
+; CHECK-NEXT: vmov.f32 s16, s0
+; CHECK-NEXT: vorr q0, q4, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x i1> @fcmp_v4f32(<4 x float> %x, <4 x float> %y) #0 {
+; CHECK-LABEL: fcmp_v4f32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.f32 s3, s7
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s2, s6
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: mov r3, #0
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: movweq r1, #1
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: mvnne r1, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: movweq r2, #1
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: mvnne r2, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s1, s5
+; CHECK-NEXT: vmov.32 d17[0], r2
+; CHECK-NEXT: movweq r3, #1
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: mvnne r3, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vmov.32 d16[0], r3
+; CHECK-NEXT: vmov.32 d17[1], r1
+; CHECK-NEXT: movweq r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: vmov.32 d16[1], r0
+; CHECK-NEXT: vmovn.i32 d0, q8
+; CHECK-NEXT: bx lr
+entry:
+ %val = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x float> %x, <4 x float> %y, metadata !"oeq", metadata !"fpexcept.strict")
+ ret <4 x i1> %val
+}
+
+define <4 x i1> @fcmps_v4f32(<4 x float> %x, <4 x float> %y) #0 {
+; CHECK-LABEL: fcmps_v4f32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmpe.f32 s3, s7
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmpe.f32 s2, s6
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: mov r3, #0
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: movweq r1, #1
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: mvnne r1, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmpe.f32 s0, s4
+; CHECK-NEXT: movweq r2, #1
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: mvnne r2, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmpe.f32 s1, s5
+; CHECK-NEXT: vmov.32 d17[0], r2
+; CHECK-NEXT: movweq r3, #1
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: mvnne r3, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vmov.32 d16[0], r3
+; CHECK-NEXT: vmov.32 d17[1], r1
+; CHECK-NEXT: movweq r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: vmov.32 d16[1], r0
+; CHECK-NEXT: vmovn.i32 d0, q8
+; CHECK-NEXT: bx lr
+entry:
+ %val = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %x, <4 x float> %y, metadata !"oeq", metadata !"fpexcept.strict")
+ ret <4 x i1> %val
+}
+
+
+
+define <2 x double> @add_v2f64(<2 x double> %x, <2 x double> %y) #0 {
+; CHECK-LABEL: add_v2f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vadd.f64 d17, d1, d3
+; CHECK-NEXT: vadd.f64 d16, d0, d2
+; CHECK-NEXT: vorr q0, q8, q8
+; CHECK-NEXT: bx lr
+ %val = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @sub_v2f64(<2 x double> %x, <2 x double> %y) #0 {
+; CHECK-LABEL: sub_v2f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vsub.f64 d17, d1, d3
+; CHECK-NEXT: vsub.f64 d16, d0, d2
+; CHECK-NEXT: vorr q0, q8, q8
+; CHECK-NEXT: bx lr
+ %val = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @mul_v2f64(<2 x double> %x, <2 x double> %y) #0 {
+; CHECK-LABEL: mul_v2f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmul.f64 d17, d1, d3
+; CHECK-NEXT: vmul.f64 d16, d0, d2
+; CHECK-NEXT: vorr q0, q8, q8
+; CHECK-NEXT: bx lr
+ %val = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @div_v2f64(<2 x double> %x, <2 x double> %y) #0 {
+; CHECK-LABEL: div_v2f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vdiv.f64 d17, d1, d3
+; CHECK-NEXT: vdiv.f64 d16, d0, d2
+; CHECK-NEXT: vorr q0, q8, q8
+; CHECK-NEXT: bx lr
+ %val = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @fma_v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z) #0 {
+; CHECK-LABEL: fma_v2f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vfma.f64 d5, d1, d3
+; CHECK-NEXT: vfma.f64 d4, d0, d2
+; CHECK-NEXT: vorr q0, q2, q2
+; CHECK-NEXT: bx lr
+ %val = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x i32> @fptosi_v2i32_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: fptosi_v2i32_v2f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcvt.s32.f64 s4, d0
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vcvt.s32.f64 s2, d1
+; CHECK-NEXT: vmov.32 d0[0], r0
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov.32 d0[1], r0
+; CHECK-NEXT: bx lr
+ %val = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0
+ ret <2 x i32> %val
+}
+
+define <2 x i32> @fptoui_v2i32_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: fptoui_v2i32_v2f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcvt.u32.f64 s4, d0
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vcvt.u32.f64 s2, d1
+; CHECK-NEXT: vmov.32 d0[0], r0
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov.32 d0[1], r0
+; CHECK-NEXT: bx lr
+ %val = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0
+ ret <2 x i32> %val
+}
+
+define <2 x i64> @fptosi_v2i64_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: fptosi_v2i64_v2f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: vmov r2, r1, d8
+; CHECK-NEXT: vmov.32 d9[0], r0
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: vmov.32 d8[0], r0
+; CHECK-NEXT: vmov.32 d9[1], r4
+; CHECK-NEXT: vmov.32 d8[1], r1
+; CHECK-NEXT: vorr q0, q4, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r4, pc}
+ %val = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0
+ ret <2 x i64> %val
+}
+
+define <2 x i64> @fptoui_v2i64_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: fptoui_v2i64_v2f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: vmov r2, r1, d8
+; CHECK-NEXT: vmov.32 d9[0], r0
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: vmov.32 d8[0], r0
+; CHECK-NEXT: vmov.32 d9[1], r4
+; CHECK-NEXT: vmov.32 d8[1], r1
+; CHECK-NEXT: vorr q0, q4, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r4, pc}
+ %val = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0
+ ret <2 x i64> %val
+}
+
+define <2 x double> @sitofp_v2f64_v2i32(<2 x i32> %x) #0 {
+; CHECK-LABEL: sitofp_v2f64_v2i32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .pad #16
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: vmov.32 r0, d0[1]
+; CHECK-NEXT: movw r2, #0
+; CHECK-NEXT: vmov.32 r1, d0[0]
+; CHECK-NEXT: movt r2, #17200
+; CHECK-NEXT: str r2, [sp, #4]
+; CHECK-NEXT: vldr d16, .LCPI34_0
+; CHECK-NEXT: eor r0, r0, #-2147483648
+; CHECK-NEXT: str r0, [sp]
+; CHECK-NEXT: str r2, [sp, #12]
+; CHECK-NEXT: eor r0, r1, #-2147483648
+; CHECK-NEXT: vldr d17, [sp]
+; CHECK-NEXT: str r0, [sp, #8]
+; CHECK-NEXT: vldr d18, [sp, #8]
+; CHECK-NEXT: vsub.f64 d1, d17, d16
+; CHECK-NEXT: vsub.f64 d0, d18, d16
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI34_0:
+; CHECK-NEXT: .long 2147483648 @ double 4503601774854144
+; CHECK-NEXT: .long 1127219200
+ %val = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @uitofp_v2f64_v2i32(<2 x i32> %x) #0 {
+; CHECK-LABEL: uitofp_v2f64_v2i32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .pad #16
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: movw r0, #0
+; CHECK-NEXT: mov r1, sp
+; CHECK-NEXT: movt r0, #17200
+; CHECK-NEXT: vst1.32 {d0[1]}, [r1:32]
+; CHECK-NEXT: add r1, sp, #8
+; CHECK-NEXT: str r0, [sp, #4]
+; CHECK-NEXT: vldr d17, [sp]
+; CHECK-NEXT: vst1.32 {d0[0]}, [r1:32]
+; CHECK-NEXT: vldr d16, .LCPI35_0
+; CHECK-NEXT: str r0, [sp, #12]
+; CHECK-NEXT: vldr d18, [sp, #8]
+; CHECK-NEXT: vsub.f64 d1, d17, d16
+; CHECK-NEXT: vsub.f64 d0, d18, d16
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI35_0:
+; CHECK-NEXT: .long 0 @ double 4503599627370496
+; CHECK-NEXT: .long 1127219200
+ %val = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @sitofp_v2f64_v2i64(<2 x i64> %x) #0 {
+; CHECK-LABEL: sitofp_v2f64_v2i64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl __aeabi_l2d
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl __aeabi_l2d
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vorr q0, q4, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @uitofp_v2f64_v2i64(<2 x i64> %x) #0 {
+; CHECK-LABEL: uitofp_v2f64_v2i64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl __aeabi_ul2d
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl __aeabi_ul2d
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vorr q0, q4, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @sqrt_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: sqrt_v2f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vsqrt.f64 d17, d1
+; CHECK-NEXT: vsqrt.f64 d16, d0
+; CHECK-NEXT: vorr q0, q8, q8
+; CHECK-NEXT: bx lr
+ %val = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @rint_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: rint_v2f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vorr d0, d9, d9
+; CHECK-NEXT: bl rint
+; CHECK-NEXT: vorr d11, d0, d0
+; CHECK-NEXT: vorr d0, d8, d8
+; CHECK-NEXT: bl rint
+; CHECK-NEXT: vorr d10, d0, d0
+; CHECK-NEXT: vorr q0, q5, q5
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @nearbyint_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: nearbyint_v2f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vorr d0, d9, d9
+; CHECK-NEXT: bl nearbyint
+; CHECK-NEXT: vorr d11, d0, d0
+; CHECK-NEXT: vorr d0, d8, d8
+; CHECK-NEXT: bl nearbyint
+; CHECK-NEXT: vorr d10, d0, d0
+; CHECK-NEXT: vorr q0, q5, q5
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @maxnum_v2f64(<2 x double> %x, <2 x double> %y) #0 {
+; CHECK-LABEL: maxnum_v2f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: vorr q5, q0, q0
+; CHECK-NEXT: vorr q4, q1, q1
+; CHECK-NEXT: vorr d0, d11, d11
+; CHECK-NEXT: vorr d1, d9, d9
+; CHECK-NEXT: bl fmax
+; CHECK-NEXT: vorr d13, d0, d0
+; CHECK-NEXT: vorr d0, d10, d10
+; CHECK-NEXT: vorr d1, d8, d8
+; CHECK-NEXT: bl fmax
+; CHECK-NEXT: vorr d12, d0, d0
+; CHECK-NEXT: vorr q0, q6, q6
+; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double> %x, <2 x double> %y, metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @minnum_v2f64(<2 x double> %x, <2 x double> %y) #0 {
+; CHECK-LABEL: minnum_v2f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: vorr q5, q0, q0
+; CHECK-NEXT: vorr q4, q1, q1
+; CHECK-NEXT: vorr d0, d11, d11
+; CHECK-NEXT: vorr d1, d9, d9
+; CHECK-NEXT: bl fmin
+; CHECK-NEXT: vorr d13, d0, d0
+; CHECK-NEXT: vorr d0, d10, d10
+; CHECK-NEXT: vorr d1, d8, d8
+; CHECK-NEXT: bl fmin
+; CHECK-NEXT: vorr d12, d0, d0
+; CHECK-NEXT: vorr q0, q6, q6
+; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double> %x, <2 x double> %y, metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @ceil_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: ceil_v2f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vorr d0, d9, d9
+; CHECK-NEXT: bl ceil
+; CHECK-NEXT: vorr d11, d0, d0
+; CHECK-NEXT: vorr d0, d8, d8
+; CHECK-NEXT: bl ceil
+; CHECK-NEXT: vorr d10, d0, d0
+; CHECK-NEXT: vorr q0, q5, q5
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @floor_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: floor_v2f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vorr d0, d9, d9
+; CHECK-NEXT: bl floor
+; CHECK-NEXT: vorr d11, d0, d0
+; CHECK-NEXT: vorr d0, d8, d8
+; CHECK-NEXT: bl floor
+; CHECK-NEXT: vorr d10, d0, d0
+; CHECK-NEXT: vorr q0, q5, q5
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @round_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: round_v2f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vorr d0, d9, d9
+; CHECK-NEXT: bl round
+; CHECK-NEXT: vorr d11, d0, d0
+; CHECK-NEXT: vorr d0, d8, d8
+; CHECK-NEXT: bl round
+; CHECK-NEXT: vorr d10, d0, d0
+; CHECK-NEXT: vorr q0, q5, q5
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @roundeven_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: roundeven_v2f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vorr d0, d9, d9
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: vorr d11, d0, d0
+; CHECK-NEXT: vorr d0, d8, d8
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: vorr d10, d0, d0
+; CHECK-NEXT: vorr q0, q5, q5
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <2 x double> @llvm.experimental.constrained.roundeven.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x double> @trunc_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: trunc_v2f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vorr d0, d9, d9
+; CHECK-NEXT: bl trunc
+; CHECK-NEXT: vorr d11, d0, d0
+; CHECK-NEXT: vorr d0, d8, d8
+; CHECK-NEXT: bl trunc
+; CHECK-NEXT: vorr d10, d0, d0
+; CHECK-NEXT: vorr q0, q5, q5
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double> %x, metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+define <2 x i1> @fcmp_v2f64(<2 x double> %x, <2 x double> %y) #0 {
+; CHECK-LABEL: fcmp_v2f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.f64 d0, d2
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f64 d1, d3
+; CHECK-NEXT: movweq r1, #1
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: mvnne r1, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vmov.32 d0[0], r1
+; CHECK-NEXT: movweq r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: vmov.32 d0[1], r0
+; CHECK-NEXT: bx lr
+entry:
+ %val = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %x, <2 x double> %y, metadata !"oeq", metadata !"fpexcept.strict")
+ ret <2 x i1> %val
+}
+
+define <2 x i1> @fcmps_v2f64(<2 x double> %x, <2 x double> %y) #0 {
+; CHECK-LABEL: fcmps_v2f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmpe.f64 d0, d2
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmpe.f64 d1, d3
+; CHECK-NEXT: movweq r1, #1
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: mvnne r1, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vmov.32 d0[0], r1
+; CHECK-NEXT: movweq r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: vmov.32 d0[1], r0
+; CHECK-NEXT: bx lr
+entry:
+ %val = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %x, <2 x double> %y, metadata !"oeq", metadata !"fpexcept.strict")
+ ret <2 x i1> %val
+}
+
+
+
+define <1 x double> @add_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: add_v1f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vadd.f64 d0, d0, d1
+; CHECK-NEXT: bx lr
+ %val = call <1 x double> @llvm.experimental.constrained.fadd.v1f64(<1 x double> %x, <1 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @sub_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: sub_v1f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vsub.f64 d0, d0, d1
+; CHECK-NEXT: bx lr
+ %val = call <1 x double> @llvm.experimental.constrained.fsub.v1f64(<1 x double> %x, <1 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @mul_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: mul_v1f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmul.f64 d0, d0, d1
+; CHECK-NEXT: bx lr
+ %val = call <1 x double> @llvm.experimental.constrained.fmul.v1f64(<1 x double> %x, <1 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @div_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: div_v1f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vdiv.f64 d0, d0, d1
+; CHECK-NEXT: bx lr
+ %val = call <1 x double> @llvm.experimental.constrained.fdiv.v1f64(<1 x double> %x, <1 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @fma_v1f64(<1 x double> %x, <1 x double> %y, <1 x double> %z) #0 {
+; CHECK-LABEL: fma_v1f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vfma.f64 d2, d0, d1
+; CHECK-NEXT: vmov.f64 d0, d2
+; CHECK-NEXT: bx lr
+ %val = call <1 x double> @llvm.experimental.constrained.fma.v1f64(<1 x double> %x, <1 x double> %y, <1 x double> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x i32> @fptosi_v1i32_v1f64(<1 x double> %x) #0 {
+; CHECK-LABEL: fptosi_v1i32_v1f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcvt.s32.f64 s0, d0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+ %val = call <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0
+ ret <1 x i32> %val
+}
+
+define <1 x i32> @fptoui_v1i32_v1f64(<1 x double> %x) #0 {
+; CHECK-LABEL: fptoui_v1i32_v1f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcvt.u32.f64 s0, d0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+ %val = call <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0
+ ret <1 x i32> %val
+}
+
+define <1 x i64> @fptosi_v1i64_v1f64(<1 x double> %x) #0 {
+; CHECK-LABEL: fptosi_v1i64_v1f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vmov r0, r1, d0
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: vmov.32 d0[0], r0
+; CHECK-NEXT: vmov.32 d0[1], r1
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0
+ ret <1 x i64> %val
+}
+
+define <1 x i64> @fptoui_v1i64_v1f64(<1 x double> %x) #0 {
+; CHECK-LABEL: fptoui_v1i64_v1f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vmov r0, r1, d0
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: vmov.32 d0[0], r0
+; CHECK-NEXT: vmov.32 d0[1], r1
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0
+ ret <1 x i64> %val
+}
+
+define <1 x double> @sitofp_v1f64_v1i32(<1 x i32> %x) #0 {
+; CHECK-LABEL: sitofp_v1f64_v1i32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .pad #8
+; CHECK-NEXT: sub sp, sp, #8
+; CHECK-NEXT: movw r1, #0
+; CHECK-NEXT: eor r0, r0, #-2147483648
+; CHECK-NEXT: movt r1, #17200
+; CHECK-NEXT: str r0, [sp]
+; CHECK-NEXT: str r1, [sp, #4]
+; CHECK-NEXT: vldr d16, .LCPI59_0
+; CHECK-NEXT: vldr d17, [sp]
+; CHECK-NEXT: vsub.f64 d0, d17, d16
+; CHECK-NEXT: add sp, sp, #8
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI59_0:
+; CHECK-NEXT: .long 2147483648 @ double 4503601774854144
+; CHECK-NEXT: .long 1127219200
+ %val = call <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i32(<1 x i32> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @uitofp_v1f64_v1i32(<1 x i32> %x) #0 {
+; CHECK-LABEL: uitofp_v1f64_v1i32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .pad #8
+; CHECK-NEXT: sub sp, sp, #8
+; CHECK-NEXT: movw r1, #0
+; CHECK-NEXT: str r0, [sp]
+; CHECK-NEXT: movt r1, #17200
+; CHECK-NEXT: vldr d16, .LCPI60_0
+; CHECK-NEXT: str r1, [sp, #4]
+; CHECK-NEXT: vldr d17, [sp]
+; CHECK-NEXT: vsub.f64 d0, d17, d16
+; CHECK-NEXT: add sp, sp, #8
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI60_0:
+; CHECK-NEXT: .long 0 @ double 4503599627370496
+; CHECK-NEXT: .long 1127219200
+ %val = call <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i32(<1 x i32> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @sitofp_v1f64_v1i64(<1 x i64> %x) #0 {
+; CHECK-LABEL: sitofp_v1f64_v1i64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vmov.32 r0, d0[0]
+; CHECK-NEXT: vmov.32 r1, d0[1]
+; CHECK-NEXT: bl __aeabi_l2d
+; CHECK-NEXT: vmov d0, r0, r1
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i64(<1 x i64> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @uitofp_v1f64_v1i64(<1 x i64> %x) #0 {
+; CHECK-LABEL: uitofp_v1f64_v1i64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vmov.32 r0, d0[0]
+; CHECK-NEXT: vmov.32 r1, d0[1]
+; CHECK-NEXT: bl __aeabi_ul2d
+; CHECK-NEXT: vmov d0, r0, r1
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i64(<1 x i64> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @sqrt_v1f64(<1 x double> %x) #0 {
+; CHECK-LABEL: sqrt_v1f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vsqrt.f64 d0, d0
+; CHECK-NEXT: bx lr
+ %val = call <1 x double> @llvm.experimental.constrained.sqrt.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @rint_v1f64(<1 x double> %x) #0 {
+; CHECK-LABEL: rint_v1f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl rint
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <1 x double> @llvm.experimental.constrained.rint.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @nearbyint_v1f64(<1 x double> %x) #0 {
+; CHECK-LABEL: nearbyint_v1f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl nearbyint
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <1 x double> @llvm.experimental.constrained.nearbyint.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @maxnum_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: maxnum_v1f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl fmax
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <1 x double> @llvm.experimental.constrained.maxnum.v1f64(<1 x double> %x, <1 x double> %y, metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @minnum_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: minnum_v1f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl fmin
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <1 x double> @llvm.experimental.constrained.minnum.v1f64(<1 x double> %x, <1 x double> %y, metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @ceil_v1f64(<1 x double> %x) #0 {
+; CHECK-LABEL: ceil_v1f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl ceil
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <1 x double> @llvm.experimental.constrained.ceil.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @floor_v1f64(<1 x double> %x) #0 {
+; CHECK-LABEL: floor_v1f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl floor
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <1 x double> @llvm.experimental.constrained.floor.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @round_v1f64(<1 x double> %x) #0 {
+; CHECK-LABEL: round_v1f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl round
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <1 x double> @llvm.experimental.constrained.round.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @roundeven_v1f64(<1 x double> %x) #0 {
+; CHECK-LABEL: roundeven_v1f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <1 x double> @llvm.experimental.constrained.roundeven.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x double> @trunc_v1f64(<1 x double> %x) #0 {
+; CHECK-LABEL: trunc_v1f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl trunc
+; CHECK-NEXT: pop {r11, pc}
+ %val = call <1 x double> @llvm.experimental.constrained.trunc.v1f64(<1 x double> %x, metadata !"fpexcept.strict") #0
+ ret <1 x double> %val
+}
+
+define <1 x i1> @fcmp_v1f61(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: fcmp_v1f61:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.f64 d0, d1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movweq r0, #1
+; CHECK-NEXT: bx lr
+entry:
+ %val = call <1 x i1> @llvm.experimental.constrained.fcmp.v1f64(<1 x double> %x, <1 x double> %y, metadata !"oeq", metadata !"fpexcept.strict")
+ ret <1 x i1> %val
+}
+
+define <1 x i1> @fcmps_v1f61(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: fcmps_v1f61:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmpe.f64 d0, d1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: movweq r0, #1
+; CHECK-NEXT: bx lr
+entry:
+ %val = call <1 x i1> @llvm.experimental.constrained.fcmps.v1f64(<1 x double> %x, <1 x double> %y, metadata !"oeq", metadata !"fpexcept.strict")
+ ret <1 x i1> %val
+}
+
+
+
+define <2 x float> @fptrunc_v2f32_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: fptrunc_v2f32_v2f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcvt.f32.f64 s5, d1
+; CHECK-NEXT: vcvt.f32.f64 s4, d0
+; CHECK-NEXT: vmov.f64 d0, d2
+; CHECK-NEXT: bx lr
+ %val = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x float> %val
+}
+
+define <2 x double> @fpext_v2f64_v2f32(<2 x float> %x) #0 {
+; CHECK-LABEL: fpext_v2f64_v2f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcvt.f64.f32 d17, s1
+; CHECK-NEXT: vcvt.f64.f32 d16, s0
+; CHECK-NEXT: vorr q0, q8, q8
+; CHECK-NEXT: bx lr
+ %val = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float> %x, metadata !"fpexcept.strict") #0
+ ret <2 x double> %val
+}
+
+attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
index b4060d5fdb574..7b9474313e5bf 100644
--- a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
+++ b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
@@ -675,8 +675,8 @@ define half @frem_f16(half %x, half %y) #0 {
; CHECK-LABEL: frem_f16:
; CHECK: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
-; CHECK-NEXT: vcvtb.f32.f16 s0, s0
; CHECK-NEXT: vcvtb.f32.f16 s1, s1
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
; CHECK-NEXT: bl fmodf
; CHECK-NEXT: vcvtb.f16.f32 s0, s0
; CHECK-NEXT: pop {r11, pc}
@@ -713,7 +713,7 @@ define i32 @fptosi_i32_f16(half %x) #0 {
define i32 @fptoui_i32_f16(half %x) #0 {
; CHECK-LABEL: fptoui_i32_f16:
-; CHECK: vcvt.s32.f16 s0, s0
+; CHECK: vcvt.u32.f16 s0, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
%val = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict") #0
@@ -925,8 +925,8 @@ define half @atan2_f16(half %x, half %y) #0 {
; CHECK-LABEL: atan2_f16:
; CHECK: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
-; CHECK-NEXT: vcvtb.f32.f16 s0, s0
; CHECK-NEXT: vcvtb.f32.f16 s1, s1
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
; CHECK-NEXT: bl atan2f
; CHECK-NEXT: vcvtb.f16.f32 s0, s0
; CHECK-NEXT: pop {r11, pc}
@@ -974,8 +974,8 @@ define half @pow_f16(half %x, half %y) #0 {
; CHECK-LABEL: pow_f16:
; CHECK: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
-; CHECK-NEXT: vcvtb.f32.f16 s0, s0
; CHECK-NEXT: vcvtb.f32.f16 s1, s1
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
; CHECK-NEXT: bl powf
; CHECK-NEXT: vcvtb.f16.f32 s0, s0
; CHECK-NEXT: pop {r11, pc}
More information about the llvm-commits
mailing list