[llvm] 8fc6b1a - [AArch64] Add some vcvt tests. NFC.
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 17 02:41:58 PDT 2023
Author: David Green
Date: 2023-08-17T10:41:52+01:00
New Revision: 8fc6b1a18f4d9cc4d481c38bbc503a27acc7e461
URL: https://github.com/llvm/llvm-project/commit/8fc6b1a18f4d9cc4d481c38bbc503a27acc7e461
DIFF: https://github.com/llvm/llvm-project/commit/8fc6b1a18f4d9cc4d481c38bbc503a27acc7e461.diff
LOG: [AArch64] Add some vcvt tests. NFC.
See D157679. This also removes some duplication from arm64-vfloatintrinsics.ll,
where the tests now exist elsewhere.
Added:
llvm/test/CodeGen/AArch64/fcvt.ll
Modified:
llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll b/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
index eecbc4eb3f2c4b..887f94d41c4a4c 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
@@ -14,22 +14,6 @@
%v4f16 = type <4 x half>
-; FALLBACK-NOT: remark{{.*}}test_v4f16.sqrt
-define %v4f16 @test_v4f16.sqrt(%v4f16 %a) {
- ; CHECK-LABEL: test_v4f16.sqrt:
- ; CHECK-NOFP16-COUNT-4: fsqrt s{{[0-9]+}}, s{{[0-9]+}}
- ; CHECK-FP16-NOT: fcvt
- ; CHECK-FP16: fsqrt.4h
- ; CHECK-FP16-NEXT: ret
- ; GISEL-LABEL: test_v4f16.sqrt:
- ; GISEL-NOFP16: fcvt
- ; GISEL-NOFP16: fsqrt.4s
- ; GISEL-FP16-NOT: fcvt
- ; GISEL-FP16: fsqrt.4h
- ; GISEL-FP16-NEXT: ret
- %1 = call %v4f16 @llvm.sqrt.v4f16(%v4f16 %a)
- ret %v4f16 %1
-}
define %v4f16 @test_v4f16.powi(%v4f16 %a, i32 %b) {
; This operation is expanded, whether with or without +fullfp16.
; CHECK-LABEL: test_v4f16.powi:
@@ -136,132 +120,6 @@ define %v4f16 @test_v4f16.fma(%v4f16 %a, %v4f16 %b, %v4f16 %c) {
ret %v4f16 %1
}
-; FALLBACK-NOT: remark{{.*}}test_v4f16.fabs
-define %v4f16 @test_v4f16.fabs(%v4f16 %a) {
- ; CHECK-LABEL: test_v4f16.fabs:
- ; CHECK-NOFP16-COUNT-4: fabs s{{[0-9]+}}, s{{[0-9]+}}
- ; CHECK-FP16-NOT: fcvt
- ; CHECK-FP16: fabs.4h
- ; CHECK-FP16-NEXT: ret
-
- ; GISEL-LABEL: test_v4f16.fabs:
- ; GISEL-NOFP16: fabs.4s
- ; GISEL-FP16-NOT: fcvt
- ; GISEL-FP16: fabs.4h
- ; GISEL-FP16-NEXT: ret
- %1 = call %v4f16 @llvm.fabs.v4f16(%v4f16 %a)
- ret %v4f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f16.floor
-define %v4f16 @test_v4f16.floor(%v4f16 %a) {
- ; CHECK-LABEL: test_v4f16.floor:
- ; CHECK-NOFP16-COUNT-4: frintm s{{[0-9]+}}, s{{[0-9]+}}
- ; CHECK-FP16-NOT: fcvt
- ; CHECK-FP16: frintm.4h
- ; CHECK-FP16-NEXT: ret
-
- ; GISEL-LABEL: test_v4f16.floor:
- ; GISEL-NOFP16-COUNT-4: frintm s{{[0-9]+}}, s{{[0-9]+}}
- ; GISEL-FP16-NOT: fcvt
- ; GISEL-FP16: frintm.4h
- ; GISEL-FP16-NEXT: ret
- %1 = call %v4f16 @llvm.floor.v4f16(%v4f16 %a)
- ret %v4f16 %1
-}
-define %v4f16 @test_v4f16.ceil(%v4f16 %a) {
- ; CHECK-LABEL: test_v4f16.ceil:
- ; CHECK-NOFP16-COUNT-4: frintp s{{[0-9]+}}, s{{[0-9]+}}
- ; CHECK-FP16-NOT: fcvt
- ; CHECK-FP16: frintp.4h
- ; CHECK-FP16-NEXT: ret
- ; FALLBACK-NOT: remark{{.*}}test_v4f16.ceil:
- ; GISEL-LABEL: test_v4f16.ceil:
- ; GISEL-NOFP16-COUNT-4: frintp s{{[0-9]+}}, s{{[0-9]+}}
- ; GISEL-FP16-NOT: fcvt
- ; GISEL-FP16: frintp.4h
- ; GISEL-FP16-NEXT: ret
- %1 = call %v4f16 @llvm.ceil.v4f16(%v4f16 %a)
- ret %v4f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f16.trunc
-define %v4f16 @test_v4f16.trunc(%v4f16 %a) {
- ; CHECK-LABEL: test_v4f16.trunc:
- ; CHECK-NOFP16-COUNT-4: frintz s{{[0-9]+}}, s{{[0-9]+}}
- ; CHECK-FP16-NOT: fcvt
- ; CHECK-FP16: frintz.4h
- ; CHECK-FP16-NEXT: ret
- ; GISEL-LABEL: test_v4f16.trunc:
- ; GISEL-NOFP16-COUNT-4: frintz s{{[0-9]+}}, s{{[0-9]+}}
- ; GISEL-FP16-NOT: fcvt
- ; GISEL-FP16: frintz.4h
- ; GISEL-FP16-NEXT: ret
- %1 = call %v4f16 @llvm.trunc.v4f16(%v4f16 %a)
- ret %v4f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f16.rint
-define %v4f16 @test_v4f16.rint(%v4f16 %a) {
- ; CHECK-LABEL: test_v4f16.rint:
- ; CHECK-NOFP16-COUNT-4: frintx s{{[0-9]+}}, s{{[0-9]+}}
- ; CHECK-FP16-NOT: fcvt
- ; CHECK-FP16: frintx.4h
- ; CHECK-FP16-NEXT: ret
- ; GISEL-LABEL: test_v4f16.rint:
- ; GISEL-NOFP16-COUNT-4: frintx s{{[0-9]+}}, s{{[0-9]+}}
- ; GISEL-FP16-NOT: fcvt
- ; GISEL-FP16: frintx.4h
- ; GISEL-FP16-NEXT: ret
- %1 = call %v4f16 @llvm.rint.v4f16(%v4f16 %a)
- ret %v4f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f16.nearbyint
-define %v4f16 @test_v4f16.nearbyint(%v4f16 %a) {
- ; CHECK-LABEL: test_v4f16.nearbyint:
- ; CHECK-NOFP16-COUNT-4: frinti s{{[0-9]+}}, s{{[0-9]+}}
- ; CHECK-FP16-NOT: fcvt
- ; CHECK-FP16: frinti.4h
- ; CHECK-FP16-NEXT: ret
- ; GISEL-LABEL: test_v4f16.nearbyint:
- ; GISEL-NOFP16-COUNT-4: frinti s{{[0-9]+}}, s{{[0-9]+}}
- ; GISEL-FP16-NOT: fcvt
- ; GISEL-FP16: frinti.4h
- ; GISEL-FP16-NEXT: ret
- %1 = call %v4f16 @llvm.nearbyint.v4f16(%v4f16 %a)
- ret %v4f16 %1
-}
-define %v4f16 @test_v4f16.round(%v4f16 %a) {
- ; CHECK-LABEL: test_v4f16.round:
- ; CHECK-NOFP16-COUNT-4: frinta s{{[0-9]+}}, s{{[0-9]+}}
- ; CHECK-FP16-NOT: fcvt
- ; CHECK-FP16: frinta.4h
- ; CHECK-FP16-NEXT: ret
- ; GISEL-LABEL: test_v4f16.round:
- ; GISEL-NOFP16-COUNT-4: frinta s{{[0-9]+}}, s{{[0-9]+}}
- ; GISEL-FP16-NOT: fcvt
- ; GISEL-FP16: frinta.4h
- ; GISEL-FP16-NEXT: ret
- %1 = call %v4f16 @llvm.round.v4f16(%v4f16 %a)
- ret %v4f16 %1
-}
-define %v4f16 @test_v4f16.roundeven(%v4f16 %a) {
- ; CHECK-LABEL: test_v4f16.roundeven:
- ; CHECK-NOFP16-COUNT-4: frintn s{{[0-9]+}}, s{{[0-9]+}}
- ; CHECK-FP16-NOT: fcvt
- ; CHECK-FP16: frintn.4h
- ; CHECK-FP16-NEXT: ret
- ; GISEL-LABEL: test_v4f16.roundeven:
- ; GISEL-NOFP16-COUNT-4: frintn s{{[0-9]+}}, s{{[0-9]+}}
- ; GISEL-FP16-NOT: fcvt
- ; GISEL-FP16: frintn.4h
- ; GISEL-FP16-NEXT: ret
- %1 = call %v4f16 @llvm.roundeven.v4f16(%v4f16 %a)
- ret %v4f16 %1
-}
-
-declare %v4f16 @llvm.sqrt.v4f16(%v4f16) #0
declare %v4f16 @llvm.powi.v4f16.i32(%v4f16, i32) #0
declare %v4f16 @llvm.sin.v4f16(%v4f16) #0
declare %v4f16 @llvm.cos.v4f16(%v4f16) #0
@@ -272,35 +130,11 @@ declare %v4f16 @llvm.log.v4f16(%v4f16) #0
declare %v4f16 @llvm.log10.v4f16(%v4f16) #0
declare %v4f16 @llvm.log2.v4f16(%v4f16) #0
declare %v4f16 @llvm.fma.v4f16(%v4f16, %v4f16, %v4f16) #0
-declare %v4f16 @llvm.fabs.v4f16(%v4f16) #0
-declare %v4f16 @llvm.floor.v4f16(%v4f16) #0
-declare %v4f16 @llvm.ceil.v4f16(%v4f16) #0
-declare %v4f16 @llvm.trunc.v4f16(%v4f16) #0
-declare %v4f16 @llvm.rint.v4f16(%v4f16) #0
-declare %v4f16 @llvm.nearbyint.v4f16(%v4f16) #0
-declare %v4f16 @llvm.round.v4f16(%v4f16) #0
-declare %v4f16 @llvm.roundeven.v4f16(%v4f16) #0
;;;
%v8f16 = type <8 x half>
-; FALLBACK-NOT: remark{{.*}}test_v8f16.sqrt
-define %v8f16 @test_v8f16.sqrt(%v8f16 %a) {
- ; CHECK-LABEL: test_v8f16.sqrt:
- ; CHECK-NOFP16-COUNT-8: fsqrt s{{[0-9]+}}, s{{[0-9]+}}
- ; CHECK-FP16-NOT: fcvt
- ; CHECK-FP16: fsqrt.8h
- ; CHECK-FP16-NEXT: ret
- ; GISEL-LABEL: test_v8f16.sqrt:
- ; GISEL-NOFP16: fcvt
- ; GISEL-NOFP16-COUNT-2: fsqrt.4s
- ; GISEL-FP16-NOT: fcvt
- ; GISEL-FP16: fsqrt.8h
- ; GISEL-FP16-NEXT: ret
- %1 = call %v8f16 @llvm.sqrt.v8f16(%v8f16 %a)
- ret %v8f16 %1
-}
define %v8f16 @test_v8f16.powi(%v8f16 %a, i32 %b) {
; This operation is expanded, whether with or without +fullfp16.
; CHECK-LABEL: test_v8f16.powi:
@@ -409,132 +243,6 @@ define %v8f16 @test_v8f16.fma(%v8f16 %a, %v8f16 %b, %v8f16 %c) {
ret %v8f16 %1
}
-; FALLBACK-NOT: remark{{.*}}test_v8f16.fabs
-define %v8f16 @test_v8f16.fabs(%v8f16 %a) {
- ; CHECK-LABEL: test_v8f16.fabs:
- ; CHECK-NOFP16-COUNT-8: fabs s{{[0-9]+}}, s{{[0-9]+}}
- ; CHECK-FP16-NOT: fcvt
- ; CHECK-FP16: fabs.8h
- ; CHECK-FP16-NEXT: ret
-
- ; GISEL-LABEL: test_v8f16.fabs:
- ; GISEL-NOFP16-COUNT-2: fabs.4s
- ; GISEL-FP16-NOT: fcvt
- ; GISEL-FP16: fabs.8h
- ; GISEL-FP16-NEXT: ret
- %1 = call %v8f16 @llvm.fabs.v8f16(%v8f16 %a)
- ret %v8f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v8f16.floor
-define %v8f16 @test_v8f16.floor(%v8f16 %a) {
- ; CHECK-LABEL: test_v8f16.floor:
- ; CHECK-NOFP16-COUNT-8: frintm s{{[0-9]+}}, s{{[0-9]+}}
- ; CHECK-FP16-NOT: fcvt
- ; CHECK-FP16: frintm.8h
- ; CHECK-FP16-NEXT: ret
-
- ; GISEL-LABEL: test_v8f16.floor:
- ; GISEL-NOFP16-COUNT-8: frintm s{{[0-9]+}}, s{{[0-9]+}}
- ; GISEL-FP16-NOT: fcvt
- ; GISEL-FP16: frintm.8h
- ; GISEL-FP16-NEXT: ret
- %1 = call %v8f16 @llvm.floor.v8f16(%v8f16 %a)
- ret %v8f16 %1
-}
-define %v8f16 @test_v8f16.ceil(%v8f16 %a) {
- ; CHECK-LABEL: test_v8f16.ceil:
- ; CHECK-NOFP16-COUNT-8: frintp s{{[0-9]+}}, s{{[0-9]+}}
- ; CHECK-FP16-NOT: fcvt
- ; CHECK-FP16: frintp.8h
- ; CHECK-FP16-NEXT: ret
- ; FALLBACK-NOT: remark{{.*}}test_v8f16.ceil:
- ; GISEL-LABEL: test_v8f16.ceil:
- ; GISEL-NOFP16-COUNT-8: frintp s{{[0-9]+}}, s{{[0-9]+}}
- ; GISEL-FP16-NOT: fcvt
- ; GISEL-FP16: frintp.8h
- ; GISEL-FP16-NEXT: ret
- %1 = call %v8f16 @llvm.ceil.v8f16(%v8f16 %a)
- ret %v8f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v8f16.trunc
-define %v8f16 @test_v8f16.trunc(%v8f16 %a) {
- ; CHECK-LABEL: test_v8f16.trunc:
- ; CHECK-NOFP16-COUNT-8: frintz s{{[0-9]+}}, s{{[0-9]+}}
- ; CHECK-FP16-NOT: fcvt
- ; CHECK-FP16: frintz.8h
- ; CHECK-FP16-NEXT: ret
- ; GISEL-LABEL: test_v8f16.trunc:
- ; GISEL-NOFP16-COUNT-8: frintz s{{[0-9]+}}, s{{[0-9]+}}
- ; GISEL-FP16-NOT: fcvt
- ; GISEL-FP16: frintz.8h
- ; GISEL-FP16-NEXT: ret
- %1 = call %v8f16 @llvm.trunc.v8f16(%v8f16 %a)
- ret %v8f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v8f16.rint
-define %v8f16 @test_v8f16.rint(%v8f16 %a) {
- ; CHECK-LABEL: test_v8f16.rint:
- ; CHECK-NOFP16-COUNT-8: frintx s{{[0-9]+}}, s{{[0-9]+}}
- ; CHECK-FP16-NOT: fcvt
- ; CHECK-FP16: frintx.8h
- ; CHECK-FP16-NEXT: ret
- ; GISEL-LABEL: test_v8f16.rint:
- ; GISEL-NOFP16-COUNT-8: frintx s{{[0-9]+}}, s{{[0-9]+}}
- ; GISEL-FP16-NOT: fcvt
- ; GISEL-FP16: frintx.8h
- ; GISEL-FP16-NEXT: ret
- %1 = call %v8f16 @llvm.rint.v8f16(%v8f16 %a)
- ret %v8f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v8f16.nearbyint
-define %v8f16 @test_v8f16.nearbyint(%v8f16 %a) {
- ; CHECK-LABEL: test_v8f16.nearbyint:
- ; CHECK-NOFP16-COUNT-8: frinti s{{[0-9]+}}, s{{[0-9]+}}
- ; CHECK-FP16-NOT: fcvt
- ; CHECK-FP16: frinti.8h
- ; CHECK-FP16-NEXT: ret
- ; GISEL-LABEL: test_v8f16.nearbyint:
- ; GISEL-NOFP16-COUNT-8: frinti s{{[0-9]+}}, s{{[0-9]+}}
- ; GISEL-FP16-NOT: fcvt
- ; GISEL-FP16: frinti.8h
- ; GISEL-FP16-NEXT: ret
- %1 = call %v8f16 @llvm.nearbyint.v8f16(%v8f16 %a)
- ret %v8f16 %1
-}
-define %v8f16 @test_v8f16.round(%v8f16 %a) {
- ; CHECK-LABEL: test_v8f16.round:
- ; CHECK-NOFP16-COUNT-8: frinta s{{[0-9]+}}, s{{[0-9]+}}
- ; CHECK-FP16-NOT: fcvt
- ; CHECK-FP16: frinta.8h
- ; CHECK-FP16-NEXT: ret
- ; GISEL-LABEL: test_v8f16.round:
- ; GISEL-NOFP16-COUNT-8: frinta s{{[0-9]+}}, s{{[0-9]+}}
- ; GISEL-FP16-NOT: fcvt
- ; GISEL-FP16: frinta.8h
- ; GISEL-FP16-NEXT: ret
- %1 = call %v8f16 @llvm.round.v8f16(%v8f16 %a)
- ret %v8f16 %1
-}
-define %v8f16 @test_v8f16.roundeven(%v8f16 %a) {
- ; CHECK-LABEL: test_v8f16.roundeven:
- ; CHECK-NOFP16-COUNT-8: frintn s{{[0-9]+}}, s{{[0-9]+}}
- ; CHECK-FP16-NOT: fcvt
- ; CHECK-FP16: frintn.8h
- ; CHECK-FP16-NEXT: ret
- ; GISEL-LABEL: test_v8f16.roundeven:
- ; GISEL-NOFP16-COUNT-8: frintn s{{[0-9]+}}, s{{[0-9]+}}
- ; GISEL-FP16-NOT: fcvt
- ; GISEL-FP16: frintn.8h
- ; GISEL-FP16-NEXT: ret
- %1 = call %v8f16 @llvm.roundeven.v8f16(%v8f16 %a)
- ret %v8f16 %1
-}
-
-declare %v8f16 @llvm.sqrt.v8f16(%v8f16) #0
declare %v8f16 @llvm.powi.v8f16.i32(%v8f16, i32) #0
declare %v8f16 @llvm.sin.v8f16(%v8f16) #0
declare %v8f16 @llvm.cos.v8f16(%v8f16) #0
@@ -545,28 +253,11 @@ declare %v8f16 @llvm.log.v8f16(%v8f16) #0
declare %v8f16 @llvm.log10.v8f16(%v8f16) #0
declare %v8f16 @llvm.log2.v8f16(%v8f16) #0
declare %v8f16 @llvm.fma.v8f16(%v8f16, %v8f16, %v8f16) #0
-declare %v8f16 @llvm.fabs.v8f16(%v8f16) #0
-declare %v8f16 @llvm.floor.v8f16(%v8f16) #0
-declare %v8f16 @llvm.ceil.v8f16(%v8f16) #0
-declare %v8f16 @llvm.trunc.v8f16(%v8f16) #0
-declare %v8f16 @llvm.rint.v8f16(%v8f16) #0
-declare %v8f16 @llvm.nearbyint.v8f16(%v8f16) #0
-declare %v8f16 @llvm.round.v8f16(%v8f16) #0
-declare %v8f16 @llvm.roundeven.v8f16(%v8f16) #0
;;; Float vectors
%v2f32 = type <2 x float>
-; FALLBACK-NOT: remark{{.*}}test_v2f32.sqrt
-; CHECK-LABEL: test_v2f32.sqrt:
-; GISEL-LABEL: test_v2f32.sqrt:
-define %v2f32 @test_v2f32.sqrt(%v2f32 %a) {
- ; CHECK: fsqrt.2s
- ; GISEL: fsqrt.2s
- %1 = call %v2f32 @llvm.sqrt.v2f32(%v2f32 %a)
- ret %v2f32 %1
-}
; CHECK: test_v2f32.powi:
define %v2f32 @test_v2f32.powi(%v2f32 %a, i32 %b) {
; CHECK: pow
@@ -657,64 +348,6 @@ define %v2f32 @test_v2f32.fma(%v2f32 %a, %v2f32 %b, %v2f32 %c) {
ret %v2f32 %1
}
-; FALLBACK-NOT: remark{{.*}}test_v2f32.fabs
-; CHECK-LABEL: test_v2f32.fabs:
-; GISEL-LABEL: test_v2f32.fabs:
-define %v2f32 @test_v2f32.fabs(%v2f32 %a) {
- ; CHECK: fabs.2s
- ; GISEL: fabs.2s
- %1 = call %v2f32 @llvm.fabs.v2f32(%v2f32 %a)
- ret %v2f32 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v2f32.floor
-; CHECK-LABEL: test_v2f32.floor:
-; GISEL-LABEL: test_v2f32.floor:
-define %v2f32 @test_v2f32.floor(%v2f32 %a) {
- ; CHECK: frintm.2s
- ; GISEL: frintm.2s
- %1 = call %v2f32 @llvm.floor.v2f32(%v2f32 %a)
- ret %v2f32 %1
-}
-; CHECK-LABEL: test_v2f32.ceil:
-; FALLBACK-NOT: remark{{.*}}test_v2f32.ceil
-; GISEL-LABEL: test_v2f32.ceil:
-define %v2f32 @test_v2f32.ceil(%v2f32 %a) {
- ; CHECK: frintp.2s
- ; GISEL: frintp.2s
- %1 = call %v2f32 @llvm.ceil.v2f32(%v2f32 %a)
- ret %v2f32 %1
-}
-; CHECK-LABEL: test_v2f32.trunc:
-; FALLBACK-NOT: remark{{.*}}test_v2f32.trunc
-; GISEL-LABEL: test_v2f32.trunc:
-define %v2f32 @test_v2f32.trunc(%v2f32 %a) {
- ; CHECK: frintz.2s
- ; GISEL: frintz.2s
- %1 = call %v2f32 @llvm.trunc.v2f32(%v2f32 %a)
- ret %v2f32 %1
-}
-; CHECK-LABEL: test_v2f32.rint:
-; FALLBACK-NOT: remark{{.*}}test_v2f32.rint
-; GISEL-LABEL: test_v2f32.rint:
-define %v2f32 @test_v2f32.rint(%v2f32 %a) {
- ; CHECK: frintx.2s
- ; GISEL: frintx.2s
- %1 = call %v2f32 @llvm.rint.v2f32(%v2f32 %a)
- ret %v2f32 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v2f32.nearbyint
-; CHECK-LABEL: test_v2f32.nearbyint:
-; GISEL-LABEL: test_v2f32.nearbyint:
-define %v2f32 @test_v2f32.nearbyint(%v2f32 %a) {
- ; CHECK: frinti.2s
- ; GISEL: frinti.2s
- %1 = call %v2f32 @llvm.nearbyint.v2f32(%v2f32 %a)
- ret %v2f32 %1
-}
-
-declare %v2f32 @llvm.sqrt.v2f32(%v2f32) #0
declare %v2f32 @llvm.powi.v2f32.i32(%v2f32, i32) #0
declare %v2f32 @llvm.sin.v2f32(%v2f32) #0
declare %v2f32 @llvm.cos.v2f32(%v2f32) #0
@@ -725,26 +358,11 @@ declare %v2f32 @llvm.log.v2f32(%v2f32) #0
declare %v2f32 @llvm.log10.v2f32(%v2f32) #0
declare %v2f32 @llvm.log2.v2f32(%v2f32) #0
declare %v2f32 @llvm.fma.v2f32(%v2f32, %v2f32, %v2f32) #0
-declare %v2f32 @llvm.fabs.v2f32(%v2f32) #0
-declare %v2f32 @llvm.floor.v2f32(%v2f32) #0
-declare %v2f32 @llvm.ceil.v2f32(%v2f32) #0
-declare %v2f32 @llvm.trunc.v2f32(%v2f32) #0
-declare %v2f32 @llvm.rint.v2f32(%v2f32) #0
-declare %v2f32 @llvm.nearbyint.v2f32(%v2f32) #0
;;;
%v4f32 = type <4 x float>
-; FALLBACK-NOT: remark{{.*}}test_v4f32.sqrt
-; CHECK: test_v4f32.sqrt:
-; GISEL: test_v4f32.sqrt:
-define %v4f32 @test_v4f32.sqrt(%v4f32 %a) {
- ; CHECK: fsqrt.4s
- ; GISEL: fsqrt.4s
- %1 = call %v4f32 @llvm.sqrt.v4f32(%v4f32 %a)
- ret %v4f32 %1
-}
; CHECK: test_v4f32.powi:
define %v4f32 @test_v4f32.powi(%v4f32 %a, i32 %b) {
; CHECK: pow
@@ -834,64 +452,6 @@ define %v4f32 @test_v4f32.fma(%v4f32 %a, %v4f32 %b, %v4f32 %c) {
ret %v4f32 %1
}
-; FALLBACK-NOT: remark{{.*}}test_v4f32.fabs
-; CHECK: test_v4f32.fabs:
-; GISEL: test_v4f32.fabs:
-define %v4f32 @test_v4f32.fabs(%v4f32 %a) {
- ; CHECK: fabs
- ; GISEL: fabs
- %1 = call %v4f32 @llvm.fabs.v4f32(%v4f32 %a)
- ret %v4f32 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f32.floor
-; CHECK: test_v4f32.floor:
-; GISEL: test_v4f32.floor:
-define %v4f32 @test_v4f32.floor(%v4f32 %a) {
- ; CHECK: frintm.4s
- ; GISEL: frintm.4s
- %1 = call %v4f32 @llvm.floor.v4f32(%v4f32 %a)
- ret %v4f32 %1
-}
-; CHECK: test_v4f32.ceil:
-; FALLBACK-NOT: remark{{.*}}test_v4f32.ceil
-; GISEL-LABEL: test_v4f32.ceil:
-define %v4f32 @test_v4f32.ceil(%v4f32 %a) {
- ; CHECK: frintp.4s
- ; GISEL: frintp.4s
- %1 = call %v4f32 @llvm.ceil.v4f32(%v4f32 %a)
- ret %v4f32 %1
-}
-; CHECK: test_v4f32.trunc:
-; FALLBACK-NOT: remark{{.*}}test_v4f32.trunc
-; GISEL: test_v4f32.trunc:
-define %v4f32 @test_v4f32.trunc(%v4f32 %a) {
- ; CHECK: frintz.4s
- ; GISEL: frintz.4s
- %1 = call %v4f32 @llvm.trunc.v4f32(%v4f32 %a)
- ret %v4f32 %1
-}
-; CHECK: test_v4f32.rint:
-; FALLBACK-NOT: remark{{.*}}test_v4f32.rint
-; GISEL: test_v4f32.rint:
-define %v4f32 @test_v4f32.rint(%v4f32 %a) {
- ; CHECK: frintx.4s
- ; GISEL: frintx.4s
- %1 = call %v4f32 @llvm.rint.v4f32(%v4f32 %a)
- ret %v4f32 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f32.nearbyint
-; CHECK: test_v4f32.nearbyint:
-; GISEL: test_v4f32.nearbyint:
-define %v4f32 @test_v4f32.nearbyint(%v4f32 %a) {
- ; CHECK: frinti.4s
- ; GISEL: frinti.4s
- %1 = call %v4f32 @llvm.nearbyint.v4f32(%v4f32 %a)
- ret %v4f32 %1
-}
-
-declare %v4f32 @llvm.sqrt.v4f32(%v4f32) #0
declare %v4f32 @llvm.powi.v4f32.i32(%v4f32, i32) #0
declare %v4f32 @llvm.sin.v4f32(%v4f32) #0
declare %v4f32 @llvm.cos.v4f32(%v4f32) #0
@@ -902,25 +462,11 @@ declare %v4f32 @llvm.log.v4f32(%v4f32) #0
declare %v4f32 @llvm.log10.v4f32(%v4f32) #0
declare %v4f32 @llvm.log2.v4f32(%v4f32) #0
declare %v4f32 @llvm.fma.v4f32(%v4f32, %v4f32, %v4f32) #0
-declare %v4f32 @llvm.fabs.v4f32(%v4f32) #0
-declare %v4f32 @llvm.floor.v4f32(%v4f32) #0
-declare %v4f32 @llvm.ceil.v4f32(%v4f32) #0
-declare %v4f32 @llvm.trunc.v4f32(%v4f32) #0
-declare %v4f32 @llvm.rint.v4f32(%v4f32) #0
-declare %v4f32 @llvm.nearbyint.v4f32(%v4f32) #0
;;; Double vector
%v2f64 = type <2 x double>
-; FALLBACK-NOT: remark{{.*}}test_v2f64.sqrt
-; CHECK: test_v2f64.sqrt:
-; GISEL: test_v2f64.sqrt:
-define %v2f64 @test_v2f64.sqrt(%v2f64 %a) {
- ; CHECK: fsqrt.2d
- ; GISEL: fsqrt.2d
- %1 = call %v2f64 @llvm.sqrt.v2f64(%v2f64 %a)
- ret %v2f64 %1
-}
+
; CHECK: test_v2f64.powi:
define %v2f64 @test_v2f64.powi(%v2f64 %a, i32 %b) {
; CHECK: pow
@@ -1011,64 +557,6 @@ define %v2f64 @test_v2f64.fma(%v2f64 %a, %v2f64 %b, %v2f64 %c) {
ret %v2f64 %1
}
-; FALLBACK-NOT: remark{{.*}}test_v2f64.fabs
-; CHECK: test_v2f64.fabs:
-; GISEL: test_v2f64.fabs:
-define %v2f64 @test_v2f64.fabs(%v2f64 %a) {
- ; CHECK: fabs
- ; GISEL: fabs
- %1 = call %v2f64 @llvm.fabs.v2f64(%v2f64 %a)
- ret %v2f64 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v2f64.floor
-; CHECK: test_v2f64.floor:
-; GISEL: test_v2f64.floor:
-define %v2f64 @test_v2f64.floor(%v2f64 %a) {
- ; CHECK: frintm.2d
- ; GISEL: frintm.2d
- %1 = call %v2f64 @llvm.floor.v2f64(%v2f64 %a)
- ret %v2f64 %1
-}
-; CHECK: test_v2f64.ceil:
-; FALLBACK-NOT: remark{{.*}}test_v2f64.ceil
-; GISEL-LABEL: test_v2f64.ceil:
-define %v2f64 @test_v2f64.ceil(%v2f64 %a) {
- ; CHECK: frintp.2d
- ; GISEL: frintp.2d
- %1 = call %v2f64 @llvm.ceil.v2f64(%v2f64 %a)
- ret %v2f64 %1
-}
-; CHECK: test_v2f64.trunc:
-; FALLBACK-NOT: remark{{.*}}test_v2f64.trunc
-; GISEL: test_v2f64.trunc:
-define %v2f64 @test_v2f64.trunc(%v2f64 %a) {
- ; CHECK: frintz.2d
- ; GISEL: frintz.2d
- %1 = call %v2f64 @llvm.trunc.v2f64(%v2f64 %a)
- ret %v2f64 %1
-}
-; CHECK: test_v2f64.rint:
-; FALLBACK-NOT: remark{{.*}}test_v2f64.rint
-; GISEL: test_v2f64.rint:
-define %v2f64 @test_v2f64.rint(%v2f64 %a) {
- ; CHECK: frintx.2d
- ; GISEL: frintx.2d
- %1 = call %v2f64 @llvm.rint.v2f64(%v2f64 %a)
- ret %v2f64 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v2f64.nearbyint
-; CHECK: test_v2f64.nearbyint:
-; GISEL: test_v2f64.nearbyint:
-define %v2f64 @test_v2f64.nearbyint(%v2f64 %a) {
- ; CHECK: frinti.2d
- ; GISEL: frinti.2d
- %1 = call %v2f64 @llvm.nearbyint.v2f64(%v2f64 %a)
- ret %v2f64 %1
-}
-
-declare %v2f64 @llvm.sqrt.v2f64(%v2f64) #0
declare %v2f64 @llvm.powi.v2f64.i32(%v2f64, i32) #0
declare %v2f64 @llvm.sin.v2f64(%v2f64) #0
declare %v2f64 @llvm.cos.v2f64(%v2f64) #0
@@ -1079,11 +567,5 @@ declare %v2f64 @llvm.log.v2f64(%v2f64) #0
declare %v2f64 @llvm.log10.v2f64(%v2f64) #0
declare %v2f64 @llvm.log2.v2f64(%v2f64) #0
declare %v2f64 @llvm.fma.v2f64(%v2f64, %v2f64, %v2f64) #0
-declare %v2f64 @llvm.fabs.v2f64(%v2f64) #0
-declare %v2f64 @llvm.floor.v2f64(%v2f64) #0
-declare %v2f64 @llvm.ceil.v2f64(%v2f64) #0
-declare %v2f64 @llvm.trunc.v2f64(%v2f64) #0
-declare %v2f64 @llvm.rint.v2f64(%v2f64) #0
-declare %v2f64 @llvm.nearbyint.v2f64(%v2f64) #0
attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AArch64/fcvt.ll b/llvm/test/CodeGen/AArch64/fcvt.ll
new file mode 100644
index 00000000000000..aaa9d657c7913e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fcvt.ll
@@ -0,0 +1,4167 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16
+; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
+; RUN: llc -mtriple=aarch64-none-eabi -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
+; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
+
+; CHECK-GI: warning: Instruction selection used fallback path for ceil_v3f64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ceil_v4f64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ceil_v3f32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ceil_v8f32
+; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for ceil_v7f16
+; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for ceil_v16f16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_v3f64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_v4f64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_v3f32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_v8f32
+; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for floor_v7f16
+; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for floor_v16f16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_v3f64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_v4f64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_v3f32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_v8f32
+; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for nearbyint_v7f16
+; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for nearbyint_v16f16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_f64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_f32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_f16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v2f64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v3f64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v4f64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v2f32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v3f32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v4f32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v8f32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v7f16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v4f16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v8f16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v16f16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_v3f64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_v4f64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_v3f32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_v8f32
+; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for rint_v7f16
+; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for rint_v16f16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_v3f64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_v4f64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_v3f32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_v8f32
+; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for round_v7f16
+; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for round_v16f16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_v3f64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_v4f64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_v3f32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_v8f32
+; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for trunc_v7f16
+; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for trunc_v16f16
+
+define double @ceil_f64(double %a) {
+; CHECK-LABEL: ceil_f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintp d0, d0
+; CHECK-NEXT: ret
+entry:
+ %c = call double @llvm.ceil.f64(double %a)
+ ret double %c
+}
+
+define float @ceil_f32(float %a) {
+; CHECK-LABEL: ceil_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintp s0, s0
+; CHECK-NEXT: ret
+entry:
+ %c = call float @llvm.ceil.f32(float %a)
+ ret float %c
+}
+
+define half @ceil_f16(half %a) {
+; CHECK-SD-NOFP16-LABEL: ceil_f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: frintp s0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: ceil_f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frintp h0, h0
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: ceil_f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: frintp s0, s0
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: ceil_f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frintp h0, h0
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call half @llvm.ceil.f16(half %a)
+ ret half %c
+}
+
+define <2 x double> @ceil_v2f64(<2 x double> %a) {
+; CHECK-LABEL: ceil_v2f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintp v0.2d, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %c = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a)
+ ret <2 x double> %c
+}
+
+define <3 x double> @ceil_v3f64(<3 x double> %a) {
+; CHECK-LABEL: ceil_v3f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: frintp v2.2d, v2.2d
+; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-NEXT: frintp v0.2d, v0.2d
+; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT: ret
+entry:
+ %c = call <3 x double> @llvm.ceil.v3f64(<3 x double> %a)
+ ret <3 x double> %c
+}
+
+define <4 x double> @ceil_v4f64(<4 x double> %a) {
+; CHECK-LABEL: ceil_v4f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintp v0.2d, v0.2d
+; CHECK-NEXT: frintp v1.2d, v1.2d
+; CHECK-NEXT: ret
+entry:
+ %c = call <4 x double> @llvm.ceil.v4f64(<4 x double> %a)
+ ret <4 x double> %c
+}
+
+define <2 x float> @ceil_v2f32(<2 x float> %a) {
+; CHECK-LABEL: ceil_v2f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintp v0.2s, v0.2s
+; CHECK-NEXT: ret
+entry:
+ %c = call <2 x float> @llvm.ceil.v2f32(<2 x float> %a)
+ ret <2 x float> %c
+}
+
+define <3 x float> @ceil_v3f32(<3 x float> %a) {
+; CHECK-LABEL: ceil_v3f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintp v0.4s, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %c = call <3 x float> @llvm.ceil.v3f32(<3 x float> %a)
+ ret <3 x float> %c
+}
+
+define <4 x float> @ceil_v4f32(<4 x float> %a) {
+; CHECK-LABEL: ceil_v4f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintp v0.4s, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %c = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a)
+ ret <4 x float> %c
+}
+
+define <8 x float> @ceil_v8f32(<8 x float> %a) {
+; CHECK-LABEL: ceil_v8f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintp v0.4s, v0.4s
+; CHECK-NEXT: frintp v1.4s, v1.4s
+; CHECK-NEXT: ret
+entry:
+ %c = call <8 x float> @llvm.ceil.v8f32(<8 x float> %a)
+ ret <8 x float> %c
+}
+
+define <7 x half> @ceil_v7f16(<7 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: ceil_v7f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: frintp s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frintp s5, s1
+; CHECK-SD-NOFP16-NEXT: frintp s2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT: frintp s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: frintp s4, s5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT: frintp s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT: mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: frintp s2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: frintp s0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: ceil_v7f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frintp v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: ceil_v7f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: frintp s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT: frintp s5, s1
+; CHECK-GI-NOFP16-NEXT: frintp s2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT: frintp s4, s4
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: frintp s3, s6
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s3
+; CHECK-GI-NOFP16-NEXT: frintp s3, s5
+; CHECK-GI-NOFP16-NEXT: frintp s0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: ceil_v7f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frintp v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <7 x half> @llvm.ceil.v7f16(<7 x half> %a)
+ ret <7 x half> %c
+}
+
+define <4 x half> @ceil_v4f16(<4 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: ceil_v4f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h0
+; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: frintp s2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: frintp s1, s1
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s2
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h4
+; CHECK-SD-NOFP16-NEXT: frintp s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT: frintp s2, s2
+; CHECK-SD-NOFP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s2
+; CHECK-SD-NOFP16-NEXT: mov v0.h[3], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: ceil_v4f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frintp v0.4h, v0.4h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: ceil_v4f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: frintp s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h0
+; CHECK-GI-NOFP16-NEXT: frintp s1, s1
+; CHECK-GI-NOFP16-NEXT: frintp s2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s3
+; CHECK-GI-NOFP16-NEXT: frintp s3, s4
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: ceil_v4f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frintp v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <4 x half> @llvm.ceil.v4f16(<4 x half> %a)
+ ret <4 x half> %c
+}
+
+define <8 x half> @ceil_v8f16(<8 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: ceil_v8f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: frintp s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frintp s5, s1
+; CHECK-SD-NOFP16-NEXT: frintp s2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT: frintp s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: frintp s4, s5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT: frintp s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT: mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: frintp s2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: frintp s0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: ceil_v8f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frintp v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: ceil_v8f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: frintp s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT: frintp s5, s1
+; CHECK-GI-NOFP16-NEXT: frintp s2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT: frintp s4, s4
+; CHECK-GI-NOFP16-NEXT: frintp s6, s6
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT: frintp s2, s5
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s6
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: frintp s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: frintp s0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: ceil_v8f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frintp v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <8 x half> @llvm.ceil.v8f16(<8 x half> %a)
+ ret <8 x half> %c
+}
+
+define <16 x half> @ceil_v16f16(<16 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: ceil_v16f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h3, v1.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h0
+; CHECK-SD-NOFP16-NEXT: fcvt s6, h1
+; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[2]
+; CHECK-SD-NOFP16-NEXT: mov h18, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frintp s5, s5
+; CHECK-SD-NOFP16-NEXT: frintp s6, s6
+; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
+; CHECK-SD-NOFP16-NEXT: frintp s16, s2
+; CHECK-SD-NOFP16-NEXT: frintp s17, s3
+; CHECK-SD-NOFP16-NEXT: frintp s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s5
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s6
+; CHECK-SD-NOFP16-NEXT: frintp s7, s7
+; CHECK-SD-NOFP16-NEXT: fcvt h5, s16
+; CHECK-SD-NOFP16-NEXT: fcvt h6, s17
+; CHECK-SD-NOFP16-NEXT: mov h16, v1.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s17, h18
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h7, s7
+; CHECK-SD-NOFP16-NEXT: mov v2.h[1], v5.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT: mov v3.h[1], v6.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h6, v1.h[4]
+; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
+; CHECK-SD-NOFP16-NEXT: frintp s17, s17
+; CHECK-SD-NOFP16-NEXT: mov v2.h[2], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[5]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
+; CHECK-SD-NOFP16-NEXT: frintp s16, s16
+; CHECK-SD-NOFP16-NEXT: mov v3.h[2], v7.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h7, s17
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frintp s5, s5
+; CHECK-SD-NOFP16-NEXT: frintp s6, s6
+; CHECK-SD-NOFP16-NEXT: fcvt h16, s16
+; CHECK-SD-NOFP16-NEXT: mov v2.h[3], v7.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[5]
+; CHECK-SD-NOFP16-NEXT: frintp s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT: mov v3.h[3], v16.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov v2.h[4], v5.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[6]
+; CHECK-SD-NOFP16-NEXT: mov v3.h[4], v6.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt s6, h7
+; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT: mov v2.h[5], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[6]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: frintp s6, s6
+; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frintp s5, s5
+; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: frintp s0, s0
+; CHECK-SD-NOFP16-NEXT: frintp s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT: mov v3.h[5], v6.h[0]
+; CHECK-SD-NOFP16-NEXT: frintp s1, s1
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov v2.h[6], v5.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT: mov v3.h[6], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v2.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v3.h[7], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v0.16b, v2.16b
+; CHECK-SD-NOFP16-NEXT: mov v1.16b, v3.16b
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: ceil_v16f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frintp v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT: frintp v1.8h, v1.8h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: ceil_v16f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1]
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h0
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h1
+; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2]
+; CHECK-GI-NOFP16-NEXT: mov h18, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov h19, v1.h[3]
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT: frintp s4, s4
+; CHECK-GI-NOFP16-NEXT: frintp s5, s5
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT: frintp s16, s2
+; CHECK-GI-NOFP16-NEXT: frintp s17, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s4
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT: frintp s6, s6
+; CHECK-GI-NOFP16-NEXT: frintp s7, s7
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s16
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s17
+; CHECK-GI-NOFP16-NEXT: fcvt s16, h18
+; CHECK-GI-NOFP16-NEXT: fcvt s17, h19
+; CHECK-GI-NOFP16-NEXT: mov h18, v1.h[4]
+; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s6
+; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: fcvt h6, s7
+; CHECK-GI-NOFP16-NEXT: frintp s16, s16
+; CHECK-GI-NOFP16-NEXT: frintp s17, s17
+; CHECK-GI-NOFP16-NEXT: mov h7, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt s18, h18
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v6.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s16
+; CHECK-GI-NOFP16-NEXT: fcvt h16, s17
+; CHECK-GI-NOFP16-NEXT: mov h17, v1.h[5]
+; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT: frintp s5, s5
+; CHECK-GI-NOFP16-NEXT: mov v2.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[6]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v16.h[0]
+; CHECK-GI-NOFP16-NEXT: frintp s16, s18
+; CHECK-GI-NOFP16-NEXT: fcvt s17, h17
+; CHECK-GI-NOFP16-NEXT: frintp s7, s7
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7]
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s5
+; CHECK-GI-NOFP16-NEXT: fcvt h16, s16
+; CHECK-GI-NOFP16-NEXT: frintp s17, s17
+; CHECK-GI-NOFP16-NEXT: fcvt h7, s7
+; CHECK-GI-NOFP16-NEXT: frintp s6, s6
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: frintp s4, s4
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: mov v2.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[4], v16.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s17
+; CHECK-GI-NOFP16-NEXT: fcvt h6, s6
+; CHECK-GI-NOFP16-NEXT: frintp s0, s0
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT: frintp s1, s1
+; CHECK-GI-NOFP16-NEXT: mov v2.h[5], v7.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[5], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT: mov v2.h[6], v6.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[6], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v2.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[7], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.16b, v2.16b
+; CHECK-GI-NOFP16-NEXT: mov v1.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: ceil_v16f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frintp v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: frintp v1.8h, v1.8h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <16 x half> @llvm.ceil.v16f16(<16 x half> %a)
+ ret <16 x half> %c
+}
+
+define double @floor_f64(double %a) {
+; CHECK-LABEL: floor_f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintm d0, d0
+; CHECK-NEXT: ret
+entry:
+ %c = call double @llvm.floor.f64(double %a)
+ ret double %c
+}
+
+define float @floor_f32(float %a) {
+; CHECK-LABEL: floor_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintm s0, s0
+; CHECK-NEXT: ret
+entry:
+ %c = call float @llvm.floor.f32(float %a)
+ ret float %c
+}
+
+define half @floor_f16(half %a) {
+; CHECK-SD-NOFP16-LABEL: floor_f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: frintm s0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: floor_f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frintm h0, h0
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: floor_f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: frintm s0, s0
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: floor_f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frintm h0, h0
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call half @llvm.floor.f16(half %a)
+ ret half %c
+}
+
+define <2 x double> @floor_v2f64(<2 x double> %a) {
+; CHECK-LABEL: floor_v2f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintm v0.2d, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %c = call <2 x double> @llvm.floor.v2f64(<2 x double> %a)
+ ret <2 x double> %c
+}
+
+define <3 x double> @floor_v3f64(<3 x double> %a) {
+; CHECK-LABEL: floor_v3f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: frintm v2.2d, v2.2d
+; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-NEXT: frintm v0.2d, v0.2d
+; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT: ret
+entry:
+ %c = call <3 x double> @llvm.floor.v3f64(<3 x double> %a)
+ ret <3 x double> %c
+}
+
+define <4 x double> @floor_v4f64(<4 x double> %a) {
+; CHECK-LABEL: floor_v4f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintm v0.2d, v0.2d
+; CHECK-NEXT: frintm v1.2d, v1.2d
+; CHECK-NEXT: ret
+entry:
+ %c = call <4 x double> @llvm.floor.v4f64(<4 x double> %a)
+ ret <4 x double> %c
+}
+
+define <2 x float> @floor_v2f32(<2 x float> %a) {
+; CHECK-LABEL: floor_v2f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintm v0.2s, v0.2s
+; CHECK-NEXT: ret
+entry:
+ %c = call <2 x float> @llvm.floor.v2f32(<2 x float> %a)
+ ret <2 x float> %c
+}
+
+define <3 x float> @floor_v3f32(<3 x float> %a) {
+; CHECK-LABEL: floor_v3f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintm v0.4s, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %c = call <3 x float> @llvm.floor.v3f32(<3 x float> %a)
+ ret <3 x float> %c
+}
+
+define <4 x float> @floor_v4f32(<4 x float> %a) {
+; CHECK-LABEL: floor_v4f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintm v0.4s, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %c = call <4 x float> @llvm.floor.v4f32(<4 x float> %a)
+ ret <4 x float> %c
+}
+
+define <8 x float> @floor_v8f32(<8 x float> %a) {
+; CHECK-LABEL: floor_v8f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintm v0.4s, v0.4s
+; CHECK-NEXT: frintm v1.4s, v1.4s
+; CHECK-NEXT: ret
+entry:
+ %c = call <8 x float> @llvm.floor.v8f32(<8 x float> %a)
+ ret <8 x float> %c
+}
+
+define <7 x half> @floor_v7f16(<7 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: floor_v7f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: frintm s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frintm s5, s1
+; CHECK-SD-NOFP16-NEXT: frintm s2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT: frintm s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: frintm s4, s5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT: frintm s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT: mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: frintm s2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: frintm s0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: floor_v7f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frintm v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: floor_v7f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: frintm s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT: frintm s5, s1
+; CHECK-GI-NOFP16-NEXT: frintm s2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT: frintm s4, s4
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: frintm s3, s6
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s3
+; CHECK-GI-NOFP16-NEXT: frintm s3, s5
+; CHECK-GI-NOFP16-NEXT: frintm s0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: floor_v7f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frintm v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <7 x half> @llvm.floor.v7f16(<7 x half> %a)
+ ret <7 x half> %c
+}
+
+define <4 x half> @floor_v4f16(<4 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: floor_v4f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h0
+; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: frintm s2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: frintm s1, s1
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s2
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h4
+; CHECK-SD-NOFP16-NEXT: frintm s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT: frintm s2, s2
+; CHECK-SD-NOFP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s2
+; CHECK-SD-NOFP16-NEXT: mov v0.h[3], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: floor_v4f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frintm v0.4h, v0.4h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: floor_v4f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: frintm s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h0
+; CHECK-GI-NOFP16-NEXT: frintm s1, s1
+; CHECK-GI-NOFP16-NEXT: frintm s2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s3
+; CHECK-GI-NOFP16-NEXT: frintm s3, s4
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: floor_v4f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frintm v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <4 x half> @llvm.floor.v4f16(<4 x half> %a)
+ ret <4 x half> %c
+}
+
+define <8 x half> @floor_v8f16(<8 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: floor_v8f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: frintm s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frintm s5, s1
+; CHECK-SD-NOFP16-NEXT: frintm s2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT: frintm s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: frintm s4, s5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT: frintm s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT: mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: frintm s2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: frintm s0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: floor_v8f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frintm v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: floor_v8f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: frintm s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT: frintm s5, s1
+; CHECK-GI-NOFP16-NEXT: frintm s2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT: frintm s4, s4
+; CHECK-GI-NOFP16-NEXT: frintm s6, s6
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT: frintm s2, s5
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s6
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: frintm s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: frintm s0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: floor_v8f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frintm v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <8 x half> @llvm.floor.v8f16(<8 x half> %a)
+ ret <8 x half> %c
+}
+
+define <16 x half> @floor_v16f16(<16 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: floor_v16f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h3, v1.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h0
+; CHECK-SD-NOFP16-NEXT: fcvt s6, h1
+; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[2]
+; CHECK-SD-NOFP16-NEXT: mov h18, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frintm s5, s5
+; CHECK-SD-NOFP16-NEXT: frintm s6, s6
+; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
+; CHECK-SD-NOFP16-NEXT: frintm s16, s2
+; CHECK-SD-NOFP16-NEXT: frintm s17, s3
+; CHECK-SD-NOFP16-NEXT: frintm s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s5
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s6
+; CHECK-SD-NOFP16-NEXT: frintm s7, s7
+; CHECK-SD-NOFP16-NEXT: fcvt h5, s16
+; CHECK-SD-NOFP16-NEXT: fcvt h6, s17
+; CHECK-SD-NOFP16-NEXT: mov h16, v1.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s17, h18
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h7, s7
+; CHECK-SD-NOFP16-NEXT: mov v2.h[1], v5.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT: mov v3.h[1], v6.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h6, v1.h[4]
+; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
+; CHECK-SD-NOFP16-NEXT: frintm s17, s17
+; CHECK-SD-NOFP16-NEXT: mov v2.h[2], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[5]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
+; CHECK-SD-NOFP16-NEXT: frintm s16, s16
+; CHECK-SD-NOFP16-NEXT: mov v3.h[2], v7.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h7, s17
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frintm s5, s5
+; CHECK-SD-NOFP16-NEXT: frintm s6, s6
+; CHECK-SD-NOFP16-NEXT: fcvt h16, s16
+; CHECK-SD-NOFP16-NEXT: mov v2.h[3], v7.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[5]
+; CHECK-SD-NOFP16-NEXT: frintm s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT: mov v3.h[3], v16.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov v2.h[4], v5.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[6]
+; CHECK-SD-NOFP16-NEXT: mov v3.h[4], v6.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt s6, h7
+; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT: mov v2.h[5], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[6]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: frintm s6, s6
+; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frintm s5, s5
+; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: frintm s0, s0
+; CHECK-SD-NOFP16-NEXT: frintm s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT: mov v3.h[5], v6.h[0]
+; CHECK-SD-NOFP16-NEXT: frintm s1, s1
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov v2.h[6], v5.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT: mov v3.h[6], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v2.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v3.h[7], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v0.16b, v2.16b
+; CHECK-SD-NOFP16-NEXT: mov v1.16b, v3.16b
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: floor_v16f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frintm v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT: frintm v1.8h, v1.8h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: floor_v16f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1]
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h0
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h1
+; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2]
+; CHECK-GI-NOFP16-NEXT: mov h18, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov h19, v1.h[3]
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT: frintm s4, s4
+; CHECK-GI-NOFP16-NEXT: frintm s5, s5
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT: frintm s16, s2
+; CHECK-GI-NOFP16-NEXT: frintm s17, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s4
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT: frintm s6, s6
+; CHECK-GI-NOFP16-NEXT: frintm s7, s7
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s16
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s17
+; CHECK-GI-NOFP16-NEXT: fcvt s16, h18
+; CHECK-GI-NOFP16-NEXT: fcvt s17, h19
+; CHECK-GI-NOFP16-NEXT: mov h18, v1.h[4]
+; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s6
+; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: fcvt h6, s7
+; CHECK-GI-NOFP16-NEXT: frintm s16, s16
+; CHECK-GI-NOFP16-NEXT: frintm s17, s17
+; CHECK-GI-NOFP16-NEXT: mov h7, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt s18, h18
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v6.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s16
+; CHECK-GI-NOFP16-NEXT: fcvt h16, s17
+; CHECK-GI-NOFP16-NEXT: mov h17, v1.h[5]
+; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT: frintm s5, s5
+; CHECK-GI-NOFP16-NEXT: mov v2.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[6]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v16.h[0]
+; CHECK-GI-NOFP16-NEXT: frintm s16, s18
+; CHECK-GI-NOFP16-NEXT: fcvt s17, h17
+; CHECK-GI-NOFP16-NEXT: frintm s7, s7
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7]
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s5
+; CHECK-GI-NOFP16-NEXT: fcvt h16, s16
+; CHECK-GI-NOFP16-NEXT: frintm s17, s17
+; CHECK-GI-NOFP16-NEXT: fcvt h7, s7
+; CHECK-GI-NOFP16-NEXT: frintm s6, s6
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: frintm s4, s4
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: mov v2.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[4], v16.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s17
+; CHECK-GI-NOFP16-NEXT: fcvt h6, s6
+; CHECK-GI-NOFP16-NEXT: frintm s0, s0
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT: frintm s1, s1
+; CHECK-GI-NOFP16-NEXT: mov v2.h[5], v7.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[5], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT: mov v2.h[6], v6.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[6], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v2.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[7], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.16b, v2.16b
+; CHECK-GI-NOFP16-NEXT: mov v1.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: floor_v16f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frintm v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: frintm v1.8h, v1.8h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <16 x half> @llvm.floor.v16f16(<16 x half> %a)
+ ret <16 x half> %c
+}
+
+define double @nearbyint_f64(double %a) {
+; CHECK-LABEL: nearbyint_f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frinti d0, d0
+; CHECK-NEXT: ret
+entry:
+ %c = call double @llvm.nearbyint.f64(double %a)
+ ret double %c
+}
+
+define float @nearbyint_f32(float %a) {
+; CHECK-LABEL: nearbyint_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frinti s0, s0
+; CHECK-NEXT: ret
+entry:
+ %c = call float @llvm.nearbyint.f32(float %a)
+ ret float %c
+}
+
+define half @nearbyint_f16(half %a) {
+; CHECK-SD-NOFP16-LABEL: nearbyint_f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: frinti s0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: nearbyint_f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frinti h0, h0
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: nearbyint_f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: frinti s0, s0
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: nearbyint_f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frinti h0, h0
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call half @llvm.nearbyint.f16(half %a)
+ ret half %c
+}
+
+define <2 x double> @nearbyint_v2f64(<2 x double> %a) {
+; CHECK-LABEL: nearbyint_v2f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frinti v0.2d, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %c = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a)
+ ret <2 x double> %c
+}
+
+define <3 x double> @nearbyint_v3f64(<3 x double> %a) {
+; CHECK-LABEL: nearbyint_v3f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: frinti v2.2d, v2.2d
+; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-NEXT: frinti v0.2d, v0.2d
+; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT: ret
+entry:
+ %c = call <3 x double> @llvm.nearbyint.v3f64(<3 x double> %a)
+ ret <3 x double> %c
+}
+
+define <4 x double> @nearbyint_v4f64(<4 x double> %a) {
+; CHECK-LABEL: nearbyint_v4f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frinti v0.2d, v0.2d
+; CHECK-NEXT: frinti v1.2d, v1.2d
+; CHECK-NEXT: ret
+entry:
+ %c = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %a)
+ ret <4 x double> %c
+}
+
+define <2 x float> @nearbyint_v2f32(<2 x float> %a) {
+; CHECK-LABEL: nearbyint_v2f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frinti v0.2s, v0.2s
+; CHECK-NEXT: ret
+entry:
+ %c = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %a)
+ ret <2 x float> %c
+}
+
+define <3 x float> @nearbyint_v3f32(<3 x float> %a) {
+; CHECK-LABEL: nearbyint_v3f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frinti v0.4s, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %c = call <3 x float> @llvm.nearbyint.v3f32(<3 x float> %a)
+ ret <3 x float> %c
+}
+
+define <4 x float> @nearbyint_v4f32(<4 x float> %a) {
+; CHECK-LABEL: nearbyint_v4f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frinti v0.4s, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %c = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a)
+ ret <4 x float> %c
+}
+
+define <8 x float> @nearbyint_v8f32(<8 x float> %a) {
+; CHECK-LABEL: nearbyint_v8f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frinti v0.4s, v0.4s
+; CHECK-NEXT: frinti v1.4s, v1.4s
+; CHECK-NEXT: ret
+entry:
+ %c = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %a)
+ ret <8 x float> %c
+}
+
+define <7 x half> @nearbyint_v7f16(<7 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: nearbyint_v7f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: frinti s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frinti s5, s1
+; CHECK-SD-NOFP16-NEXT: frinti s2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT: frinti s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: frinti s4, s5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT: frinti s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT: mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: frinti s2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: frinti s0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: nearbyint_v7f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frinti v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: nearbyint_v7f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: frinti s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT: frinti s5, s1
+; CHECK-GI-NOFP16-NEXT: frinti s2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT: frinti s4, s4
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: frinti s3, s6
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s3
+; CHECK-GI-NOFP16-NEXT: frinti s3, s5
+; CHECK-GI-NOFP16-NEXT: frinti s0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: nearbyint_v7f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frinti v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <7 x half> @llvm.nearbyint.v7f16(<7 x half> %a)
+ ret <7 x half> %c
+}
+
+define <4 x half> @nearbyint_v4f16(<4 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: nearbyint_v4f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h0
+; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: frinti s2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: frinti s1, s1
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s2
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h4
+; CHECK-SD-NOFP16-NEXT: frinti s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT: frinti s2, s2
+; CHECK-SD-NOFP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s2
+; CHECK-SD-NOFP16-NEXT: mov v0.h[3], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: nearbyint_v4f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frinti v0.4h, v0.4h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: nearbyint_v4f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: frinti s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h0
+; CHECK-GI-NOFP16-NEXT: frinti s1, s1
+; CHECK-GI-NOFP16-NEXT: frinti s2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s3
+; CHECK-GI-NOFP16-NEXT: frinti s3, s4
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: nearbyint_v4f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frinti v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <4 x half> @llvm.nearbyint.v4f16(<4 x half> %a)
+ ret <4 x half> %c
+}
+
+define <8 x half> @nearbyint_v8f16(<8 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: nearbyint_v8f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: frinti s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frinti s5, s1
+; CHECK-SD-NOFP16-NEXT: frinti s2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT: frinti s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: frinti s4, s5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT: frinti s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT: mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: frinti s2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: frinti s0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: nearbyint_v8f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frinti v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: nearbyint_v8f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: frinti s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT: frinti s5, s1
+; CHECK-GI-NOFP16-NEXT: frinti s2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT: frinti s4, s4
+; CHECK-GI-NOFP16-NEXT: frinti s6, s6
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT: frinti s2, s5
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s6
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: frinti s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: frinti s0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: nearbyint_v8f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frinti v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %a)
+ ret <8 x half> %c
+}
+
+define <16 x half> @nearbyint_v16f16(<16 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: nearbyint_v16f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h3, v1.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h0
+; CHECK-SD-NOFP16-NEXT: fcvt s6, h1
+; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[2]
+; CHECK-SD-NOFP16-NEXT: mov h18, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frinti s5, s5
+; CHECK-SD-NOFP16-NEXT: frinti s6, s6
+; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
+; CHECK-SD-NOFP16-NEXT: frinti s16, s2
+; CHECK-SD-NOFP16-NEXT: frinti s17, s3
+; CHECK-SD-NOFP16-NEXT: frinti s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s5
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s6
+; CHECK-SD-NOFP16-NEXT: frinti s7, s7
+; CHECK-SD-NOFP16-NEXT: fcvt h5, s16
+; CHECK-SD-NOFP16-NEXT: fcvt h6, s17
+; CHECK-SD-NOFP16-NEXT: mov h16, v1.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s17, h18
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h7, s7
+; CHECK-SD-NOFP16-NEXT: mov v2.h[1], v5.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT: mov v3.h[1], v6.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h6, v1.h[4]
+; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
+; CHECK-SD-NOFP16-NEXT: frinti s17, s17
+; CHECK-SD-NOFP16-NEXT: mov v2.h[2], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[5]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
+; CHECK-SD-NOFP16-NEXT: frinti s16, s16
+; CHECK-SD-NOFP16-NEXT: mov v3.h[2], v7.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h7, s17
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frinti s5, s5
+; CHECK-SD-NOFP16-NEXT: frinti s6, s6
+; CHECK-SD-NOFP16-NEXT: fcvt h16, s16
+; CHECK-SD-NOFP16-NEXT: mov v2.h[3], v7.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[5]
+; CHECK-SD-NOFP16-NEXT: frinti s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT: mov v3.h[3], v16.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov v2.h[4], v5.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[6]
+; CHECK-SD-NOFP16-NEXT: mov v3.h[4], v6.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt s6, h7
+; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT: mov v2.h[5], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[6]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: frinti s6, s6
+; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frinti s5, s5
+; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: frinti s0, s0
+; CHECK-SD-NOFP16-NEXT: frinti s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT: mov v3.h[5], v6.h[0]
+; CHECK-SD-NOFP16-NEXT: frinti s1, s1
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov v2.h[6], v5.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT: mov v3.h[6], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v2.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v3.h[7], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v0.16b, v2.16b
+; CHECK-SD-NOFP16-NEXT: mov v1.16b, v3.16b
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: nearbyint_v16f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frinti v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT: frinti v1.8h, v1.8h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: nearbyint_v16f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1]
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h0
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h1
+; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2]
+; CHECK-GI-NOFP16-NEXT: mov h18, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov h19, v1.h[3]
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT: frinti s4, s4
+; CHECK-GI-NOFP16-NEXT: frinti s5, s5
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT: frinti s16, s2
+; CHECK-GI-NOFP16-NEXT: frinti s17, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s4
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT: frinti s6, s6
+; CHECK-GI-NOFP16-NEXT: frinti s7, s7
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s16
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s17
+; CHECK-GI-NOFP16-NEXT: fcvt s16, h18
+; CHECK-GI-NOFP16-NEXT: fcvt s17, h19
+; CHECK-GI-NOFP16-NEXT: mov h18, v1.h[4]
+; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s6
+; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: fcvt h6, s7
+; CHECK-GI-NOFP16-NEXT: frinti s16, s16
+; CHECK-GI-NOFP16-NEXT: frinti s17, s17
+; CHECK-GI-NOFP16-NEXT: mov h7, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt s18, h18
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v6.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s16
+; CHECK-GI-NOFP16-NEXT: fcvt h16, s17
+; CHECK-GI-NOFP16-NEXT: mov h17, v1.h[5]
+; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT: frinti s5, s5
+; CHECK-GI-NOFP16-NEXT: mov v2.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[6]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v16.h[0]
+; CHECK-GI-NOFP16-NEXT: frinti s16, s18
+; CHECK-GI-NOFP16-NEXT: fcvt s17, h17
+; CHECK-GI-NOFP16-NEXT: frinti s7, s7
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7]
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s5
+; CHECK-GI-NOFP16-NEXT: fcvt h16, s16
+; CHECK-GI-NOFP16-NEXT: frinti s17, s17
+; CHECK-GI-NOFP16-NEXT: fcvt h7, s7
+; CHECK-GI-NOFP16-NEXT: frinti s6, s6
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: frinti s4, s4
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: mov v2.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[4], v16.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s17
+; CHECK-GI-NOFP16-NEXT: fcvt h6, s6
+; CHECK-GI-NOFP16-NEXT: frinti s0, s0
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT: frinti s1, s1
+; CHECK-GI-NOFP16-NEXT: mov v2.h[5], v7.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[5], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT: mov v2.h[6], v6.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[6], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v2.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[7], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.16b, v2.16b
+; CHECK-GI-NOFP16-NEXT: mov v1.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: nearbyint_v16f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frinti v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: frinti v1.8h, v1.8h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <16 x half> @llvm.nearbyint.v16f16(<16 x half> %a)
+ ret <16 x half> %c
+}
+
+define double @roundeven_f64(double %a) {
+; CHECK-LABEL: roundeven_f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintn d0, d0
+; CHECK-NEXT: ret
+entry:
+ %c = call double @llvm.roundeven.f64(double %a)
+ ret double %c
+}
+
+define float @roundeven_f32(float %a) {
+; CHECK-LABEL: roundeven_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintn s0, s0
+; CHECK-NEXT: ret
+entry:
+ %c = call float @llvm.roundeven.f32(float %a)
+ ret float %c
+}
+
+define half @roundeven_f16(half %a) {
+; CHECK-SD-NOFP16-LABEL: roundeven_f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: frintn s0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: roundeven_f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frintn h0, h0
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: roundeven_f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: frintn s0, s0
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: roundeven_f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frintn h0, h0
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call half @llvm.roundeven.f16(half %a)
+ ret half %c
+}
+
+define <2 x double> @roundeven_v2f64(<2 x double> %a) {
+; CHECK-LABEL: roundeven_v2f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintn v0.2d, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %c = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %a)
+ ret <2 x double> %c
+}
+
+define <3 x double> @roundeven_v3f64(<3 x double> %a) {
+; CHECK-LABEL: roundeven_v3f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: frintn v2.2d, v2.2d
+; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-NEXT: frintn v0.2d, v0.2d
+; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT: ret
+entry:
+ %c = call <3 x double> @llvm.roundeven.v3f64(<3 x double> %a)
+ ret <3 x double> %c
+}
+
+define <4 x double> @roundeven_v4f64(<4 x double> %a) {
+; CHECK-LABEL: roundeven_v4f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintn v0.2d, v0.2d
+; CHECK-NEXT: frintn v1.2d, v1.2d
+; CHECK-NEXT: ret
+entry:
+ %c = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %a)
+ ret <4 x double> %c
+}
+
+define <2 x float> @roundeven_v2f32(<2 x float> %a) {
+; CHECK-LABEL: roundeven_v2f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintn v0.2s, v0.2s
+; CHECK-NEXT: ret
+entry:
+ %c = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %a)
+ ret <2 x float> %c
+}
+
+define <3 x float> @roundeven_v3f32(<3 x float> %a) {
+; CHECK-LABEL: roundeven_v3f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintn v0.4s, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %c = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %a)
+ ret <3 x float> %c
+}
+
+define <4 x float> @roundeven_v4f32(<4 x float> %a) {
+; CHECK-LABEL: roundeven_v4f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintn v0.4s, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %c = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %a)
+ ret <4 x float> %c
+}
+
+define <8 x float> @roundeven_v8f32(<8 x float> %a) {
+; CHECK-LABEL: roundeven_v8f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintn v0.4s, v0.4s
+; CHECK-NEXT: frintn v1.4s, v1.4s
+; CHECK-NEXT: ret
+entry:
+ %c = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %a)
+ ret <8 x float> %c
+}
+
+define <7 x half> @roundeven_v7f16(<7 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: roundeven_v7f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: frintn s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frintn s5, s1
+; CHECK-SD-NOFP16-NEXT: frintn s2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT: frintn s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: frintn s4, s5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT: frintn s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT: mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: frintn s2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: frintn s0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: roundeven_v7f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frintn v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: roundeven_v7f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: frintn s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT: frintn s5, s1
+; CHECK-GI-NOFP16-NEXT: frintn s2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT: frintn s4, s4
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s4
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT: frintn s4, s5
+; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: frintn s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: frintn s2, s2
+; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: frintn s0, s0
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: roundeven_v7f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frintn v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <7 x half> @llvm.roundeven.v7f16(<7 x half> %a)
+ ret <7 x half> %c
+}
+
+define <4 x half> @roundeven_v4f16(<4 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: roundeven_v4f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h0
+; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: frintn s2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: frintn s1, s1
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s2
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h4
+; CHECK-SD-NOFP16-NEXT: frintn s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT: frintn s2, s2
+; CHECK-SD-NOFP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s2
+; CHECK-SD-NOFP16-NEXT: mov v0.h[3], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: roundeven_v4f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frintn v0.4h, v0.4h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: roundeven_v4f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
+; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: frintn s2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT: frintn s1, s1
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s2
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h4
+; CHECK-GI-NOFP16-NEXT: frintn s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT: frintn s2, s2
+; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s2
+; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: roundeven_v4f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frintn v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %a)
+ ret <4 x half> %c
+}
+
+define <8 x half> @roundeven_v8f16(<8 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: roundeven_v8f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: frintn s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frintn s5, s1
+; CHECK-SD-NOFP16-NEXT: frintn s2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT: frintn s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: frintn s4, s5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT: frintn s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT: mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: frintn s2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: frintn s0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: roundeven_v8f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frintn v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: roundeven_v8f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: frintn s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT: frintn s5, s1
+; CHECK-GI-NOFP16-NEXT: frintn s2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT: frintn s4, s4
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s4
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT: frintn s4, s5
+; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: frintn s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: frintn s2, s2
+; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: frintn s0, s0
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: roundeven_v8f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frintn v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %a)
+ ret <8 x half> %c
+}
+
+define <16 x half> @roundeven_v16f16(<16 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: roundeven_v16f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h3, v1.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h0
+; CHECK-SD-NOFP16-NEXT: fcvt s6, h1
+; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[2]
+; CHECK-SD-NOFP16-NEXT: mov h18, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frintn s5, s5
+; CHECK-SD-NOFP16-NEXT: frintn s6, s6
+; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
+; CHECK-SD-NOFP16-NEXT: frintn s16, s2
+; CHECK-SD-NOFP16-NEXT: frintn s17, s3
+; CHECK-SD-NOFP16-NEXT: frintn s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s5
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s6
+; CHECK-SD-NOFP16-NEXT: frintn s7, s7
+; CHECK-SD-NOFP16-NEXT: fcvt h5, s16
+; CHECK-SD-NOFP16-NEXT: fcvt h6, s17
+; CHECK-SD-NOFP16-NEXT: mov h16, v1.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s17, h18
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h7, s7
+; CHECK-SD-NOFP16-NEXT: mov v2.h[1], v5.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT: mov v3.h[1], v6.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h6, v1.h[4]
+; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
+; CHECK-SD-NOFP16-NEXT: frintn s17, s17
+; CHECK-SD-NOFP16-NEXT: mov v2.h[2], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[5]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
+; CHECK-SD-NOFP16-NEXT: frintn s16, s16
+; CHECK-SD-NOFP16-NEXT: mov v3.h[2], v7.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h7, s17
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frintn s5, s5
+; CHECK-SD-NOFP16-NEXT: frintn s6, s6
+; CHECK-SD-NOFP16-NEXT: fcvt h16, s16
+; CHECK-SD-NOFP16-NEXT: mov v2.h[3], v7.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[5]
+; CHECK-SD-NOFP16-NEXT: frintn s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT: mov v3.h[3], v16.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov v2.h[4], v5.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[6]
+; CHECK-SD-NOFP16-NEXT: mov v3.h[4], v6.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt s6, h7
+; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT: mov v2.h[5], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[6]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: frintn s6, s6
+; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frintn s5, s5
+; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: frintn s0, s0
+; CHECK-SD-NOFP16-NEXT: frintn s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT: mov v3.h[5], v6.h[0]
+; CHECK-SD-NOFP16-NEXT: frintn s1, s1
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov v2.h[6], v5.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT: mov v3.h[6], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v2.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v3.h[7], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v0.16b, v2.16b
+; CHECK-SD-NOFP16-NEXT: mov v1.16b, v3.16b
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: roundeven_v16f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frintn v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT: frintn v1.8h, v1.8h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: roundeven_v16f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h0
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h1
+; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2]
+; CHECK-GI-NOFP16-NEXT: mov h18, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT: frintn s5, s5
+; CHECK-GI-NOFP16-NEXT: frintn s6, s6
+; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT: frintn s16, s2
+; CHECK-GI-NOFP16-NEXT: frintn s17, s3
+; CHECK-GI-NOFP16-NEXT: frintn s4, s4
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s5
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s6
+; CHECK-GI-NOFP16-NEXT: frintn s7, s7
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s16
+; CHECK-GI-NOFP16-NEXT: fcvt h6, s17
+; CHECK-GI-NOFP16-NEXT: mov h16, v1.h[3]
+; CHECK-GI-NOFP16-NEXT: fcvt s17, h18
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT: fcvt h7, s7
+; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v6.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h6, v1.h[4]
+; CHECK-GI-NOFP16-NEXT: fcvt s16, h16
+; CHECK-GI-NOFP16-NEXT: frintn s17, s17
+; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT: frintn s16, s16
+; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v7.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h7, s17
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT: frintn s5, s5
+; CHECK-GI-NOFP16-NEXT: frintn s6, s6
+; CHECK-GI-NOFP16-NEXT: fcvt h16, s16
+; CHECK-GI-NOFP16-NEXT: mov v2.h[3], v7.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[5]
+; CHECK-GI-NOFP16-NEXT: frintn s4, s4
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s5
+; CHECK-GI-NOFP16-NEXT: fcvt h6, s6
+; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v16.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT: mov v2.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[4], v6.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h7
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT: mov v2.h[5], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[6]
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT: frintn s6, s6
+; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7]
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT: frintn s5, s5
+; CHECK-GI-NOFP16-NEXT: fcvt h6, s6
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: frintn s0, s0
+; CHECK-GI-NOFP16-NEXT: frintn s4, s4
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s5
+; CHECK-GI-NOFP16-NEXT: mov v3.h[5], v6.h[0]
+; CHECK-GI-NOFP16-NEXT: frintn s1, s1
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT: mov v2.h[6], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT: mov v3.h[6], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v2.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[7], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.16b, v2.16b
+; CHECK-GI-NOFP16-NEXT: mov v1.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: roundeven_v16f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frintn v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: frintn v1.8h, v1.8h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %a)
+ ret <16 x half> %c
+}
+
+define double @rint_f64(double %a) {
+; CHECK-LABEL: rint_f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintx d0, d0
+; CHECK-NEXT: ret
+entry:
+ %c = call double @llvm.rint.f64(double %a)
+ ret double %c
+}
+
+define float @rint_f32(float %a) {
+; CHECK-LABEL: rint_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintx s0, s0
+; CHECK-NEXT: ret
+entry:
+ %c = call float @llvm.rint.f32(float %a)
+ ret float %c
+}
+
+define half @rint_f16(half %a) {
+; CHECK-SD-NOFP16-LABEL: rint_f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: frintx s0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: rint_f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frintx h0, h0
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: rint_f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: frintx s0, s0
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: rint_f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frintx h0, h0
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call half @llvm.rint.f16(half %a)
+ ret half %c
+}
+
+define <2 x double> @rint_v2f64(<2 x double> %a) {
+; CHECK-LABEL: rint_v2f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintx v0.2d, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %c = call <2 x double> @llvm.rint.v2f64(<2 x double> %a)
+ ret <2 x double> %c
+}
+
+define <3 x double> @rint_v3f64(<3 x double> %a) {
+; CHECK-LABEL: rint_v3f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: frintx v2.2d, v2.2d
+; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-NEXT: frintx v0.2d, v0.2d
+; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT: ret
+entry:
+ %c = call <3 x double> @llvm.rint.v3f64(<3 x double> %a)
+ ret <3 x double> %c
+}
+
+define <4 x double> @rint_v4f64(<4 x double> %a) {
+; CHECK-LABEL: rint_v4f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintx v0.2d, v0.2d
+; CHECK-NEXT: frintx v1.2d, v1.2d
+; CHECK-NEXT: ret
+entry:
+ %c = call <4 x double> @llvm.rint.v4f64(<4 x double> %a)
+ ret <4 x double> %c
+}
+
+define <2 x float> @rint_v2f32(<2 x float> %a) {
+; CHECK-LABEL: rint_v2f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintx v0.2s, v0.2s
+; CHECK-NEXT: ret
+entry:
+ %c = call <2 x float> @llvm.rint.v2f32(<2 x float> %a)
+ ret <2 x float> %c
+}
+
+define <3 x float> @rint_v3f32(<3 x float> %a) {
+; CHECK-LABEL: rint_v3f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintx v0.4s, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %c = call <3 x float> @llvm.rint.v3f32(<3 x float> %a)
+ ret <3 x float> %c
+}
+
+define <4 x float> @rint_v4f32(<4 x float> %a) {
+; CHECK-LABEL: rint_v4f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintx v0.4s, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %c = call <4 x float> @llvm.rint.v4f32(<4 x float> %a)
+ ret <4 x float> %c
+}
+
+define <8 x float> @rint_v8f32(<8 x float> %a) {
+; CHECK-LABEL: rint_v8f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintx v0.4s, v0.4s
+; CHECK-NEXT: frintx v1.4s, v1.4s
+; CHECK-NEXT: ret
+entry:
+ %c = call <8 x float> @llvm.rint.v8f32(<8 x float> %a)
+ ret <8 x float> %c
+}
+
+define <7 x half> @rint_v7f16(<7 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: rint_v7f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: frintx s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frintx s5, s1
+; CHECK-SD-NOFP16-NEXT: frintx s2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT: frintx s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: frintx s4, s5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT: frintx s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT: mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: frintx s2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: frintx s0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: rint_v7f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frintx v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: rint_v7f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: frintx s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT: frintx s5, s1
+; CHECK-GI-NOFP16-NEXT: frintx s2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT: frintx s4, s4
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: frintx s3, s6
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s3
+; CHECK-GI-NOFP16-NEXT: frintx s3, s5
+; CHECK-GI-NOFP16-NEXT: frintx s0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: rint_v7f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frintx v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <7 x half> @llvm.rint.v7f16(<7 x half> %a)
+ ret <7 x half> %c
+}
+
+define <4 x half> @rint_v4f16(<4 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: rint_v4f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h0
+; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: frintx s2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: frintx s1, s1
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s2
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h4
+; CHECK-SD-NOFP16-NEXT: frintx s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT: frintx s2, s2
+; CHECK-SD-NOFP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s2
+; CHECK-SD-NOFP16-NEXT: mov v0.h[3], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: rint_v4f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frintx v0.4h, v0.4h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: rint_v4f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: frintx s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h0
+; CHECK-GI-NOFP16-NEXT: frintx s1, s1
+; CHECK-GI-NOFP16-NEXT: frintx s2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s3
+; CHECK-GI-NOFP16-NEXT: frintx s3, s4
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: rint_v4f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frintx v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <4 x half> @llvm.rint.v4f16(<4 x half> %a)
+ ret <4 x half> %c
+}
+
+define <8 x half> @rint_v8f16(<8 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: rint_v8f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: frintx s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frintx s5, s1
+; CHECK-SD-NOFP16-NEXT: frintx s2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT: frintx s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: frintx s4, s5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT: frintx s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT: mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: frintx s2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: frintx s0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: rint_v8f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frintx v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: rint_v8f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: frintx s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT: frintx s5, s1
+; CHECK-GI-NOFP16-NEXT: frintx s2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT: frintx s4, s4
+; CHECK-GI-NOFP16-NEXT: frintx s6, s6
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT: frintx s2, s5
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s6
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: frintx s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: frintx s0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: rint_v8f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frintx v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <8 x half> @llvm.rint.v8f16(<8 x half> %a)
+ ret <8 x half> %c
+}
+
+define <16 x half> @rint_v16f16(<16 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: rint_v16f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h3, v1.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h0
+; CHECK-SD-NOFP16-NEXT: fcvt s6, h1
+; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[2]
+; CHECK-SD-NOFP16-NEXT: mov h18, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frintx s5, s5
+; CHECK-SD-NOFP16-NEXT: frintx s6, s6
+; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
+; CHECK-SD-NOFP16-NEXT: frintx s16, s2
+; CHECK-SD-NOFP16-NEXT: frintx s17, s3
+; CHECK-SD-NOFP16-NEXT: frintx s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s5
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s6
+; CHECK-SD-NOFP16-NEXT: frintx s7, s7
+; CHECK-SD-NOFP16-NEXT: fcvt h5, s16
+; CHECK-SD-NOFP16-NEXT: fcvt h6, s17
+; CHECK-SD-NOFP16-NEXT: mov h16, v1.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s17, h18
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h7, s7
+; CHECK-SD-NOFP16-NEXT: mov v2.h[1], v5.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT: mov v3.h[1], v6.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h6, v1.h[4]
+; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
+; CHECK-SD-NOFP16-NEXT: frintx s17, s17
+; CHECK-SD-NOFP16-NEXT: mov v2.h[2], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[5]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
+; CHECK-SD-NOFP16-NEXT: frintx s16, s16
+; CHECK-SD-NOFP16-NEXT: mov v3.h[2], v7.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h7, s17
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frintx s5, s5
+; CHECK-SD-NOFP16-NEXT: frintx s6, s6
+; CHECK-SD-NOFP16-NEXT: fcvt h16, s16
+; CHECK-SD-NOFP16-NEXT: mov v2.h[3], v7.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[5]
+; CHECK-SD-NOFP16-NEXT: frintx s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT: mov v3.h[3], v16.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov v2.h[4], v5.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[6]
+; CHECK-SD-NOFP16-NEXT: mov v3.h[4], v6.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt s6, h7
+; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT: mov v2.h[5], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[6]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: frintx s6, s6
+; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frintx s5, s5
+; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: frintx s0, s0
+; CHECK-SD-NOFP16-NEXT: frintx s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT: mov v3.h[5], v6.h[0]
+; CHECK-SD-NOFP16-NEXT: frintx s1, s1
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov v2.h[6], v5.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT: mov v3.h[6], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v2.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v3.h[7], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v0.16b, v2.16b
+; CHECK-SD-NOFP16-NEXT: mov v1.16b, v3.16b
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: rint_v16f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frintx v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT: frintx v1.8h, v1.8h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: rint_v16f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1]
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h0
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h1
+; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2]
+; CHECK-GI-NOFP16-NEXT: mov h18, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov h19, v1.h[3]
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT: frintx s4, s4
+; CHECK-GI-NOFP16-NEXT: frintx s5, s5
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT: frintx s16, s2
+; CHECK-GI-NOFP16-NEXT: frintx s17, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s4
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT: frintx s6, s6
+; CHECK-GI-NOFP16-NEXT: frintx s7, s7
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s16
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s17
+; CHECK-GI-NOFP16-NEXT: fcvt s16, h18
+; CHECK-GI-NOFP16-NEXT: fcvt s17, h19
+; CHECK-GI-NOFP16-NEXT: mov h18, v1.h[4]
+; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s6
+; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: fcvt h6, s7
+; CHECK-GI-NOFP16-NEXT: frintx s16, s16
+; CHECK-GI-NOFP16-NEXT: frintx s17, s17
+; CHECK-GI-NOFP16-NEXT: mov h7, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt s18, h18
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v6.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s16
+; CHECK-GI-NOFP16-NEXT: fcvt h16, s17
+; CHECK-GI-NOFP16-NEXT: mov h17, v1.h[5]
+; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT: frintx s5, s5
+; CHECK-GI-NOFP16-NEXT: mov v2.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[6]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v16.h[0]
+; CHECK-GI-NOFP16-NEXT: frintx s16, s18
+; CHECK-GI-NOFP16-NEXT: fcvt s17, h17
+; CHECK-GI-NOFP16-NEXT: frintx s7, s7
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7]
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s5
+; CHECK-GI-NOFP16-NEXT: fcvt h16, s16
+; CHECK-GI-NOFP16-NEXT: frintx s17, s17
+; CHECK-GI-NOFP16-NEXT: fcvt h7, s7
+; CHECK-GI-NOFP16-NEXT: frintx s6, s6
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: frintx s4, s4
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: mov v2.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[4], v16.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s17
+; CHECK-GI-NOFP16-NEXT: fcvt h6, s6
+; CHECK-GI-NOFP16-NEXT: frintx s0, s0
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT: frintx s1, s1
+; CHECK-GI-NOFP16-NEXT: mov v2.h[5], v7.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[5], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT: mov v2.h[6], v6.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[6], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v2.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[7], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.16b, v2.16b
+; CHECK-GI-NOFP16-NEXT: mov v1.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: rint_v16f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frintx v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: frintx v1.8h, v1.8h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <16 x half> @llvm.rint.v16f16(<16 x half> %a)
+ ret <16 x half> %c
+}
+
+define double @round_f64(double %a) {
+; CHECK-LABEL: round_f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frinta d0, d0
+; CHECK-NEXT: ret
+entry:
+ %c = call double @llvm.round.f64(double %a)
+ ret double %c
+}
+
+define float @round_f32(float %a) {
+; CHECK-LABEL: round_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frinta s0, s0
+; CHECK-NEXT: ret
+entry:
+ %c = call float @llvm.round.f32(float %a)
+ ret float %c
+}
+
+define half @round_f16(half %a) {
+; CHECK-SD-NOFP16-LABEL: round_f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: frinta s0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: round_f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frinta h0, h0
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: round_f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: frinta s0, s0
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: round_f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frinta h0, h0
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call half @llvm.round.f16(half %a)
+ ret half %c
+}
+
+define <2 x double> @round_v2f64(<2 x double> %a) {
+; CHECK-LABEL: round_v2f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frinta v0.2d, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %c = call <2 x double> @llvm.round.v2f64(<2 x double> %a)
+ ret <2 x double> %c
+}
+
+define <3 x double> @round_v3f64(<3 x double> %a) {
+; CHECK-LABEL: round_v3f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: frinta v2.2d, v2.2d
+; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-NEXT: frinta v0.2d, v0.2d
+; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT: ret
+entry:
+ %c = call <3 x double> @llvm.round.v3f64(<3 x double> %a)
+ ret <3 x double> %c
+}
+
+define <4 x double> @round_v4f64(<4 x double> %a) {
+; CHECK-LABEL: round_v4f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frinta v0.2d, v0.2d
+; CHECK-NEXT: frinta v1.2d, v1.2d
+; CHECK-NEXT: ret
+entry:
+ %c = call <4 x double> @llvm.round.v4f64(<4 x double> %a)
+ ret <4 x double> %c
+}
+
+define <2 x float> @round_v2f32(<2 x float> %a) {
+; CHECK-LABEL: round_v2f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frinta v0.2s, v0.2s
+; CHECK-NEXT: ret
+entry:
+ %c = call <2 x float> @llvm.round.v2f32(<2 x float> %a)
+ ret <2 x float> %c
+}
+
+define <3 x float> @round_v3f32(<3 x float> %a) {
+; CHECK-LABEL: round_v3f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frinta v0.4s, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %c = call <3 x float> @llvm.round.v3f32(<3 x float> %a)
+ ret <3 x float> %c
+}
+
+define <4 x float> @round_v4f32(<4 x float> %a) {
+; CHECK-LABEL: round_v4f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frinta v0.4s, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %c = call <4 x float> @llvm.round.v4f32(<4 x float> %a)
+ ret <4 x float> %c
+}
+
+define <8 x float> @round_v8f32(<8 x float> %a) {
+; CHECK-LABEL: round_v8f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frinta v0.4s, v0.4s
+; CHECK-NEXT: frinta v1.4s, v1.4s
+; CHECK-NEXT: ret
+entry:
+ %c = call <8 x float> @llvm.round.v8f32(<8 x float> %a)
+ ret <8 x float> %c
+}
+
+define <7 x half> @round_v7f16(<7 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: round_v7f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: frinta s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frinta s5, s1
+; CHECK-SD-NOFP16-NEXT: frinta s2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT: frinta s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: frinta s4, s5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT: frinta s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT: mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: frinta s2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: frinta s0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: round_v7f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frinta v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: round_v7f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: frinta s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT: frinta s5, s1
+; CHECK-GI-NOFP16-NEXT: frinta s2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT: frinta s4, s4
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: frinta s3, s6
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s3
+; CHECK-GI-NOFP16-NEXT: frinta s3, s5
+; CHECK-GI-NOFP16-NEXT: frinta s0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: round_v7f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frinta v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <7 x half> @llvm.round.v7f16(<7 x half> %a)
+ ret <7 x half> %c
+}
+
+define <4 x half> @round_v4f16(<4 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: round_v4f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h0
+; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: frinta s2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: frinta s1, s1
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s2
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h4
+; CHECK-SD-NOFP16-NEXT: frinta s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT: frinta s2, s2
+; CHECK-SD-NOFP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s2
+; CHECK-SD-NOFP16-NEXT: mov v0.h[3], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: round_v4f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frinta v0.4h, v0.4h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: round_v4f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: frinta s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h0
+; CHECK-GI-NOFP16-NEXT: frinta s1, s1
+; CHECK-GI-NOFP16-NEXT: frinta s2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s3
+; CHECK-GI-NOFP16-NEXT: frinta s3, s4
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: round_v4f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frinta v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <4 x half> @llvm.round.v4f16(<4 x half> %a)
+ ret <4 x half> %c
+}
+
+define <8 x half> @round_v8f16(<8 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: round_v8f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: frinta s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frinta s5, s1
+; CHECK-SD-NOFP16-NEXT: frinta s2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT: frinta s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: frinta s4, s5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT: frinta s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT: mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: frinta s2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: frinta s0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: round_v8f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frinta v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: round_v8f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: frinta s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT: frinta s5, s1
+; CHECK-GI-NOFP16-NEXT: frinta s2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT: frinta s4, s4
+; CHECK-GI-NOFP16-NEXT: frinta s6, s6
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT: frinta s2, s5
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s6
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: frinta s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: frinta s0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: round_v8f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frinta v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <8 x half> @llvm.round.v8f16(<8 x half> %a)
+ ret <8 x half> %c
+}
+
+define <16 x half> @round_v16f16(<16 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: round_v16f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h3, v1.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h0
+; CHECK-SD-NOFP16-NEXT: fcvt s6, h1
+; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[2]
+; CHECK-SD-NOFP16-NEXT: mov h18, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frinta s5, s5
+; CHECK-SD-NOFP16-NEXT: frinta s6, s6
+; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
+; CHECK-SD-NOFP16-NEXT: frinta s16, s2
+; CHECK-SD-NOFP16-NEXT: frinta s17, s3
+; CHECK-SD-NOFP16-NEXT: frinta s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s5
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s6
+; CHECK-SD-NOFP16-NEXT: frinta s7, s7
+; CHECK-SD-NOFP16-NEXT: fcvt h5, s16
+; CHECK-SD-NOFP16-NEXT: fcvt h6, s17
+; CHECK-SD-NOFP16-NEXT: mov h16, v1.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s17, h18
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h7, s7
+; CHECK-SD-NOFP16-NEXT: mov v2.h[1], v5.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT: mov v3.h[1], v6.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h6, v1.h[4]
+; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
+; CHECK-SD-NOFP16-NEXT: frinta s17, s17
+; CHECK-SD-NOFP16-NEXT: mov v2.h[2], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[5]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
+; CHECK-SD-NOFP16-NEXT: frinta s16, s16
+; CHECK-SD-NOFP16-NEXT: mov v3.h[2], v7.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h7, s17
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frinta s5, s5
+; CHECK-SD-NOFP16-NEXT: frinta s6, s6
+; CHECK-SD-NOFP16-NEXT: fcvt h16, s16
+; CHECK-SD-NOFP16-NEXT: mov v2.h[3], v7.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[5]
+; CHECK-SD-NOFP16-NEXT: frinta s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT: mov v3.h[3], v16.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov v2.h[4], v5.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[6]
+; CHECK-SD-NOFP16-NEXT: mov v3.h[4], v6.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt s6, h7
+; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT: mov v2.h[5], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[6]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: frinta s6, s6
+; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frinta s5, s5
+; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: frinta s0, s0
+; CHECK-SD-NOFP16-NEXT: frinta s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT: mov v3.h[5], v6.h[0]
+; CHECK-SD-NOFP16-NEXT: frinta s1, s1
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov v2.h[6], v5.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT: mov v3.h[6], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v2.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v3.h[7], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v0.16b, v2.16b
+; CHECK-SD-NOFP16-NEXT: mov v1.16b, v3.16b
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: round_v16f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frinta v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT: frinta v1.8h, v1.8h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: round_v16f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1]
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h0
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h1
+; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2]
+; CHECK-GI-NOFP16-NEXT: mov h18, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov h19, v1.h[3]
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT: frinta s4, s4
+; CHECK-GI-NOFP16-NEXT: frinta s5, s5
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT: frinta s16, s2
+; CHECK-GI-NOFP16-NEXT: frinta s17, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s4
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT: frinta s6, s6
+; CHECK-GI-NOFP16-NEXT: frinta s7, s7
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s16
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s17
+; CHECK-GI-NOFP16-NEXT: fcvt s16, h18
+; CHECK-GI-NOFP16-NEXT: fcvt s17, h19
+; CHECK-GI-NOFP16-NEXT: mov h18, v1.h[4]
+; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s6
+; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: fcvt h6, s7
+; CHECK-GI-NOFP16-NEXT: frinta s16, s16
+; CHECK-GI-NOFP16-NEXT: frinta s17, s17
+; CHECK-GI-NOFP16-NEXT: mov h7, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt s18, h18
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v6.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s16
+; CHECK-GI-NOFP16-NEXT: fcvt h16, s17
+; CHECK-GI-NOFP16-NEXT: mov h17, v1.h[5]
+; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT: frinta s5, s5
+; CHECK-GI-NOFP16-NEXT: mov v2.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[6]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v16.h[0]
+; CHECK-GI-NOFP16-NEXT: frinta s16, s18
+; CHECK-GI-NOFP16-NEXT: fcvt s17, h17
+; CHECK-GI-NOFP16-NEXT: frinta s7, s7
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7]
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s5
+; CHECK-GI-NOFP16-NEXT: fcvt h16, s16
+; CHECK-GI-NOFP16-NEXT: frinta s17, s17
+; CHECK-GI-NOFP16-NEXT: fcvt h7, s7
+; CHECK-GI-NOFP16-NEXT: frinta s6, s6
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: frinta s4, s4
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: mov v2.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[4], v16.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s17
+; CHECK-GI-NOFP16-NEXT: fcvt h6, s6
+; CHECK-GI-NOFP16-NEXT: frinta s0, s0
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT: frinta s1, s1
+; CHECK-GI-NOFP16-NEXT: mov v2.h[5], v7.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[5], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT: mov v2.h[6], v6.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[6], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v2.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[7], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.16b, v2.16b
+; CHECK-GI-NOFP16-NEXT: mov v1.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: round_v16f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frinta v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: frinta v1.8h, v1.8h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <16 x half> @llvm.round.v16f16(<16 x half> %a)
+ ret <16 x half> %c
+}
+
+define double @trunc_f64(double %a) {
+; CHECK-LABEL: trunc_f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintz d0, d0
+; CHECK-NEXT: ret
+entry:
+ %c = call double @llvm.trunc.f64(double %a)
+ ret double %c
+}
+
+define float @trunc_f32(float %a) {
+; CHECK-LABEL: trunc_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintz s0, s0
+; CHECK-NEXT: ret
+entry:
+ %c = call float @llvm.trunc.f32(float %a)
+ ret float %c
+}
+
+define half @trunc_f16(half %a) {
+; CHECK-SD-NOFP16-LABEL: trunc_f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: frintz s0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: trunc_f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frintz h0, h0
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: trunc_f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: frintz s0, s0
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: trunc_f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frintz h0, h0
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call half @llvm.trunc.f16(half %a)
+ ret half %c
+}
+
+define <2 x double> @trunc_v2f64(<2 x double> %a) {
+; CHECK-LABEL: trunc_v2f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintz v0.2d, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %c = call <2 x double> @llvm.trunc.v2f64(<2 x double> %a)
+ ret <2 x double> %c
+}
+
+define <3 x double> @trunc_v3f64(<3 x double> %a) {
+; CHECK-LABEL: trunc_v3f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: frintz v2.2d, v2.2d
+; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-NEXT: frintz v0.2d, v0.2d
+; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT: ret
+entry:
+ %c = call <3 x double> @llvm.trunc.v3f64(<3 x double> %a)
+ ret <3 x double> %c
+}
+
+define <4 x double> @trunc_v4f64(<4 x double> %a) {
+; CHECK-LABEL: trunc_v4f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintz v0.2d, v0.2d
+; CHECK-NEXT: frintz v1.2d, v1.2d
+; CHECK-NEXT: ret
+entry:
+ %c = call <4 x double> @llvm.trunc.v4f64(<4 x double> %a)
+ ret <4 x double> %c
+}
+
+define <2 x float> @trunc_v2f32(<2 x float> %a) {
+; CHECK-LABEL: trunc_v2f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintz v0.2s, v0.2s
+; CHECK-NEXT: ret
+entry:
+ %c = call <2 x float> @llvm.trunc.v2f32(<2 x float> %a)
+ ret <2 x float> %c
+}
+
+define <3 x float> @trunc_v3f32(<3 x float> %a) {
+; CHECK-LABEL: trunc_v3f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintz v0.4s, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %c = call <3 x float> @llvm.trunc.v3f32(<3 x float> %a)
+ ret <3 x float> %c
+}
+
+define <4 x float> @trunc_v4f32(<4 x float> %a) {
+; CHECK-LABEL: trunc_v4f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintz v0.4s, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %c = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a)
+ ret <4 x float> %c
+}
+
+define <8 x float> @trunc_v8f32(<8 x float> %a) {
+; CHECK-LABEL: trunc_v8f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintz v0.4s, v0.4s
+; CHECK-NEXT: frintz v1.4s, v1.4s
+; CHECK-NEXT: ret
+entry:
+ %c = call <8 x float> @llvm.trunc.v8f32(<8 x float> %a)
+ ret <8 x float> %c
+}
+
+define <7 x half> @trunc_v7f16(<7 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: trunc_v7f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: frintz s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frintz s5, s1
+; CHECK-SD-NOFP16-NEXT: frintz s2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT: frintz s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: frintz s4, s5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT: frintz s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT: mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: frintz s2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: frintz s0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: trunc_v7f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frintz v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: trunc_v7f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: frintz s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT: frintz s5, s1
+; CHECK-GI-NOFP16-NEXT: frintz s2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT: frintz s4, s4
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: frintz s3, s6
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s3
+; CHECK-GI-NOFP16-NEXT: frintz s3, s5
+; CHECK-GI-NOFP16-NEXT: frintz s0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: trunc_v7f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frintz v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <7 x half> @llvm.trunc.v7f16(<7 x half> %a)
+ ret <7 x half> %c
+}
+
+define <4 x half> @trunc_v4f16(<4 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: trunc_v4f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h0
+; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: frintz s2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: frintz s1, s1
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s2
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h4
+; CHECK-SD-NOFP16-NEXT: frintz s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT: frintz s2, s2
+; CHECK-SD-NOFP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s2
+; CHECK-SD-NOFP16-NEXT: mov v0.h[3], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: trunc_v4f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frintz v0.4h, v0.4h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: trunc_v4f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: frintz s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h0
+; CHECK-GI-NOFP16-NEXT: frintz s1, s1
+; CHECK-GI-NOFP16-NEXT: frintz s2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s3
+; CHECK-GI-NOFP16-NEXT: frintz s3, s4
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: trunc_v4f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frintz v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <4 x half> @llvm.trunc.v4f16(<4 x half> %a)
+ ret <4 x half> %c
+}
+
+define <8 x half> @trunc_v8f16(<8 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: trunc_v8f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: frintz s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frintz s5, s1
+; CHECK-SD-NOFP16-NEXT: frintz s2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT: frintz s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: frintz s4, s5
+; CHECK-SD-NOFP16-NEXT: mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT: frintz s3, s3
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT: mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: frintz s2, s2
+; CHECK-SD-NOFP16-NEXT: mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT: frintz s0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: trunc_v8f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frintz v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: trunc_v8f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: frintz s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT: frintz s5, s1
+; CHECK-GI-NOFP16-NEXT: frintz s2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT: frintz s4, s4
+; CHECK-GI-NOFP16-NEXT: frintz s6, s6
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT: frintz s2, s5
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s6
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: frintz s3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT: frintz s0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: trunc_v8f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frintz v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <8 x half> @llvm.trunc.v8f16(<8 x half> %a)
+ ret <8 x half> %c
+}
+
+define <16 x half> @trunc_v16f16(<16 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: trunc_v16f16:
+; CHECK-SD-NOFP16: // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h3, v1.h[1]
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[2]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h0
+; CHECK-SD-NOFP16-NEXT: fcvt s6, h1
+; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[2]
+; CHECK-SD-NOFP16-NEXT: mov h18, v0.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frintz s5, s5
+; CHECK-SD-NOFP16-NEXT: frintz s6, s6
+; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
+; CHECK-SD-NOFP16-NEXT: frintz s16, s2
+; CHECK-SD-NOFP16-NEXT: frintz s17, s3
+; CHECK-SD-NOFP16-NEXT: frintz s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h2, s5
+; CHECK-SD-NOFP16-NEXT: fcvt h3, s6
+; CHECK-SD-NOFP16-NEXT: frintz s7, s7
+; CHECK-SD-NOFP16-NEXT: fcvt h5, s16
+; CHECK-SD-NOFP16-NEXT: fcvt h6, s17
+; CHECK-SD-NOFP16-NEXT: mov h16, v1.h[3]
+; CHECK-SD-NOFP16-NEXT: fcvt s17, h18
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h7, s7
+; CHECK-SD-NOFP16-NEXT: mov v2.h[1], v5.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT: mov v3.h[1], v6.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h6, v1.h[4]
+; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
+; CHECK-SD-NOFP16-NEXT: frintz s17, s17
+; CHECK-SD-NOFP16-NEXT: mov v2.h[2], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[5]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
+; CHECK-SD-NOFP16-NEXT: frintz s16, s16
+; CHECK-SD-NOFP16-NEXT: mov v3.h[2], v7.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h7, s17
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frintz s5, s5
+; CHECK-SD-NOFP16-NEXT: frintz s6, s6
+; CHECK-SD-NOFP16-NEXT: fcvt h16, s16
+; CHECK-SD-NOFP16-NEXT: mov v2.h[3], v7.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[5]
+; CHECK-SD-NOFP16-NEXT: frintz s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT: mov v3.h[3], v16.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov v2.h[4], v5.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[6]
+; CHECK-SD-NOFP16-NEXT: mov v3.h[4], v6.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt s6, h7
+; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT: mov v2.h[5], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[6]
+; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT: frintz s6, s6
+; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
+; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT: frintz s5, s5
+; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT: frintz s0, s0
+; CHECK-SD-NOFP16-NEXT: frintz s4, s4
+; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT: mov v3.h[5], v6.h[0]
+; CHECK-SD-NOFP16-NEXT: frintz s1, s1
+; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT: mov v2.h[6], v5.h[0]
+; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT: mov v3.h[6], v4.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v2.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v3.h[7], v1.h[0]
+; CHECK-SD-NOFP16-NEXT: mov v0.16b, v2.16b
+; CHECK-SD-NOFP16-NEXT: mov v1.16b, v3.16b
+; CHECK-SD-NOFP16-NEXT: ret
+;
+; CHECK-SD-FP16-LABEL: trunc_v16f16:
+; CHECK-SD-FP16: // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT: frintz v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT: frintz v1.8h, v1.8h
+; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: trunc_v16f16:
+; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1]
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h0
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h1
+; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2]
+; CHECK-GI-NOFP16-NEXT: mov h18, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov h19, v1.h[3]
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT: frintz s4, s4
+; CHECK-GI-NOFP16-NEXT: frintz s5, s5
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT: frintz s16, s2
+; CHECK-GI-NOFP16-NEXT: frintz s17, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h2, s4
+; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT: frintz s6, s6
+; CHECK-GI-NOFP16-NEXT: frintz s7, s7
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s16
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s17
+; CHECK-GI-NOFP16-NEXT: fcvt s16, h18
+; CHECK-GI-NOFP16-NEXT: fcvt s17, h19
+; CHECK-GI-NOFP16-NEXT: mov h18, v1.h[4]
+; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s6
+; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: fcvt h6, s7
+; CHECK-GI-NOFP16-NEXT: frintz s16, s16
+; CHECK-GI-NOFP16-NEXT: frintz s17, s17
+; CHECK-GI-NOFP16-NEXT: mov h7, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt s18, h18
+; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v6.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s16
+; CHECK-GI-NOFP16-NEXT: fcvt h16, s17
+; CHECK-GI-NOFP16-NEXT: mov h17, v1.h[5]
+; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT: frintz s5, s5
+; CHECK-GI-NOFP16-NEXT: mov v2.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[6]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v16.h[0]
+; CHECK-GI-NOFP16-NEXT: frintz s16, s18
+; CHECK-GI-NOFP16-NEXT: fcvt s17, h17
+; CHECK-GI-NOFP16-NEXT: frintz s7, s7
+; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7]
+; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s5
+; CHECK-GI-NOFP16-NEXT: fcvt h16, s16
+; CHECK-GI-NOFP16-NEXT: frintz s17, s17
+; CHECK-GI-NOFP16-NEXT: fcvt h7, s7
+; CHECK-GI-NOFP16-NEXT: frintz s6, s6
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: frintz s4, s4
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: mov v2.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[4], v16.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h5, s17
+; CHECK-GI-NOFP16-NEXT: fcvt h6, s6
+; CHECK-GI-NOFP16-NEXT: frintz s0, s0
+; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT: frintz s1, s1
+; CHECK-GI-NOFP16-NEXT: mov v2.h[5], v7.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[5], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT: mov v2.h[6], v6.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[6], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v2.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v3.h[7], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.16b, v2.16b
+; CHECK-GI-NOFP16-NEXT: mov v1.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: trunc_v16f16:
+; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: frintz v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: frintz v1.8h, v1.8h
+; CHECK-GI-FP16-NEXT: ret
+entry:
+ %c = call <16 x half> @llvm.trunc.v16f16(<16 x half> %a)
+ ret <16 x half> %c
+}
+
+declare <16 x half> @llvm.ceil.v16f16(<16 x half>)
+declare <16 x half> @llvm.floor.v16f16(<16 x half>)
+declare <16 x half> @llvm.nearbyint.v16f16(<16 x half>)
+declare <16 x half> @llvm.rint.v16f16(<16 x half>)
+declare <16 x half> @llvm.round.v16f16(<16 x half>)
+declare <16 x half> @llvm.roundeven.v16f16(<16 x half>)
+declare <16 x half> @llvm.trunc.v16f16(<16 x half>)
+declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
+declare <2 x double> @llvm.floor.v2f64(<2 x double>)
+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
+declare <2 x double> @llvm.rint.v2f64(<2 x double>)
+declare <2 x double> @llvm.round.v2f64(<2 x double>)
+declare <2 x double> @llvm.roundeven.v2f64(<2 x double>)
+declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
+declare <2 x float> @llvm.ceil.v2f32(<2 x float>)
+declare <2 x float> @llvm.floor.v2f32(<2 x float>)
+declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>)
+declare <2 x float> @llvm.rint.v2f32(<2 x float>)
+declare <2 x float> @llvm.round.v2f32(<2 x float>)
+declare <2 x float> @llvm.roundeven.v2f32(<2 x float>)
+declare <2 x float> @llvm.trunc.v2f32(<2 x float>)
+declare <3 x double> @llvm.ceil.v3f64(<3 x double>)
+declare <3 x double> @llvm.floor.v3f64(<3 x double>)
+declare <3 x double> @llvm.nearbyint.v3f64(<3 x double>)
+declare <3 x double> @llvm.rint.v3f64(<3 x double>)
+declare <3 x double> @llvm.round.v3f64(<3 x double>)
+declare <3 x double> @llvm.roundeven.v3f64(<3 x double>)
+declare <3 x double> @llvm.trunc.v3f64(<3 x double>)
+declare <3 x float> @llvm.ceil.v3f32(<3 x float>)
+declare <3 x float> @llvm.floor.v3f32(<3 x float>)
+declare <3 x float> @llvm.nearbyint.v3f32(<3 x float>)
+declare <3 x float> @llvm.rint.v3f32(<3 x float>)
+declare <3 x float> @llvm.round.v3f32(<3 x float>)
+declare <3 x float> @llvm.roundeven.v3f32(<3 x float>)
+declare <3 x float> @llvm.trunc.v3f32(<3 x float>)
+declare <4 x double> @llvm.ceil.v4f64(<4 x double>)
+declare <4 x double> @llvm.floor.v4f64(<4 x double>)
+declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>)
+declare <4 x double> @llvm.rint.v4f64(<4 x double>)
+declare <4 x double> @llvm.round.v4f64(<4 x double>)
+declare <4 x double> @llvm.roundeven.v4f64(<4 x double>)
+declare <4 x double> @llvm.trunc.v4f64(<4 x double>)
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
+declare <4 x float> @llvm.floor.v4f32(<4 x float>)
+declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
+declare <4 x float> @llvm.rint.v4f32(<4 x float>)
+declare <4 x float> @llvm.round.v4f32(<4 x float>)
+declare <4 x float> @llvm.roundeven.v4f32(<4 x float>)
+declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
+declare <4 x half> @llvm.ceil.v4f16(<4 x half>)
+declare <4 x half> @llvm.floor.v4f16(<4 x half>)
+declare <4 x half> @llvm.nearbyint.v4f16(<4 x half>)
+declare <4 x half> @llvm.rint.v4f16(<4 x half>)
+declare <4 x half> @llvm.round.v4f16(<4 x half>)
+declare <4 x half> @llvm.roundeven.v4f16(<4 x half>)
+declare <4 x half> @llvm.trunc.v4f16(<4 x half>)
+declare <7 x half> @llvm.ceil.v7f16(<7 x half>)
+declare <7 x half> @llvm.floor.v7f16(<7 x half>)
+declare <7 x half> @llvm.nearbyint.v7f16(<7 x half>)
+declare <7 x half> @llvm.rint.v7f16(<7 x half>)
+declare <7 x half> @llvm.round.v7f16(<7 x half>)
+declare <7 x half> @llvm.roundeven.v7f16(<7 x half>)
+declare <7 x half> @llvm.trunc.v7f16(<7 x half>)
+declare <8 x float> @llvm.ceil.v8f32(<8 x float>)
+declare <8 x float> @llvm.floor.v8f32(<8 x float>)
+declare <8 x float> @llvm.nearbyint.v8f32(<8 x float>)
+declare <8 x float> @llvm.rint.v8f32(<8 x float>)
+declare <8 x float> @llvm.round.v8f32(<8 x float>)
+declare <8 x float> @llvm.roundeven.v8f32(<8 x float>)
+declare <8 x float> @llvm.trunc.v8f32(<8 x float>)
+declare <8 x half> @llvm.ceil.v8f16(<8 x half>)
+declare <8 x half> @llvm.floor.v8f16(<8 x half>)
+declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>)
+declare <8 x half> @llvm.rint.v8f16(<8 x half>)
+declare <8 x half> @llvm.round.v8f16(<8 x half>)
+declare <8 x half> @llvm.roundeven.v8f16(<8 x half>)
+declare <8 x half> @llvm.trunc.v8f16(<8 x half>)
+declare double @llvm.ceil.f64(double)
+declare double @llvm.floor.f64(double)
+declare double @llvm.nearbyint.f64(double)
+declare double @llvm.rint.f64(double)
+declare double @llvm.round.f64(double)
+declare double @llvm.roundeven.f64(double)
+declare double @llvm.trunc.f64(double)
+declare float @llvm.ceil.f32(float)
+declare float @llvm.floor.f32(float)
+declare float @llvm.nearbyint.f32(float)
+declare float @llvm.rint.f32(float)
+declare float @llvm.round.f32(float)
+declare float @llvm.roundeven.f32(float)
+declare float @llvm.trunc.f32(float)
+declare half @llvm.ceil.f16(half)
+declare half @llvm.floor.f16(half)
+declare half @llvm.nearbyint.f16(half)
+declare half @llvm.rint.f16(half)
+declare half @llvm.round.f16(half)
+declare half @llvm.roundeven.f16(half)
+declare half @llvm.trunc.f16(half)
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-GI: {{.*}}
+; CHECK-SD: {{.*}}
More information about the llvm-commits
mailing list