[llvm] 8fc6b1a - [AArch64] Add some vcvt tests. NFC.

David Green via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 17 02:41:58 PDT 2023


Author: David Green
Date: 2023-08-17T10:41:52+01:00
New Revision: 8fc6b1a18f4d9cc4d481c38bbc503a27acc7e461

URL: https://github.com/llvm/llvm-project/commit/8fc6b1a18f4d9cc4d481c38bbc503a27acc7e461
DIFF: https://github.com/llvm/llvm-project/commit/8fc6b1a18f4d9cc4d481c38bbc503a27acc7e461.diff

LOG: [AArch64] Add some vcvt tests. NFC.

See D157679. This also removes some duplication from arm64-vfloatintrinsics.ll,
where the tests now exist elsewhere.

Added: 
    llvm/test/CodeGen/AArch64/fcvt.ll

Modified: 
    llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll b/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
index eecbc4eb3f2c4b..887f94d41c4a4c 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
@@ -14,22 +14,6 @@
 
 %v4f16 = type <4 x half>
 
-; FALLBACK-NOT: remark{{.*}}test_v4f16.sqrt
-define %v4f16 @test_v4f16.sqrt(%v4f16 %a) {
-  ; CHECK-LABEL:          test_v4f16.sqrt:
-  ; CHECK-NOFP16-COUNT-4: fsqrt s{{[0-9]+}}, s{{[0-9]+}}
-  ; CHECK-FP16-NOT:       fcvt
-  ; CHECK-FP16:           fsqrt.4h
-  ; CHECK-FP16-NEXT:      ret
-  ; GISEL-LABEL:          test_v4f16.sqrt:
-  ; GISEL-NOFP16:         fcvt
-  ; GISEL-NOFP16:         fsqrt.4s
-  ; GISEL-FP16-NOT:       fcvt
-  ; GISEL-FP16:           fsqrt.4h
-  ; GISEL-FP16-NEXT:      ret
-  %1 = call %v4f16 @llvm.sqrt.v4f16(%v4f16 %a)
-  ret %v4f16 %1
-}
 define %v4f16 @test_v4f16.powi(%v4f16 %a, i32 %b) {
   ; This operation is expanded, whether with or without +fullfp16.
   ; CHECK-LABEL:   test_v4f16.powi:
@@ -136,132 +120,6 @@ define %v4f16 @test_v4f16.fma(%v4f16 %a, %v4f16 %b, %v4f16 %c) {
   ret %v4f16 %1
 }
 
-; FALLBACK-NOT: remark{{.*}}test_v4f16.fabs
-define %v4f16 @test_v4f16.fabs(%v4f16 %a) {
-  ; CHECK-LABEL:          test_v4f16.fabs:
-  ; CHECK-NOFP16-COUNT-4: fabs s{{[0-9]+}}, s{{[0-9]+}}
-  ; CHECK-FP16-NOT:       fcvt
-  ; CHECK-FP16:           fabs.4h
-  ; CHECK-FP16-NEXT:      ret
-
-  ; GISEL-LABEL:          test_v4f16.fabs:
-  ; GISEL-NOFP16:         fabs.4s
-  ; GISEL-FP16-NOT:       fcvt
-  ; GISEL-FP16:           fabs.4h
-  ; GISEL-FP16-NEXT:      ret
-  %1 = call %v4f16 @llvm.fabs.v4f16(%v4f16 %a)
-  ret %v4f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f16.floor
-define %v4f16 @test_v4f16.floor(%v4f16 %a) {
-  ; CHECK-LABEL:          test_v4f16.floor:
-  ; CHECK-NOFP16-COUNT-4: frintm s{{[0-9]+}}, s{{[0-9]+}}
-  ; CHECK-FP16-NOT:       fcvt
-  ; CHECK-FP16:           frintm.4h
-  ; CHECK-FP16-NEXT:      ret
-
-  ; GISEL-LABEL:          test_v4f16.floor:
-  ; GISEL-NOFP16-COUNT-4: frintm s{{[0-9]+}}, s{{[0-9]+}}
-  ; GISEL-FP16-NOT:       fcvt
-  ; GISEL-FP16:           frintm.4h
-  ; GISEL-FP16-NEXT:      ret
-  %1 = call %v4f16 @llvm.floor.v4f16(%v4f16 %a)
-  ret %v4f16 %1
-}
-define %v4f16 @test_v4f16.ceil(%v4f16 %a) {
-  ; CHECK-LABEL:          test_v4f16.ceil:
-  ; CHECK-NOFP16-COUNT-4: frintp s{{[0-9]+}}, s{{[0-9]+}}
-  ; CHECK-FP16-NOT:       fcvt
-  ; CHECK-FP16:           frintp.4h
-  ; CHECK-FP16-NEXT:      ret
-  ; FALLBACK-NOT: remark{{.*}}test_v4f16.ceil:
-  ; GISEL-LABEL:          test_v4f16.ceil:
-  ; GISEL-NOFP16-COUNT-4: frintp s{{[0-9]+}}, s{{[0-9]+}}
-  ; GISEL-FP16-NOT:       fcvt
-  ; GISEL-FP16:           frintp.4h
-  ; GISEL-FP16-NEXT:      ret
-  %1 = call %v4f16 @llvm.ceil.v4f16(%v4f16 %a)
-  ret %v4f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f16.trunc
-define %v4f16 @test_v4f16.trunc(%v4f16 %a) {
-  ; CHECK-LABEL:          test_v4f16.trunc:
-  ; CHECK-NOFP16-COUNT-4: frintz s{{[0-9]+}}, s{{[0-9]+}}
-  ; CHECK-FP16-NOT:       fcvt
-  ; CHECK-FP16:           frintz.4h
-  ; CHECK-FP16-NEXT:      ret
-  ; GISEL-LABEL:          test_v4f16.trunc:
-  ; GISEL-NOFP16-COUNT-4: frintz s{{[0-9]+}}, s{{[0-9]+}}
-  ; GISEL-FP16-NOT:       fcvt
-  ; GISEL-FP16:           frintz.4h
-  ; GISEL-FP16-NEXT:      ret
-  %1 = call %v4f16 @llvm.trunc.v4f16(%v4f16 %a)
-  ret %v4f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f16.rint
-define %v4f16 @test_v4f16.rint(%v4f16 %a) {
-  ; CHECK-LABEL:          test_v4f16.rint:
-  ; CHECK-NOFP16-COUNT-4: frintx s{{[0-9]+}}, s{{[0-9]+}}
-  ; CHECK-FP16-NOT:       fcvt
-  ; CHECK-FP16:           frintx.4h
-  ; CHECK-FP16-NEXT:      ret
-  ; GISEL-LABEL:          test_v4f16.rint:
-  ; GISEL-NOFP16-COUNT-4: frintx s{{[0-9]+}}, s{{[0-9]+}}
-  ; GISEL-FP16-NOT:       fcvt
-  ; GISEL-FP16:           frintx.4h
-  ; GISEL-FP16-NEXT:      ret
-  %1 = call %v4f16 @llvm.rint.v4f16(%v4f16 %a)
-  ret %v4f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f16.nearbyint
-define %v4f16 @test_v4f16.nearbyint(%v4f16 %a) {
-  ; CHECK-LABEL:          test_v4f16.nearbyint:
-  ; CHECK-NOFP16-COUNT-4: frinti s{{[0-9]+}}, s{{[0-9]+}}
-  ; CHECK-FP16-NOT:       fcvt
-  ; CHECK-FP16:           frinti.4h
-  ; CHECK-FP16-NEXT:      ret
-  ; GISEL-LABEL:          test_v4f16.nearbyint:
-  ; GISEL-NOFP16-COUNT-4: frinti s{{[0-9]+}}, s{{[0-9]+}}
-  ; GISEL-FP16-NOT:       fcvt
-  ; GISEL-FP16:           frinti.4h
-  ; GISEL-FP16-NEXT:      ret
-  %1 = call %v4f16 @llvm.nearbyint.v4f16(%v4f16 %a)
-  ret %v4f16 %1
-}
-define %v4f16 @test_v4f16.round(%v4f16 %a) {
-  ; CHECK-LABEL:          test_v4f16.round:
-  ; CHECK-NOFP16-COUNT-4: frinta s{{[0-9]+}}, s{{[0-9]+}}
-  ; CHECK-FP16-NOT:       fcvt
-  ; CHECK-FP16:           frinta.4h
-  ; CHECK-FP16-NEXT:      ret
-  ; GISEL-LABEL:          test_v4f16.round:
-  ; GISEL-NOFP16-COUNT-4: frinta s{{[0-9]+}}, s{{[0-9]+}}
-  ; GISEL-FP16-NOT:       fcvt
-  ; GISEL-FP16:           frinta.4h
-  ; GISEL-FP16-NEXT:      ret
-  %1 =  call %v4f16 @llvm.round.v4f16(%v4f16 %a)
-  ret %v4f16 %1
-}
-define %v4f16 @test_v4f16.roundeven(%v4f16 %a) {
-  ; CHECK-LABEL:          test_v4f16.roundeven:
-  ; CHECK-NOFP16-COUNT-4: frintn s{{[0-9]+}}, s{{[0-9]+}}
-  ; CHECK-FP16-NOT:       fcvt
-  ; CHECK-FP16:           frintn.4h
-  ; CHECK-FP16-NEXT:      ret
-  ; GISEL-LABEL:          test_v4f16.roundeven:
-  ; GISEL-NOFP16-COUNT-4: frintn s{{[0-9]+}}, s{{[0-9]+}}
-  ; GISEL-FP16-NOT:       fcvt
-  ; GISEL-FP16:           frintn.4h
-  ; GISEL-FP16-NEXT:      ret
-  %1 =  call %v4f16 @llvm.roundeven.v4f16(%v4f16 %a)
-  ret %v4f16 %1
-}
-
-declare %v4f16 @llvm.sqrt.v4f16(%v4f16) #0
 declare %v4f16 @llvm.powi.v4f16.i32(%v4f16, i32) #0
 declare %v4f16 @llvm.sin.v4f16(%v4f16) #0
 declare %v4f16 @llvm.cos.v4f16(%v4f16) #0
@@ -272,35 +130,11 @@ declare %v4f16 @llvm.log.v4f16(%v4f16) #0
 declare %v4f16 @llvm.log10.v4f16(%v4f16) #0
 declare %v4f16 @llvm.log2.v4f16(%v4f16) #0
 declare %v4f16 @llvm.fma.v4f16(%v4f16, %v4f16, %v4f16) #0
-declare %v4f16 @llvm.fabs.v4f16(%v4f16) #0
-declare %v4f16 @llvm.floor.v4f16(%v4f16) #0
-declare %v4f16 @llvm.ceil.v4f16(%v4f16) #0
-declare %v4f16 @llvm.trunc.v4f16(%v4f16) #0
-declare %v4f16 @llvm.rint.v4f16(%v4f16) #0
-declare %v4f16 @llvm.nearbyint.v4f16(%v4f16) #0
-declare %v4f16 @llvm.round.v4f16(%v4f16) #0
-declare %v4f16 @llvm.roundeven.v4f16(%v4f16) #0
 
 ;;;
 
 %v8f16 = type <8 x half>
 
-; FALLBACK-NOT: remark{{.*}}test_v8f16.sqrt
-define %v8f16 @test_v8f16.sqrt(%v8f16 %a) {
-  ; CHECK-LABEL:          test_v8f16.sqrt:
-  ; CHECK-NOFP16-COUNT-8: fsqrt s{{[0-9]+}}, s{{[0-9]+}}
-  ; CHECK-FP16-NOT:       fcvt
-  ; CHECK-FP16:           fsqrt.8h
-  ; CHECK-FP16-NEXT:      ret
-  ; GISEL-LABEL:          test_v8f16.sqrt:
-  ; GISEL-NOFP16:         fcvt
-  ; GISEL-NOFP16-COUNT-2: fsqrt.4s
-  ; GISEL-FP16-NOT:       fcvt
-  ; GISEL-FP16:           fsqrt.8h
-  ; GISEL-FP16-NEXT:      ret
-  %1 = call %v8f16 @llvm.sqrt.v8f16(%v8f16 %a)
-  ret %v8f16 %1
-}
 define %v8f16 @test_v8f16.powi(%v8f16 %a, i32 %b) {
   ; This operation is expanded, whether with or without +fullfp16.
   ; CHECK-LABEL:   test_v8f16.powi:
@@ -409,132 +243,6 @@ define %v8f16 @test_v8f16.fma(%v8f16 %a, %v8f16 %b, %v8f16 %c) {
   ret %v8f16 %1
 }
 
-; FALLBACK-NOT: remark{{.*}}test_v8f16.fabs
-define %v8f16 @test_v8f16.fabs(%v8f16 %a) {
-  ; CHECK-LABEL:          test_v8f16.fabs:
-  ; CHECK-NOFP16-COUNT-8: fabs s{{[0-9]+}}, s{{[0-9]+}}
-  ; CHECK-FP16-NOT:       fcvt
-  ; CHECK-FP16:           fabs.8h
-  ; CHECK-FP16-NEXT:      ret
-
-  ; GISEL-LABEL:          test_v8f16.fabs:
-  ; GISEL-NOFP16-COUNT-2: fabs.4s
-  ; GISEL-FP16-NOT:       fcvt
-  ; GISEL-FP16:           fabs.8h
-  ; GISEL-FP16-NEXT:      ret
-  %1 = call %v8f16 @llvm.fabs.v8f16(%v8f16 %a)
-  ret %v8f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v8f16.floor
-define %v8f16 @test_v8f16.floor(%v8f16 %a) {
-  ; CHECK-LABEL:     		  test_v8f16.floor:
-  ; CHECK-NOFP16-COUNT-8: frintm s{{[0-9]+}}, s{{[0-9]+}}
-  ; CHECK-FP16-NOT:       fcvt
-  ; CHECK-FP16:           frintm.8h
-  ; CHECK-FP16-NEXT:      ret
-
-  ; GISEL-LABEL:     		  test_v8f16.floor:
-  ; GISEL-NOFP16-COUNT-8: frintm s{{[0-9]+}}, s{{[0-9]+}}
-  ; GISEL-FP16-NOT:       fcvt
-  ; GISEL-FP16:           frintm.8h
-  ; GISEL-FP16-NEXT:      ret
-  %1 = call %v8f16 @llvm.floor.v8f16(%v8f16 %a)
-  ret %v8f16 %1
-}
-define %v8f16 @test_v8f16.ceil(%v8f16 %a) {
-  ; CHECK-LABEL:          test_v8f16.ceil:
-  ; CHECK-NOFP16-COUNT-8: frintp s{{[0-9]+}}, s{{[0-9]+}}
-  ; CHECK-FP16-NOT:       fcvt
-  ; CHECK-FP16:           frintp.8h
-  ; CHECK-FP16-NEXT:      ret
-  ; FALLBACK-NOT:         remark{{.*}}test_v8f16.ceil:
-  ; GISEL-LABEL:          test_v8f16.ceil:
-  ; GISEL-NOFP16-COUNT-8: frintp s{{[0-9]+}}, s{{[0-9]+}}
-  ; GISEL-FP16-NOT:       fcvt
-  ; GISEL-FP16:           frintp.8h
-  ; GISEL-FP16-NEXT:      ret
-  %1 = call %v8f16 @llvm.ceil.v8f16(%v8f16 %a)
-  ret %v8f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v8f16.trunc
-define %v8f16 @test_v8f16.trunc(%v8f16 %a) {
-  ; CHECK-LABEL:          test_v8f16.trunc:
-  ; CHECK-NOFP16-COUNT-8: frintz s{{[0-9]+}}, s{{[0-9]+}}
-  ; CHECK-FP16-NOT:       fcvt
-  ; CHECK-FP16:           frintz.8h
-  ; CHECK-FP16-NEXT:      ret
-  ; GISEL-LABEL:          test_v8f16.trunc:
-  ; GISEL-NOFP16-COUNT-8: frintz s{{[0-9]+}}, s{{[0-9]+}}
-  ; GISEL-FP16-NOT:       fcvt
-  ; GISEL-FP16:           frintz.8h
-  ; GISEL-FP16-NEXT:      ret
-  %1 = call %v8f16 @llvm.trunc.v8f16(%v8f16 %a)
-  ret %v8f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v8f16.rint
-define %v8f16 @test_v8f16.rint(%v8f16 %a) {
-  ; CHECK-LABEL:          test_v8f16.rint:
-  ; CHECK-NOFP16-COUNT-8: frintx s{{[0-9]+}}, s{{[0-9]+}}
-  ; CHECK-FP16-NOT:       fcvt
-  ; CHECK-FP16:           frintx.8h
-  ; CHECK-FP16-NEXT:      ret
-  ; GISEL-LABEL:          test_v8f16.rint:
-  ; GISEL-NOFP16-COUNT-8: frintx s{{[0-9]+}}, s{{[0-9]+}}
-  ; GISEL-FP16-NOT:       fcvt
-  ; GISEL-FP16:           frintx.8h
-  ; GISEL-FP16-NEXT:      ret
-  %1 = call %v8f16 @llvm.rint.v8f16(%v8f16 %a)
-  ret %v8f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v8f16.nearbyint
-define %v8f16 @test_v8f16.nearbyint(%v8f16 %a) {
-  ; CHECK-LABEL:          test_v8f16.nearbyint:
-  ; CHECK-NOFP16-COUNT-8: frinti s{{[0-9]+}}, s{{[0-9]+}}
-  ; CHECK-FP16-NOT:       fcvt
-  ; CHECK-FP16:           frinti.8h
-  ; CHECK-FP16-NEXT:      ret
-  ; GISEL-LABEL:          test_v8f16.nearbyint:
-  ; GISEL-NOFP16-COUNT-8: frinti s{{[0-9]+}}, s{{[0-9]+}}
-  ; GISEL-FP16-NOT:       fcvt
-  ; GISEL-FP16:           frinti.8h
-  ; GISEL-FP16-NEXT:      ret
-  %1 = call %v8f16 @llvm.nearbyint.v8f16(%v8f16 %a)
-  ret %v8f16 %1
-}
-define %v8f16 @test_v8f16.round(%v8f16 %a) {
-  ; CHECK-LABEL:          test_v8f16.round:
-  ; CHECK-NOFP16-COUNT-8: frinta s{{[0-9]+}}, s{{[0-9]+}}
-  ; CHECK-FP16-NOT:       fcvt
-  ; CHECK-FP16:           frinta.8h
-  ; CHECK-FP16-NEXT:      ret
-  ; GISEL-LABEL:          test_v8f16.round:
-  ; GISEL-NOFP16-COUNT-8: frinta s{{[0-9]+}}, s{{[0-9]+}}
-  ; GISEL-FP16-NOT:       fcvt
-  ; GISEL-FP16:           frinta.8h
-  ; GISEL-FP16-NEXT:      ret
-  %1 =  call %v8f16 @llvm.round.v8f16(%v8f16 %a)
-  ret %v8f16 %1
-}
-define %v8f16 @test_v8f16.roundeven(%v8f16 %a) {
-  ; CHECK-LABEL:          test_v8f16.roundeven:
-  ; CHECK-NOFP16-COUNT-8: frintn s{{[0-9]+}}, s{{[0-9]+}}
-  ; CHECK-FP16-NOT:       fcvt
-  ; CHECK-FP16:           frintn.8h
-  ; CHECK-FP16-NEXT:      ret
-  ; GISEL-LABEL:          test_v8f16.roundeven:
-  ; GISEL-NOFP16-COUNT-8: frintn s{{[0-9]+}}, s{{[0-9]+}}
-  ; GISEL-FP16-NOT:       fcvt
-  ; GISEL-FP16:           frintn.8h
-  ; GISEL-FP16-NEXT:      ret
-  %1 =  call %v8f16 @llvm.roundeven.v8f16(%v8f16 %a)
-  ret %v8f16 %1
-}
-
-declare %v8f16 @llvm.sqrt.v8f16(%v8f16) #0
 declare %v8f16 @llvm.powi.v8f16.i32(%v8f16, i32) #0
 declare %v8f16 @llvm.sin.v8f16(%v8f16) #0
 declare %v8f16 @llvm.cos.v8f16(%v8f16) #0
@@ -545,28 +253,11 @@ declare %v8f16 @llvm.log.v8f16(%v8f16) #0
 declare %v8f16 @llvm.log10.v8f16(%v8f16) #0
 declare %v8f16 @llvm.log2.v8f16(%v8f16) #0
 declare %v8f16 @llvm.fma.v8f16(%v8f16, %v8f16, %v8f16) #0
-declare %v8f16 @llvm.fabs.v8f16(%v8f16) #0
-declare %v8f16 @llvm.floor.v8f16(%v8f16) #0
-declare %v8f16 @llvm.ceil.v8f16(%v8f16) #0
-declare %v8f16 @llvm.trunc.v8f16(%v8f16) #0
-declare %v8f16 @llvm.rint.v8f16(%v8f16) #0
-declare %v8f16 @llvm.nearbyint.v8f16(%v8f16) #0
-declare %v8f16 @llvm.round.v8f16(%v8f16) #0
-declare %v8f16 @llvm.roundeven.v8f16(%v8f16) #0
 
 ;;; Float vectors
 
 %v2f32 = type <2 x float>
 
-; FALLBACK-NOT: remark{{.*}}test_v2f32.sqrt
-; CHECK-LABEL: test_v2f32.sqrt:
-; GISEL-LABEL: test_v2f32.sqrt:
-define %v2f32 @test_v2f32.sqrt(%v2f32 %a) {
-  ; CHECK: fsqrt.2s
-  ; GISEL: fsqrt.2s
-  %1 = call %v2f32 @llvm.sqrt.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
 ; CHECK: test_v2f32.powi:
 define %v2f32 @test_v2f32.powi(%v2f32 %a, i32 %b) {
   ; CHECK: pow
@@ -657,64 +348,6 @@ define %v2f32 @test_v2f32.fma(%v2f32 %a, %v2f32 %b, %v2f32 %c) {
   ret %v2f32 %1
 }
 
-; FALLBACK-NOT: remark{{.*}}test_v2f32.fabs
-; CHECK-LABEL: test_v2f32.fabs:
-; GISEL-LABEL: test_v2f32.fabs:
-define %v2f32 @test_v2f32.fabs(%v2f32 %a) {
-  ; CHECK: fabs.2s
-  ; GISEL: fabs.2s
-  %1 = call %v2f32 @llvm.fabs.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v2f32.floor
-; CHECK-LABEL: test_v2f32.floor:
-; GISEL-LABEL: test_v2f32.floor:
-define %v2f32 @test_v2f32.floor(%v2f32 %a) {
-  ; CHECK: frintm.2s
-  ; GISEL: frintm.2s
-  %1 = call %v2f32 @llvm.floor.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-; CHECK-LABEL: test_v2f32.ceil:
-; FALLBACK-NOT: remark{{.*}}test_v2f32.ceil
-; GISEL-LABEL: test_v2f32.ceil:
-define %v2f32 @test_v2f32.ceil(%v2f32 %a) {
-  ; CHECK: frintp.2s
-  ; GISEL: frintp.2s
-  %1 = call %v2f32 @llvm.ceil.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-; CHECK-LABEL: test_v2f32.trunc:
-; FALLBACK-NOT: remark{{.*}}test_v2f32.trunc
-; GISEL-LABEL: test_v2f32.trunc:
-define %v2f32 @test_v2f32.trunc(%v2f32 %a) {
-  ; CHECK: frintz.2s
-  ; GISEL: frintz.2s
-  %1 = call %v2f32 @llvm.trunc.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-; CHECK-LABEL: test_v2f32.rint:
-; FALLBACK-NOT: remark{{.*}}test_v2f32.rint
-; GISEL-LABEL: test_v2f32.rint:
-define %v2f32 @test_v2f32.rint(%v2f32 %a) {
-  ; CHECK: frintx.2s
-  ; GISEL: frintx.2s
-  %1 = call %v2f32 @llvm.rint.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v2f32.nearbyint
-; CHECK-LABEL: test_v2f32.nearbyint:
-; GISEL-LABEL: test_v2f32.nearbyint:
-define %v2f32 @test_v2f32.nearbyint(%v2f32 %a) {
-  ; CHECK: frinti.2s
-  ; GISEL: frinti.2s
-  %1 = call %v2f32 @llvm.nearbyint.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-
-declare %v2f32 @llvm.sqrt.v2f32(%v2f32) #0
 declare %v2f32 @llvm.powi.v2f32.i32(%v2f32, i32) #0
 declare %v2f32 @llvm.sin.v2f32(%v2f32) #0
 declare %v2f32 @llvm.cos.v2f32(%v2f32) #0
@@ -725,26 +358,11 @@ declare %v2f32 @llvm.log.v2f32(%v2f32) #0
 declare %v2f32 @llvm.log10.v2f32(%v2f32) #0
 declare %v2f32 @llvm.log2.v2f32(%v2f32) #0
 declare %v2f32 @llvm.fma.v2f32(%v2f32, %v2f32, %v2f32) #0
-declare %v2f32 @llvm.fabs.v2f32(%v2f32) #0
-declare %v2f32 @llvm.floor.v2f32(%v2f32) #0
-declare %v2f32 @llvm.ceil.v2f32(%v2f32) #0
-declare %v2f32 @llvm.trunc.v2f32(%v2f32) #0
-declare %v2f32 @llvm.rint.v2f32(%v2f32) #0
-declare %v2f32 @llvm.nearbyint.v2f32(%v2f32) #0
 
 ;;;
 
 %v4f32 = type <4 x float>
 
-; FALLBACK-NOT: remark{{.*}}test_v4f32.sqrt
-; CHECK: test_v4f32.sqrt:
-; GISEL: test_v4f32.sqrt:
-define %v4f32 @test_v4f32.sqrt(%v4f32 %a) {
-  ; CHECK: fsqrt.4s
-  ; GISEL: fsqrt.4s
-  %1 = call %v4f32 @llvm.sqrt.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
 ; CHECK: test_v4f32.powi:
 define %v4f32 @test_v4f32.powi(%v4f32 %a, i32 %b) {
   ; CHECK: pow
@@ -834,64 +452,6 @@ define %v4f32 @test_v4f32.fma(%v4f32 %a, %v4f32 %b, %v4f32 %c) {
   ret %v4f32 %1
 }
 
-; FALLBACK-NOT: remark{{.*}}test_v4f32.fabs
-; CHECK: test_v4f32.fabs:
-; GISEL: test_v4f32.fabs:
-define %v4f32 @test_v4f32.fabs(%v4f32 %a) {
-  ; CHECK: fabs
-  ; GISEL: fabs
-  %1 = call %v4f32 @llvm.fabs.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f32.floor
-; CHECK: test_v4f32.floor:
-; GISEL: test_v4f32.floor:
-define %v4f32 @test_v4f32.floor(%v4f32 %a) {
-  ; CHECK: frintm.4s
-  ; GISEL: frintm.4s
-  %1 = call %v4f32 @llvm.floor.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-; CHECK: test_v4f32.ceil:
-; FALLBACK-NOT: remark{{.*}}test_v4f32.ceil
-; GISEL-LABEL: test_v4f32.ceil:
-define %v4f32 @test_v4f32.ceil(%v4f32 %a) {
-  ; CHECK: frintp.4s
-  ; GISEL: frintp.4s
-  %1 = call %v4f32 @llvm.ceil.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-; CHECK: test_v4f32.trunc:
-; FALLBACK-NOT: remark{{.*}}test_v4f32.trunc
-; GISEL: test_v4f32.trunc:
-define %v4f32 @test_v4f32.trunc(%v4f32 %a) {
-  ; CHECK: frintz.4s
-  ; GISEL: frintz.4s
-  %1 = call %v4f32 @llvm.trunc.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-; CHECK: test_v4f32.rint:
-; FALLBACK-NOT: remark{{.*}}test_v4f32.rint
-; GISEL: test_v4f32.rint:
-define %v4f32 @test_v4f32.rint(%v4f32 %a) {
-  ; CHECK: frintx.4s
-  ; GISEL: frintx.4s
-  %1 = call %v4f32 @llvm.rint.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f32.nearbyint
-; CHECK: test_v4f32.nearbyint:
-; GISEL: test_v4f32.nearbyint:
-define %v4f32 @test_v4f32.nearbyint(%v4f32 %a) {
-  ; CHECK: frinti.4s
-  ; GISEL: frinti.4s
-  %1 = call %v4f32 @llvm.nearbyint.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-
-declare %v4f32 @llvm.sqrt.v4f32(%v4f32) #0
 declare %v4f32 @llvm.powi.v4f32.i32(%v4f32, i32) #0
 declare %v4f32 @llvm.sin.v4f32(%v4f32) #0
 declare %v4f32 @llvm.cos.v4f32(%v4f32) #0
@@ -902,25 +462,11 @@ declare %v4f32 @llvm.log.v4f32(%v4f32) #0
 declare %v4f32 @llvm.log10.v4f32(%v4f32) #0
 declare %v4f32 @llvm.log2.v4f32(%v4f32) #0
 declare %v4f32 @llvm.fma.v4f32(%v4f32, %v4f32, %v4f32) #0
-declare %v4f32 @llvm.fabs.v4f32(%v4f32) #0
-declare %v4f32 @llvm.floor.v4f32(%v4f32) #0
-declare %v4f32 @llvm.ceil.v4f32(%v4f32) #0
-declare %v4f32 @llvm.trunc.v4f32(%v4f32) #0
-declare %v4f32 @llvm.rint.v4f32(%v4f32) #0
-declare %v4f32 @llvm.nearbyint.v4f32(%v4f32) #0
 
 ;;; Double vector
 
 %v2f64 = type <2 x double>
-; FALLBACK-NOT: remark{{.*}}test_v2f64.sqrt
-; CHECK: test_v2f64.sqrt:
-; GISEL: test_v2f64.sqrt:
-define %v2f64 @test_v2f64.sqrt(%v2f64 %a) {
-  ; CHECK: fsqrt.2d
-  ; GISEL: fsqrt.2d
-  %1 = call %v2f64 @llvm.sqrt.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
+
 ; CHECK: test_v2f64.powi:
 define %v2f64 @test_v2f64.powi(%v2f64 %a, i32 %b) {
   ; CHECK: pow
@@ -1011,64 +557,6 @@ define %v2f64 @test_v2f64.fma(%v2f64 %a, %v2f64 %b, %v2f64 %c) {
   ret %v2f64 %1
 }
 
-; FALLBACK-NOT: remark{{.*}}test_v2f64.fabs
-; CHECK: test_v2f64.fabs:
-; GISEL: test_v2f64.fabs:
-define %v2f64 @test_v2f64.fabs(%v2f64 %a) {
-  ; CHECK: fabs
-  ; GISEL: fabs
-  %1 = call %v2f64 @llvm.fabs.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v2f64.floor
-; CHECK: test_v2f64.floor:
-; GISEL: test_v2f64.floor:
-define %v2f64 @test_v2f64.floor(%v2f64 %a) {
-  ; CHECK: frintm.2d
-  ; GISEL: frintm.2d
-  %1 = call %v2f64 @llvm.floor.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-; CHECK: test_v2f64.ceil:
-; FALLBACK-NOT: remark{{.*}}test_v2f64.ceil
-; GISEL-LABEL: test_v2f64.ceil:
-define %v2f64 @test_v2f64.ceil(%v2f64 %a) {
-  ; CHECK: frintp.2d
-  ; GISEL: frintp.2d
-  %1 = call %v2f64 @llvm.ceil.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-; CHECK: test_v2f64.trunc:
-; FALLBACK-NOT: remark{{.*}}test_v2f64.trunc
-; GISEL: test_v2f64.trunc:
-define %v2f64 @test_v2f64.trunc(%v2f64 %a) {
-  ; CHECK: frintz.2d
-  ; GISEL: frintz.2d
-  %1 = call %v2f64 @llvm.trunc.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-; CHECK: test_v2f64.rint:
-; FALLBACK-NOT: remark{{.*}}test_v2f64.rint
-; GISEL: test_v2f64.rint:
-define %v2f64 @test_v2f64.rint(%v2f64 %a) {
-  ; CHECK: frintx.2d
-  ; GISEL: frintx.2d
-  %1 = call %v2f64 @llvm.rint.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v2f64.nearbyint
-; CHECK: test_v2f64.nearbyint:
-; GISEL: test_v2f64.nearbyint:
-define %v2f64 @test_v2f64.nearbyint(%v2f64 %a) {
-  ; CHECK: frinti.2d
-  ; GISEL: frinti.2d
-  %1 = call %v2f64 @llvm.nearbyint.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-
-declare %v2f64 @llvm.sqrt.v2f64(%v2f64) #0
 declare %v2f64 @llvm.powi.v2f64.i32(%v2f64, i32) #0
 declare %v2f64 @llvm.sin.v2f64(%v2f64) #0
 declare %v2f64 @llvm.cos.v2f64(%v2f64) #0
@@ -1079,11 +567,5 @@ declare %v2f64 @llvm.log.v2f64(%v2f64) #0
 declare %v2f64 @llvm.log10.v2f64(%v2f64) #0
 declare %v2f64 @llvm.log2.v2f64(%v2f64) #0
 declare %v2f64 @llvm.fma.v2f64(%v2f64, %v2f64, %v2f64) #0
-declare %v2f64 @llvm.fabs.v2f64(%v2f64) #0
-declare %v2f64 @llvm.floor.v2f64(%v2f64) #0
-declare %v2f64 @llvm.ceil.v2f64(%v2f64) #0
-declare %v2f64 @llvm.trunc.v2f64(%v2f64) #0
-declare %v2f64 @llvm.rint.v2f64(%v2f64) #0
-declare %v2f64 @llvm.nearbyint.v2f64(%v2f64) #0
 
 attributes #0 = { nounwind readonly }

diff  --git a/llvm/test/CodeGen/AArch64/fcvt.ll b/llvm/test/CodeGen/AArch64/fcvt.ll
new file mode 100644
index 00000000000000..aaa9d657c7913e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fcvt.ll
@@ -0,0 +1,4167 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16
+; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
+; RUN: llc -mtriple=aarch64-none-eabi -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
+; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
+
+; CHECK-GI:       warning: Instruction selection used fallback path for ceil_v3f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for ceil_v4f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for ceil_v3f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for ceil_v8f32
+; CHECK-GI-FP16-NEXT:  warning: Instruction selection used fallback path for ceil_v7f16
+; CHECK-GI-FP16-NEXT:  warning: Instruction selection used fallback path for ceil_v16f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for floor_v3f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for floor_v4f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for floor_v3f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for floor_v8f32
+; CHECK-GI-FP16-NEXT:  warning: Instruction selection used fallback path for floor_v7f16
+; CHECK-GI-FP16-NEXT:  warning: Instruction selection used fallback path for floor_v16f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for nearbyint_v3f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for nearbyint_v4f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for nearbyint_v3f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for nearbyint_v8f32
+; CHECK-GI-FP16-NEXT:  warning: Instruction selection used fallback path for nearbyint_v7f16
+; CHECK-GI-FP16-NEXT:  warning: Instruction selection used fallback path for nearbyint_v16f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for roundeven_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for roundeven_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for roundeven_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for roundeven_v2f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for roundeven_v3f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for roundeven_v4f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for roundeven_v2f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for roundeven_v3f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for roundeven_v4f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for roundeven_v8f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for roundeven_v7f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for roundeven_v4f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for roundeven_v8f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for roundeven_v16f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for rint_v3f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for rint_v4f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for rint_v3f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for rint_v8f32
+; CHECK-GI-FP16-NEXT:  warning: Instruction selection used fallback path for rint_v7f16
+; CHECK-GI-FP16-NEXT:  warning: Instruction selection used fallback path for rint_v16f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for round_v3f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for round_v4f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for round_v3f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for round_v8f32
+; CHECK-GI-FP16-NEXT:  warning: Instruction selection used fallback path for round_v7f16
+; CHECK-GI-FP16-NEXT:  warning: Instruction selection used fallback path for round_v16f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for trunc_v3f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for trunc_v4f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for trunc_v3f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for trunc_v8f32
+; CHECK-GI-FP16-NEXT:  warning: Instruction selection used fallback path for trunc_v7f16
+; CHECK-GI-FP16-NEXT:  warning: Instruction selection used fallback path for trunc_v16f16
+
+define double @ceil_f64(double %a) {
+; CHECK-LABEL: ceil_f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintp d0, d0
+; CHECK-NEXT:    ret
+entry:
+  %c = call double @llvm.ceil.f64(double %a)
+  ret double %c
+}
+
+define float @ceil_f32(float %a) {
+; CHECK-LABEL: ceil_f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintp s0, s0
+; CHECK-NEXT:    ret
+entry:
+  %c = call float @llvm.ceil.f32(float %a)
+  ret float %c
+}
+
+define half @ceil_f16(half %a) {
+; CHECK-SD-NOFP16-LABEL: ceil_f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    frintp s0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: ceil_f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frintp h0, h0
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: ceil_f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    frintp s0, s0
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: ceil_f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frintp h0, h0
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call half @llvm.ceil.f16(half %a)
+  ret half %c
+}
+
+define <2 x double> @ceil_v2f64(<2 x double> %a) {
+; CHECK-LABEL: ceil_v2f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintp v0.2d, v0.2d
+; CHECK-NEXT:    ret
+entry:
+  %c = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a)
+  ret <2 x double> %c
+}
+
+define <3 x double> @ceil_v3f64(<3 x double> %a) {
+; CHECK-LABEL: ceil_v3f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    frintp v2.2d, v2.2d
+; CHECK-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-NEXT:    frintp v0.2d, v0.2d
+; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT:    ret
+entry:
+  %c = call <3 x double> @llvm.ceil.v3f64(<3 x double> %a)
+  ret <3 x double> %c
+}
+
+define <4 x double> @ceil_v4f64(<4 x double> %a) {
+; CHECK-LABEL: ceil_v4f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintp v0.2d, v0.2d
+; CHECK-NEXT:    frintp v1.2d, v1.2d
+; CHECK-NEXT:    ret
+entry:
+  %c = call <4 x double> @llvm.ceil.v4f64(<4 x double> %a)
+  ret <4 x double> %c
+}
+
+define <2 x float> @ceil_v2f32(<2 x float> %a) {
+; CHECK-LABEL: ceil_v2f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintp v0.2s, v0.2s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <2 x float> @llvm.ceil.v2f32(<2 x float> %a)
+  ret <2 x float> %c
+}
+
+define <3 x float> @ceil_v3f32(<3 x float> %a) {
+; CHECK-LABEL: ceil_v3f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintp v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <3 x float> @llvm.ceil.v3f32(<3 x float> %a)
+  ret <3 x float> %c
+}
+
+define <4 x float> @ceil_v4f32(<4 x float> %a) {
+; CHECK-LABEL: ceil_v4f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintp v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a)
+  ret <4 x float> %c
+}
+
+define <8 x float> @ceil_v8f32(<8 x float> %a) {
+; CHECK-LABEL: ceil_v8f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintp v0.4s, v0.4s
+; CHECK-NEXT:    frintp v1.4s, v1.4s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <8 x float> @llvm.ceil.v8f32(<8 x float> %a)
+  ret <8 x float> %c
+}
+
+define <7 x half> @ceil_v7f16(<7 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: ceil_v7f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    frintp s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frintp s5, s1
+; CHECK-SD-NOFP16-NEXT:    frintp s2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT:    frintp s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    frintp s4, s5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT:    frintp s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    frintp s2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    frintp s0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: ceil_v7f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frintp v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: ceil_v7f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[4]
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    frintp s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    frintp s5, s1
+; CHECK-GI-NOFP16-NEXT:    frintp s2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT:    frintp s4, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[6]
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    frintp s3, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s3
+; CHECK-GI-NOFP16-NEXT:    frintp s3, s5
+; CHECK-GI-NOFP16-NEXT:    frintp s0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[4], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[5], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[6], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: ceil_v7f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frintp v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <7 x half> @llvm.ceil.v7f16(<7 x half> %a)
+  ret <7 x half> %c
+}
+
+define <4 x half> @ceil_v4f16(<4 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: ceil_v4f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h0
+; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    frintp s2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    frintp s1, s1
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h4
+; CHECK-SD-NOFP16-NEXT:    frintp s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT:    frintp s2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[2], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s2
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: ceil_v4f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frintp v0.4h, v0.4h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: ceil_v4f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    frintp s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h0
+; CHECK-GI-NOFP16-NEXT:    frintp s1, s1
+; CHECK-GI-NOFP16-NEXT:    frintp s2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s3
+; CHECK-GI-NOFP16-NEXT:    frintp s3, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: ceil_v4f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frintp v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <4 x half> @llvm.ceil.v4f16(<4 x half> %a)
+  ret <4 x half> %c
+}
+
+define <8 x half> @ceil_v8f16(<8 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: ceil_v8f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    frintp s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frintp s5, s1
+; CHECK-SD-NOFP16-NEXT:    frintp s2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT:    frintp s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    frintp s4, s5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT:    frintp s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    frintp s2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    frintp s0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: ceil_v8f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frintp v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: ceil_v8f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[4]
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    frintp s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    frintp s5, s1
+; CHECK-GI-NOFP16-NEXT:    frintp s2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT:    frintp s4, s4
+; CHECK-GI-NOFP16-NEXT:    frintp s6, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[6]
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT:    frintp s2, s5
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    frintp s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    frintp s0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[5], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[6], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: ceil_v8f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frintp v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <8 x half> @llvm.ceil.v8f16(<8 x half> %a)
+  ret <8 x half> %c
+}
+
+define <16 x half> @ceil_v16f16(<16 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: ceil_v16f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h3, v1.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h0
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h1
+; CHECK-SD-NOFP16-NEXT:    mov h7, v1.h[2]
+; CHECK-SD-NOFP16-NEXT:    mov h18, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frintp s5, s5
+; CHECK-SD-NOFP16-NEXT:    frintp s6, s6
+; CHECK-SD-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-SD-NOFP16-NEXT:    frintp s16, s2
+; CHECK-SD-NOFP16-NEXT:    frintp s17, s3
+; CHECK-SD-NOFP16-NEXT:    frintp s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s5
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s6
+; CHECK-SD-NOFP16-NEXT:    frintp s7, s7
+; CHECK-SD-NOFP16-NEXT:    fcvt h5, s16
+; CHECK-SD-NOFP16-NEXT:    fcvt h6, s17
+; CHECK-SD-NOFP16-NEXT:    mov h16, v1.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s17, h18
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h7, s7
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[1], v5.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[1], v6.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h6, v1.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt s16, h16
+; CHECK-SD-NOFP16-NEXT:    frintp s17, s17
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[2], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[5]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-SD-NOFP16-NEXT:    frintp s16, s16
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[2], v7.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h7, s17
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frintp s5, s5
+; CHECK-SD-NOFP16-NEXT:    frintp s6, s6
+; CHECK-SD-NOFP16-NEXT:    fcvt h16, s16
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[3], v7.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h7, v1.h[5]
+; CHECK-SD-NOFP16-NEXT:    frintp s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT:    fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[3], v16.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[4], v5.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[6]
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[4], v6.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h7
+; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[5], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v1.h[6]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    frintp s6, s6
+; CHECK-SD-NOFP16-NEXT:    mov h1, v1.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frintp s5, s5
+; CHECK-SD-NOFP16-NEXT:    fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    frintp s0, s0
+; CHECK-SD-NOFP16-NEXT:    frintp s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[5], v6.h[0]
+; CHECK-SD-NOFP16-NEXT:    frintp s1, s1
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[6], v5.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[6], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[7], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NOFP16-NEXT:    mov v1.16b, v3.16b
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: ceil_v16f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frintp v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    frintp v1.8h, v1.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: ceil_v16f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h3, v1.h[1]
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h0
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h1
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    mov h7, v1.h[2]
+; CHECK-GI-NOFP16-NEXT:    mov h18, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    mov h19, v1.h[3]
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT:    frintp s4, s4
+; CHECK-GI-NOFP16-NEXT:    frintp s5, s5
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT:    frintp s16, s2
+; CHECK-GI-NOFP16-NEXT:    frintp s17, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT:    frintp s6, s6
+; CHECK-GI-NOFP16-NEXT:    frintp s7, s7
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s16
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s17
+; CHECK-GI-NOFP16-NEXT:    fcvt s16, h18
+; CHECK-GI-NOFP16-NEXT:    fcvt s17, h19
+; CHECK-GI-NOFP16-NEXT:    mov h18, v1.h[4]
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[1], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s6
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[1], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-GI-NOFP16-NEXT:    fcvt h6, s7
+; CHECK-GI-NOFP16-NEXT:    frintp s16, s16
+; CHECK-GI-NOFP16-NEXT:    frintp s17, s17
+; CHECK-GI-NOFP16-NEXT:    mov h7, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[2], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt s18, h18
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[2], v6.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s16
+; CHECK-GI-NOFP16-NEXT:    fcvt h16, s17
+; CHECK-GI-NOFP16-NEXT:    mov h17, v1.h[5]
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[6]
+; CHECK-GI-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT:    frintp s5, s5
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h4, v1.h[6]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[3], v16.h[0]
+; CHECK-GI-NOFP16-NEXT:    frintp s16, s18
+; CHECK-GI-NOFP16-NEXT:    fcvt s17, h17
+; CHECK-GI-NOFP16-NEXT:    frintp s7, s7
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT:    mov h1, v1.h[7]
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s5
+; CHECK-GI-NOFP16-NEXT:    fcvt h16, s16
+; CHECK-GI-NOFP16-NEXT:    frintp s17, s17
+; CHECK-GI-NOFP16-NEXT:    fcvt h7, s7
+; CHECK-GI-NOFP16-NEXT:    frintp s6, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    frintp s4, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[4], v16.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s17
+; CHECK-GI-NOFP16-NEXT:    fcvt h6, s6
+; CHECK-GI-NOFP16-NEXT:    frintp s0, s0
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT:    frintp s1, s1
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[5], v7.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[5], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[6], v6.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[6], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[7], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v2.16b
+; CHECK-GI-NOFP16-NEXT:    mov v1.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: ceil_v16f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frintp v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    frintp v1.8h, v1.8h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <16 x half> @llvm.ceil.v16f16(<16 x half> %a)
+  ret <16 x half> %c
+}
+
+define double @floor_f64(double %a) {
+; CHECK-LABEL: floor_f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintm d0, d0
+; CHECK-NEXT:    ret
+entry:
+  %c = call double @llvm.floor.f64(double %a)
+  ret double %c
+}
+
+define float @floor_f32(float %a) {
+; CHECK-LABEL: floor_f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintm s0, s0
+; CHECK-NEXT:    ret
+entry:
+  %c = call float @llvm.floor.f32(float %a)
+  ret float %c
+}
+
+define half @floor_f16(half %a) {
+; CHECK-SD-NOFP16-LABEL: floor_f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    frintm s0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: floor_f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frintm h0, h0
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: floor_f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    frintm s0, s0
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: floor_f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frintm h0, h0
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call half @llvm.floor.f16(half %a)
+  ret half %c
+}
+
+define <2 x double> @floor_v2f64(<2 x double> %a) {
+; CHECK-LABEL: floor_v2f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintm v0.2d, v0.2d
+; CHECK-NEXT:    ret
+entry:
+  %c = call <2 x double> @llvm.floor.v2f64(<2 x double> %a)
+  ret <2 x double> %c
+}
+
+define <3 x double> @floor_v3f64(<3 x double> %a) {
+; CHECK-LABEL: floor_v3f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    frintm v2.2d, v2.2d
+; CHECK-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-NEXT:    frintm v0.2d, v0.2d
+; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT:    ret
+entry:
+  %c = call <3 x double> @llvm.floor.v3f64(<3 x double> %a)
+  ret <3 x double> %c
+}
+
+define <4 x double> @floor_v4f64(<4 x double> %a) {
+; CHECK-LABEL: floor_v4f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintm v0.2d, v0.2d
+; CHECK-NEXT:    frintm v1.2d, v1.2d
+; CHECK-NEXT:    ret
+entry:
+  %c = call <4 x double> @llvm.floor.v4f64(<4 x double> %a)
+  ret <4 x double> %c
+}
+
+define <2 x float> @floor_v2f32(<2 x float> %a) {
+; CHECK-LABEL: floor_v2f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintm v0.2s, v0.2s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <2 x float> @llvm.floor.v2f32(<2 x float> %a)
+  ret <2 x float> %c
+}
+
+define <3 x float> @floor_v3f32(<3 x float> %a) {
+; CHECK-LABEL: floor_v3f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintm v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <3 x float> @llvm.floor.v3f32(<3 x float> %a)
+  ret <3 x float> %c
+}
+
+define <4 x float> @floor_v4f32(<4 x float> %a) {
+; CHECK-LABEL: floor_v4f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintm v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <4 x float> @llvm.floor.v4f32(<4 x float> %a)
+  ret <4 x float> %c
+}
+
+define <8 x float> @floor_v8f32(<8 x float> %a) {
+; CHECK-LABEL: floor_v8f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintm v0.4s, v0.4s
+; CHECK-NEXT:    frintm v1.4s, v1.4s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <8 x float> @llvm.floor.v8f32(<8 x float> %a)
+  ret <8 x float> %c
+}
+
+define <7 x half> @floor_v7f16(<7 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: floor_v7f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    frintm s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frintm s5, s1
+; CHECK-SD-NOFP16-NEXT:    frintm s2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT:    frintm s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    frintm s4, s5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT:    frintm s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    frintm s2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    frintm s0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: floor_v7f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frintm v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: floor_v7f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[4]
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    frintm s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    frintm s5, s1
+; CHECK-GI-NOFP16-NEXT:    frintm s2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT:    frintm s4, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[6]
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    frintm s3, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s3
+; CHECK-GI-NOFP16-NEXT:    frintm s3, s5
+; CHECK-GI-NOFP16-NEXT:    frintm s0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[4], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[5], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[6], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: floor_v7f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frintm v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <7 x half> @llvm.floor.v7f16(<7 x half> %a)
+  ret <7 x half> %c
+}
+
+define <4 x half> @floor_v4f16(<4 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: floor_v4f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h0
+; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    frintm s2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    frintm s1, s1
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h4
+; CHECK-SD-NOFP16-NEXT:    frintm s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT:    frintm s2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[2], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s2
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: floor_v4f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frintm v0.4h, v0.4h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: floor_v4f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    frintm s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h0
+; CHECK-GI-NOFP16-NEXT:    frintm s1, s1
+; CHECK-GI-NOFP16-NEXT:    frintm s2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s3
+; CHECK-GI-NOFP16-NEXT:    frintm s3, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: floor_v4f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frintm v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <4 x half> @llvm.floor.v4f16(<4 x half> %a)
+  ret <4 x half> %c
+}
+
+define <8 x half> @floor_v8f16(<8 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: floor_v8f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    frintm s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frintm s5, s1
+; CHECK-SD-NOFP16-NEXT:    frintm s2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT:    frintm s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    frintm s4, s5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT:    frintm s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    frintm s2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    frintm s0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: floor_v8f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frintm v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: floor_v8f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[4]
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    frintm s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    frintm s5, s1
+; CHECK-GI-NOFP16-NEXT:    frintm s2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT:    frintm s4, s4
+; CHECK-GI-NOFP16-NEXT:    frintm s6, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[6]
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT:    frintm s2, s5
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    frintm s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    frintm s0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[5], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[6], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: floor_v8f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frintm v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <8 x half> @llvm.floor.v8f16(<8 x half> %a)
+  ret <8 x half> %c
+}
+
+define <16 x half> @floor_v16f16(<16 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: floor_v16f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h3, v1.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h0
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h1
+; CHECK-SD-NOFP16-NEXT:    mov h7, v1.h[2]
+; CHECK-SD-NOFP16-NEXT:    mov h18, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frintm s5, s5
+; CHECK-SD-NOFP16-NEXT:    frintm s6, s6
+; CHECK-SD-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-SD-NOFP16-NEXT:    frintm s16, s2
+; CHECK-SD-NOFP16-NEXT:    frintm s17, s3
+; CHECK-SD-NOFP16-NEXT:    frintm s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s5
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s6
+; CHECK-SD-NOFP16-NEXT:    frintm s7, s7
+; CHECK-SD-NOFP16-NEXT:    fcvt h5, s16
+; CHECK-SD-NOFP16-NEXT:    fcvt h6, s17
+; CHECK-SD-NOFP16-NEXT:    mov h16, v1.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s17, h18
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h7, s7
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[1], v5.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[1], v6.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h6, v1.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt s16, h16
+; CHECK-SD-NOFP16-NEXT:    frintm s17, s17
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[2], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[5]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-SD-NOFP16-NEXT:    frintm s16, s16
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[2], v7.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h7, s17
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frintm s5, s5
+; CHECK-SD-NOFP16-NEXT:    frintm s6, s6
+; CHECK-SD-NOFP16-NEXT:    fcvt h16, s16
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[3], v7.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h7, v1.h[5]
+; CHECK-SD-NOFP16-NEXT:    frintm s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT:    fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[3], v16.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[4], v5.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[6]
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[4], v6.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h7
+; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[5], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v1.h[6]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    frintm s6, s6
+; CHECK-SD-NOFP16-NEXT:    mov h1, v1.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frintm s5, s5
+; CHECK-SD-NOFP16-NEXT:    fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    frintm s0, s0
+; CHECK-SD-NOFP16-NEXT:    frintm s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[5], v6.h[0]
+; CHECK-SD-NOFP16-NEXT:    frintm s1, s1
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[6], v5.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[6], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[7], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NOFP16-NEXT:    mov v1.16b, v3.16b
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: floor_v16f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frintm v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    frintm v1.8h, v1.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: floor_v16f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h3, v1.h[1]
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h0
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h1
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    mov h7, v1.h[2]
+; CHECK-GI-NOFP16-NEXT:    mov h18, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    mov h19, v1.h[3]
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT:    frintm s4, s4
+; CHECK-GI-NOFP16-NEXT:    frintm s5, s5
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT:    frintm s16, s2
+; CHECK-GI-NOFP16-NEXT:    frintm s17, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT:    frintm s6, s6
+; CHECK-GI-NOFP16-NEXT:    frintm s7, s7
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s16
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s17
+; CHECK-GI-NOFP16-NEXT:    fcvt s16, h18
+; CHECK-GI-NOFP16-NEXT:    fcvt s17, h19
+; CHECK-GI-NOFP16-NEXT:    mov h18, v1.h[4]
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[1], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s6
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[1], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-GI-NOFP16-NEXT:    fcvt h6, s7
+; CHECK-GI-NOFP16-NEXT:    frintm s16, s16
+; CHECK-GI-NOFP16-NEXT:    frintm s17, s17
+; CHECK-GI-NOFP16-NEXT:    mov h7, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[2], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt s18, h18
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[2], v6.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s16
+; CHECK-GI-NOFP16-NEXT:    fcvt h16, s17
+; CHECK-GI-NOFP16-NEXT:    mov h17, v1.h[5]
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[6]
+; CHECK-GI-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT:    frintm s5, s5
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h4, v1.h[6]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[3], v16.h[0]
+; CHECK-GI-NOFP16-NEXT:    frintm s16, s18
+; CHECK-GI-NOFP16-NEXT:    fcvt s17, h17
+; CHECK-GI-NOFP16-NEXT:    frintm s7, s7
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT:    mov h1, v1.h[7]
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s5
+; CHECK-GI-NOFP16-NEXT:    fcvt h16, s16
+; CHECK-GI-NOFP16-NEXT:    frintm s17, s17
+; CHECK-GI-NOFP16-NEXT:    fcvt h7, s7
+; CHECK-GI-NOFP16-NEXT:    frintm s6, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    frintm s4, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[4], v16.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s17
+; CHECK-GI-NOFP16-NEXT:    fcvt h6, s6
+; CHECK-GI-NOFP16-NEXT:    frintm s0, s0
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT:    frintm s1, s1
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[5], v7.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[5], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[6], v6.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[6], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[7], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v2.16b
+; CHECK-GI-NOFP16-NEXT:    mov v1.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: floor_v16f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frintm v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    frintm v1.8h, v1.8h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <16 x half> @llvm.floor.v16f16(<16 x half> %a)
+  ret <16 x half> %c
+}
+
+define double @nearbyint_f64(double %a) {
+; CHECK-LABEL: nearbyint_f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frinti d0, d0
+; CHECK-NEXT:    ret
+entry:
+  %c = call double @llvm.nearbyint.f64(double %a)
+  ret double %c
+}
+
+define float @nearbyint_f32(float %a) {
+; CHECK-LABEL: nearbyint_f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frinti s0, s0
+; CHECK-NEXT:    ret
+entry:
+  %c = call float @llvm.nearbyint.f32(float %a)
+  ret float %c
+}
+
+define half @nearbyint_f16(half %a) {
+; CHECK-SD-NOFP16-LABEL: nearbyint_f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    frinti s0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: nearbyint_f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frinti h0, h0
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: nearbyint_f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    frinti s0, s0
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: nearbyint_f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frinti h0, h0
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call half @llvm.nearbyint.f16(half %a)
+  ret half %c
+}
+
+define <2 x double> @nearbyint_v2f64(<2 x double> %a) {
+; CHECK-LABEL: nearbyint_v2f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frinti v0.2d, v0.2d
+; CHECK-NEXT:    ret
+entry:
+  %c = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a)
+  ret <2 x double> %c
+}
+
+define <3 x double> @nearbyint_v3f64(<3 x double> %a) {
+; CHECK-LABEL: nearbyint_v3f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    frinti v2.2d, v2.2d
+; CHECK-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-NEXT:    frinti v0.2d, v0.2d
+; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT:    ret
+entry:
+  %c = call <3 x double> @llvm.nearbyint.v3f64(<3 x double> %a)
+  ret <3 x double> %c
+}
+
+define <4 x double> @nearbyint_v4f64(<4 x double> %a) {
+; CHECK-LABEL: nearbyint_v4f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frinti v0.2d, v0.2d
+; CHECK-NEXT:    frinti v1.2d, v1.2d
+; CHECK-NEXT:    ret
+entry:
+  %c = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %a)
+  ret <4 x double> %c
+}
+
+define <2 x float> @nearbyint_v2f32(<2 x float> %a) {
+; CHECK-LABEL: nearbyint_v2f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frinti v0.2s, v0.2s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %a)
+  ret <2 x float> %c
+}
+
+define <3 x float> @nearbyint_v3f32(<3 x float> %a) {
+; CHECK-LABEL: nearbyint_v3f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frinti v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <3 x float> @llvm.nearbyint.v3f32(<3 x float> %a)
+  ret <3 x float> %c
+}
+
+define <4 x float> @nearbyint_v4f32(<4 x float> %a) {
+; CHECK-LABEL: nearbyint_v4f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frinti v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a)
+  ret <4 x float> %c
+}
+
+define <8 x float> @nearbyint_v8f32(<8 x float> %a) {
+; CHECK-LABEL: nearbyint_v8f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frinti v0.4s, v0.4s
+; CHECK-NEXT:    frinti v1.4s, v1.4s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %a)
+  ret <8 x float> %c
+}
+
+define <7 x half> @nearbyint_v7f16(<7 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: nearbyint_v7f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    frinti s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frinti s5, s1
+; CHECK-SD-NOFP16-NEXT:    frinti s2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT:    frinti s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    frinti s4, s5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT:    frinti s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    frinti s2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    frinti s0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: nearbyint_v7f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frinti v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: nearbyint_v7f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[4]
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    frinti s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    frinti s5, s1
+; CHECK-GI-NOFP16-NEXT:    frinti s2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT:    frinti s4, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[6]
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    frinti s3, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s3
+; CHECK-GI-NOFP16-NEXT:    frinti s3, s5
+; CHECK-GI-NOFP16-NEXT:    frinti s0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[4], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[5], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[6], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: nearbyint_v7f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frinti v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <7 x half> @llvm.nearbyint.v7f16(<7 x half> %a)
+  ret <7 x half> %c
+}
+
+define <4 x half> @nearbyint_v4f16(<4 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: nearbyint_v4f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h0
+; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    frinti s2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    frinti s1, s1
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h4
+; CHECK-SD-NOFP16-NEXT:    frinti s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT:    frinti s2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[2], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s2
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: nearbyint_v4f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frinti v0.4h, v0.4h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: nearbyint_v4f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    frinti s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h0
+; CHECK-GI-NOFP16-NEXT:    frinti s1, s1
+; CHECK-GI-NOFP16-NEXT:    frinti s2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s3
+; CHECK-GI-NOFP16-NEXT:    frinti s3, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: nearbyint_v4f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frinti v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <4 x half> @llvm.nearbyint.v4f16(<4 x half> %a)
+  ret <4 x half> %c
+}
+
+define <8 x half> @nearbyint_v8f16(<8 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: nearbyint_v8f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    frinti s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frinti s5, s1
+; CHECK-SD-NOFP16-NEXT:    frinti s2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT:    frinti s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    frinti s4, s5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT:    frinti s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    frinti s2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    frinti s0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: nearbyint_v8f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frinti v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: nearbyint_v8f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[4]
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    frinti s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    frinti s5, s1
+; CHECK-GI-NOFP16-NEXT:    frinti s2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT:    frinti s4, s4
+; CHECK-GI-NOFP16-NEXT:    frinti s6, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[6]
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT:    frinti s2, s5
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    frinti s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    frinti s0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[5], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[6], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: nearbyint_v8f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frinti v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %a)
+  ret <8 x half> %c
+}
+
+define <16 x half> @nearbyint_v16f16(<16 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: nearbyint_v16f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h3, v1.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h0
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h1
+; CHECK-SD-NOFP16-NEXT:    mov h7, v1.h[2]
+; CHECK-SD-NOFP16-NEXT:    mov h18, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frinti s5, s5
+; CHECK-SD-NOFP16-NEXT:    frinti s6, s6
+; CHECK-SD-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-SD-NOFP16-NEXT:    frinti s16, s2
+; CHECK-SD-NOFP16-NEXT:    frinti s17, s3
+; CHECK-SD-NOFP16-NEXT:    frinti s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s5
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s6
+; CHECK-SD-NOFP16-NEXT:    frinti s7, s7
+; CHECK-SD-NOFP16-NEXT:    fcvt h5, s16
+; CHECK-SD-NOFP16-NEXT:    fcvt h6, s17
+; CHECK-SD-NOFP16-NEXT:    mov h16, v1.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s17, h18
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h7, s7
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[1], v5.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[1], v6.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h6, v1.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt s16, h16
+; CHECK-SD-NOFP16-NEXT:    frinti s17, s17
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[2], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[5]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-SD-NOFP16-NEXT:    frinti s16, s16
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[2], v7.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h7, s17
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frinti s5, s5
+; CHECK-SD-NOFP16-NEXT:    frinti s6, s6
+; CHECK-SD-NOFP16-NEXT:    fcvt h16, s16
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[3], v7.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h7, v1.h[5]
+; CHECK-SD-NOFP16-NEXT:    frinti s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT:    fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[3], v16.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[4], v5.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[6]
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[4], v6.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h7
+; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[5], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v1.h[6]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    frinti s6, s6
+; CHECK-SD-NOFP16-NEXT:    mov h1, v1.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frinti s5, s5
+; CHECK-SD-NOFP16-NEXT:    fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    frinti s0, s0
+; CHECK-SD-NOFP16-NEXT:    frinti s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[5], v6.h[0]
+; CHECK-SD-NOFP16-NEXT:    frinti s1, s1
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[6], v5.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[6], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[7], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NOFP16-NEXT:    mov v1.16b, v3.16b
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: nearbyint_v16f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frinti v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    frinti v1.8h, v1.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: nearbyint_v16f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h3, v1.h[1]
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h0
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h1
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    mov h7, v1.h[2]
+; CHECK-GI-NOFP16-NEXT:    mov h18, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    mov h19, v1.h[3]
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT:    frinti s4, s4
+; CHECK-GI-NOFP16-NEXT:    frinti s5, s5
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT:    frinti s16, s2
+; CHECK-GI-NOFP16-NEXT:    frinti s17, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT:    frinti s6, s6
+; CHECK-GI-NOFP16-NEXT:    frinti s7, s7
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s16
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s17
+; CHECK-GI-NOFP16-NEXT:    fcvt s16, h18
+; CHECK-GI-NOFP16-NEXT:    fcvt s17, h19
+; CHECK-GI-NOFP16-NEXT:    mov h18, v1.h[4]
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[1], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s6
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[1], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-GI-NOFP16-NEXT:    fcvt h6, s7
+; CHECK-GI-NOFP16-NEXT:    frinti s16, s16
+; CHECK-GI-NOFP16-NEXT:    frinti s17, s17
+; CHECK-GI-NOFP16-NEXT:    mov h7, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[2], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt s18, h18
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[2], v6.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s16
+; CHECK-GI-NOFP16-NEXT:    fcvt h16, s17
+; CHECK-GI-NOFP16-NEXT:    mov h17, v1.h[5]
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[6]
+; CHECK-GI-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT:    frinti s5, s5
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h4, v1.h[6]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[3], v16.h[0]
+; CHECK-GI-NOFP16-NEXT:    frinti s16, s18
+; CHECK-GI-NOFP16-NEXT:    fcvt s17, h17
+; CHECK-GI-NOFP16-NEXT:    frinti s7, s7
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT:    mov h1, v1.h[7]
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s5
+; CHECK-GI-NOFP16-NEXT:    fcvt h16, s16
+; CHECK-GI-NOFP16-NEXT:    frinti s17, s17
+; CHECK-GI-NOFP16-NEXT:    fcvt h7, s7
+; CHECK-GI-NOFP16-NEXT:    frinti s6, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    frinti s4, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[4], v16.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s17
+; CHECK-GI-NOFP16-NEXT:    fcvt h6, s6
+; CHECK-GI-NOFP16-NEXT:    frinti s0, s0
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT:    frinti s1, s1
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[5], v7.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[5], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[6], v6.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[6], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[7], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v2.16b
+; CHECK-GI-NOFP16-NEXT:    mov v1.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: nearbyint_v16f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frinti v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    frinti v1.8h, v1.8h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <16 x half> @llvm.nearbyint.v16f16(<16 x half> %a)
+  ret <16 x half> %c
+}
+
+define double @roundeven_f64(double %a) {
+; CHECK-LABEL: roundeven_f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintn d0, d0
+; CHECK-NEXT:    ret
+entry:
+  %c = call double @llvm.roundeven.f64(double %a)
+  ret double %c
+}
+
+define float @roundeven_f32(float %a) {
+; CHECK-LABEL: roundeven_f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintn s0, s0
+; CHECK-NEXT:    ret
+entry:
+  %c = call float @llvm.roundeven.f32(float %a)
+  ret float %c
+}
+
+define half @roundeven_f16(half %a) {
+; CHECK-SD-NOFP16-LABEL: roundeven_f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    frintn s0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: roundeven_f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frintn h0, h0
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: roundeven_f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    frintn s0, s0
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: roundeven_f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frintn h0, h0
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call half @llvm.roundeven.f16(half %a)
+  ret half %c
+}
+
+define <2 x double> @roundeven_v2f64(<2 x double> %a) {
+; CHECK-LABEL: roundeven_v2f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintn v0.2d, v0.2d
+; CHECK-NEXT:    ret
+entry:
+  %c = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %a)
+  ret <2 x double> %c
+}
+
+define <3 x double> @roundeven_v3f64(<3 x double> %a) {
+; CHECK-LABEL: roundeven_v3f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    frintn v2.2d, v2.2d
+; CHECK-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-NEXT:    frintn v0.2d, v0.2d
+; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT:    ret
+entry:
+  %c = call <3 x double> @llvm.roundeven.v3f64(<3 x double> %a)
+  ret <3 x double> %c
+}
+
+define <4 x double> @roundeven_v4f64(<4 x double> %a) {
+; CHECK-LABEL: roundeven_v4f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintn v0.2d, v0.2d
+; CHECK-NEXT:    frintn v1.2d, v1.2d
+; CHECK-NEXT:    ret
+entry:
+  %c = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %a)
+  ret <4 x double> %c
+}
+
+define <2 x float> @roundeven_v2f32(<2 x float> %a) {
+; CHECK-LABEL: roundeven_v2f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintn v0.2s, v0.2s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %a)
+  ret <2 x float> %c
+}
+
+define <3 x float> @roundeven_v3f32(<3 x float> %a) {
+; CHECK-LABEL: roundeven_v3f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintn v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %a)
+  ret <3 x float> %c
+}
+
+define <4 x float> @roundeven_v4f32(<4 x float> %a) {
+; CHECK-LABEL: roundeven_v4f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintn v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %a)
+  ret <4 x float> %c
+}
+
+define <8 x float> @roundeven_v8f32(<8 x float> %a) {
+; CHECK-LABEL: roundeven_v8f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintn v0.4s, v0.4s
+; CHECK-NEXT:    frintn v1.4s, v1.4s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %a)
+  ret <8 x float> %c
+}
+
+define <7 x half> @roundeven_v7f16(<7 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: roundeven_v7f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    frintn s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frintn s5, s1
+; CHECK-SD-NOFP16-NEXT:    frintn s2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT:    frintn s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    frintn s4, s5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT:    frintn s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    frintn s2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    frintn s0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: roundeven_v7f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frintn v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: roundeven_v7f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    frintn s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT:    frintn s5, s1
+; CHECK-GI-NOFP16-NEXT:    frintn s2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT:    frintn s4, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT:    frintn s4, s5
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[3], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[6]
+; CHECK-GI-NOFP16-NEXT:    frintn s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[4], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    frintn s2, s2
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[5], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    frintn s0, s0
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[6], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: roundeven_v7f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frintn v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <7 x half> @llvm.roundeven.v7f16(<7 x half> %a)
+  ret <7 x half> %c
+}
+
+define <4 x half> @roundeven_v4f16(<4 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: roundeven_v4f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h0
+; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    frintn s2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    frintn s1, s1
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h4
+; CHECK-SD-NOFP16-NEXT:    frintn s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT:    frintn s2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[2], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s2
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: roundeven_v4f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frintn v0.4h, v0.4h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: roundeven_v4f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h0
+; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    frintn s2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT:    frintn s1, s1
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h4
+; CHECK-GI-NOFP16-NEXT:    frintn s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT:    frintn s2, s2
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[2], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s2
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: roundeven_v4f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frintn v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %a)
+  ret <4 x half> %c
+}
+
+define <8 x half> @roundeven_v8f16(<8 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: roundeven_v8f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    frintn s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frintn s5, s1
+; CHECK-SD-NOFP16-NEXT:    frintn s2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT:    frintn s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    frintn s4, s5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT:    frintn s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    frintn s2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    frintn s0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: roundeven_v8f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frintn v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: roundeven_v8f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    frintn s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT:    frintn s5, s1
+; CHECK-GI-NOFP16-NEXT:    frintn s2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT:    frintn s4, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT:    frintn s4, s5
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[3], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[6]
+; CHECK-GI-NOFP16-NEXT:    frintn s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[4], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    frintn s2, s2
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[5], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    frintn s0, s0
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[6], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: roundeven_v8f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frintn v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %a)
+  ret <8 x half> %c
+}
+
+define <16 x half> @roundeven_v16f16(<16 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: roundeven_v16f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h3, v1.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h0
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h1
+; CHECK-SD-NOFP16-NEXT:    mov h7, v1.h[2]
+; CHECK-SD-NOFP16-NEXT:    mov h18, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frintn s5, s5
+; CHECK-SD-NOFP16-NEXT:    frintn s6, s6
+; CHECK-SD-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-SD-NOFP16-NEXT:    frintn s16, s2
+; CHECK-SD-NOFP16-NEXT:    frintn s17, s3
+; CHECK-SD-NOFP16-NEXT:    frintn s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s5
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s6
+; CHECK-SD-NOFP16-NEXT:    frintn s7, s7
+; CHECK-SD-NOFP16-NEXT:    fcvt h5, s16
+; CHECK-SD-NOFP16-NEXT:    fcvt h6, s17
+; CHECK-SD-NOFP16-NEXT:    mov h16, v1.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s17, h18
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h7, s7
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[1], v5.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[1], v6.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h6, v1.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt s16, h16
+; CHECK-SD-NOFP16-NEXT:    frintn s17, s17
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[2], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[5]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-SD-NOFP16-NEXT:    frintn s16, s16
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[2], v7.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h7, s17
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frintn s5, s5
+; CHECK-SD-NOFP16-NEXT:    frintn s6, s6
+; CHECK-SD-NOFP16-NEXT:    fcvt h16, s16
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[3], v7.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h7, v1.h[5]
+; CHECK-SD-NOFP16-NEXT:    frintn s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT:    fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[3], v16.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[4], v5.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[6]
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[4], v6.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h7
+; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[5], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v1.h[6]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    frintn s6, s6
+; CHECK-SD-NOFP16-NEXT:    mov h1, v1.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frintn s5, s5
+; CHECK-SD-NOFP16-NEXT:    fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    frintn s0, s0
+; CHECK-SD-NOFP16-NEXT:    frintn s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[5], v6.h[0]
+; CHECK-SD-NOFP16-NEXT:    frintn s1, s1
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[6], v5.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[6], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[7], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NOFP16-NEXT:    mov v1.16b, v3.16b
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: roundeven_v16f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frintn v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    frintn v1.8h, v1.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: roundeven_v16f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h3, v1.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h0
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h1
+; CHECK-GI-NOFP16-NEXT:    mov h7, v1.h[2]
+; CHECK-GI-NOFP16-NEXT:    mov h18, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT:    frintn s5, s5
+; CHECK-GI-NOFP16-NEXT:    frintn s6, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT:    frintn s16, s2
+; CHECK-GI-NOFP16-NEXT:    frintn s17, s3
+; CHECK-GI-NOFP16-NEXT:    frintn s4, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s5
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s6
+; CHECK-GI-NOFP16-NEXT:    frintn s7, s7
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s16
+; CHECK-GI-NOFP16-NEXT:    fcvt h6, s17
+; CHECK-GI-NOFP16-NEXT:    mov h16, v1.h[3]
+; CHECK-GI-NOFP16-NEXT:    fcvt s17, h18
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt h7, s7
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[1], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[1], v6.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h6, v1.h[4]
+; CHECK-GI-NOFP16-NEXT:    fcvt s16, h16
+; CHECK-GI-NOFP16-NEXT:    frintn s17, s17
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[2], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    frintn s16, s16
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[2], v7.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h7, s17
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT:    frintn s5, s5
+; CHECK-GI-NOFP16-NEXT:    frintn s6, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt h16, s16
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[3], v7.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h7, v1.h[5]
+; CHECK-GI-NOFP16-NEXT:    frintn s4, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s5
+; CHECK-GI-NOFP16-NEXT:    fcvt h6, s6
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[3], v16.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[6]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[4], v6.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h7
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[5], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h4, v1.h[6]
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT:    frintn s6, s6
+; CHECK-GI-NOFP16-NEXT:    mov h1, v1.h[7]
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT:    frintn s5, s5
+; CHECK-GI-NOFP16-NEXT:    fcvt h6, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    frintn s0, s0
+; CHECK-GI-NOFP16-NEXT:    frintn s4, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s5
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[5], v6.h[0]
+; CHECK-GI-NOFP16-NEXT:    frintn s1, s1
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[6], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[6], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[7], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v2.16b
+; CHECK-GI-NOFP16-NEXT:    mov v1.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: roundeven_v16f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frintn v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    frintn v1.8h, v1.8h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %a)
+  ret <16 x half> %c
+}
+
+define double @rint_f64(double %a) {
+; CHECK-LABEL: rint_f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintx d0, d0
+; CHECK-NEXT:    ret
+entry:
+  %c = call double @llvm.rint.f64(double %a)
+  ret double %c
+}
+
+define float @rint_f32(float %a) {
+; CHECK-LABEL: rint_f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintx s0, s0
+; CHECK-NEXT:    ret
+entry:
+  %c = call float @llvm.rint.f32(float %a)
+  ret float %c
+}
+
+define half @rint_f16(half %a) {
+; CHECK-SD-NOFP16-LABEL: rint_f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    frintx s0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: rint_f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frintx h0, h0
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: rint_f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    frintx s0, s0
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: rint_f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frintx h0, h0
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call half @llvm.rint.f16(half %a)
+  ret half %c
+}
+
+define <2 x double> @rint_v2f64(<2 x double> %a) {
+; CHECK-LABEL: rint_v2f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintx v0.2d, v0.2d
+; CHECK-NEXT:    ret
+entry:
+  %c = call <2 x double> @llvm.rint.v2f64(<2 x double> %a)
+  ret <2 x double> %c
+}
+
+define <3 x double> @rint_v3f64(<3 x double> %a) {
+; CHECK-LABEL: rint_v3f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    frintx v2.2d, v2.2d
+; CHECK-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-NEXT:    frintx v0.2d, v0.2d
+; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT:    ret
+entry:
+  %c = call <3 x double> @llvm.rint.v3f64(<3 x double> %a)
+  ret <3 x double> %c
+}
+
+define <4 x double> @rint_v4f64(<4 x double> %a) {
+; CHECK-LABEL: rint_v4f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintx v0.2d, v0.2d
+; CHECK-NEXT:    frintx v1.2d, v1.2d
+; CHECK-NEXT:    ret
+entry:
+  %c = call <4 x double> @llvm.rint.v4f64(<4 x double> %a)
+  ret <4 x double> %c
+}
+
+define <2 x float> @rint_v2f32(<2 x float> %a) {
+; CHECK-LABEL: rint_v2f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintx v0.2s, v0.2s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <2 x float> @llvm.rint.v2f32(<2 x float> %a)
+  ret <2 x float> %c
+}
+
+define <3 x float> @rint_v3f32(<3 x float> %a) {
+; CHECK-LABEL: rint_v3f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintx v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <3 x float> @llvm.rint.v3f32(<3 x float> %a)
+  ret <3 x float> %c
+}
+
+define <4 x float> @rint_v4f32(<4 x float> %a) {
+; CHECK-LABEL: rint_v4f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintx v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <4 x float> @llvm.rint.v4f32(<4 x float> %a)
+  ret <4 x float> %c
+}
+
+define <8 x float> @rint_v8f32(<8 x float> %a) {
+; CHECK-LABEL: rint_v8f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintx v0.4s, v0.4s
+; CHECK-NEXT:    frintx v1.4s, v1.4s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <8 x float> @llvm.rint.v8f32(<8 x float> %a)
+  ret <8 x float> %c
+}
+
+define <7 x half> @rint_v7f16(<7 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: rint_v7f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    frintx s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frintx s5, s1
+; CHECK-SD-NOFP16-NEXT:    frintx s2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT:    frintx s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    frintx s4, s5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT:    frintx s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    frintx s2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    frintx s0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: rint_v7f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frintx v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: rint_v7f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[4]
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    frintx s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    frintx s5, s1
+; CHECK-GI-NOFP16-NEXT:    frintx s2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT:    frintx s4, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[6]
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    frintx s3, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s3
+; CHECK-GI-NOFP16-NEXT:    frintx s3, s5
+; CHECK-GI-NOFP16-NEXT:    frintx s0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[4], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[5], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[6], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: rint_v7f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frintx v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <7 x half> @llvm.rint.v7f16(<7 x half> %a)
+  ret <7 x half> %c
+}
+
+define <4 x half> @rint_v4f16(<4 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: rint_v4f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h0
+; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    frintx s2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    frintx s1, s1
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h4
+; CHECK-SD-NOFP16-NEXT:    frintx s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT:    frintx s2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[2], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s2
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: rint_v4f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frintx v0.4h, v0.4h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: rint_v4f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    frintx s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h0
+; CHECK-GI-NOFP16-NEXT:    frintx s1, s1
+; CHECK-GI-NOFP16-NEXT:    frintx s2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s3
+; CHECK-GI-NOFP16-NEXT:    frintx s3, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: rint_v4f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frintx v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <4 x half> @llvm.rint.v4f16(<4 x half> %a)
+  ret <4 x half> %c
+}
+
+define <8 x half> @rint_v8f16(<8 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: rint_v8f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    frintx s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frintx s5, s1
+; CHECK-SD-NOFP16-NEXT:    frintx s2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT:    frintx s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    frintx s4, s5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT:    frintx s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    frintx s2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    frintx s0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: rint_v8f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frintx v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: rint_v8f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[4]
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    frintx s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    frintx s5, s1
+; CHECK-GI-NOFP16-NEXT:    frintx s2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT:    frintx s4, s4
+; CHECK-GI-NOFP16-NEXT:    frintx s6, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[6]
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT:    frintx s2, s5
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    frintx s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    frintx s0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[5], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[6], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: rint_v8f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frintx v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <8 x half> @llvm.rint.v8f16(<8 x half> %a)
+  ret <8 x half> %c
+}
+
+define <16 x half> @rint_v16f16(<16 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: rint_v16f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h3, v1.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h0
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h1
+; CHECK-SD-NOFP16-NEXT:    mov h7, v1.h[2]
+; CHECK-SD-NOFP16-NEXT:    mov h18, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frintx s5, s5
+; CHECK-SD-NOFP16-NEXT:    frintx s6, s6
+; CHECK-SD-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-SD-NOFP16-NEXT:    frintx s16, s2
+; CHECK-SD-NOFP16-NEXT:    frintx s17, s3
+; CHECK-SD-NOFP16-NEXT:    frintx s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s5
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s6
+; CHECK-SD-NOFP16-NEXT:    frintx s7, s7
+; CHECK-SD-NOFP16-NEXT:    fcvt h5, s16
+; CHECK-SD-NOFP16-NEXT:    fcvt h6, s17
+; CHECK-SD-NOFP16-NEXT:    mov h16, v1.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s17, h18
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h7, s7
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[1], v5.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[1], v6.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h6, v1.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt s16, h16
+; CHECK-SD-NOFP16-NEXT:    frintx s17, s17
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[2], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[5]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-SD-NOFP16-NEXT:    frintx s16, s16
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[2], v7.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h7, s17
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frintx s5, s5
+; CHECK-SD-NOFP16-NEXT:    frintx s6, s6
+; CHECK-SD-NOFP16-NEXT:    fcvt h16, s16
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[3], v7.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h7, v1.h[5]
+; CHECK-SD-NOFP16-NEXT:    frintx s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT:    fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[3], v16.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[4], v5.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[6]
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[4], v6.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h7
+; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[5], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v1.h[6]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    frintx s6, s6
+; CHECK-SD-NOFP16-NEXT:    mov h1, v1.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frintx s5, s5
+; CHECK-SD-NOFP16-NEXT:    fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    frintx s0, s0
+; CHECK-SD-NOFP16-NEXT:    frintx s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[5], v6.h[0]
+; CHECK-SD-NOFP16-NEXT:    frintx s1, s1
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[6], v5.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[6], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[7], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NOFP16-NEXT:    mov v1.16b, v3.16b
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: rint_v16f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frintx v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    frintx v1.8h, v1.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: rint_v16f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h3, v1.h[1]
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h0
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h1
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    mov h7, v1.h[2]
+; CHECK-GI-NOFP16-NEXT:    mov h18, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    mov h19, v1.h[3]
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT:    frintx s4, s4
+; CHECK-GI-NOFP16-NEXT:    frintx s5, s5
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT:    frintx s16, s2
+; CHECK-GI-NOFP16-NEXT:    frintx s17, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT:    frintx s6, s6
+; CHECK-GI-NOFP16-NEXT:    frintx s7, s7
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s16
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s17
+; CHECK-GI-NOFP16-NEXT:    fcvt s16, h18
+; CHECK-GI-NOFP16-NEXT:    fcvt s17, h19
+; CHECK-GI-NOFP16-NEXT:    mov h18, v1.h[4]
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[1], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s6
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[1], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-GI-NOFP16-NEXT:    fcvt h6, s7
+; CHECK-GI-NOFP16-NEXT:    frintx s16, s16
+; CHECK-GI-NOFP16-NEXT:    frintx s17, s17
+; CHECK-GI-NOFP16-NEXT:    mov h7, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[2], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt s18, h18
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[2], v6.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s16
+; CHECK-GI-NOFP16-NEXT:    fcvt h16, s17
+; CHECK-GI-NOFP16-NEXT:    mov h17, v1.h[5]
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[6]
+; CHECK-GI-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT:    frintx s5, s5
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h4, v1.h[6]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[3], v16.h[0]
+; CHECK-GI-NOFP16-NEXT:    frintx s16, s18
+; CHECK-GI-NOFP16-NEXT:    fcvt s17, h17
+; CHECK-GI-NOFP16-NEXT:    frintx s7, s7
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT:    mov h1, v1.h[7]
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s5
+; CHECK-GI-NOFP16-NEXT:    fcvt h16, s16
+; CHECK-GI-NOFP16-NEXT:    frintx s17, s17
+; CHECK-GI-NOFP16-NEXT:    fcvt h7, s7
+; CHECK-GI-NOFP16-NEXT:    frintx s6, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    frintx s4, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[4], v16.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s17
+; CHECK-GI-NOFP16-NEXT:    fcvt h6, s6
+; CHECK-GI-NOFP16-NEXT:    frintx s0, s0
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT:    frintx s1, s1
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[5], v7.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[5], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[6], v6.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[6], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[7], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v2.16b
+; CHECK-GI-NOFP16-NEXT:    mov v1.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: rint_v16f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frintx v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    frintx v1.8h, v1.8h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <16 x half> @llvm.rint.v16f16(<16 x half> %a)
+  ret <16 x half> %c
+}
+
+define double @round_f64(double %a) {
+; CHECK-LABEL: round_f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frinta d0, d0
+; CHECK-NEXT:    ret
+entry:
+  %c = call double @llvm.round.f64(double %a)
+  ret double %c
+}
+
+define float @round_f32(float %a) {
+; CHECK-LABEL: round_f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frinta s0, s0
+; CHECK-NEXT:    ret
+entry:
+  %c = call float @llvm.round.f32(float %a)
+  ret float %c
+}
+
+define half @round_f16(half %a) {
+; CHECK-SD-NOFP16-LABEL: round_f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    frinta s0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: round_f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frinta h0, h0
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: round_f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    frinta s0, s0
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: round_f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frinta h0, h0
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call half @llvm.round.f16(half %a)
+  ret half %c
+}
+
+define <2 x double> @round_v2f64(<2 x double> %a) {
+; CHECK-LABEL: round_v2f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frinta v0.2d, v0.2d
+; CHECK-NEXT:    ret
+entry:
+  %c = call <2 x double> @llvm.round.v2f64(<2 x double> %a)
+  ret <2 x double> %c
+}
+
+define <3 x double> @round_v3f64(<3 x double> %a) {
+; CHECK-LABEL: round_v3f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    frinta v2.2d, v2.2d
+; CHECK-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-NEXT:    frinta v0.2d, v0.2d
+; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT:    ret
+entry:
+  %c = call <3 x double> @llvm.round.v3f64(<3 x double> %a)
+  ret <3 x double> %c
+}
+
+define <4 x double> @round_v4f64(<4 x double> %a) {
+; CHECK-LABEL: round_v4f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frinta v0.2d, v0.2d
+; CHECK-NEXT:    frinta v1.2d, v1.2d
+; CHECK-NEXT:    ret
+entry:
+  %c = call <4 x double> @llvm.round.v4f64(<4 x double> %a)
+  ret <4 x double> %c
+}
+
+define <2 x float> @round_v2f32(<2 x float> %a) {
+; CHECK-LABEL: round_v2f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frinta v0.2s, v0.2s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <2 x float> @llvm.round.v2f32(<2 x float> %a)
+  ret <2 x float> %c
+}
+
+define <3 x float> @round_v3f32(<3 x float> %a) {
+; CHECK-LABEL: round_v3f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frinta v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <3 x float> @llvm.round.v3f32(<3 x float> %a)
+  ret <3 x float> %c
+}
+
+define <4 x float> @round_v4f32(<4 x float> %a) {
+; CHECK-LABEL: round_v4f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frinta v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <4 x float> @llvm.round.v4f32(<4 x float> %a)
+  ret <4 x float> %c
+}
+
+define <8 x float> @round_v8f32(<8 x float> %a) {
+; CHECK-LABEL: round_v8f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frinta v0.4s, v0.4s
+; CHECK-NEXT:    frinta v1.4s, v1.4s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <8 x float> @llvm.round.v8f32(<8 x float> %a)
+  ret <8 x float> %c
+}
+
+define <7 x half> @round_v7f16(<7 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: round_v7f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    frinta s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frinta s5, s1
+; CHECK-SD-NOFP16-NEXT:    frinta s2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT:    frinta s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    frinta s4, s5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT:    frinta s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    frinta s2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    frinta s0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: round_v7f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frinta v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: round_v7f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[4]
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    frinta s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    frinta s5, s1
+; CHECK-GI-NOFP16-NEXT:    frinta s2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT:    frinta s4, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[6]
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    frinta s3, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s3
+; CHECK-GI-NOFP16-NEXT:    frinta s3, s5
+; CHECK-GI-NOFP16-NEXT:    frinta s0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[4], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[5], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[6], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: round_v7f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frinta v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <7 x half> @llvm.round.v7f16(<7 x half> %a)
+  ret <7 x half> %c
+}
+
+define <4 x half> @round_v4f16(<4 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: round_v4f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h0
+; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    frinta s2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    frinta s1, s1
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h4
+; CHECK-SD-NOFP16-NEXT:    frinta s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT:    frinta s2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[2], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s2
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: round_v4f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frinta v0.4h, v0.4h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: round_v4f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    frinta s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h0
+; CHECK-GI-NOFP16-NEXT:    frinta s1, s1
+; CHECK-GI-NOFP16-NEXT:    frinta s2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s3
+; CHECK-GI-NOFP16-NEXT:    frinta s3, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: round_v4f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frinta v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <4 x half> @llvm.round.v4f16(<4 x half> %a)
+  ret <4 x half> %c
+}
+
+define <8 x half> @round_v8f16(<8 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: round_v8f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    frinta s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frinta s5, s1
+; CHECK-SD-NOFP16-NEXT:    frinta s2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT:    frinta s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    frinta s4, s5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT:    frinta s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    frinta s2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    frinta s0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: round_v8f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frinta v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: round_v8f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[4]
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    frinta s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    frinta s5, s1
+; CHECK-GI-NOFP16-NEXT:    frinta s2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT:    frinta s4, s4
+; CHECK-GI-NOFP16-NEXT:    frinta s6, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[6]
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT:    frinta s2, s5
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    frinta s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    frinta s0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[5], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[6], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: round_v8f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frinta v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <8 x half> @llvm.round.v8f16(<8 x half> %a)
+  ret <8 x half> %c
+}
+
+define <16 x half> @round_v16f16(<16 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: round_v16f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h3, v1.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h0
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h1
+; CHECK-SD-NOFP16-NEXT:    mov h7, v1.h[2]
+; CHECK-SD-NOFP16-NEXT:    mov h18, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frinta s5, s5
+; CHECK-SD-NOFP16-NEXT:    frinta s6, s6
+; CHECK-SD-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-SD-NOFP16-NEXT:    frinta s16, s2
+; CHECK-SD-NOFP16-NEXT:    frinta s17, s3
+; CHECK-SD-NOFP16-NEXT:    frinta s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s5
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s6
+; CHECK-SD-NOFP16-NEXT:    frinta s7, s7
+; CHECK-SD-NOFP16-NEXT:    fcvt h5, s16
+; CHECK-SD-NOFP16-NEXT:    fcvt h6, s17
+; CHECK-SD-NOFP16-NEXT:    mov h16, v1.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s17, h18
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h7, s7
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[1], v5.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[1], v6.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h6, v1.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt s16, h16
+; CHECK-SD-NOFP16-NEXT:    frinta s17, s17
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[2], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[5]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-SD-NOFP16-NEXT:    frinta s16, s16
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[2], v7.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h7, s17
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frinta s5, s5
+; CHECK-SD-NOFP16-NEXT:    frinta s6, s6
+; CHECK-SD-NOFP16-NEXT:    fcvt h16, s16
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[3], v7.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h7, v1.h[5]
+; CHECK-SD-NOFP16-NEXT:    frinta s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT:    fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[3], v16.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[4], v5.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[6]
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[4], v6.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h7
+; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[5], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v1.h[6]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    frinta s6, s6
+; CHECK-SD-NOFP16-NEXT:    mov h1, v1.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frinta s5, s5
+; CHECK-SD-NOFP16-NEXT:    fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    frinta s0, s0
+; CHECK-SD-NOFP16-NEXT:    frinta s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[5], v6.h[0]
+; CHECK-SD-NOFP16-NEXT:    frinta s1, s1
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[6], v5.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[6], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[7], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NOFP16-NEXT:    mov v1.16b, v3.16b
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: round_v16f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frinta v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    frinta v1.8h, v1.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: round_v16f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h3, v1.h[1]
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h0
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h1
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    mov h7, v1.h[2]
+; CHECK-GI-NOFP16-NEXT:    mov h18, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    mov h19, v1.h[3]
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT:    frinta s4, s4
+; CHECK-GI-NOFP16-NEXT:    frinta s5, s5
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT:    frinta s16, s2
+; CHECK-GI-NOFP16-NEXT:    frinta s17, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT:    frinta s6, s6
+; CHECK-GI-NOFP16-NEXT:    frinta s7, s7
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s16
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s17
+; CHECK-GI-NOFP16-NEXT:    fcvt s16, h18
+; CHECK-GI-NOFP16-NEXT:    fcvt s17, h19
+; CHECK-GI-NOFP16-NEXT:    mov h18, v1.h[4]
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[1], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s6
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[1], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-GI-NOFP16-NEXT:    fcvt h6, s7
+; CHECK-GI-NOFP16-NEXT:    frinta s16, s16
+; CHECK-GI-NOFP16-NEXT:    frinta s17, s17
+; CHECK-GI-NOFP16-NEXT:    mov h7, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[2], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt s18, h18
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[2], v6.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s16
+; CHECK-GI-NOFP16-NEXT:    fcvt h16, s17
+; CHECK-GI-NOFP16-NEXT:    mov h17, v1.h[5]
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[6]
+; CHECK-GI-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT:    frinta s5, s5
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h4, v1.h[6]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[3], v16.h[0]
+; CHECK-GI-NOFP16-NEXT:    frinta s16, s18
+; CHECK-GI-NOFP16-NEXT:    fcvt s17, h17
+; CHECK-GI-NOFP16-NEXT:    frinta s7, s7
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT:    mov h1, v1.h[7]
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s5
+; CHECK-GI-NOFP16-NEXT:    fcvt h16, s16
+; CHECK-GI-NOFP16-NEXT:    frinta s17, s17
+; CHECK-GI-NOFP16-NEXT:    fcvt h7, s7
+; CHECK-GI-NOFP16-NEXT:    frinta s6, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    frinta s4, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[4], v16.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s17
+; CHECK-GI-NOFP16-NEXT:    fcvt h6, s6
+; CHECK-GI-NOFP16-NEXT:    frinta s0, s0
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT:    frinta s1, s1
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[5], v7.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[5], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[6], v6.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[6], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[7], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v2.16b
+; CHECK-GI-NOFP16-NEXT:    mov v1.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: round_v16f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frinta v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    frinta v1.8h, v1.8h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <16 x half> @llvm.round.v16f16(<16 x half> %a)
+  ret <16 x half> %c
+}
+
+define double @trunc_f64(double %a) {
+; CHECK-LABEL: trunc_f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintz d0, d0
+; CHECK-NEXT:    ret
+entry:
+  %c = call double @llvm.trunc.f64(double %a)
+  ret double %c
+}
+
+define float @trunc_f32(float %a) {
+; CHECK-LABEL: trunc_f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintz s0, s0
+; CHECK-NEXT:    ret
+entry:
+  %c = call float @llvm.trunc.f32(float %a)
+  ret float %c
+}
+
+define half @trunc_f16(half %a) {
+; CHECK-SD-NOFP16-LABEL: trunc_f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    frintz s0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: trunc_f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frintz h0, h0
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: trunc_f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    frintz s0, s0
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: trunc_f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frintz h0, h0
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call half @llvm.trunc.f16(half %a)
+  ret half %c
+}
+
+define <2 x double> @trunc_v2f64(<2 x double> %a) {
+; CHECK-LABEL: trunc_v2f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintz v0.2d, v0.2d
+; CHECK-NEXT:    ret
+entry:
+  %c = call <2 x double> @llvm.trunc.v2f64(<2 x double> %a)
+  ret <2 x double> %c
+}
+
+define <3 x double> @trunc_v3f64(<3 x double> %a) {
+; CHECK-LABEL: trunc_v3f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    frintz v2.2d, v2.2d
+; CHECK-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-NEXT:    frintz v0.2d, v0.2d
+; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT:    ret
+entry:
+  %c = call <3 x double> @llvm.trunc.v3f64(<3 x double> %a)
+  ret <3 x double> %c
+}
+
+define <4 x double> @trunc_v4f64(<4 x double> %a) {
+; CHECK-LABEL: trunc_v4f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintz v0.2d, v0.2d
+; CHECK-NEXT:    frintz v1.2d, v1.2d
+; CHECK-NEXT:    ret
+entry:
+  %c = call <4 x double> @llvm.trunc.v4f64(<4 x double> %a)
+  ret <4 x double> %c
+}
+
+define <2 x float> @trunc_v2f32(<2 x float> %a) {
+; CHECK-LABEL: trunc_v2f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintz v0.2s, v0.2s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <2 x float> @llvm.trunc.v2f32(<2 x float> %a)
+  ret <2 x float> %c
+}
+
+define <3 x float> @trunc_v3f32(<3 x float> %a) {
+; CHECK-LABEL: trunc_v3f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintz v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <3 x float> @llvm.trunc.v3f32(<3 x float> %a)
+  ret <3 x float> %c
+}
+
+define <4 x float> @trunc_v4f32(<4 x float> %a) {
+; CHECK-LABEL: trunc_v4f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintz v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a)
+  ret <4 x float> %c
+}
+
+define <8 x float> @trunc_v8f32(<8 x float> %a) {
+; CHECK-LABEL: trunc_v8f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintz v0.4s, v0.4s
+; CHECK-NEXT:    frintz v1.4s, v1.4s
+; CHECK-NEXT:    ret
+entry:
+  %c = call <8 x float> @llvm.trunc.v8f32(<8 x float> %a)
+  ret <8 x float> %c
+}
+
+define <7 x half> @trunc_v7f16(<7 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: trunc_v7f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    frintz s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frintz s5, s1
+; CHECK-SD-NOFP16-NEXT:    frintz s2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT:    frintz s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    frintz s4, s5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT:    frintz s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    frintz s2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    frintz s0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: trunc_v7f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frintz v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: trunc_v7f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[4]
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    frintz s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    frintz s5, s1
+; CHECK-GI-NOFP16-NEXT:    frintz s2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT:    frintz s4, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[6]
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    frintz s3, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s3
+; CHECK-GI-NOFP16-NEXT:    frintz s3, s5
+; CHECK-GI-NOFP16-NEXT:    frintz s0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[4], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[5], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[6], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: trunc_v7f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frintz v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <7 x half> @llvm.trunc.v7f16(<7 x half> %a)
+  ret <7 x half> %c
+}
+
+define <4 x half> @trunc_v4f16(<4 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: trunc_v4f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h0
+; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    frintz s2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    frintz s1, s1
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h4
+; CHECK-SD-NOFP16-NEXT:    frintz s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT:    frintz s2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[2], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s2
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: trunc_v4f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frintz v0.4h, v0.4h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: trunc_v4f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    frintz s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h0
+; CHECK-GI-NOFP16-NEXT:    frintz s1, s1
+; CHECK-GI-NOFP16-NEXT:    frintz s2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s3
+; CHECK-GI-NOFP16-NEXT:    frintz s3, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: trunc_v4f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frintz v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <4 x half> @llvm.trunc.v4f16(<4 x half> %a)
+  ret <4 x half> %c
+}
+
+define <8 x half> @trunc_v8f16(<8 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: trunc_v8f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    frintz s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frintz s5, s1
+; CHECK-SD-NOFP16-NEXT:    frintz s2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT:    frintz s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[5]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    frintz s4, s5
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[3], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[6]
+; CHECK-SD-NOFP16-NEXT:    frintz s3, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    frintz s2, s2
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    frintz s0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[6], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: trunc_v8f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frintz v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: trunc_v8f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h0
+; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[4]
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    frintz s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    frintz s5, s1
+; CHECK-GI-NOFP16-NEXT:    frintz s2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-GI-NOFP16-NEXT:    frintz s4, s4
+; CHECK-GI-NOFP16-NEXT:    frintz s6, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[6]
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT:    frintz s2, s5
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    frintz s3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
+; CHECK-GI-NOFP16-NEXT:    frintz s0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[5], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[6], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: trunc_v8f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frintz v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <8 x half> @llvm.trunc.v8f16(<8 x half> %a)
+  ret <8 x half> %c
+}
+
+define <16 x half> @trunc_v16f16(<16 x half> %a) {
+; CHECK-SD-NOFP16-LABEL: trunc_v16f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h3, v1.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h0
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h1
+; CHECK-SD-NOFP16-NEXT:    mov h7, v1.h[2]
+; CHECK-SD-NOFP16-NEXT:    mov h18, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frintz s5, s5
+; CHECK-SD-NOFP16-NEXT:    frintz s6, s6
+; CHECK-SD-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-SD-NOFP16-NEXT:    frintz s16, s2
+; CHECK-SD-NOFP16-NEXT:    frintz s17, s3
+; CHECK-SD-NOFP16-NEXT:    frintz s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s5
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s6
+; CHECK-SD-NOFP16-NEXT:    frintz s7, s7
+; CHECK-SD-NOFP16-NEXT:    fcvt h5, s16
+; CHECK-SD-NOFP16-NEXT:    fcvt h6, s17
+; CHECK-SD-NOFP16-NEXT:    mov h16, v1.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s17, h18
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h7, s7
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[1], v5.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[1], v6.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h6, v1.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt s16, h16
+; CHECK-SD-NOFP16-NEXT:    frintz s17, s17
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[2], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v0.h[5]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-SD-NOFP16-NEXT:    frintz s16, s16
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[2], v7.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h7, s17
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frintz s5, s5
+; CHECK-SD-NOFP16-NEXT:    frintz s6, s6
+; CHECK-SD-NOFP16-NEXT:    fcvt h16, s16
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[3], v7.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h7, v1.h[5]
+; CHECK-SD-NOFP16-NEXT:    frintz s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT:    fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[3], v16.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[4], v5.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[6]
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[4], v6.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h7
+; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[5], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov h4, v1.h[6]
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    frintz s6, s6
+; CHECK-SD-NOFP16-NEXT:    mov h1, v1.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    frintz s5, s5
+; CHECK-SD-NOFP16-NEXT:    fcvt h6, s6
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    frintz s0, s0
+; CHECK-SD-NOFP16-NEXT:    frintz s4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt h5, s5
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[5], v6.h[0]
+; CHECK-SD-NOFP16-NEXT:    frintz s1, s1
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[6], v5.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[6], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v3.h[7], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NOFP16-NEXT:    mov v1.16b, v3.16b
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: trunc_v16f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    frintz v0.8h, v0.8h
+; CHECK-SD-FP16-NEXT:    frintz v1.8h, v1.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: trunc_v16f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h3, v1.h[1]
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h0
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h1
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    mov h7, v1.h[2]
+; CHECK-GI-NOFP16-NEXT:    mov h18, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    mov h19, v1.h[3]
+; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT:    frintz s4, s4
+; CHECK-GI-NOFP16-NEXT:    frintz s5, s5
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT:    frintz s16, s2
+; CHECK-GI-NOFP16-NEXT:    frintz s17, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt h2, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt h3, s5
+; CHECK-GI-NOFP16-NEXT:    frintz s6, s6
+; CHECK-GI-NOFP16-NEXT:    frintz s7, s7
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s16
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s17
+; CHECK-GI-NOFP16-NEXT:    fcvt s16, h18
+; CHECK-GI-NOFP16-NEXT:    fcvt s17, h19
+; CHECK-GI-NOFP16-NEXT:    mov h18, v1.h[4]
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[1], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s6
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[1], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[4]
+; CHECK-GI-NOFP16-NEXT:    fcvt h6, s7
+; CHECK-GI-NOFP16-NEXT:    frintz s16, s16
+; CHECK-GI-NOFP16-NEXT:    frintz s17, s17
+; CHECK-GI-NOFP16-NEXT:    mov h7, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[2], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt s18, h18
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[2], v6.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s16
+; CHECK-GI-NOFP16-NEXT:    fcvt h16, s17
+; CHECK-GI-NOFP16-NEXT:    mov h17, v1.h[5]
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[6]
+; CHECK-GI-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT:    frintz s5, s5
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h4, v1.h[6]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[3], v16.h[0]
+; CHECK-GI-NOFP16-NEXT:    frintz s16, s18
+; CHECK-GI-NOFP16-NEXT:    fcvt s17, h17
+; CHECK-GI-NOFP16-NEXT:    frintz s7, s7
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT:    mov h1, v1.h[7]
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s5
+; CHECK-GI-NOFP16-NEXT:    fcvt h16, s16
+; CHECK-GI-NOFP16-NEXT:    frintz s17, s17
+; CHECK-GI-NOFP16-NEXT:    fcvt h7, s7
+; CHECK-GI-NOFP16-NEXT:    frintz s6, s6
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    frintz s4, s4
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[4], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[4], v16.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h5, s17
+; CHECK-GI-NOFP16-NEXT:    fcvt h6, s6
+; CHECK-GI-NOFP16-NEXT:    frintz s0, s0
+; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-GI-NOFP16-NEXT:    frintz s1, s1
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[5], v7.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[5], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[6], v6.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[6], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[7], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v2.16b
+; CHECK-GI-NOFP16-NEXT:    mov v1.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: trunc_v16f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    frintz v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT:    frintz v1.8h, v1.8h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = call <16 x half> @llvm.trunc.v16f16(<16 x half> %a)
+  ret <16 x half> %c
+}
+
+declare <16 x half> @llvm.ceil.v16f16(<16 x half>)
+declare <16 x half> @llvm.floor.v16f16(<16 x half>)
+declare <16 x half> @llvm.nearbyint.v16f16(<16 x half>)
+declare <16 x half> @llvm.rint.v16f16(<16 x half>)
+declare <16 x half> @llvm.round.v16f16(<16 x half>)
+declare <16 x half> @llvm.roundeven.v16f16(<16 x half>)
+declare <16 x half> @llvm.trunc.v16f16(<16 x half>)
+declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
+declare <2 x double> @llvm.floor.v2f64(<2 x double>)
+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
+declare <2 x double> @llvm.rint.v2f64(<2 x double>)
+declare <2 x double> @llvm.round.v2f64(<2 x double>)
+declare <2 x double> @llvm.roundeven.v2f64(<2 x double>)
+declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
+declare <2 x float> @llvm.ceil.v2f32(<2 x float>)
+declare <2 x float> @llvm.floor.v2f32(<2 x float>)
+declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>)
+declare <2 x float> @llvm.rint.v2f32(<2 x float>)
+declare <2 x float> @llvm.round.v2f32(<2 x float>)
+declare <2 x float> @llvm.roundeven.v2f32(<2 x float>)
+declare <2 x float> @llvm.trunc.v2f32(<2 x float>)
+declare <3 x double> @llvm.ceil.v3f64(<3 x double>)
+declare <3 x double> @llvm.floor.v3f64(<3 x double>)
+declare <3 x double> @llvm.nearbyint.v3f64(<3 x double>)
+declare <3 x double> @llvm.rint.v3f64(<3 x double>)
+declare <3 x double> @llvm.round.v3f64(<3 x double>)
+declare <3 x double> @llvm.roundeven.v3f64(<3 x double>)
+declare <3 x double> @llvm.trunc.v3f64(<3 x double>)
+declare <3 x float> @llvm.ceil.v3f32(<3 x float>)
+declare <3 x float> @llvm.floor.v3f32(<3 x float>)
+declare <3 x float> @llvm.nearbyint.v3f32(<3 x float>)
+declare <3 x float> @llvm.rint.v3f32(<3 x float>)
+declare <3 x float> @llvm.round.v3f32(<3 x float>)
+declare <3 x float> @llvm.roundeven.v3f32(<3 x float>)
+declare <3 x float> @llvm.trunc.v3f32(<3 x float>)
+declare <4 x double> @llvm.ceil.v4f64(<4 x double>)
+declare <4 x double> @llvm.floor.v4f64(<4 x double>)
+declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>)
+declare <4 x double> @llvm.rint.v4f64(<4 x double>)
+declare <4 x double> @llvm.round.v4f64(<4 x double>)
+declare <4 x double> @llvm.roundeven.v4f64(<4 x double>)
+declare <4 x double> @llvm.trunc.v4f64(<4 x double>)
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
+declare <4 x float> @llvm.floor.v4f32(<4 x float>)
+declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
+declare <4 x float> @llvm.rint.v4f32(<4 x float>)
+declare <4 x float> @llvm.round.v4f32(<4 x float>)
+declare <4 x float> @llvm.roundeven.v4f32(<4 x float>)
+declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
+declare <4 x half> @llvm.ceil.v4f16(<4 x half>)
+declare <4 x half> @llvm.floor.v4f16(<4 x half>)
+declare <4 x half> @llvm.nearbyint.v4f16(<4 x half>)
+declare <4 x half> @llvm.rint.v4f16(<4 x half>)
+declare <4 x half> @llvm.round.v4f16(<4 x half>)
+declare <4 x half> @llvm.roundeven.v4f16(<4 x half>)
+declare <4 x half> @llvm.trunc.v4f16(<4 x half>)
+declare <7 x half> @llvm.ceil.v7f16(<7 x half>)
+declare <7 x half> @llvm.floor.v7f16(<7 x half>)
+declare <7 x half> @llvm.nearbyint.v7f16(<7 x half>)
+declare <7 x half> @llvm.rint.v7f16(<7 x half>)
+declare <7 x half> @llvm.round.v7f16(<7 x half>)
+declare <7 x half> @llvm.roundeven.v7f16(<7 x half>)
+declare <7 x half> @llvm.trunc.v7f16(<7 x half>)
+declare <8 x float> @llvm.ceil.v8f32(<8 x float>)
+declare <8 x float> @llvm.floor.v8f32(<8 x float>)
+declare <8 x float> @llvm.nearbyint.v8f32(<8 x float>)
+declare <8 x float> @llvm.rint.v8f32(<8 x float>)
+declare <8 x float> @llvm.round.v8f32(<8 x float>)
+declare <8 x float> @llvm.roundeven.v8f32(<8 x float>)
+declare <8 x float> @llvm.trunc.v8f32(<8 x float>)
+declare <8 x half> @llvm.ceil.v8f16(<8 x half>)
+declare <8 x half> @llvm.floor.v8f16(<8 x half>)
+declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>)
+declare <8 x half> @llvm.rint.v8f16(<8 x half>)
+declare <8 x half> @llvm.round.v8f16(<8 x half>)
+declare <8 x half> @llvm.roundeven.v8f16(<8 x half>)
+declare <8 x half> @llvm.trunc.v8f16(<8 x half>)
+declare double @llvm.ceil.f64(double)
+declare double @llvm.floor.f64(double)
+declare double @llvm.nearbyint.f64(double)
+declare double @llvm.rint.f64(double)
+declare double @llvm.round.f64(double)
+declare double @llvm.roundeven.f64(double)
+declare double @llvm.trunc.f64(double)
+declare float @llvm.ceil.f32(float)
+declare float @llvm.floor.f32(float)
+declare float @llvm.nearbyint.f32(float)
+declare float @llvm.rint.f32(float)
+declare float @llvm.round.f32(float)
+declare float @llvm.roundeven.f32(float)
+declare float @llvm.trunc.f32(float)
+declare half @llvm.ceil.f16(half)
+declare half @llvm.floor.f16(half)
+declare half @llvm.nearbyint.f16(half)
+declare half @llvm.rint.f16(half)
+declare half @llvm.round.f16(half)
+declare half @llvm.roundeven.f16(half)
+declare half @llvm.trunc.f16(half)
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-GI: {{.*}}
+; CHECK-SD: {{.*}}


        


More information about the llvm-commits mailing list