[llvm] b737f1a - [AArch64] Regenerate some test checks. NFC

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 13 03:37:38 PDT 2021



> On Sep 13, 2021, at 11:29, David Green via llvm-commits <llvm-commits at lists.llvm.org> wrote:
> 
> 
> Author: David Green
> Date: 2021-09-13T11:29:44+01:00
> New Revision: b737f1acf42807b6c791fc87763967d60bfc7ee3
> 
> URL: https://github.com/llvm/llvm-project/commit/b737f1acf42807b6c791fc87763967d60bfc7ee3
> DIFF: https://github.com/llvm/llvm-project/commit/b737f1acf42807b6c791fc87763967d60bfc7ee3.diff
> 
> LOG: [AArch64] Regenerate some test checks. NFC
> 
> This regenerates some of the tests that had very-close-to-updated check
> line already, in order to make them more maintainable.
> 

It looks like the tests intentionally did only check for targeted instructions and now they are much more fragile and will need updating more often. There are many cases were we now check for the full assembly, when the only interesting bits are a few generated instructions. Granted, updating them can now be done by running a script, but that still can lead to a lot of changes needing review + potential to cause downstream conflicts.

The changes test also now rely on precise register assignments, which can cause pain for downstream users.

Could you share the patch where this simplifies updating the tests drastically?

Cheers,
Florian

> Added: 
> 
> 
> Modified: 
>    llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
>    llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
>    llvm/test/CodeGen/AArch64/arm64-nvcast.ll
>    llvm/test/CodeGen/AArch64/arm64-register-pairing.ll
>    llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll
>    llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
>    llvm/test/CodeGen/AArch64/arm64-trunc-store.ll
> 
> Removed: 
> 
> 
> 
> ################################################################################
> diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll b/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
> index 6404ab728011..65cab2cfb068 100644
> --- a/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
> +++ b/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
> @@ -1,35 +1,44 @@
> +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
> ; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
> 
> declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>)
> declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>)
> 
> define <8 x i8> @test_uabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
> -; CHECK: test_uabd_v8i8:
> +; CHECK-LABEL: test_uabd_v8i8:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    uabd v0.8b, v0.8b, v1.8b
> +; CHECK-NEXT:    ret
>   %abd = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
> -; CHECK: uabd v0.8b, v0.8b, v1.8b
>   ret <8 x i8> %abd
> }
> 
> define <8 x i8> @test_uaba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
> -; CHECK: test_uaba_v8i8:
> +; CHECK-LABEL: test_uaba_v8i8:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    uaba v0.8b, v0.8b, v1.8b
> +; CHECK-NEXT:    ret
>   %abd = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
>   %aba = add <8 x i8> %lhs, %abd
> -; CHECK: uaba v0.8b, v0.8b, v1.8b
>   ret <8 x i8> %aba
> }
> 
> define <8 x i8> @test_sabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
> -; CHECK: test_sabd_v8i8:
> +; CHECK-LABEL: test_sabd_v8i8:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    sabd v0.8b, v0.8b, v1.8b
> +; CHECK-NEXT:    ret
>   %abd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
> -; CHECK: sabd v0.8b, v0.8b, v1.8b
>   ret <8 x i8> %abd
> }
> 
> define <8 x i8> @test_saba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
> -; CHECK: test_saba_v8i8:
> +; CHECK-LABEL: test_saba_v8i8:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    saba v0.8b, v0.8b, v1.8b
> +; CHECK-NEXT:    ret
>   %abd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
>   %aba = add <8 x i8> %lhs, %abd
> -; CHECK: saba v0.8b, v0.8b, v1.8b
>   ret <8 x i8> %aba
> }
> 
> @@ -37,32 +46,40 @@ declare <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8>, <16 x i8>)
> declare <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8>, <16 x i8>)
> 
> define <16 x i8> @test_uabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
> -; CHECK: test_uabd_v16i8:
> +; CHECK-LABEL: test_uabd_v16i8:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    uabd v0.16b, v0.16b, v1.16b
> +; CHECK-NEXT:    ret
>   %abd = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
> -; CHECK: uabd v0.16b, v0.16b, v1.16b
>   ret <16 x i8> %abd
> }
> 
> define <16 x i8> @test_uaba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
> -; CHECK: test_uaba_v16i8:
> +; CHECK-LABEL: test_uaba_v16i8:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    uaba v0.16b, v0.16b, v1.16b
> +; CHECK-NEXT:    ret
>   %abd = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
>   %aba = add <16 x i8> %lhs, %abd
> -; CHECK: uaba v0.16b, v0.16b, v1.16b
>   ret <16 x i8> %aba
> }
> 
> define <16 x i8> @test_sabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
> -; CHECK: test_sabd_v16i8:
> +; CHECK-LABEL: test_sabd_v16i8:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    sabd v0.16b, v0.16b, v1.16b
> +; CHECK-NEXT:    ret
>   %abd = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
> -; CHECK: sabd v0.16b, v0.16b, v1.16b
>   ret <16 x i8> %abd
> }
> 
> define <16 x i8> @test_saba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
> -; CHECK: test_saba_v16i8:
> +; CHECK-LABEL: test_saba_v16i8:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    saba v0.16b, v0.16b, v1.16b
> +; CHECK-NEXT:    ret
>   %abd = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
>   %aba = add <16 x i8> %lhs, %abd
> -; CHECK: saba v0.16b, v0.16b, v1.16b
>   ret <16 x i8> %aba
> }
> 
> @@ -70,32 +87,40 @@ declare <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16>, <4 x i16>)
> declare <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16>, <4 x i16>)
> 
> define <4 x i16> @test_uabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
> -; CHECK: test_uabd_v4i16:
> +; CHECK-LABEL: test_uabd_v4i16:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    uabd v0.4h, v0.4h, v1.4h
> +; CHECK-NEXT:    ret
>   %abd = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
> -; CHECK: uabd v0.4h, v0.4h, v1.4h
>   ret <4 x i16> %abd
> }
> 
> define <4 x i16> @test_uaba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
> -; CHECK: test_uaba_v4i16:
> +; CHECK-LABEL: test_uaba_v4i16:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    uaba v0.4h, v0.4h, v1.4h
> +; CHECK-NEXT:    ret
>   %abd = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
>   %aba = add <4 x i16> %lhs, %abd
> -; CHECK: uaba v0.4h, v0.4h, v1.4h
>   ret <4 x i16> %aba
> }
> 
> define <4 x i16> @test_sabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
> -; CHECK: test_sabd_v4i16:
> +; CHECK-LABEL: test_sabd_v4i16:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    sabd v0.4h, v0.4h, v1.4h
> +; CHECK-NEXT:    ret
>   %abd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
> -; CHECK: sabd v0.4h, v0.4h, v1.4h
>   ret <4 x i16> %abd
> }
> 
> define <4 x i16> @test_saba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
> -; CHECK: test_saba_v4i16:
> +; CHECK-LABEL: test_saba_v4i16:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    saba v0.4h, v0.4h, v1.4h
> +; CHECK-NEXT:    ret
>   %abd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
>   %aba = add <4 x i16> %lhs, %abd
> -; CHECK: saba v0.4h, v0.4h, v1.4h
>   ret <4 x i16> %aba
> }
> 
> @@ -103,32 +128,40 @@ declare <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16>, <8 x i16>)
> declare <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16>, <8 x i16>)
> 
> define <8 x i16> @test_uabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
> -; CHECK: test_uabd_v8i16:
> +; CHECK-LABEL: test_uabd_v8i16:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
> +; CHECK-NEXT:    ret
>   %abd = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
> -; CHECK: uabd v0.8h, v0.8h, v1.8h
>   ret <8 x i16> %abd
> }
> 
> define <8 x i16> @test_uaba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
> -; CHECK: test_uaba_v8i16:
> +; CHECK-LABEL: test_uaba_v8i16:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    uaba v0.8h, v0.8h, v1.8h
> +; CHECK-NEXT:    ret
>   %abd = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
>   %aba = add <8 x i16> %lhs, %abd
> -; CHECK: uaba v0.8h, v0.8h, v1.8h
>   ret <8 x i16> %aba
> }
> 
> define <8 x i16> @test_sabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
> -; CHECK: test_sabd_v8i16:
> +; CHECK-LABEL: test_sabd_v8i16:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
> +; CHECK-NEXT:    ret
>   %abd = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
> -; CHECK: sabd v0.8h, v0.8h, v1.8h
>   ret <8 x i16> %abd
> }
> 
> define <8 x i16> @test_saba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
> -; CHECK: test_saba_v8i16:
> +; CHECK-LABEL: test_saba_v8i16:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    saba v0.8h, v0.8h, v1.8h
> +; CHECK-NEXT:    ret
>   %abd = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
>   %aba = add <8 x i16> %lhs, %abd
> -; CHECK: saba v0.8h, v0.8h, v1.8h
>   ret <8 x i16> %aba
> }
> 
> @@ -136,31 +169,41 @@ declare <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32>, <2 x i32>)
> declare <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32>, <2 x i32>)
> 
> define <2 x i32> @test_uabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
> -; CHECK: test_uabd_v2i32:
> +; CHECK-LABEL: test_uabd_v2i32:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    uabd v0.2s, v0.2s, v1.2s
> +; CHECK-NEXT:    ret
>   %abd = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
> -; CHECK: uabd v0.2s, v0.2s, v1.2s
>   ret <2 x i32> %abd
> }
> 
> define <2 x i32> @test_uaba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
> -; CHECK: test_uaba_v2i32:
> +; CHECK-LABEL: test_uaba_v2i32:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    uaba v0.2s, v0.2s, v1.2s
> +; CHECK-NEXT:    ret
>   %abd = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
>   %aba = add <2 x i32> %lhs, %abd
> -; CHECK: uaba v0.2s, v0.2s, v1.2s
>   ret <2 x i32> %aba
> }
> 
> define <2 x i32> @test_sabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
> -; CHECK: test_sabd_v2i32:
> +; CHECK-LABEL: test_sabd_v2i32:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    sabd v0.2s, v0.2s, v1.2s
> +; CHECK-NEXT:    ret
>   %abd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
> -; CHECK: sabd v0.2s, v0.2s, v1.2s
>   ret <2 x i32> %abd
> }
> 
> define <2 x i32> @test_sabd_v2i32_const() {
> -; CHECK: test_sabd_v2i32_const:
> -; CHECK: movi     d1, #0x00ffffffff0000
> -; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s
> +; CHECK-LABEL: test_sabd_v2i32_const:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    adrp x8, .LCPI19_0
> +; CHECK-NEXT:    ldr d0, [x8, :lo12:.LCPI19_0]
> +; CHECK-NEXT:    movi d1, #0x00ffffffff0000
> +; CHECK-NEXT:    sabd v0.2s, v0.2s, v1.2s
> +; CHECK-NEXT:    ret
>   %1 = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(
>     <2 x i32> <i32 -2147483648, i32 2147450880>,
>     <2 x i32> <i32 -65536, i32 65535>)
> @@ -168,10 +211,12 @@ define <2 x i32> @test_sabd_v2i32_const() {
> }
> 
> define <2 x i32> @test_saba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
> -; CHECK: test_saba_v2i32:
> +; CHECK-LABEL: test_saba_v2i32:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    saba v0.2s, v0.2s, v1.2s
> +; CHECK-NEXT:    ret
>   %abd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
>   %aba = add <2 x i32> %lhs, %abd
> -; CHECK: saba v0.2s, v0.2s, v1.2s
>   ret <2 x i32> %aba
> }
> 
> @@ -179,58 +224,72 @@ declare <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32>, <4 x i32>)
> declare <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32>, <4 x i32>)
> 
> define <4 x i32> @test_uabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
> -; CHECK: test_uabd_v4i32:
> +; CHECK-LABEL: test_uabd_v4i32:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    uabd v0.4s, v0.4s, v1.4s
> +; CHECK-NEXT:    ret
>   %abd = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
> -; CHECK: uabd v0.4s, v0.4s, v1.4s
>   ret <4 x i32> %abd
> }
> 
> define <4 x i32> @test_uaba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
> -; CHECK: test_uaba_v4i32:
> +; CHECK-LABEL: test_uaba_v4i32:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    uaba v0.4s, v0.4s, v1.4s
> +; CHECK-NEXT:    ret
>   %abd = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
>   %aba = add <4 x i32> %lhs, %abd
> -; CHECK: uaba v0.4s, v0.4s, v1.4s
>   ret <4 x i32> %aba
> }
> 
> define <4 x i32> @test_sabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
> -; CHECK: test_sabd_v4i32:
> +; CHECK-LABEL: test_sabd_v4i32:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    sabd v0.4s, v0.4s, v1.4s
> +; CHECK-NEXT:    ret
>   %abd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
> -; CHECK: sabd v0.4s, v0.4s, v1.4s
>   ret <4 x i32> %abd
> }
> 
> define <4 x i32> @test_saba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
> -; CHECK: test_saba_v4i32:
> +; CHECK-LABEL: test_saba_v4i32:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    saba v0.4s, v0.4s, v1.4s
> +; CHECK-NEXT:    ret
>   %abd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
>   %aba = add <4 x i32> %lhs, %abd
> -; CHECK: saba v0.4s, v0.4s, v1.4s
>   ret <4 x i32> %aba
> }
> 
> declare <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float>, <2 x float>)
> 
> define <2 x float> @test_fabd_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
> -; CHECK: test_fabd_v2f32:
> +; CHECK-LABEL: test_fabd_v2f32:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    fabd v0.2s, v0.2s, v1.2s
> +; CHECK-NEXT:    ret
>   %abd = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %lhs, <2 x float> %rhs)
> -; CHECK: fabd v0.2s, v0.2s, v1.2s
>   ret <2 x float> %abd
> }
> 
> declare <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float>, <4 x float>)
> 
> define <4 x float> @test_fabd_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
> -; CHECK: test_fabd_v4f32:
> +; CHECK-LABEL: test_fabd_v4f32:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    fabd v0.4s, v0.4s, v1.4s
> +; CHECK-NEXT:    ret
>   %abd = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %lhs, <4 x float> %rhs)
> -; CHECK: fabd v0.4s, v0.4s, v1.4s
>   ret <4 x float> %abd
> }
> 
> declare <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double>, <2 x double>)
> 
> define <2 x double> @test_fabd_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
> -; CHECK: test_fabd_v2f64:
> +; CHECK-LABEL: test_fabd_v2f64:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    fabd v0.2d, v0.2d, v1.2d
> +; CHECK-NEXT:    ret
>   %abd = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %lhs, <2 x double> %rhs)
> -; CHECK: fabd v0.2d, v0.2d, v1.2d
>   ret <2 x double> %abd
> }
> 
> diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
> index 720f3eb6a4bf..3da3f515ccce 100644
> --- a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
> +++ b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
> @@ -1,79 +1,113 @@
> +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
> ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
> ; arm64 has its own copy of this because of the intrinsics
> 
> define <8 x i8> @mul8xi8(<8 x i8> %A, <8 x i8> %B) {
> ; CHECK-LABEL: mul8xi8:
> -; CHECK: mul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    mul v0.8b, v0.8b, v1.8b
> +; CHECK-NEXT:    ret
> 	%tmp3 = mul <8 x i8> %A, %B;
> 	ret <8 x i8> %tmp3
> }
> 
> define <16 x i8> @mul16xi8(<16 x i8> %A, <16 x i8> %B) {
> ; CHECK-LABEL: mul16xi8:
> -; CHECK: mul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    mul v0.16b, v0.16b, v1.16b
> +; CHECK-NEXT:    ret
> 	%tmp3 = mul <16 x i8> %A, %B;
> 	ret <16 x i8> %tmp3
> }
> 
> define <4 x i16> @mul4xi16(<4 x i16> %A, <4 x i16> %B) {
> ; CHECK-LABEL: mul4xi16:
> -; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    mul v0.4h, v0.4h, v1.4h
> +; CHECK-NEXT:    ret
> 	%tmp3 = mul <4 x i16> %A, %B;
> 	ret <4 x i16> %tmp3
> }
> 
> define <8 x i16> @mul8xi16(<8 x i16> %A, <8 x i16> %B) {
> ; CHECK-LABEL: mul8xi16:
> -; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    mul v0.8h, v0.8h, v1.8h
> +; CHECK-NEXT:    ret
> 	%tmp3 = mul <8 x i16> %A, %B;
> 	ret <8 x i16> %tmp3
> }
> 
> define <2 x i32> @mul2xi32(<2 x i32> %A, <2 x i32> %B) {
> ; CHECK-LABEL: mul2xi32:
> -; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    mul v0.2s, v0.2s, v1.2s
> +; CHECK-NEXT:    ret
> 	%tmp3 = mul <2 x i32> %A, %B;
> 	ret <2 x i32> %tmp3
> }
> 
> define <4 x i32> @mul4x32(<4 x i32> %A, <4 x i32> %B) {
> ; CHECK-LABEL: mul4x32:
> -; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    mul v0.4s, v0.4s, v1.4s
> +; CHECK-NEXT:    ret
> 	%tmp3 = mul <4 x i32> %A, %B;
> 	ret <4 x i32> %tmp3
> }
> 
> define <1 x i64> @mul1xi64(<1 x i64> %A, <1 x i64> %B) {
> ; CHECK-LABEL: mul1xi64:
> -; CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    fmov x8, d1
> +; CHECK-NEXT:    fmov x9, d0
> +; CHECK-NEXT:    mul x8, x9, x8
> +; CHECK-NEXT:    fmov d0, x8
> +; CHECK-NEXT:    ret
>   %tmp3 = mul <1 x i64> %A, %B;
>   ret <1 x i64> %tmp3
> }
> 
> define <2 x i64> @mul2xi64(<2 x i64> %A, <2 x i64> %B) {
> ; CHECK-LABEL: mul2xi64:
> -; CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
> -; CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    fmov x10, d1
> +; CHECK-NEXT:    fmov x11, d0
> +; CHECK-NEXT:    mov x8, v1.d[1]
> +; CHECK-NEXT:    mov x9, v0.d[1]
> +; CHECK-NEXT:    mul x10, x11, x10
> +; CHECK-NEXT:    mul x8, x9, x8
> +; CHECK-NEXT:    fmov d0, x10
> +; CHECK-NEXT:    mov v0.d[1], x8
> +; CHECK-NEXT:    ret
>   %tmp3 = mul <2 x i64> %A, %B;
>   ret <2 x i64> %tmp3
> }
> 
>  define <2 x float> @mul2xfloat(<2 x float> %A, <2 x float> %B) {
> ; CHECK-LABEL: mul2xfloat:
> -; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    fmul v0.2s, v0.2s, v1.2s
> +; CHECK-NEXT:    ret
> 	%tmp3 = fmul <2 x float> %A, %B;
> 	ret <2 x float> %tmp3
> }
> 
> define <4 x float> @mul4xfloat(<4 x float> %A, <4 x float> %B) {
> ; CHECK-LABEL: mul4xfloat:
> -; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    fmul v0.4s, v0.4s, v1.4s
> +; CHECK-NEXT:    ret
> 	%tmp3 = fmul <4 x float> %A, %B;
> 	ret <4 x float> %tmp3
> }
> define <2 x double> @mul2xdouble(<2 x double> %A, <2 x double> %B) {
> ; CHECK-LABEL: mul2xdouble:
> -; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    fmul v0.2d, v0.2d, v1.2d
> +; CHECK-NEXT:    ret
> 	%tmp3 = fmul <2 x double> %A, %B;
> 	ret <2 x double> %tmp3
> }
> @@ -81,605 +115,1429 @@ define <2 x double> @mul2xdouble(<2 x double> %A, <2 x double> %B) {
> 
>  define <2 x float> @div2xfloat(<2 x float> %A, <2 x float> %B) {
> ; CHECK-LABEL: div2xfloat:
> -; CHECK: fdiv {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    fdiv v0.2s, v0.2s, v1.2s
> +; CHECK-NEXT:    ret
> 	%tmp3 = fdiv <2 x float> %A, %B;
> 	ret <2 x float> %tmp3
> }
> 
> define <4 x float> @div4xfloat(<4 x float> %A, <4 x float> %B) {
> ; CHECK-LABEL: div4xfloat:
> -; CHECK: fdiv {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    fdiv v0.4s, v0.4s, v1.4s
> +; CHECK-NEXT:    ret
> 	%tmp3 = fdiv <4 x float> %A, %B;
> 	ret <4 x float> %tmp3
> }
> define <2 x double> @div2xdouble(<2 x double> %A, <2 x double> %B) {
> ; CHECK-LABEL: div2xdouble:
> -; CHECK: fdiv {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    fdiv v0.2d, v0.2d, v1.2d
> +; CHECK-NEXT:    ret
> 	%tmp3 = fdiv <2 x double> %A, %B;
> 	ret <2 x double> %tmp3
> }
> 
> define <1 x i8> @sdiv1x8(<1 x i8> %A, <1 x i8> %B) {
> ; CHECK-LABEL: sdiv1x8:
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    smov w8, v1.b[0]
> +; CHECK-NEXT:    smov w9, v0.b[0]
> +; CHECK-NEXT:    sdiv w8, w9, w8
> +; CHECK-NEXT:    fmov s0, w8
> +; CHECK-NEXT:    ret
> 	%tmp3 = sdiv <1 x i8> %A, %B;
> 	ret <1 x i8> %tmp3
> }
> 
> define <8 x i8> @sdiv8x8(<8 x i8> %A, <8 x i8> %B) {
> ; CHECK-LABEL: sdiv8x8:
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    smov w10, v1.b[0]
> +; CHECK-NEXT:    smov w11, v0.b[0]
> +; CHECK-NEXT:    smov w8, v1.b[1]
> +; CHECK-NEXT:    smov w9, v0.b[1]
> +; CHECK-NEXT:    sdiv w10, w11, w10
> +; CHECK-NEXT:    smov w12, v1.b[2]
> +; CHECK-NEXT:    smov w13, v0.b[2]
> +; CHECK-NEXT:    sdiv w8, w9, w8
> +; CHECK-NEXT:    fmov s2, w10
> +; CHECK-NEXT:    smov w14, v1.b[3]
> +; CHECK-NEXT:    smov w15, v0.b[3]
> +; CHECK-NEXT:    sdiv w12, w13, w12
> +; CHECK-NEXT:    mov v2.b[1], w8
> +; CHECK-NEXT:    smov w9, v1.b[4]
> +; CHECK-NEXT:    smov w11, v0.b[4]
> +; CHECK-NEXT:    sdiv w14, w15, w14
> +; CHECK-NEXT:    mov v2.b[2], w12
> +; CHECK-NEXT:    smov w13, v1.b[5]
> +; CHECK-NEXT:    smov w15, v0.b[5]
> +; CHECK-NEXT:    sdiv w9, w11, w9
> +; CHECK-NEXT:    mov v2.b[3], w14
> +; CHECK-NEXT:    smov w11, v1.b[6]
> +; CHECK-NEXT:    sdiv w13, w15, w13
> +; CHECK-NEXT:    smov w15, v0.b[6]
> +; CHECK-NEXT:    mov v2.b[4], w9
> +; CHECK-NEXT:    sdiv w11, w15, w11
> +; CHECK-NEXT:    smov w8, v1.b[7]
> +; CHECK-NEXT:    mov v2.b[5], w13
> +; CHECK-NEXT:    smov w9, v0.b[7]
> +; CHECK-NEXT:    mov v2.b[6], w11
> +; CHECK-NEXT:    sdiv w8, w9, w8
> +; CHECK-NEXT:    mov v2.b[7], w8
> +; CHECK-NEXT:    fmov d0, d2
> +; CHECK-NEXT:    ret
> 	%tmp3 = sdiv <8 x i8> %A, %B;
> 	ret <8 x i8> %tmp3
> }
> 
> define <16 x i8> @sdiv16x8(<16 x i8> %A, <16 x i8> %B) {
> ; CHECK-LABEL: sdiv16x8:
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    smov w10, v1.b[0]
> +; CHECK-NEXT:    smov w11, v0.b[0]
> +; CHECK-NEXT:    smov w8, v1.b[1]
> +; CHECK-NEXT:    smov w9, v0.b[1]
> +; CHECK-NEXT:    sdiv w10, w11, w10
> +; CHECK-NEXT:    smov w12, v1.b[2]
> +; CHECK-NEXT:    smov w13, v0.b[2]
> +; CHECK-NEXT:    sdiv w8, w9, w8
> +; CHECK-NEXT:    fmov s2, w10
> +; CHECK-NEXT:    smov w14, v1.b[3]
> +; CHECK-NEXT:    smov w15, v0.b[3]
> +; CHECK-NEXT:    sdiv w12, w13, w12
> +; CHECK-NEXT:    mov v2.b[1], w8
> +; CHECK-NEXT:    smov w16, v1.b[4]
> +; CHECK-NEXT:    smov w17, v0.b[4]
> +; CHECK-NEXT:    sdiv w14, w15, w14
> +; CHECK-NEXT:    mov v2.b[2], w12
> +; CHECK-NEXT:    smov w18, v1.b[5]
> +; CHECK-NEXT:    smov w0, v0.b[5]
> +; CHECK-NEXT:    sdiv w16, w17, w16
> +; CHECK-NEXT:    mov v2.b[3], w14
> +; CHECK-NEXT:    smov w1, v1.b[6]
> +; CHECK-NEXT:    smov w2, v0.b[6]
> +; CHECK-NEXT:    sdiv w18, w0, w18
> +; CHECK-NEXT:    mov v2.b[4], w16
> +; CHECK-NEXT:    smov w3, v1.b[7]
> +; CHECK-NEXT:    smov w4, v0.b[7]
> +; CHECK-NEXT:    sdiv w1, w2, w1
> +; CHECK-NEXT:    mov v2.b[5], w18
> +; CHECK-NEXT:    smov w9, v1.b[8]
> +; CHECK-NEXT:    smov w11, v0.b[8]
> +; CHECK-NEXT:    sdiv w3, w4, w3
> +; CHECK-NEXT:    mov v2.b[6], w1
> +; CHECK-NEXT:    smov w13, v1.b[9]
> +; CHECK-NEXT:    smov w15, v0.b[9]
> +; CHECK-NEXT:    sdiv w9, w11, w9
> +; CHECK-NEXT:    mov v2.b[7], w3
> +; CHECK-NEXT:    smov w17, v1.b[10]
> +; CHECK-NEXT:    smov w0, v0.b[10]
> +; CHECK-NEXT:    sdiv w13, w15, w13
> +; CHECK-NEXT:    mov v2.b[8], w9
> +; CHECK-NEXT:    smov w2, v1.b[11]
> +; CHECK-NEXT:    smov w4, v0.b[11]
> +; CHECK-NEXT:    sdiv w17, w0, w17
> +; CHECK-NEXT:    mov v2.b[9], w13
> +; CHECK-NEXT:    smov w11, v1.b[12]
> +; CHECK-NEXT:    smov w15, v0.b[12]
> +; CHECK-NEXT:    sdiv w2, w4, w2
> +; CHECK-NEXT:    mov v2.b[10], w17
> +; CHECK-NEXT:    smov w0, v1.b[13]
> +; CHECK-NEXT:    smov w4, v0.b[13]
> +; CHECK-NEXT:    sdiv w11, w15, w11
> +; CHECK-NEXT:    mov v2.b[11], w2
> +; CHECK-NEXT:    smov w15, v1.b[14]
> +; CHECK-NEXT:    sdiv w0, w4, w0
> +; CHECK-NEXT:    smov w4, v0.b[14]
> +; CHECK-NEXT:    mov v2.b[12], w11
> +; CHECK-NEXT:    sdiv w15, w4, w15
> +; CHECK-NEXT:    smov w8, v1.b[15]
> +; CHECK-NEXT:    mov v2.b[13], w0
> +; CHECK-NEXT:    smov w9, v0.b[15]
> +; CHECK-NEXT:    mov v2.b[14], w15
> +; CHECK-NEXT:    sdiv w8, w9, w8
> +; CHECK-NEXT:    mov v2.b[15], w8
> +; CHECK-NEXT:    mov v0.16b, v2.16b
> +; CHECK-NEXT:    ret
> 	%tmp3 = sdiv <16 x i8> %A, %B;
> 	ret <16 x i8> %tmp3
> }
> 
> define <1 x i16> @sdiv1x16(<1 x i16> %A, <1 x i16> %B) {
> ; CHECK-LABEL: sdiv1x16:
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    smov w8, v1.h[0]
> +; CHECK-NEXT:    smov w9, v0.h[0]
> +; CHECK-NEXT:    sdiv w8, w9, w8
> +; CHECK-NEXT:    fmov s0, w8
> +; CHECK-NEXT:    ret
> 	%tmp3 = sdiv <1 x i16> %A, %B;
> 	ret <1 x i16> %tmp3
> }
> 
> define <4 x i16> @sdiv4x16(<4 x i16> %A, <4 x i16> %B) {
> ; CHECK-LABEL: sdiv4x16:
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    smov w10, v1.h[0]
> +; CHECK-NEXT:    smov w11, v0.h[0]
> +; CHECK-NEXT:    smov w8, v1.h[1]
> +; CHECK-NEXT:    smov w9, v0.h[1]
> +; CHECK-NEXT:    sdiv w10, w11, w10
> +; CHECK-NEXT:    sdiv w8, w9, w8
> +; CHECK-NEXT:    smov w9, v1.h[2]
> +; CHECK-NEXT:    smov w11, v0.h[2]
> +; CHECK-NEXT:    fmov s2, w10
> +; CHECK-NEXT:    sdiv w9, w11, w9
> +; CHECK-NEXT:    smov w10, v1.h[3]
> +; CHECK-NEXT:    mov v2.h[1], w8
> +; CHECK-NEXT:    smov w8, v0.h[3]
> +; CHECK-NEXT:    mov v2.h[2], w9
> +; CHECK-NEXT:    sdiv w8, w8, w10
> +; CHECK-NEXT:    mov v2.h[3], w8
> +; CHECK-NEXT:    fmov d0, d2
> +; CHECK-NEXT:    ret
> 	%tmp3 = sdiv <4 x i16> %A, %B;
> 	ret <4 x i16> %tmp3
> }
> 
> define <8 x i16> @sdiv8x16(<8 x i16> %A, <8 x i16> %B) {
> ; CHECK-LABEL: sdiv8x16:
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    smov w10, v1.h[0]
> +; CHECK-NEXT:    smov w11, v0.h[0]
> +; CHECK-NEXT:    smov w8, v1.h[1]
> +; CHECK-NEXT:    smov w9, v0.h[1]
> +; CHECK-NEXT:    sdiv w10, w11, w10
> +; CHECK-NEXT:    smov w12, v1.h[2]
> +; CHECK-NEXT:    smov w13, v0.h[2]
> +; CHECK-NEXT:    sdiv w8, w9, w8
> +; CHECK-NEXT:    fmov s2, w10
> +; CHECK-NEXT:    smov w14, v1.h[3]
> +; CHECK-NEXT:    smov w15, v0.h[3]
> +; CHECK-NEXT:    sdiv w12, w13, w12
> +; CHECK-NEXT:    mov v2.h[1], w8
> +; CHECK-NEXT:    smov w9, v1.h[4]
> +; CHECK-NEXT:    smov w11, v0.h[4]
> +; CHECK-NEXT:    sdiv w14, w15, w14
> +; CHECK-NEXT:    mov v2.h[2], w12
> +; CHECK-NEXT:    smov w13, v1.h[5]
> +; CHECK-NEXT:    smov w15, v0.h[5]
> +; CHECK-NEXT:    sdiv w9, w11, w9
> +; CHECK-NEXT:    mov v2.h[3], w14
> +; CHECK-NEXT:    smov w11, v1.h[6]
> +; CHECK-NEXT:    sdiv w13, w15, w13
> +; CHECK-NEXT:    smov w15, v0.h[6]
> +; CHECK-NEXT:    mov v2.h[4], w9
> +; CHECK-NEXT:    sdiv w11, w15, w11
> +; CHECK-NEXT:    smov w8, v1.h[7]
> +; CHECK-NEXT:    mov v2.h[5], w13
> +; CHECK-NEXT:    smov w9, v0.h[7]
> +; CHECK-NEXT:    mov v2.h[6], w11
> +; CHECK-NEXT:    sdiv w8, w9, w8
> +; CHECK-NEXT:    mov v2.h[7], w8
> +; CHECK-NEXT:    mov v0.16b, v2.16b
> +; CHECK-NEXT:    ret
> 	%tmp3 = sdiv <8 x i16> %A, %B;
> 	ret <8 x i16> %tmp3
> }
> 
> define <1 x i32> @sdiv1x32(<1 x i32> %A, <1 x i32> %B) {
> ; CHECK-LABEL: sdiv1x32:
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    fmov w8, s1
> +; CHECK-NEXT:    fmov w9, s0
> +; CHECK-NEXT:    sdiv w8, w9, w8
> +; CHECK-NEXT:    fmov s0, w8
> +; CHECK-NEXT:    ret
> 	%tmp3 = sdiv <1 x i32> %A, %B;
> 	ret <1 x i32> %tmp3
> }
> 
> define <2 x i32> @sdiv2x32(<2 x i32> %A, <2 x i32> %B) {
> ; CHECK-LABEL: sdiv2x32:
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    fmov w10, s1
> +; CHECK-NEXT:    fmov w11, s0
> +; CHECK-NEXT:    mov w8, v1.s[1]
> +; CHECK-NEXT:    mov w9, v0.s[1]
> +; CHECK-NEXT:    sdiv w10, w11, w10
> +; CHECK-NEXT:    sdiv w8, w9, w8
> +; CHECK-NEXT:    fmov s0, w10
> +; CHECK-NEXT:    mov v0.s[1], w8
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
> +; CHECK-NEXT:    ret
> 	%tmp3 = sdiv <2 x i32> %A, %B;
> 	ret <2 x i32> %tmp3
> }
> 
> define <4 x i32> @sdiv4x32(<4 x i32> %A, <4 x i32> %B) {
> ; CHECK-LABEL: sdiv4x32:
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    fmov w12, s1
> +; CHECK-NEXT:    fmov w15, s0
> +; CHECK-NEXT:    mov w8, v1.s[1]
> +; CHECK-NEXT:    mov w9, v0.s[1]
> +; CHECK-NEXT:    sdiv w12, w15, w12
> +; CHECK-NEXT:    mov w10, v1.s[2]
> +; CHECK-NEXT:    mov w13, v0.s[2]
> +; CHECK-NEXT:    mov w14, v0.s[3]
> +; CHECK-NEXT:    sdiv w8, w9, w8
> +; CHECK-NEXT:    fmov s0, w12
> +; CHECK-NEXT:    mov w11, v1.s[3]
> +; CHECK-NEXT:    sdiv w9, w13, w10
> +; CHECK-NEXT:    mov v0.s[1], w8
> +; CHECK-NEXT:    mov v0.s[2], w9
> +; CHECK-NEXT:    sdiv w8, w14, w11
> +; CHECK-NEXT:    mov v0.s[3], w8
> +; CHECK-NEXT:    ret
> 	%tmp3 = sdiv <4 x i32> %A, %B;
> 	ret <4 x i32> %tmp3
> }
> 
> define <1 x i64> @sdiv1x64(<1 x i64> %A, <1 x i64> %B) {
> ; CHECK-LABEL: sdiv1x64:
> -; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    fmov x8, d1
> +; CHECK-NEXT:    fmov x9, d0
> +; CHECK-NEXT:    sdiv x8, x9, x8
> +; CHECK-NEXT:    fmov d0, x8
> +; CHECK-NEXT:    ret
> 	%tmp3 = sdiv <1 x i64> %A, %B;
> 	ret <1 x i64> %tmp3
> }
> 
> define <2 x i64> @sdiv2x64(<2 x i64> %A, <2 x i64> %B) {
> ; CHECK-LABEL: sdiv2x64:
> -; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
> -; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    fmov x10, d1
> +; CHECK-NEXT:    fmov x11, d0
> +; CHECK-NEXT:    mov x8, v1.d[1]
> +; CHECK-NEXT:    mov x9, v0.d[1]
> +; CHECK-NEXT:    sdiv x10, x11, x10
> +; CHECK-NEXT:    sdiv x8, x9, x8
> +; CHECK-NEXT:    fmov d0, x10
> +; CHECK-NEXT:    mov v0.d[1], x8
> +; CHECK-NEXT:    ret
> 	%tmp3 = sdiv <2 x i64> %A, %B;
> 	ret <2 x i64> %tmp3
> }
> 
> define <1 x i8> @udiv1x8(<1 x i8> %A, <1 x i8> %B) {
> ; CHECK-LABEL: udiv1x8:
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    umov w8, v1.b[0]
> +; CHECK-NEXT:    umov w9, v0.b[0]
> +; CHECK-NEXT:    udiv w8, w9, w8
> +; CHECK-NEXT:    fmov s0, w8
> +; CHECK-NEXT:    ret
> 	%tmp3 = udiv <1 x i8> %A, %B;
> 	ret <1 x i8> %tmp3
> }
> 
> define <8 x i8> @udiv8x8(<8 x i8> %A, <8 x i8> %B) {
> ; CHECK-LABEL: udiv8x8:
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    umov w10, v1.b[0]
> +; CHECK-NEXT:    umov w11, v0.b[0]
> +; CHECK-NEXT:    umov w8, v1.b[1]
> +; CHECK-NEXT:    umov w9, v0.b[1]
> +; CHECK-NEXT:    udiv w10, w11, w10
> +; CHECK-NEXT:    umov w12, v1.b[2]
> +; CHECK-NEXT:    umov w13, v0.b[2]
> +; CHECK-NEXT:    udiv w8, w9, w8
> +; CHECK-NEXT:    fmov s2, w10
> +; CHECK-NEXT:    umov w14, v1.b[3]
> +; CHECK-NEXT:    umov w15, v0.b[3]
> +; CHECK-NEXT:    udiv w12, w13, w12
> +; CHECK-NEXT:    mov v2.b[1], w8
> +; CHECK-NEXT:    umov w9, v1.b[4]
> +; CHECK-NEXT:    umov w11, v0.b[4]
> +; CHECK-NEXT:    udiv w14, w15, w14
> +; CHECK-NEXT:    mov v2.b[2], w12
> +; CHECK-NEXT:    umov w13, v1.b[5]
> +; CHECK-NEXT:    umov w15, v0.b[5]
> +; CHECK-NEXT:    udiv w9, w11, w9
> +; CHECK-NEXT:    mov v2.b[3], w14
> +; CHECK-NEXT:    umov w11, v1.b[6]
> +; CHECK-NEXT:    udiv w13, w15, w13
> +; CHECK-NEXT:    umov w15, v0.b[6]
> +; CHECK-NEXT:    mov v2.b[4], w9
> +; CHECK-NEXT:    udiv w11, w15, w11
> +; CHECK-NEXT:    umov w8, v1.b[7]
> +; CHECK-NEXT:    mov v2.b[5], w13
> +; CHECK-NEXT:    umov w9, v0.b[7]
> +; CHECK-NEXT:    mov v2.b[6], w11
> +; CHECK-NEXT:    udiv w8, w9, w8
> +; CHECK-NEXT:    mov v2.b[7], w8
> +; CHECK-NEXT:    fmov d0, d2
> +; CHECK-NEXT:    ret
> 	%tmp3 = udiv <8 x i8> %A, %B;
> 	ret <8 x i8> %tmp3
> }
> 
> define <16 x i8> @udiv16x8(<16 x i8> %A, <16 x i8> %B) {
> ; CHECK-LABEL: udiv16x8:
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    umov w10, v1.b[0]
> +; CHECK-NEXT:    umov w11, v0.b[0]
> +; CHECK-NEXT:    umov w8, v1.b[1]
> +; CHECK-NEXT:    umov w9, v0.b[1]
> +; CHECK-NEXT:    udiv w10, w11, w10
> +; CHECK-NEXT:    umov w12, v1.b[2]
> +; CHECK-NEXT:    umov w13, v0.b[2]
> +; CHECK-NEXT:    udiv w8, w9, w8
> +; CHECK-NEXT:    fmov s2, w10
> +; CHECK-NEXT:    umov w14, v1.b[3]
> +; CHECK-NEXT:    umov w15, v0.b[3]
> +; CHECK-NEXT:    udiv w12, w13, w12
> +; CHECK-NEXT:    mov v2.b[1], w8
> +; CHECK-NEXT:    umov w16, v1.b[4]
> +; CHECK-NEXT:    umov w17, v0.b[4]
> +; CHECK-NEXT:    udiv w14, w15, w14
> +; CHECK-NEXT:    mov v2.b[2], w12
> +; CHECK-NEXT:    umov w18, v1.b[5]
> +; CHECK-NEXT:    umov w0, v0.b[5]
> +; CHECK-NEXT:    udiv w16, w17, w16
> +; CHECK-NEXT:    mov v2.b[3], w14
> +; CHECK-NEXT:    umov w1, v1.b[6]
> +; CHECK-NEXT:    umov w2, v0.b[6]
> +; CHECK-NEXT:    udiv w18, w0, w18
> +; CHECK-NEXT:    mov v2.b[4], w16
> +; CHECK-NEXT:    umov w3, v1.b[7]
> +; CHECK-NEXT:    umov w4, v0.b[7]
> +; CHECK-NEXT:    udiv w1, w2, w1
> +; CHECK-NEXT:    mov v2.b[5], w18
> +; CHECK-NEXT:    umov w9, v1.b[8]
> +; CHECK-NEXT:    umov w11, v0.b[8]
> +; CHECK-NEXT:    udiv w3, w4, w3
> +; CHECK-NEXT:    mov v2.b[6], w1
> +; CHECK-NEXT:    umov w13, v1.b[9]
> +; CHECK-NEXT:    umov w15, v0.b[9]
> +; CHECK-NEXT:    udiv w9, w11, w9
> +; CHECK-NEXT:    mov v2.b[7], w3
> +; CHECK-NEXT:    umov w17, v1.b[10]
> +; CHECK-NEXT:    umov w0, v0.b[10]
> +; CHECK-NEXT:    udiv w13, w15, w13
> +; CHECK-NEXT:    mov v2.b[8], w9
> +; CHECK-NEXT:    umov w2, v1.b[11]
> +; CHECK-NEXT:    umov w4, v0.b[11]
> +; CHECK-NEXT:    udiv w17, w0, w17
> +; CHECK-NEXT:    mov v2.b[9], w13
> +; CHECK-NEXT:    umov w11, v1.b[12]
> +; CHECK-NEXT:    umov w15, v0.b[12]
> +; CHECK-NEXT:    udiv w2, w4, w2
> +; CHECK-NEXT:    mov v2.b[10], w17
> +; CHECK-NEXT:    umov w0, v1.b[13]
> +; CHECK-NEXT:    umov w4, v0.b[13]
> +; CHECK-NEXT:    udiv w11, w15, w11
> +; CHECK-NEXT:    mov v2.b[11], w2
> +; CHECK-NEXT:    umov w15, v1.b[14]
> +; CHECK-NEXT:    udiv w0, w4, w0
> +; CHECK-NEXT:    umov w4, v0.b[14]
> +; CHECK-NEXT:    mov v2.b[12], w11
> +; CHECK-NEXT:    udiv w15, w4, w15
> +; CHECK-NEXT:    umov w8, v1.b[15]
> +; CHECK-NEXT:    mov v2.b[13], w0
> +; CHECK-NEXT:    umov w9, v0.b[15]
> +; CHECK-NEXT:    mov v2.b[14], w15
> +; CHECK-NEXT:    udiv w8, w9, w8
> +; CHECK-NEXT:    mov v2.b[15], w8
> +; CHECK-NEXT:    mov v0.16b, v2.16b
> +; CHECK-NEXT:    ret
> 	%tmp3 = udiv <16 x i8> %A, %B;
> 	ret <16 x i8> %tmp3
> }
> 
> define <1 x i16> @udiv1x16(<1 x i16> %A, <1 x i16> %B) {
> ; CHECK-LABEL: udiv1x16:
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    umov w8, v1.h[0]
> +; CHECK-NEXT:    umov w9, v0.h[0]
> +; CHECK-NEXT:    udiv w8, w9, w8
> +; CHECK-NEXT:    fmov s0, w8
> +; CHECK-NEXT:    ret
> 	%tmp3 = udiv <1 x i16> %A, %B;
> 	ret <1 x i16> %tmp3
> }
> 
> define <4 x i16> @udiv4x16(<4 x i16> %A, <4 x i16> %B) {
> ; CHECK-LABEL: udiv4x16:
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    umov w10, v1.h[0]
> +; CHECK-NEXT:    umov w11, v0.h[0]
> +; CHECK-NEXT:    umov w8, v1.h[1]
> +; CHECK-NEXT:    umov w9, v0.h[1]
> +; CHECK-NEXT:    udiv w10, w11, w10
> +; CHECK-NEXT:    udiv w8, w9, w8
> +; CHECK-NEXT:    umov w9, v1.h[2]
> +; CHECK-NEXT:    umov w11, v0.h[2]
> +; CHECK-NEXT:    fmov s2, w10
> +; CHECK-NEXT:    udiv w9, w11, w9
> +; CHECK-NEXT:    umov w10, v1.h[3]
> +; CHECK-NEXT:    mov v2.h[1], w8
> +; CHECK-NEXT:    umov w8, v0.h[3]
> +; CHECK-NEXT:    mov v2.h[2], w9
> +; CHECK-NEXT:    udiv w8, w8, w10
> +; CHECK-NEXT:    mov v2.h[3], w8
> +; CHECK-NEXT:    fmov d0, d2
> +; CHECK-NEXT:    ret
> 	%tmp3 = udiv <4 x i16> %A, %B;
> 	ret <4 x i16> %tmp3
> }
> 
> define <8 x i16> @udiv8x16(<8 x i16> %A, <8 x i16> %B) {
> ; CHECK-LABEL: udiv8x16:
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    umov w10, v1.h[0]
> +; CHECK-NEXT:    umov w11, v0.h[0]
> +; CHECK-NEXT:    umov w8, v1.h[1]
> +; CHECK-NEXT:    umov w9, v0.h[1]
> +; CHECK-NEXT:    udiv w10, w11, w10
> +; CHECK-NEXT:    umov w12, v1.h[2]
> +; CHECK-NEXT:    umov w13, v0.h[2]
> +; CHECK-NEXT:    udiv w8, w9, w8
> +; CHECK-NEXT:    fmov s2, w10
> +; CHECK-NEXT:    umov w14, v1.h[3]
> +; CHECK-NEXT:    umov w15, v0.h[3]
> +; CHECK-NEXT:    udiv w12, w13, w12
> +; CHECK-NEXT:    mov v2.h[1], w8
> +; CHECK-NEXT:    umov w9, v1.h[4]
> +; CHECK-NEXT:    umov w11, v0.h[4]
> +; CHECK-NEXT:    udiv w14, w15, w14
> +; CHECK-NEXT:    mov v2.h[2], w12
> +; CHECK-NEXT:    umov w13, v1.h[5]
> +; CHECK-NEXT:    umov w15, v0.h[5]
> +; CHECK-NEXT:    udiv w9, w11, w9
> +; CHECK-NEXT:    mov v2.h[3], w14
> +; CHECK-NEXT:    umov w11, v1.h[6]
> +; CHECK-NEXT:    udiv w13, w15, w13
> +; CHECK-NEXT:    umov w15, v0.h[6]
> +; CHECK-NEXT:    mov v2.h[4], w9
> +; CHECK-NEXT:    udiv w11, w15, w11
> +; CHECK-NEXT:    umov w8, v1.h[7]
> +; CHECK-NEXT:    mov v2.h[5], w13
> +; CHECK-NEXT:    umov w9, v0.h[7]
> +; CHECK-NEXT:    mov v2.h[6], w11
> +; CHECK-NEXT:    udiv w8, w9, w8
> +; CHECK-NEXT:    mov v2.h[7], w8
> +; CHECK-NEXT:    mov v0.16b, v2.16b
> +; CHECK-NEXT:    ret
> 	%tmp3 = udiv <8 x i16> %A, %B;
> 	ret <8 x i16> %tmp3
> }
> 
> define <1 x i32> @udiv1x32(<1 x i32> %A, <1 x i32> %B) {
> ; CHECK-LABEL: udiv1x32:
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    fmov w8, s1
> +; CHECK-NEXT:    fmov w9, s0
> +; CHECK-NEXT:    udiv w8, w9, w8
> +; CHECK-NEXT:    fmov s0, w8
> +; CHECK-NEXT:    ret
> 	%tmp3 = udiv <1 x i32> %A, %B;
> 	ret <1 x i32> %tmp3
> }
> 
> define <2 x i32> @udiv2x32(<2 x i32> %A, <2 x i32> %B) {
> ; CHECK-LABEL: udiv2x32:
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    fmov w10, s1
> +; CHECK-NEXT:    fmov w11, s0
> +; CHECK-NEXT:    mov w8, v1.s[1]
> +; CHECK-NEXT:    mov w9, v0.s[1]
> +; CHECK-NEXT:    udiv w10, w11, w10
> +; CHECK-NEXT:    udiv w8, w9, w8
> +; CHECK-NEXT:    fmov s0, w10
> +; CHECK-NEXT:    mov v0.s[1], w8
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
> +; CHECK-NEXT:    ret
> 	%tmp3 = udiv <2 x i32> %A, %B;
> 	ret <2 x i32> %tmp3
> }
> 
> define <4 x i32> @udiv4x32(<4 x i32> %A, <4 x i32> %B) {
> ; CHECK-LABEL: udiv4x32:
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    fmov w12, s1
> +; CHECK-NEXT:    fmov w15, s0
> +; CHECK-NEXT:    mov w8, v1.s[1]
> +; CHECK-NEXT:    mov w9, v0.s[1]
> +; CHECK-NEXT:    udiv w12, w15, w12
> +; CHECK-NEXT:    mov w10, v1.s[2]
> +; CHECK-NEXT:    mov w13, v0.s[2]
> +; CHECK-NEXT:    mov w14, v0.s[3]
> +; CHECK-NEXT:    udiv w8, w9, w8
> +; CHECK-NEXT:    fmov s0, w12
> +; CHECK-NEXT:    mov w11, v1.s[3]
> +; CHECK-NEXT:    udiv w9, w13, w10
> +; CHECK-NEXT:    mov v0.s[1], w8
> +; CHECK-NEXT:    mov v0.s[2], w9
> +; CHECK-NEXT:    udiv w8, w14, w11
> +; CHECK-NEXT:    mov v0.s[3], w8
> +; CHECK-NEXT:    ret
> 	%tmp3 = udiv <4 x i32> %A, %B;
> 	ret <4 x i32> %tmp3
> }
> 
> define <1 x i64> @udiv1x64(<1 x i64> %A, <1 x i64> %B) {
> ; CHECK-LABEL: udiv1x64:
> -; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    fmov x8, d1
> +; CHECK-NEXT:    fmov x9, d0
> +; CHECK-NEXT:    udiv x8, x9, x8
> +; CHECK-NEXT:    fmov d0, x8
> +; CHECK-NEXT:    ret
> 	%tmp3 = udiv <1 x i64> %A, %B;
> 	ret <1 x i64> %tmp3
> }
> 
> define <2 x i64> @udiv2x64(<2 x i64> %A, <2 x i64> %B) {
> ; CHECK-LABEL: udiv2x64:
> -; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
> -; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    fmov x10, d1
> +; CHECK-NEXT:    fmov x11, d0
> +; CHECK-NEXT:    mov x8, v1.d[1]
> +; CHECK-NEXT:    mov x9, v0.d[1]
> +; CHECK-NEXT:    udiv x10, x11, x10
> +; CHECK-NEXT:    udiv x8, x9, x8
> +; CHECK-NEXT:    fmov d0, x10
> +; CHECK-NEXT:    mov v0.d[1], x8
> +; CHECK-NEXT:    ret
> 	%tmp3 = udiv <2 x i64> %A, %B;
> 	ret <2 x i64> %tmp3
> }
> 
> define <1 x i8> @srem1x8(<1 x i8> %A, <1 x i8> %B) {
> ; CHECK-LABEL: srem1x8:
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    smov w8, v1.b[0]
> +; CHECK-NEXT:    smov w9, v0.b[0]
> +; CHECK-NEXT:    sdiv w10, w9, w8
> +; CHECK-NEXT:    msub w8, w10, w8, w9
> +; CHECK-NEXT:    fmov s0, w8
> +; CHECK-NEXT:    ret
> 	%tmp3 = srem <1 x i8> %A, %B;
> 	ret <1 x i8> %tmp3
> }
> 
> define <8 x i8> @srem8x8(<8 x i8> %A, <8 x i8> %B) {
> ; CHECK-LABEL: srem8x8:
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    smov w8, v1.b[1]
> +; CHECK-NEXT:    smov w9, v0.b[1]
> +; CHECK-NEXT:    smov w10, v1.b[0]
> +; CHECK-NEXT:    smov w11, v0.b[0]
> +; CHECK-NEXT:    sdiv w16, w9, w8
> +; CHECK-NEXT:    smov w12, v1.b[2]
> +; CHECK-NEXT:    smov w13, v0.b[2]
> +; CHECK-NEXT:    msub w8, w16, w8, w9
> +; CHECK-NEXT:    sdiv w16, w11, w10
> +; CHECK-NEXT:    smov w14, v1.b[3]
> +; CHECK-NEXT:    smov w15, v0.b[3]
> +; CHECK-NEXT:    msub w10, w16, w10, w11
> +; CHECK-NEXT:    sdiv w16, w13, w12
> +; CHECK-NEXT:    smov w9, v1.b[4]
> +; CHECK-NEXT:    smov w11, v0.b[4]
> +; CHECK-NEXT:    msub w12, w16, w12, w13
> +; CHECK-NEXT:    sdiv w16, w15, w14
> +; CHECK-NEXT:    fmov s2, w10
> +; CHECK-NEXT:    smov w13, v1.b[5]
> +; CHECK-NEXT:    msub w14, w16, w14, w15
> +; CHECK-NEXT:    smov w15, v0.b[5]
> +; CHECK-NEXT:    sdiv w16, w11, w9
> +; CHECK-NEXT:    mov v2.b[1], w8
> +; CHECK-NEXT:    msub w9, w16, w9, w11
> +; CHECK-NEXT:    sdiv w16, w15, w13
> +; CHECK-NEXT:    mov v2.b[2], w12
> +; CHECK-NEXT:    smov w11, v1.b[6]
> +; CHECK-NEXT:    msub w13, w16, w13, w15
> +; CHECK-NEXT:    smov w15, v0.b[6]
> +; CHECK-NEXT:    mov v2.b[3], w14
> +; CHECK-NEXT:    sdiv w16, w15, w11
> +; CHECK-NEXT:    smov w8, v1.b[7]
> +; CHECK-NEXT:    mov v2.b[4], w9
> +; CHECK-NEXT:    smov w9, v0.b[7]
> +; CHECK-NEXT:    msub w11, w16, w11, w15
> +; CHECK-NEXT:    mov v2.b[5], w13
> +; CHECK-NEXT:    sdiv w10, w9, w8
> +; CHECK-NEXT:    mov v2.b[6], w11
> +; CHECK-NEXT:    msub w8, w10, w8, w9
> +; CHECK-NEXT:    mov v2.b[7], w8
> +; CHECK-NEXT:    fmov d0, d2
> +; CHECK-NEXT:    ret
> 	%tmp3 = srem <8 x i8> %A, %B;
> 	ret <8 x i8> %tmp3
> }
> 
> define <16 x i8> @srem16x8(<16 x i8> %A, <16 x i8> %B) {
> ; CHECK-LABEL: srem16x8:
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    smov w8, v1.b[1]
> +; CHECK-NEXT:    smov w9, v0.b[1]
> +; CHECK-NEXT:    smov w10, v1.b[0]
> +; CHECK-NEXT:    smov w11, v0.b[0]
> +; CHECK-NEXT:    sdiv w5, w9, w8
> +; CHECK-NEXT:    smov w12, v1.b[2]
> +; CHECK-NEXT:    smov w13, v0.b[2]
> +; CHECK-NEXT:    msub w8, w5, w8, w9
> +; CHECK-NEXT:    sdiv w5, w11, w10
> +; CHECK-NEXT:    smov w14, v1.b[3]
> +; CHECK-NEXT:    smov w15, v0.b[3]
> +; CHECK-NEXT:    msub w10, w5, w10, w11
> +; CHECK-NEXT:    sdiv w5, w13, w12
> +; CHECK-NEXT:    smov w16, v1.b[4]
> +; CHECK-NEXT:    smov w17, v0.b[4]
> +; CHECK-NEXT:    msub w12, w5, w12, w13
> +; CHECK-NEXT:    sdiv w5, w15, w14
> +; CHECK-NEXT:    fmov s2, w10
> +; CHECK-NEXT:    smov w18, v1.b[5]
> +; CHECK-NEXT:    smov w0, v0.b[5]
> +; CHECK-NEXT:    msub w14, w5, w14, w15
> +; CHECK-NEXT:    sdiv w5, w17, w16
> +; CHECK-NEXT:    mov v2.b[1], w8
> +; CHECK-NEXT:    smov w1, v1.b[6]
> +; CHECK-NEXT:    smov w2, v0.b[6]
> +; CHECK-NEXT:    msub w16, w5, w16, w17
> +; CHECK-NEXT:    sdiv w5, w0, w18
> +; CHECK-NEXT:    mov v2.b[2], w12
> +; CHECK-NEXT:    smov w3, v1.b[7]
> +; CHECK-NEXT:    smov w4, v0.b[7]
> +; CHECK-NEXT:    msub w18, w5, w18, w0
> +; CHECK-NEXT:    sdiv w5, w2, w1
> +; CHECK-NEXT:    mov v2.b[3], w14
> +; CHECK-NEXT:    smov w9, v1.b[8]
> +; CHECK-NEXT:    smov w11, v0.b[8]
> +; CHECK-NEXT:    msub w1, w5, w1, w2
> +; CHECK-NEXT:    sdiv w5, w4, w3
> +; CHECK-NEXT:    mov v2.b[4], w16
> +; CHECK-NEXT:    smov w13, v1.b[9]
> +; CHECK-NEXT:    smov w15, v0.b[9]
> +; CHECK-NEXT:    msub w3, w5, w3, w4
> +; CHECK-NEXT:    sdiv w5, w11, w9
> +; CHECK-NEXT:    mov v2.b[5], w18
> +; CHECK-NEXT:    smov w17, v1.b[10]
> +; CHECK-NEXT:    smov w0, v0.b[10]
> +; CHECK-NEXT:    msub w9, w5, w9, w11
> +; CHECK-NEXT:    sdiv w5, w15, w13
> +; CHECK-NEXT:    mov v2.b[6], w1
> +; CHECK-NEXT:    smov w2, v1.b[11]
> +; CHECK-NEXT:    smov w4, v0.b[11]
> +; CHECK-NEXT:    msub w13, w5, w13, w15
> +; CHECK-NEXT:    sdiv w5, w0, w17
> +; CHECK-NEXT:    mov v2.b[7], w3
> +; CHECK-NEXT:    smov w11, v1.b[12]
> +; CHECK-NEXT:    smov w15, v0.b[12]
> +; CHECK-NEXT:    msub w17, w5, w17, w0
> +; CHECK-NEXT:    sdiv w5, w4, w2
> +; CHECK-NEXT:    mov v2.b[8], w9
> +; CHECK-NEXT:    smov w0, v1.b[13]
> +; CHECK-NEXT:    msub w2, w5, w2, w4
> +; CHECK-NEXT:    smov w4, v0.b[13]
> +; CHECK-NEXT:    sdiv w5, w15, w11
> +; CHECK-NEXT:    mov v2.b[9], w13
> +; CHECK-NEXT:    msub w11, w5, w11, w15
> +; CHECK-NEXT:    sdiv w5, w4, w0
> +; CHECK-NEXT:    mov v2.b[10], w17
> +; CHECK-NEXT:    smov w15, v1.b[14]
> +; CHECK-NEXT:    msub w0, w5, w0, w4
> +; CHECK-NEXT:    smov w4, v0.b[14]
> +; CHECK-NEXT:    mov v2.b[11], w2
> +; CHECK-NEXT:    sdiv w5, w4, w15
> +; CHECK-NEXT:    smov w8, v1.b[15]
> +; CHECK-NEXT:    mov v2.b[12], w11
> +; CHECK-NEXT:    smov w9, v0.b[15]
> +; CHECK-NEXT:    msub w15, w5, w15, w4
> +; CHECK-NEXT:    mov v2.b[13], w0
> +; CHECK-NEXT:    sdiv w10, w9, w8
> +; CHECK-NEXT:    mov v2.b[14], w15
> +; CHECK-NEXT:    msub w8, w10, w8, w9
> +; CHECK-NEXT:    mov v2.b[15], w8
> +; CHECK-NEXT:    mov v0.16b, v2.16b
> +; CHECK-NEXT:    ret
> 	%tmp3 = srem <16 x i8> %A, %B;
> 	ret <16 x i8> %tmp3
> }
> 
> define <1 x i16> @srem1x16(<1 x i16> %A, <1 x i16> %B) {
> ; CHECK-LABEL: srem1x16:
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    smov w8, v1.h[0]
> +; CHECK-NEXT:    smov w9, v0.h[0]
> +; CHECK-NEXT:    sdiv w10, w9, w8
> +; CHECK-NEXT:    msub w8, w10, w8, w9
> +; CHECK-NEXT:    fmov s0, w8
> +; CHECK-NEXT:    ret
> 	%tmp3 = srem <1 x i16> %A, %B;
> 	ret <1 x i16> %tmp3
> }
> 
> define <4 x i16> @srem4x16(<4 x i16> %A, <4 x i16> %B) {
> ; CHECK-LABEL: srem4x16:
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    smov w8, v1.h[1]
> +; CHECK-NEXT:    smov w9, v0.h[1]
> +; CHECK-NEXT:    smov w10, v1.h[0]
> +; CHECK-NEXT:    smov w11, v0.h[0]
> +; CHECK-NEXT:    sdiv w12, w9, w8
> +; CHECK-NEXT:    msub w8, w12, w8, w9
> +; CHECK-NEXT:    sdiv w12, w11, w10
> +; CHECK-NEXT:    smov w9, v1.h[2]
> +; CHECK-NEXT:    msub w10, w12, w10, w11
> +; CHECK-NEXT:    smov w11, v0.h[2]
> +; CHECK-NEXT:    sdiv w12, w11, w9
> +; CHECK-NEXT:    msub w9, w12, w9, w11
> +; CHECK-NEXT:    smov w11, v1.h[3]
> +; CHECK-NEXT:    fmov s1, w10
> +; CHECK-NEXT:    smov w10, v0.h[3]
> +; CHECK-NEXT:    mov v1.h[1], w8
> +; CHECK-NEXT:    sdiv w8, w10, w11
> +; CHECK-NEXT:    mov v1.h[2], w9
> +; CHECK-NEXT:    msub w8, w8, w11, w10
> +; CHECK-NEXT:    mov v1.h[3], w8
> +; CHECK-NEXT:    fmov d0, d1
> +; CHECK-NEXT:    ret
> 	%tmp3 = srem <4 x i16> %A, %B;
> 	ret <4 x i16> %tmp3
> }
> 
> define <8 x i16> @srem8x16(<8 x i16> %A, <8 x i16> %B) {
> ; CHECK-LABEL: srem8x16:
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    smov w8, v1.h[1]
> +; CHECK-NEXT:    smov w9, v0.h[1]
> +; CHECK-NEXT:    smov w10, v1.h[0]
> +; CHECK-NEXT:    smov w11, v0.h[0]
> +; CHECK-NEXT:    sdiv w16, w9, w8
> +; CHECK-NEXT:    smov w12, v1.h[2]
> +; CHECK-NEXT:    smov w13, v0.h[2]
> +; CHECK-NEXT:    msub w8, w16, w8, w9
> +; CHECK-NEXT:    sdiv w16, w11, w10
> +; CHECK-NEXT:    smov w14, v1.h[3]
> +; CHECK-NEXT:    smov w15, v0.h[3]
> +; CHECK-NEXT:    msub w10, w16, w10, w11
> +; CHECK-NEXT:    sdiv w16, w13, w12
> +; CHECK-NEXT:    smov w9, v1.h[4]
> +; CHECK-NEXT:    smov w11, v0.h[4]
> +; CHECK-NEXT:    msub w12, w16, w12, w13
> +; CHECK-NEXT:    sdiv w16, w15, w14
> +; CHECK-NEXT:    fmov s2, w10
> +; CHECK-NEXT:    smov w13, v1.h[5]
> +; CHECK-NEXT:    msub w14, w16, w14, w15
> +; CHECK-NEXT:    smov w15, v0.h[5]
> +; CHECK-NEXT:    sdiv w16, w11, w9
> +; CHECK-NEXT:    mov v2.h[1], w8
> +; CHECK-NEXT:    msub w9, w16, w9, w11
> +; CHECK-NEXT:    sdiv w16, w15, w13
> +; CHECK-NEXT:    mov v2.h[2], w12
> +; CHECK-NEXT:    smov w11, v1.h[6]
> +; CHECK-NEXT:    msub w13, w16, w13, w15
> +; CHECK-NEXT:    smov w15, v0.h[6]
> +; CHECK-NEXT:    mov v2.h[3], w14
> +; CHECK-NEXT:    sdiv w16, w15, w11
> +; CHECK-NEXT:    smov w8, v1.h[7]
> +; CHECK-NEXT:    mov v2.h[4], w9
> +; CHECK-NEXT:    smov w9, v0.h[7]
> +; CHECK-NEXT:    msub w11, w16, w11, w15
> +; CHECK-NEXT:    mov v2.h[5], w13
> +; CHECK-NEXT:    sdiv w10, w9, w8
> +; CHECK-NEXT:    mov v2.h[6], w11
> +; CHECK-NEXT:    msub w8, w10, w8, w9
> +; CHECK-NEXT:    mov v2.h[7], w8
> +; CHECK-NEXT:    mov v0.16b, v2.16b
> +; CHECK-NEXT:    ret
> 	%tmp3 = srem <8 x i16> %A, %B;
> 	ret <8 x i16> %tmp3
> }
> 
> define <1 x i32> @srem1x32(<1 x i32> %A, <1 x i32> %B) {
> ; CHECK-LABEL: srem1x32:
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    fmov w8, s1
> +; CHECK-NEXT:    fmov w9, s0
> +; CHECK-NEXT:    sdiv w10, w9, w8
> +; CHECK-NEXT:    msub w8, w10, w8, w9
> +; CHECK-NEXT:    fmov s0, w8
> +; CHECK-NEXT:    ret
> 	%tmp3 = srem <1 x i32> %A, %B;
> 	ret <1 x i32> %tmp3
> }
> 
> define <2 x i32> @srem2x32(<2 x i32> %A, <2 x i32> %B) {
> ; CHECK-LABEL: srem2x32:
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    fmov w10, s1
> +; CHECK-NEXT:    fmov w11, s0
> +; CHECK-NEXT:    mov w8, v1.s[1]
> +; CHECK-NEXT:    mov w9, v0.s[1]
> +; CHECK-NEXT:    sdiv w12, w11, w10
> +; CHECK-NEXT:    sdiv w13, w9, w8
> +; CHECK-NEXT:    msub w10, w12, w10, w11
> +; CHECK-NEXT:    msub w8, w13, w8, w9
> +; CHECK-NEXT:    fmov s0, w10
> +; CHECK-NEXT:    mov v0.s[1], w8
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
> +; CHECK-NEXT:    ret
> 	%tmp3 = srem <2 x i32> %A, %B;
> 	ret <2 x i32> %tmp3
> }
> 
> define <4 x i32> @srem4x32(<4 x i32> %A, <4 x i32> %B) {
> ; CHECK-LABEL: srem4x32:
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    fmov w12, s1
> +; CHECK-NEXT:    fmov w15, s0
> +; CHECK-NEXT:    mov w8, v1.s[1]
> +; CHECK-NEXT:    mov w9, v0.s[1]
> +; CHECK-NEXT:    sdiv w16, w15, w12
> +; CHECK-NEXT:    mov w10, v1.s[2]
> +; CHECK-NEXT:    mov w13, v0.s[2]
> +; CHECK-NEXT:    msub w12, w16, w12, w15
> +; CHECK-NEXT:    sdiv w15, w9, w8
> +; CHECK-NEXT:    mov w11, v1.s[3]
> +; CHECK-NEXT:    mov w14, v0.s[3]
> +; CHECK-NEXT:    msub w8, w15, w8, w9
> +; CHECK-NEXT:    sdiv w9, w13, w10
> +; CHECK-NEXT:    fmov s0, w12
> +; CHECK-NEXT:    msub w9, w9, w10, w13
> +; CHECK-NEXT:    mov v0.s[1], w8
> +; CHECK-NEXT:    sdiv w8, w14, w11
> +; CHECK-NEXT:    mov v0.s[2], w9
> +; CHECK-NEXT:    msub w8, w8, w11, w14
> +; CHECK-NEXT:    mov v0.s[3], w8
> +; CHECK-NEXT:    ret
> 	%tmp3 = srem <4 x i32> %A, %B;
> 	ret <4 x i32> %tmp3
> }
> 
> define <1 x i64> @srem1x64(<1 x i64> %A, <1 x i64> %B) {
> ; CHECK-LABEL: srem1x64:
> -; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
> -; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    fmov x8, d1
> +; CHECK-NEXT:    fmov x9, d0
> +; CHECK-NEXT:    sdiv x10, x9, x8
> +; CHECK-NEXT:    msub x8, x10, x8, x9
> +; CHECK-NEXT:    fmov d0, x8
> +; CHECK-NEXT:    ret
> 	%tmp3 = srem <1 x i64> %A, %B;
> 	ret <1 x i64> %tmp3
> }
> 
> define <2 x i64> @srem2x64(<2 x i64> %A, <2 x i64> %B) {
> ; CHECK-LABEL: srem2x64:
> -; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
> -; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
> -; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
> -; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    fmov x10, d1
> +; CHECK-NEXT:    fmov x11, d0
> +; CHECK-NEXT:    mov x8, v1.d[1]
> +; CHECK-NEXT:    mov x9, v0.d[1]
> +; CHECK-NEXT:    sdiv x12, x11, x10
> +; CHECK-NEXT:    sdiv x13, x9, x8
> +; CHECK-NEXT:    msub x10, x12, x10, x11
> +; CHECK-NEXT:    msub x8, x13, x8, x9
> +; CHECK-NEXT:    fmov d0, x10
> +; CHECK-NEXT:    mov v0.d[1], x8
> +; CHECK-NEXT:    ret
> 	%tmp3 = srem <2 x i64> %A, %B;
> 	ret <2 x i64> %tmp3
> }
> 
> define <1 x i8> @urem1x8(<1 x i8> %A, <1 x i8> %B) {
> ; CHECK-LABEL: urem1x8:
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    umov w8, v1.b[0]
> +; CHECK-NEXT:    umov w9, v0.b[0]
> +; CHECK-NEXT:    udiv w10, w9, w8
> +; CHECK-NEXT:    msub w8, w10, w8, w9
> +; CHECK-NEXT:    fmov s0, w8
> +; CHECK-NEXT:    ret
> 	%tmp3 = urem <1 x i8> %A, %B;
> 	ret <1 x i8> %tmp3
> }
> 
> define <8 x i8> @urem8x8(<8 x i8> %A, <8 x i8> %B) {
> ; CHECK-LABEL: urem8x8:
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    umov w8, v1.b[1]
> +; CHECK-NEXT:    umov w9, v0.b[1]
> +; CHECK-NEXT:    umov w10, v1.b[0]
> +; CHECK-NEXT:    umov w11, v0.b[0]
> +; CHECK-NEXT:    udiv w16, w9, w8
> +; CHECK-NEXT:    umov w12, v1.b[2]
> +; CHECK-NEXT:    umov w13, v0.b[2]
> +; CHECK-NEXT:    msub w8, w16, w8, w9
> +; CHECK-NEXT:    udiv w16, w11, w10
> +; CHECK-NEXT:    umov w14, v1.b[3]
> +; CHECK-NEXT:    umov w15, v0.b[3]
> +; CHECK-NEXT:    msub w10, w16, w10, w11
> +; CHECK-NEXT:    udiv w16, w13, w12
> +; CHECK-NEXT:    umov w9, v1.b[4]
> +; CHECK-NEXT:    umov w11, v0.b[4]
> +; CHECK-NEXT:    msub w12, w16, w12, w13
> +; CHECK-NEXT:    udiv w16, w15, w14
> +; CHECK-NEXT:    fmov s2, w10
> +; CHECK-NEXT:    umov w13, v1.b[5]
> +; CHECK-NEXT:    msub w14, w16, w14, w15
> +; CHECK-NEXT:    umov w15, v0.b[5]
> +; CHECK-NEXT:    udiv w16, w11, w9
> +; CHECK-NEXT:    mov v2.b[1], w8
> +; CHECK-NEXT:    msub w9, w16, w9, w11
> +; CHECK-NEXT:    udiv w16, w15, w13
> +; CHECK-NEXT:    mov v2.b[2], w12
> +; CHECK-NEXT:    umov w11, v1.b[6]
> +; CHECK-NEXT:    msub w13, w16, w13, w15
> +; CHECK-NEXT:    umov w15, v0.b[6]
> +; CHECK-NEXT:    mov v2.b[3], w14
> +; CHECK-NEXT:    udiv w16, w15, w11
> +; CHECK-NEXT:    umov w8, v1.b[7]
> +; CHECK-NEXT:    mov v2.b[4], w9
> +; CHECK-NEXT:    umov w9, v0.b[7]
> +; CHECK-NEXT:    msub w11, w16, w11, w15
> +; CHECK-NEXT:    mov v2.b[5], w13
> +; CHECK-NEXT:    udiv w10, w9, w8
> +; CHECK-NEXT:    mov v2.b[6], w11
> +; CHECK-NEXT:    msub w8, w10, w8, w9
> +; CHECK-NEXT:    mov v2.b[7], w8
> +; CHECK-NEXT:    fmov d0, d2
> +; CHECK-NEXT:    ret
> 	%tmp3 = urem <8 x i8> %A, %B;
> 	ret <8 x i8> %tmp3
> }
> 
> define <16 x i8> @urem16x8(<16 x i8> %A, <16 x i8> %B) {
> ; CHECK-LABEL: urem16x8:
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    umov w8, v1.b[1]
> +; CHECK-NEXT:    umov w9, v0.b[1]
> +; CHECK-NEXT:    umov w10, v1.b[0]
> +; CHECK-NEXT:    umov w11, v0.b[0]
> +; CHECK-NEXT:    udiv w5, w9, w8
> +; CHECK-NEXT:    umov w12, v1.b[2]
> +; CHECK-NEXT:    umov w13, v0.b[2]
> +; CHECK-NEXT:    msub w8, w5, w8, w9
> +; CHECK-NEXT:    udiv w5, w11, w10
> +; CHECK-NEXT:    umov w14, v1.b[3]
> +; CHECK-NEXT:    umov w15, v0.b[3]
> +; CHECK-NEXT:    msub w10, w5, w10, w11
> +; CHECK-NEXT:    udiv w5, w13, w12
> +; CHECK-NEXT:    umov w16, v1.b[4]
> +; CHECK-NEXT:    umov w17, v0.b[4]
> +; CHECK-NEXT:    msub w12, w5, w12, w13
> +; CHECK-NEXT:    udiv w5, w15, w14
> +; CHECK-NEXT:    fmov s2, w10
> +; CHECK-NEXT:    umov w18, v1.b[5]
> +; CHECK-NEXT:    umov w0, v0.b[5]
> +; CHECK-NEXT:    msub w14, w5, w14, w15
> +; CHECK-NEXT:    udiv w5, w17, w16
> +; CHECK-NEXT:    mov v2.b[1], w8
> +; CHECK-NEXT:    umov w1, v1.b[6]
> +; CHECK-NEXT:    umov w2, v0.b[6]
> +; CHECK-NEXT:    msub w16, w5, w16, w17
> +; CHECK-NEXT:    udiv w5, w0, w18
> +; CHECK-NEXT:    mov v2.b[2], w12
> +; CHECK-NEXT:    umov w3, v1.b[7]
> +; CHECK-NEXT:    umov w4, v0.b[7]
> +; CHECK-NEXT:    msub w18, w5, w18, w0
> +; CHECK-NEXT:    udiv w5, w2, w1
> +; CHECK-NEXT:    mov v2.b[3], w14
> +; CHECK-NEXT:    umov w9, v1.b[8]
> +; CHECK-NEXT:    umov w11, v0.b[8]
> +; CHECK-NEXT:    msub w1, w5, w1, w2
> +; CHECK-NEXT:    udiv w5, w4, w3
> +; CHECK-NEXT:    mov v2.b[4], w16
> +; CHECK-NEXT:    umov w13, v1.b[9]
> +; CHECK-NEXT:    umov w15, v0.b[9]
> +; CHECK-NEXT:    msub w3, w5, w3, w4
> +; CHECK-NEXT:    udiv w5, w11, w9
> +; CHECK-NEXT:    mov v2.b[5], w18
> +; CHECK-NEXT:    umov w17, v1.b[10]
> +; CHECK-NEXT:    umov w0, v0.b[10]
> +; CHECK-NEXT:    msub w9, w5, w9, w11
> +; CHECK-NEXT:    udiv w5, w15, w13
> +; CHECK-NEXT:    mov v2.b[6], w1
> +; CHECK-NEXT:    umov w2, v1.b[11]
> +; CHECK-NEXT:    umov w4, v0.b[11]
> +; CHECK-NEXT:    msub w13, w5, w13, w15
> +; CHECK-NEXT:    udiv w5, w0, w17
> +; CHECK-NEXT:    mov v2.b[7], w3
> +; CHECK-NEXT:    umov w11, v1.b[12]
> +; CHECK-NEXT:    umov w15, v0.b[12]
> +; CHECK-NEXT:    msub w17, w5, w17, w0
> +; CHECK-NEXT:    udiv w5, w4, w2
> +; CHECK-NEXT:    mov v2.b[8], w9
> +; CHECK-NEXT:    umov w0, v1.b[13]
> +; CHECK-NEXT:    msub w2, w5, w2, w4
> +; CHECK-NEXT:    umov w4, v0.b[13]
> +; CHECK-NEXT:    udiv w5, w15, w11
> +; CHECK-NEXT:    mov v2.b[9], w13
> +; CHECK-NEXT:    msub w11, w5, w11, w15
> +; CHECK-NEXT:    udiv w5, w4, w0
> +; CHECK-NEXT:    mov v2.b[10], w17
> +; CHECK-NEXT:    umov w15, v1.b[14]
> +; CHECK-NEXT:    msub w0, w5, w0, w4
> +; CHECK-NEXT:    umov w4, v0.b[14]
> +; CHECK-NEXT:    mov v2.b[11], w2
> +; CHECK-NEXT:    udiv w5, w4, w15
> +; CHECK-NEXT:    umov w8, v1.b[15]
> +; CHECK-NEXT:    mov v2.b[12], w11
> +; CHECK-NEXT:    umov w9, v0.b[15]
> +; CHECK-NEXT:    msub w15, w5, w15, w4
> +; CHECK-NEXT:    mov v2.b[13], w0
> +; CHECK-NEXT:    udiv w10, w9, w8
> +; CHECK-NEXT:    mov v2.b[14], w15
> +; CHECK-NEXT:    msub w8, w10, w8, w9
> +; CHECK-NEXT:    mov v2.b[15], w8
> +; CHECK-NEXT:    mov v0.16b, v2.16b
> +; CHECK-NEXT:    ret
> 	%tmp3 = urem <16 x i8> %A, %B;
> 	ret <16 x i8> %tmp3
> }
> 
> define <1 x i16> @urem1x16(<1 x i16> %A, <1 x i16> %B) {
> ; CHECK-LABEL: urem1x16:
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    umov w8, v1.h[0]
> +; CHECK-NEXT:    umov w9, v0.h[0]
> +; CHECK-NEXT:    udiv w10, w9, w8
> +; CHECK-NEXT:    msub w8, w10, w8, w9
> +; CHECK-NEXT:    fmov s0, w8
> +; CHECK-NEXT:    ret
> 	%tmp3 = urem <1 x i16> %A, %B;
> 	ret <1 x i16> %tmp3
> }
> 
> define <4 x i16> @urem4x16(<4 x i16> %A, <4 x i16> %B) {
> ; CHECK-LABEL: urem4x16:
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    umov w8, v1.h[1]
> +; CHECK-NEXT:    umov w9, v0.h[1]
> +; CHECK-NEXT:    umov w10, v1.h[0]
> +; CHECK-NEXT:    umov w11, v0.h[0]
> +; CHECK-NEXT:    udiv w12, w9, w8
> +; CHECK-NEXT:    msub w8, w12, w8, w9
> +; CHECK-NEXT:    udiv w12, w11, w10
> +; CHECK-NEXT:    umov w9, v1.h[2]
> +; CHECK-NEXT:    msub w10, w12, w10, w11
> +; CHECK-NEXT:    umov w11, v0.h[2]
> +; CHECK-NEXT:    udiv w12, w11, w9
> +; CHECK-NEXT:    msub w9, w12, w9, w11
> +; CHECK-NEXT:    umov w11, v1.h[3]
> +; CHECK-NEXT:    fmov s1, w10
> +; CHECK-NEXT:    umov w10, v0.h[3]
> +; CHECK-NEXT:    mov v1.h[1], w8
> +; CHECK-NEXT:    udiv w8, w10, w11
> +; CHECK-NEXT:    mov v1.h[2], w9
> +; CHECK-NEXT:    msub w8, w8, w11, w10
> +; CHECK-NEXT:    mov v1.h[3], w8
> +; CHECK-NEXT:    fmov d0, d1
> +; CHECK-NEXT:    ret
> 	%tmp3 = urem <4 x i16> %A, %B;
> 	ret <4 x i16> %tmp3
> }
> 
> define <8 x i16> @urem8x16(<8 x i16> %A, <8 x i16> %B) {
> ; CHECK-LABEL: urem8x16:
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    umov w8, v1.h[1]
> +; CHECK-NEXT:    umov w9, v0.h[1]
> +; CHECK-NEXT:    umov w10, v1.h[0]
> +; CHECK-NEXT:    umov w11, v0.h[0]
> +; CHECK-NEXT:    udiv w16, w9, w8
> +; CHECK-NEXT:    umov w12, v1.h[2]
> +; CHECK-NEXT:    umov w13, v0.h[2]
> +; CHECK-NEXT:    msub w8, w16, w8, w9
> +; CHECK-NEXT:    udiv w16, w11, w10
> +; CHECK-NEXT:    umov w14, v1.h[3]
> +; CHECK-NEXT:    umov w15, v0.h[3]
> +; CHECK-NEXT:    msub w10, w16, w10, w11
> +; CHECK-NEXT:    udiv w16, w13, w12
> +; CHECK-NEXT:    umov w9, v1.h[4]
> +; CHECK-NEXT:    umov w11, v0.h[4]
> +; CHECK-NEXT:    msub w12, w16, w12, w13
> +; CHECK-NEXT:    udiv w16, w15, w14
> +; CHECK-NEXT:    fmov s2, w10
> +; CHECK-NEXT:    umov w13, v1.h[5]
> +; CHECK-NEXT:    msub w14, w16, w14, w15
> +; CHECK-NEXT:    umov w15, v0.h[5]
> +; CHECK-NEXT:    udiv w16, w11, w9
> +; CHECK-NEXT:    mov v2.h[1], w8
> +; CHECK-NEXT:    msub w9, w16, w9, w11
> +; CHECK-NEXT:    udiv w16, w15, w13
> +; CHECK-NEXT:    mov v2.h[2], w12
> +; CHECK-NEXT:    umov w11, v1.h[6]
> +; CHECK-NEXT:    msub w13, w16, w13, w15
> +; CHECK-NEXT:    umov w15, v0.h[6]
> +; CHECK-NEXT:    mov v2.h[3], w14
> +; CHECK-NEXT:    udiv w16, w15, w11
> +; CHECK-NEXT:    umov w8, v1.h[7]
> +; CHECK-NEXT:    mov v2.h[4], w9
> +; CHECK-NEXT:    umov w9, v0.h[7]
> +; CHECK-NEXT:    msub w11, w16, w11, w15
> +; CHECK-NEXT:    mov v2.h[5], w13
> +; CHECK-NEXT:    udiv w10, w9, w8
> +; CHECK-NEXT:    mov v2.h[6], w11
> +; CHECK-NEXT:    msub w8, w10, w8, w9
> +; CHECK-NEXT:    mov v2.h[7], w8
> +; CHECK-NEXT:    mov v0.16b, v2.16b
> +; CHECK-NEXT:    ret
> 	%tmp3 = urem <8 x i16> %A, %B;
> 	ret <8 x i16> %tmp3
> }
> 
> define <1 x i32> @urem1x32(<1 x i32> %A, <1 x i32> %B) {
> ; CHECK-LABEL: urem1x32:
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    fmov w8, s1
> +; CHECK-NEXT:    fmov w9, s0
> +; CHECK-NEXT:    udiv w10, w9, w8
> +; CHECK-NEXT:    msub w8, w10, w8, w9
> +; CHECK-NEXT:    fmov s0, w8
> +; CHECK-NEXT:    ret
> 	%tmp3 = urem <1 x i32> %A, %B;
> 	ret <1 x i32> %tmp3
> }
> 
> define <2 x i32> @urem2x32(<2 x i32> %A, <2 x i32> %B) {
> ; CHECK-LABEL: urem2x32:
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    fmov w10, s1
> +; CHECK-NEXT:    fmov w11, s0
> +; CHECK-NEXT:    mov w8, v1.s[1]
> +; CHECK-NEXT:    mov w9, v0.s[1]
> +; CHECK-NEXT:    udiv w12, w11, w10
> +; CHECK-NEXT:    udiv w13, w9, w8
> +; CHECK-NEXT:    msub w10, w12, w10, w11
> +; CHECK-NEXT:    msub w8, w13, w8, w9
> +; CHECK-NEXT:    fmov s0, w10
> +; CHECK-NEXT:    mov v0.s[1], w8
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
> +; CHECK-NEXT:    ret
> 	%tmp3 = urem <2 x i32> %A, %B;
> 	ret <2 x i32> %tmp3
> }
> 
> define <4 x i32> @urem4x32(<4 x i32> %A, <4 x i32> %B) {
> ; CHECK-LABEL: urem4x32:
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> -; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    fmov w12, s1
> +; CHECK-NEXT:    fmov w15, s0
> +; CHECK-NEXT:    mov w8, v1.s[1]
> +; CHECK-NEXT:    mov w9, v0.s[1]
> +; CHECK-NEXT:    udiv w16, w15, w12
> +; CHECK-NEXT:    mov w10, v1.s[2]
> +; CHECK-NEXT:    mov w13, v0.s[2]
> +; CHECK-NEXT:    msub w12, w16, w12, w15
> +; CHECK-NEXT:    udiv w15, w9, w8
> +; CHECK-NEXT:    mov w11, v1.s[3]
> +; CHECK-NEXT:    mov w14, v0.s[3]
> +; CHECK-NEXT:    msub w8, w15, w8, w9
> +; CHECK-NEXT:    udiv w9, w13, w10
> +; CHECK-NEXT:    fmov s0, w12
> +; CHECK-NEXT:    msub w9, w9, w10, w13
> +; CHECK-NEXT:    mov v0.s[1], w8
> +; CHECK-NEXT:    udiv w8, w14, w11
> +; CHECK-NEXT:    mov v0.s[2], w9
> +; CHECK-NEXT:    msub w8, w8, w11, w14
> +; CHECK-NEXT:    mov v0.s[3], w8
> +; CHECK-NEXT:    ret
> 	%tmp3 = urem <4 x i32> %A, %B;
> 	ret <4 x i32> %tmp3
> }
> 
> define <1 x i64> @urem1x64(<1 x i64> %A, <1 x i64> %B) {
> ; CHECK-LABEL: urem1x64:
> -; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
> -; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    fmov x8, d1
> +; CHECK-NEXT:    fmov x9, d0
> +; CHECK-NEXT:    udiv x10, x9, x8
> +; CHECK-NEXT:    msub x8, x10, x8, x9
> +; CHECK-NEXT:    fmov d0, x8
> +; CHECK-NEXT:    ret
> 	%tmp3 = urem <1 x i64> %A, %B;
> 	ret <1 x i64> %tmp3
> }
> 
> define <2 x i64> @urem2x64(<2 x i64> %A, <2 x i64> %B) {
> ; CHECK-LABEL: urem2x64:
> -; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
> -; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
> -; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
> -; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    fmov x10, d1
> +; CHECK-NEXT:    fmov x11, d0
> +; CHECK-NEXT:    mov x8, v1.d[1]
> +; CHECK-NEXT:    mov x9, v0.d[1]
> +; CHECK-NEXT:    udiv x12, x11, x10
> +; CHECK-NEXT:    udiv x13, x9, x8
> +; CHECK-NEXT:    msub x10, x12, x10, x11
> +; CHECK-NEXT:    msub x8, x13, x8, x9
> +; CHECK-NEXT:    fmov d0, x10
> +; CHECK-NEXT:    mov v0.d[1], x8
> +; CHECK-NEXT:    ret
> 	%tmp3 = urem <2 x i64> %A, %B;
> 	ret <2 x i64> %tmp3
> }
> 
> define <2 x float> @frem2f32(<2 x float> %A, <2 x float> %B) {
> ; CHECK-LABEL: frem2f32:
> -; CHECK: bl fmodf
> -; CHECK: bl fmodf
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    sub sp, sp, #64
> +; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
> +; CHECK-NEXT:    .cfi_def_cfa_offset 64
> +; CHECK-NEXT:    .cfi_offset w30, -16
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
> +; CHECK-NEXT:    mov s0, v0.s[1]
> +; CHECK-NEXT:    mov s1, v1.s[1]
> +; CHECK-NEXT:    bl fmodf
> +; CHECK-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
> +; CHECK-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
> +; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
> +; CHECK-NEXT:    // kill: def $s1 killed $s1 killed $q1
> +; CHECK-NEXT:    bl fmodf
> +; CHECK-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
> +; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
> +; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
> +; CHECK-NEXT:    mov v0.s[1], v1.s[0]
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
> +; CHECK-NEXT:    add sp, sp, #64
> +; CHECK-NEXT:    ret
> 	%tmp3 = frem <2 x float> %A, %B;
> 	ret <2 x float> %tmp3
> }
> 
> define <4 x float> @frem4f32(<4 x float> %A, <4 x float> %B) {
> ; CHECK-LABEL: frem4f32:
> -; CHECK: bl fmodf
> -; CHECK: bl fmodf
> -; CHECK: bl fmodf
> -; CHECK: bl fmodf
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    sub sp, sp, #64
> +; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
> +; CHECK-NEXT:    .cfi_def_cfa_offset 64
> +; CHECK-NEXT:    .cfi_offset w30, -16
> +; CHECK-NEXT:    stp q0, q1, [sp, #16] // 32-byte Folded Spill
> +; CHECK-NEXT:    mov s0, v0.s[1]
> +; CHECK-NEXT:    mov s1, v1.s[1]
> +; CHECK-NEXT:    bl fmodf
> +; CHECK-NEXT:    str d0, [sp] // 16-byte Folded Spill
> +; CHECK-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
> +; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
> +; CHECK-NEXT:    // kill: def $s1 killed $s1 killed $q1
> +; CHECK-NEXT:    bl fmodf
> +; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
> +; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
> +; CHECK-NEXT:    mov v0.s[1], v1.s[0]
> +; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
> +; CHECK-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
> +; CHECK-NEXT:    mov s0, v0.s[2]
> +; CHECK-NEXT:    mov s1, v1.s[2]
> +; CHECK-NEXT:    bl fmodf
> +; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
> +; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
> +; CHECK-NEXT:    mov v1.s[2], v0.s[0]
> +; CHECK-NEXT:    str q1, [sp] // 16-byte Folded Spill
> +; CHECK-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
> +; CHECK-NEXT:    mov s0, v0.s[3]
> +; CHECK-NEXT:    mov s1, v1.s[3]
> +; CHECK-NEXT:    bl fmodf
> +; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
> +; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
> +; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
> +; CHECK-NEXT:    mov v1.s[3], v0.s[0]
> +; CHECK-NEXT:    mov v0.16b, v1.16b
> +; CHECK-NEXT:    add sp, sp, #64
> +; CHECK-NEXT:    ret
> 	%tmp3 = frem <4 x float> %A, %B;
> 	ret <4 x float> %tmp3
> }
> 
> define <1 x double> @frem1d64(<1 x double> %A, <1 x double> %B) {
> ; CHECK-LABEL: frem1d64:
> -; CHECK: bl fmod
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
> +; CHECK-NEXT:    .cfi_def_cfa_offset 16
> +; CHECK-NEXT:    .cfi_offset w30, -16
> +; CHECK-NEXT:    bl fmod
> +; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
> +; CHECK-NEXT:    ret
> 	%tmp3 = frem <1 x double> %A, %B;
> 	ret <1 x double> %tmp3
> }
> 
> define <2 x double> @frem2d64(<2 x double> %A, <2 x double> %B) {
> ; CHECK-LABEL: frem2d64:
> -; CHECK: bl fmod
> -; CHECK: bl fmod
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    sub sp, sp, #64
> +; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
> +; CHECK-NEXT:    .cfi_def_cfa_offset 64
> +; CHECK-NEXT:    .cfi_offset w30, -16
> +; CHECK-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
> +; CHECK-NEXT:    mov d0, v0.d[1]
> +; CHECK-NEXT:    mov d1, v1.d[1]
> +; CHECK-NEXT:    bl fmod
> +; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
> +; CHECK-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
> +; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
> +; CHECK-NEXT:    bl fmod
> +; CHECK-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
> +; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
> +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECK-NEXT:    mov v0.d[1], v1.d[0]
> +; CHECK-NEXT:    add sp, sp, #64
> +; CHECK-NEXT:    ret
> 	%tmp3 = frem <2 x double> %A, %B;
> 	ret <2 x double> %tmp3
> }
> @@ -689,15 +1547,19 @@ declare <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8>, <16 x i8>)
> 
> define <8 x i8> @poly_mulv8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
> ; CHECK-LABEL: poly_mulv8i8:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    pmul v0.8b, v0.8b, v1.8b
> +; CHECK-NEXT:    ret
>    %prod = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
> -; CHECK: pmul v0.8b, v0.8b, v1.8b
>    ret <8 x i8> %prod
> }
> 
> define <16 x i8> @poly_mulv16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
> ; CHECK-LABEL: poly_mulv16i8:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    pmul v0.16b, v0.16b, v1.16b
> +; CHECK-NEXT:    ret
>    %prod = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
> -; CHECK: pmul v0.16b, v0.16b, v1.16b
>    ret <16 x i8> %prod
> }
> 
> @@ -708,29 +1570,37 @@ declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>)
> 
> define <4 x i16> @test_sqdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
> ; CHECK-LABEL: test_sqdmulh_v4i16:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    sqdmulh v0.4h, v0.4h, v1.4h
> +; CHECK-NEXT:    ret
>    %prod = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
> -; CHECK: sqdmulh v0.4h, v0.4h, v1.4h
>    ret <4 x i16> %prod
> }
> 
> define <8 x i16> @test_sqdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
> ; CHECK-LABEL: test_sqdmulh_v8i16:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    sqdmulh v0.8h, v0.8h, v1.8h
> +; CHECK-NEXT:    ret
>    %prod = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
> -; CHECK: sqdmulh v0.8h, v0.8h, v1.8h
>    ret <8 x i16> %prod
> }
> 
> define <2 x i32> @test_sqdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
> ; CHECK-LABEL: test_sqdmulh_v2i32:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    sqdmulh v0.2s, v0.2s, v1.2s
> +; CHECK-NEXT:    ret
>    %prod = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
> -; CHECK: sqdmulh v0.2s, v0.2s, v1.2s
>    ret <2 x i32> %prod
> }
> 
> define <4 x i32> @test_sqdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
> ; CHECK-LABEL: test_sqdmulh_v4i32:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    sqdmulh v0.4s, v0.4s, v1.4s
> +; CHECK-NEXT:    ret
>    %prod = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
> -; CHECK: sqdmulh v0.4s, v0.4s, v1.4s
>    ret <4 x i32> %prod
> }
> 
> @@ -741,29 +1611,37 @@ declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>)
> 
> define <4 x i16> @test_sqrdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
> ; CHECK-LABEL: test_sqrdmulh_v4i16:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    sqrdmulh v0.4h, v0.4h, v1.4h
> +; CHECK-NEXT:    ret
>    %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
> -; CHECK: sqrdmulh v0.4h, v0.4h, v1.4h
>    ret <4 x i16> %prod
> }
> 
> define <8 x i16> @test_sqrdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
> ; CHECK-LABEL: test_sqrdmulh_v8i16:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    sqrdmulh v0.8h, v0.8h, v1.8h
> +; CHECK-NEXT:    ret
>    %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
> -; CHECK: sqrdmulh v0.8h, v0.8h, v1.8h
>    ret <8 x i16> %prod
> }
> 
> define <2 x i32> @test_sqrdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
> ; CHECK-LABEL: test_sqrdmulh_v2i32:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    sqrdmulh v0.2s, v0.2s, v1.2s
> +; CHECK-NEXT:    ret
>    %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
> -; CHECK: sqrdmulh v0.2s, v0.2s, v1.2s
>    ret <2 x i32> %prod
> }
> 
> define <4 x i32> @test_sqrdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
> ; CHECK-LABEL: test_sqrdmulh_v4i32:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    sqrdmulh v0.4s, v0.4s, v1.4s
> +; CHECK-NEXT:    ret
>    %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
> -; CHECK: sqrdmulh v0.4s, v0.4s, v1.4s
>    ret <4 x i32> %prod
> }
> 
> @@ -773,24 +1651,30 @@ declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>)
> 
> define <2 x float> @fmulx_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
> ; CHECK-LABEL: fmulx_v2f32:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    fmulx v0.2s, v0.2s, v1.2s
> +; CHECK-NEXT:    ret
> ; Using registers other than v0, v1 and v2 are possible, but would be odd.
> -; CHECK: fmulx v0.2s, v0.2s, v1.2s
>         %val = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %lhs, <2 x float> %rhs)
>         ret <2 x float> %val
> }
> 
> define <4 x float> @fmulx_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
> ; CHECK-LABEL: fmulx_v4f32:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    fmulx v0.4s, v0.4s, v1.4s
> +; CHECK-NEXT:    ret
> ; Using registers other than v0, v1 and v2 are possible, but would be odd.
> -; CHECK: fmulx v0.4s, v0.4s, v1.4s
>         %val = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %lhs, <4 x float> %rhs)
>         ret <4 x float> %val
> }
> 
> define <2 x double> @fmulx_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
> ; CHECK-LABEL: fmulx_v2f64:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    fmulx v0.2d, v0.2d, v1.2d
> +; CHECK-NEXT:    ret
> ; Using registers other than v0, v1 and v2 are possible, but would be odd.
> -; CHECK: fmulx v0.2d, v0.2d, v1.2d
>         %val = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %lhs, <2 x double> %rhs)
>         ret <2 x double> %val
> }
> 
> diff  --git a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll
> index 327d37a6ab9e..ebba97a90b58 100644
> --- a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll
> +++ b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll
> @@ -1,4 +1,5 @@
> -; RUN: llc < %s -mtriple=arm64-apple-ios | FileCheck %s
> +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
> +; RUN: llc < %s -mtriple=arm64-apple-ios -aarch64-enable-collect-loh=false | FileCheck %s
> 
> define void @test(float * %p1, i32 %v1) {
> ; CHECK-LABEL: test:
> @@ -50,9 +51,13 @@ entry:
> @_gv = external unnamed_addr global %"st1", align 8
> 
> define internal void @nvcast_f32_v8i8() {
> -; CHECK-LABEL: _nvcast_f32_v8i8
> -; CHECK: movi.8b v[[REG:[0-9]+]], #254
> -; CHECK: str d[[REG]]
> +; CHECK-LABEL: nvcast_f32_v8i8:
> +; CHECK:       ; %bb.0: ; %entry
> +; CHECK-NEXT:    adrp x8, __gv at GOTPAGE
> +; CHECK-NEXT:    ldr x8, [x8, __gv at GOTPAGEOFF]
> +; CHECK-NEXT:    movi.8b v0, #254
> +; CHECK-NEXT:    str d0, [x8]
> +; CHECK-NEXT:    ret
> entry:
>   store <2 x float> <float 0xC7DFDFDFC0000000, float 0xC7DFDFDFC0000000>, <2 x float>* bitcast (%"st1"* @_gv to <2 x float>*), align 8
>   ret void
> @@ -61,9 +66,11 @@ entry:
> %struct.Vector3 = type { float, float, float }
> 
> define void @nvcast_v2f32_v1f64(%struct.Vector3*) {
> -; CHECK-LABEL: _nvcast_v2f32_v1f64
> -; CHECK: fmov.2s v[[REG:[0-9]+]], #1.00000000
> -; CHECK: str d[[REG]], [x0]
> +; CHECK-LABEL: nvcast_v2f32_v1f64:
> +; CHECK:       ; %bb.0: ; %entry
> +; CHECK-NEXT:    fmov.2s v0, #1.00000000
> +; CHECK-NEXT:    str d0, [x0]
> +; CHECK-NEXT:    ret
> entry:
>   %a13 = bitcast %struct.Vector3* %0 to <1 x double>*
>   store <1 x double> <double 0x3F8000003F800000>, <1 x double>* %a13, align 8
> 
> diff  --git a/llvm/test/CodeGen/AArch64/arm64-register-pairing.ll b/llvm/test/CodeGen/AArch64/arm64-register-pairing.ll
> index eac7e5cb3363..62527fe3d7a4 100644
> --- a/llvm/test/CodeGen/AArch64/arm64-register-pairing.ll
> +++ b/llvm/test/CodeGen/AArch64/arm64-register-pairing.ll
> @@ -1,3 +1,4 @@
> +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
> ; RUN: llc -mtriple=arm64-apple-ios < %s | FileCheck %s
> ; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck -check-prefix CHECK-NOTMACHO %s
> ;
> @@ -5,76 +6,94 @@
> 
> define void @odd() nounwind {
> ; CHECK-LABEL: odd:
> -; CHECK: stp d15, d14, [sp, #-144]!
> -; CHECK: stp d13, d12, [sp, #16]
> -; CHECK: stp d11, d10, [sp, #32]
> -; CHECK: stp d9, d8, [sp, #48]
> -; CHECK: stp x28, x27, [sp, #64]
> -; CHECK: stp x26, x25, [sp, #80]
> -; CHECK: stp x24, x23, [sp, #96]
> -; CHECK: stp x22, x21, [sp, #112]
> -; CHECK: stp x20, x19, [sp, #128]
> -; CHECK: mov x0, #42
> -; CHECK: ldp x20, x19, [sp, #128]
> -; CHECK: ldp x22, x21, [sp, #112]
> -; CHECK: ldp x24, x23, [sp, #96]
> -; CHECK: ldp x26, x25, [sp, #80]
> -; CHECK: ldp x28, x27, [sp, #64]
> -; CHECK: ldp d9, d8, [sp, #48]
> -; CHECK: ldp d11, d10, [sp, #32]
> -; CHECK: ldp d13, d12, [sp, #16]
> -; CHECK: ldp d15, d14, [sp], #144
> -
> +; CHECK:       ; %bb.0:
> +; CHECK-NEXT:    stp d15, d14, [sp, #-144]! ; 16-byte Folded Spill
> +; CHECK-NEXT:    stp d13, d12, [sp, #16] ; 16-byte Folded Spill
> +; CHECK-NEXT:    stp d11, d10, [sp, #32] ; 16-byte Folded Spill
> +; CHECK-NEXT:    stp d9, d8, [sp, #48] ; 16-byte Folded Spill
> +; CHECK-NEXT:    stp x28, x27, [sp, #64] ; 16-byte Folded Spill
> +; CHECK-NEXT:    stp x26, x25, [sp, #80] ; 16-byte Folded Spill
> +; CHECK-NEXT:    stp x24, x23, [sp, #96] ; 16-byte Folded Spill
> +; CHECK-NEXT:    stp x22, x21, [sp, #112] ; 16-byte Folded Spill
> +; CHECK-NEXT:    stp x20, x19, [sp, #128] ; 16-byte Folded Spill
> +; CHECK-NEXT:    ; InlineAsm Start
> +; CHECK-NEXT:    mov x0, #42
> +; CHECK-NEXT:    ; InlineAsm End
> +; CHECK-NEXT:    ldp x20, x19, [sp, #128] ; 16-byte Folded Reload
> +; CHECK-NEXT:    ldp x22, x21, [sp, #112] ; 16-byte Folded Reload
> +; CHECK-NEXT:    ldp x24, x23, [sp, #96] ; 16-byte Folded Reload
> +; CHECK-NEXT:    ldp x26, x25, [sp, #80] ; 16-byte Folded Reload
> +; CHECK-NEXT:    ldp x28, x27, [sp, #64] ; 16-byte Folded Reload
> +; CHECK-NEXT:    ldp d9, d8, [sp, #48] ; 16-byte Folded Reload
> +; CHECK-NEXT:    ldp d11, d10, [sp, #32] ; 16-byte Folded Reload
> +; CHECK-NEXT:    ldp d13, d12, [sp, #16] ; 16-byte Folded Reload
> +; CHECK-NEXT:    ldp d15, d14, [sp], #144 ; 16-byte Folded Reload
> +; CHECK-NEXT:    ret
> +;
> ; CHECK-NOTMACHO-LABEL: odd:
> -; CHECK-NOTMACHO: stp d14, d12, [sp, #-80]!
> -; CHECK-NOTMACHO: stp d10, d8, [sp, #16]
> -; CHECK-NOTMACHO: str x27, [sp, #32]
> -; CHECK-NOTMACHO: stp x25, x23, [sp, #48]
> -; CHECK-NOTMACHO: stp x21, x19, [sp, #64]
> -; CHECK-NOTMACHO: mov x0, #42
> -; CHECK-NOTMACHO: ldp x21, x19, [sp, #64]
> -; CHECK-NOTMACHO: ldp x25, x23, [sp, #48]
> -; CHECK-NOTMACHO: ldr x27, [sp, #32]
> -; CHECK-NOTMACHO: ldp d10, d8, [sp, #16]
> -; CHECK-NOTMACHO: ldp d14, d12, [sp], #80
> +; CHECK-NOTMACHO:       // %bb.0:
> +; CHECK-NOTMACHO-NEXT:    stp d14, d12, [sp, #-80]! // 16-byte Folded Spill
> +; CHECK-NOTMACHO-NEXT:    stp d10, d8, [sp, #16] // 16-byte Folded Spill
> +; CHECK-NOTMACHO-NEXT:    str x27, [sp, #32] // 8-byte Folded Spill
> +; CHECK-NOTMACHO-NEXT:    stp x25, x23, [sp, #48] // 16-byte Folded Spill
> +; CHECK-NOTMACHO-NEXT:    stp x21, x19, [sp, #64] // 16-byte Folded Spill
> +; CHECK-NOTMACHO-NEXT:    //APP
> +; CHECK-NOTMACHO-NEXT:    mov x0, #42
> +; CHECK-NOTMACHO-NEXT:    //NO_APP
> +; CHECK-NOTMACHO-NEXT:    ldp x21, x19, [sp, #64] // 16-byte Folded Reload
> +; CHECK-NOTMACHO-NEXT:    ldp x25, x23, [sp, #48] // 16-byte Folded Reload
> +; CHECK-NOTMACHO-NEXT:    ldr x27, [sp, #32] // 8-byte Folded Reload
> +; CHECK-NOTMACHO-NEXT:    ldp d10, d8, [sp, #16] // 16-byte Folded Reload
> +; CHECK-NOTMACHO-NEXT:    ldp d14, d12, [sp], #80 // 16-byte Folded Reload
> +; CHECK-NOTMACHO-NEXT:    ret
> +
>   call void asm sideeffect "mov x0, #42", "~{x0},~{x19},~{x21},~{x23},~{x25},~{x27},~{d8},~{d10},~{d12},~{d14}"() nounwind
>   ret void
> }
> 
> define void @even() nounwind {
> ; CHECK-LABEL: even:
> -; CHECK: stp d15, d14, [sp, #-144]!
> -; CHECK: stp d13, d12, [sp, #16]
> -; CHECK: stp d11, d10, [sp, #32]
> -; CHECK: stp d9, d8, [sp, #48]
> -; CHECK: stp x28, x27, [sp, #64]
> -; CHECK: stp x26, x25, [sp, #80]
> -; CHECK: stp x24, x23, [sp, #96]
> -; CHECK: stp x22, x21, [sp, #112]
> -; CHECK: stp x20, x19, [sp, #128]
> -; CHECK: mov x0, #42
> -; CHECK: ldp x20, x19, [sp, #128]
> -; CHECK: ldp x22, x21, [sp, #112]
> -; CHECK: ldp x24, x23, [sp, #96]
> -; CHECK: ldp x26, x25, [sp, #80]
> -; CHECK: ldp x28, x27, [sp, #64]
> -; CHECK: ldp d9, d8, [sp, #48]
> -; CHECK: ldp d11, d10, [sp, #32]
> -; CHECK: ldp d13, d12, [sp, #16]
> -; CHECK: ldp d15, d14, [sp], #144
> -
> +; CHECK:       ; %bb.0:
> +; CHECK-NEXT:    stp d15, d14, [sp, #-144]! ; 16-byte Folded Spill
> +; CHECK-NEXT:    stp d13, d12, [sp, #16] ; 16-byte Folded Spill
> +; CHECK-NEXT:    stp d11, d10, [sp, #32] ; 16-byte Folded Spill
> +; CHECK-NEXT:    stp d9, d8, [sp, #48] ; 16-byte Folded Spill
> +; CHECK-NEXT:    stp x28, x27, [sp, #64] ; 16-byte Folded Spill
> +; CHECK-NEXT:    stp x26, x25, [sp, #80] ; 16-byte Folded Spill
> +; CHECK-NEXT:    stp x24, x23, [sp, #96] ; 16-byte Folded Spill
> +; CHECK-NEXT:    stp x22, x21, [sp, #112] ; 16-byte Folded Spill
> +; CHECK-NEXT:    stp x20, x19, [sp, #128] ; 16-byte Folded Spill
> +; CHECK-NEXT:    ; InlineAsm Start
> +; CHECK-NEXT:    mov x0, #42
> +; CHECK-NEXT:    ; InlineAsm End
> +; CHECK-NEXT:    ldp x20, x19, [sp, #128] ; 16-byte Folded Reload
> +; CHECK-NEXT:    ldp x22, x21, [sp, #112] ; 16-byte Folded Reload
> +; CHECK-NEXT:    ldp x24, x23, [sp, #96] ; 16-byte Folded Reload
> +; CHECK-NEXT:    ldp x26, x25, [sp, #80] ; 16-byte Folded Reload
> +; CHECK-NEXT:    ldp x28, x27, [sp, #64] ; 16-byte Folded Reload
> +; CHECK-NEXT:    ldp d9, d8, [sp, #48] ; 16-byte Folded Reload
> +; CHECK-NEXT:    ldp d11, d10, [sp, #32] ; 16-byte Folded Reload
> +; CHECK-NEXT:    ldp d13, d12, [sp, #16] ; 16-byte Folded Reload
> +; CHECK-NEXT:    ldp d15, d14, [sp], #144 ; 16-byte Folded Reload
> +; CHECK-NEXT:    ret
> +;
> ; CHECK-NOTMACHO-LABEL: even:
> -; CHECK-NOTMACHO: stp d15, d13, [sp, #-80]!
> -; CHECK-NOTMACHO: stp d11, d9, [sp, #16]
> -; CHECK-NOTMACHO: str x28, [sp, #32]
> -; CHECK-NOTMACHO: stp x26, x24, [sp, #48]
> -; CHECK-NOTMACHO: stp x22, x20, [sp, #64]
> -; CHECK-NOTMACHO: mov x0, #42
> -; CHECK-NOTMACHO: ldp x22, x20, [sp, #64]
> -; CHECK-NOTMACHO: ldp x26, x24, [sp, #48]
> -; CHECK-NOTMACHO: ldr x28, [sp, #32]
> -; CHECK-NOTMACHO: ldp d11, d9, [sp, #16]
> -; CHECK-NOTMACHO: ldp d15, d13, [sp], #80
> +; CHECK-NOTMACHO:       // %bb.0:
> +; CHECK-NOTMACHO-NEXT:    stp d15, d13, [sp, #-80]! // 16-byte Folded Spill
> +; CHECK-NOTMACHO-NEXT:    stp d11, d9, [sp, #16] // 16-byte Folded Spill
> +; CHECK-NOTMACHO-NEXT:    str x28, [sp, #32] // 8-byte Folded Spill
> +; CHECK-NOTMACHO-NEXT:    stp x26, x24, [sp, #48] // 16-byte Folded Spill
> +; CHECK-NOTMACHO-NEXT:    stp x22, x20, [sp, #64] // 16-byte Folded Spill
> +; CHECK-NOTMACHO-NEXT:    //APP
> +; CHECK-NOTMACHO-NEXT:    mov x0, #42
> +; CHECK-NOTMACHO-NEXT:    //NO_APP
> +; CHECK-NOTMACHO-NEXT:    ldp x22, x20, [sp, #64] // 16-byte Folded Reload
> +; CHECK-NOTMACHO-NEXT:    ldp x26, x24, [sp, #48] // 16-byte Folded Reload
> +; CHECK-NOTMACHO-NEXT:    ldr x28, [sp, #32] // 8-byte Folded Reload
> +; CHECK-NOTMACHO-NEXT:    ldp d11, d9, [sp, #16] // 16-byte Folded Reload
> +; CHECK-NOTMACHO-NEXT:    ldp d15, d13, [sp], #80 // 16-byte Folded Reload
> +; CHECK-NOTMACHO-NEXT:    ret
> +
>   call void asm sideeffect "mov x0, #42", "~{x0},~{x20},~{x22},~{x24},~{x26},~{x28},~{d9},~{d11},~{d13},~{d15}"() nounwind
>   ret void
> }
> 
> diff  --git a/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll b/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll
> index 67283b6d3c1b..463e20aa696f 100644
> --- a/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll
> +++ b/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll
> @@ -1,11 +1,13 @@
> -; RUN: llc < %s -asm-verbose=false -mtriple=arm64-apple-ios | FileCheck %s
> +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
> +; RUN: llc < %s -mtriple=arm64-apple-ios | FileCheck %s
> 
> define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind {
> ; CHECK-LABEL: foo:
> -; CHECK-NEXT: fcmeq.4s  v0, v0, v1
> -; CHECK-NEXT: fmov.4s v1, #1.00000000
> -; CHECK-NEXT: and.16b v0, v0, v1
> -; CHECK-NEXT: ret
> +; CHECK:       ; %bb.0:
> +; CHECK-NEXT:    fcmeq.4s v0, v0, v1
> +; CHECK-NEXT:    fmov.4s v1, #1.00000000
> +; CHECK-NEXT:    and.16b v0, v0, v1
> +; CHECK-NEXT:    ret
>   %cmp = fcmp oeq <4 x float> %val, %test
>   %ext = zext <4 x i1> %cmp to <4 x i32>
>   %result = sitofp <4 x i32> %ext to <4 x float>
> @@ -16,9 +18,16 @@ define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind {
> ; the folded nodes.
> define void @foo1(<4 x float> %val, <4 x float> %test, <4 x double>* %p) nounwind {
> ; CHECK-LABEL: foo1:
> -; CHECK: movi.4s
> -; CHECK: scvtf.2d
> -; CHECK: scvtf.2d
> +; CHECK:       ; %bb.0:
> +; CHECK-NEXT:    fcmeq.4s v0, v0, v1
> +; CHECK-NEXT:    movi.4s v1, #1
> +; CHECK-NEXT:    and.16b v0, v0, v1
> +; CHECK-NEXT:    ushll.2d v1, v0, #0
> +; CHECK-NEXT:    ushll2.2d v0, v0, #0
> +; CHECK-NEXT:    scvtf.2d v1, v1
> +; CHECK-NEXT:    scvtf.2d v0, v0
> +; CHECK-NEXT:    stp q1, q0, [x0]
> +; CHECK-NEXT:    ret
>   %cmp = fcmp oeq <4 x float> %val, %test
>   %ext = zext <4 x i1> %cmp to <4 x i32>
>   %result = sitofp <4 x i32> %ext to <4 x double>
> @@ -29,16 +38,16 @@ define void @foo1(<4 x float> %val, <4 x float> %test, <4 x double>* %p) nounwin
> ; Fold explicit AND operations when the constant isn't a splat of a single
> ; scalar value like what the zext creates.
> define <4 x float> @foo2(<4 x float> %val, <4 x float> %test) nounwind {
> -; CHECK-LABEL: lCPI2_0:
> -; CHECK-NEXT: .long 1065353216
> -; CHECK-NEXT: .long 0
> -; CHECK-NEXT: .long 1065353216
> -; CHECK-NEXT: .long 0
> ; CHECK-LABEL: foo2:
> -; CHECK: adrp  x8, lCPI2_0 at PAGE
> -; CHECK: ldr q2, [x8, lCPI2_0 at PAGEOFF]
> -; CHECK-NEXT:  fcmeq.4s  v0, v0, v1
> -; CHECK-NEXT:  and.16b v0, v0, v2
> +; CHECK:       ; %bb.0:
> +; CHECK-NEXT:  Lloh0:
> +; CHECK-NEXT:    adrp x8, lCPI2_0 at PAGE
> +; CHECK-NEXT:  Lloh1:
> +; CHECK-NEXT:    ldr q2, [x8, lCPI2_0 at PAGEOFF]
> +; CHECK-NEXT:    fcmeq.4s v0, v0, v1
> +; CHECK-NEXT:    and.16b v0, v0, v2
> +; CHECK-NEXT:    ret
> +; CHECK-NEXT:    .loh AdrpLdr Lloh0, Lloh1
>   %cmp = fcmp oeq <4 x float> %val, %test
>   %ext = zext <4 x i1> %cmp to <4 x i32>
>   %and = and <4 x i32> %ext, <i32 255, i32 256, i32 257, i32 258>
> 
> diff  --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
> index 593e36d9cba9..25983279866d 100644
> --- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
> +++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
> @@ -1,5 +1,6 @@
> -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
> -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -asm-verbose=false -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* 2>&1 | FileCheck %s --check-prefixes=CHECK,FALLBACK
> +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
> +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECKDAG
> +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* 2>&1 | FileCheck %s --check-prefixes=CHECK,FALLBACK
> 
> ; Test efficient codegen of vector extends up from legal type to 128 bit
> ; and 256 bit vector types.
> @@ -11,8 +12,9 @@
> ; FALLBACK-NOT: remark:{{.*}}(in function: func1)
> define <8 x i16> @func1(<8 x i8> %v0) nounwind {
> ; CHECK-LABEL: func1:
> -; CHECK-NEXT: ushll.8h  v0, v0, #0
> -; CHECK-NEXT: ret
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    ushll.8h v0, v0, #0
> +; CHECK-NEXT:    ret
>   %r = zext <8 x i8> %v0 to <8 x i16>
>   ret <8 x i16> %r
> }
> @@ -20,26 +22,29 @@ define <8 x i16> @func1(<8 x i8> %v0) nounwind {
> ; FALLBACK-NOT: remark:{{.*}}(in function: func2)
> define <8 x i16> @func2(<8 x i8> %v0) nounwind {
> ; CHECK-LABEL: func2:
> -; CHECK-NEXT: sshll.8h  v0, v0, #0
> -; CHECK-NEXT: ret
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    sshll.8h v0, v0, #0
> +; CHECK-NEXT:    ret
>   %r = sext <8 x i8> %v0 to <8 x i16>
>   ret <8 x i16> %r
> }
> 
> define <16 x i16> @func3(<16 x i8> %v0) nounwind {
> ; CHECK-LABEL: func3:
> -; CHECK-NEXT: ushll2.8h  v1, v0, #0
> -; CHECK-NEXT: ushll.8h  v0, v0, #0
> -; CHECK-NEXT: ret
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    ushll2.8h v1, v0, #0
> +; CHECK-NEXT:    ushll.8h v0, v0, #0
> +; CHECK-NEXT:    ret
>   %r = zext <16 x i8> %v0 to <16 x i16>
>   ret <16 x i16> %r
> }
> 
> define <16 x i16> @func4(<16 x i8> %v0) nounwind {
> ; CHECK-LABEL: func4:
> -; CHECK-NEXT: sshll2.8h  v1, v0, #0
> -; CHECK-NEXT: sshll.8h  v0, v0, #0
> -; CHECK-NEXT: ret
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    sshll2.8h v1, v0, #0
> +; CHECK-NEXT:    sshll.8h v0, v0, #0
> +; CHECK-NEXT:    ret
>   %r = sext <16 x i8> %v0 to <16 x i16>
>   ret <16 x i16> %r
> }
> @@ -51,8 +56,9 @@ define <16 x i16> @func4(<16 x i8> %v0) nounwind {
> ; FALLBACK-NOT: remark:{{.*}}(in function: afunc1)
> define <4 x i32> @afunc1(<4 x i16> %v0) nounwind {
> ; CHECK-LABEL: afunc1:
> -; CHECK-NEXT: ushll.4s v0, v0, #0
> -; CHECK-NEXT: ret
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    ushll.4s v0, v0, #0
> +; CHECK-NEXT:    ret
>   %r = zext <4 x i16> %v0 to <4 x i32>
>   ret <4 x i32> %r
> }
> @@ -60,46 +66,51 @@ define <4 x i32> @afunc1(<4 x i16> %v0) nounwind {
> ; FALLBACK-NOT: remark:{{.*}}(in function: afunc2)
> define <4 x i32> @afunc2(<4 x i16> %v0) nounwind {
> ; CHECK-LABEL: afunc2:
> -; CHECK-NEXT: sshll.4s v0, v0, #0
> -; CHECK-NEXT: ret
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    sshll.4s v0, v0, #0
> +; CHECK-NEXT:    ret
>   %r = sext <4 x i16> %v0 to <4 x i32>
>   ret <4 x i32> %r
> }
> 
> define <8 x i32> @afunc3(<8 x i16> %v0) nounwind {
> ; CHECK-LABEL: afunc3:
> -; CHECK-NEXT: ushll2.4s v1, v0, #0
> -; CHECK-NEXT: ushll.4s v0, v0, #0
> -; CHECK-NEXT: ret
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    ushll2.4s v1, v0, #0
> +; CHECK-NEXT:    ushll.4s v0, v0, #0
> +; CHECK-NEXT:    ret
>   %r = zext <8 x i16> %v0 to <8 x i32>
>   ret <8 x i32> %r
> }
> 
> define <8 x i32> @afunc4(<8 x i16> %v0) nounwind {
> ; CHECK-LABEL: afunc4:
> -; CHECK-NEXT: sshll2.4s v1, v0, #0
> -; CHECK-NEXT: sshll.4s v0, v0, #0
> -; CHECK-NEXT: ret
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    sshll2.4s v1, v0, #0
> +; CHECK-NEXT:    sshll.4s v0, v0, #0
> +; CHECK-NEXT:    ret
>   %r = sext <8 x i16> %v0 to <8 x i32>
>   ret <8 x i32> %r
> }
> 
> define <8 x i32> @bfunc1(<8 x i8> %v0) nounwind {
> ; CHECK-LABEL: bfunc1:
> -; CHECK-NEXT: ushll.8h  v0, v0, #0
> -; CHECK-NEXT: ushll2.4s v1, v0, #0
> -; CHECK-NEXT: ushll.4s  v0, v0, #0
> -; CHECK-NEXT: ret
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    ushll.8h v0, v0, #0
> +; CHECK-NEXT:    ushll2.4s v1, v0, #0
> +; CHECK-NEXT:    ushll.4s v0, v0, #0
> +; CHECK-NEXT:    ret
>   %r = zext <8 x i8> %v0 to <8 x i32>
>   ret <8 x i32> %r
> }
> 
> define <8 x i32> @bfunc2(<8 x i8> %v0) nounwind {
> ; CHECK-LABEL: bfunc2:
> -; CHECK-NEXT: sshll.8h  v0, v0, #0
> -; CHECK-NEXT: sshll2.4s v1, v0, #0
> -; CHECK-NEXT: sshll.4s  v0, v0, #0
> -; CHECK-NEXT: ret
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    sshll.8h v0, v0, #0
> +; CHECK-NEXT:    sshll2.4s v1, v0, #0
> +; CHECK-NEXT:    sshll.4s v0, v0, #0
> +; CHECK-NEXT:    ret
>   %r = sext <8 x i8> %v0 to <8 x i32>
>   ret <8 x i32> %r
> }
> @@ -110,95 +121,99 @@ define <8 x i32> @bfunc2(<8 x i8> %v0) nounwind {
> 
> define <4 x i64> @zfunc1(<4 x i32> %v0) nounwind {
> ; CHECK-LABEL: zfunc1:
> -; CHECK-NEXT: ushll2.2d v1, v0, #0
> -; CHECK-NEXT: ushll.2d v0, v0, #0
> -; CHECK-NEXT: ret
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    ushll2.2d v1, v0, #0
> +; CHECK-NEXT:    ushll.2d v0, v0, #0
> +; CHECK-NEXT:    ret
>   %r = zext <4 x i32> %v0 to <4 x i64>
>   ret <4 x i64> %r
> }
> 
> define <4 x i64> @zfunc2(<4 x i32> %v0) nounwind {
> ; CHECK-LABEL: zfunc2:
> -; CHECK-NEXT: sshll2.2d v1, v0, #0
> -; CHECK-NEXT: sshll.2d v0, v0, #0
> -; CHECK-NEXT: ret
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    sshll2.2d v1, v0, #0
> +; CHECK-NEXT:    sshll.2d v0, v0, #0
> +; CHECK-NEXT:    ret
>   %r = sext <4 x i32> %v0 to <4 x i64>
>   ret <4 x i64> %r
> }
> 
> define <4 x i64> @bfunc3(<4 x i16> %v0) nounwind {
> -; CHECK-LABEL: func3:
> -; CHECK-NEXT: ushll.4s  v0, v0, #0
> -; CHECK-NEXT: ushll2.2d v1, v0, #0
> -; CHECK-NEXT: ushll.2d  v0, v0, #0
> -; CHECK-NEXT: ret
> +; CHECK-LABEL: bfunc3:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    ushll.4s v0, v0, #0
> +; CHECK-NEXT:    ushll2.2d v1, v0, #0
> +; CHECK-NEXT:    ushll.2d v0, v0, #0
> +; CHECK-NEXT:    ret
>   %r = zext <4 x i16> %v0 to <4 x i64>
>   ret <4 x i64> %r
> }
> 
> define <4 x i64> @cfunc4(<4 x i16> %v0) nounwind {
> -; CHECK-LABEL: func4:
> -; CHECK-NEXT: sshll.4s  v0, v0, #0
> -; CHECK-NEXT: sshll2.2d v1, v0, #0
> -; CHECK-NEXT: sshll.2d  v0, v0, #0
> -; CHECK-NEXT: ret
> +; CHECK-LABEL: cfunc4:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    sshll.4s v0, v0, #0
> +; CHECK-NEXT:    sshll2.2d v1, v0, #0
> +; CHECK-NEXT:    sshll.2d v0, v0, #0
> +; CHECK-NEXT:    ret
>   %r = sext <4 x i16> %v0 to <4 x i64>
>   ret <4 x i64> %r
> }
> 
> define <4 x i64> @zext_v4i8_to_v4i64(<4 x i8> %v0) nounwind {
> ; CHECK-LABEL: zext_v4i8_to_v4i64:
> -; CHECK-NEXT:    bic.4h  v0, #255, lsl #8
> -; CHECK-NEXT:    ushll.4s    v0, v0, #0
> -; CHECK-NEXT:    ushll2.2d   v1, v0, #0
> -; CHECK-NEXT:    ushll.2d    v0, v0, #0
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    bic.4h v0, #255, lsl #8
> +; CHECK-NEXT:    ushll.4s v0, v0, #0
> +; CHECK-NEXT:    ushll2.2d v1, v0, #0
> +; CHECK-NEXT:    ushll.2d v0, v0, #0
> ; CHECK-NEXT:    ret
> -;
>   %r = zext <4 x i8> %v0 to <4 x i64>
>   ret <4 x i64> %r
> }
> 
> define <4 x i64> @sext_v4i8_to_v4i64(<4 x i8> %v0) nounwind {
> ; CHECK-LABEL: sext_v4i8_to_v4i64:
> -; CHECK-NEXT:    ushll.4s    v0, v0, #0
> -; CHECK-NEXT:    ushll.2d    v1, v0, #0
> -; CHECK-NEXT:    ushll2.2d   v0, v0, #0
> -; CHECK-NEXT:    shl.2d  v0, v0, #56
> -; CHECK-NEXT:    shl.2d  v2, v1, #56
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    ushll.4s v0, v0, #0
> +; CHECK-NEXT:    ushll.2d v1, v0, #0
> +; CHECK-NEXT:    ushll2.2d v0, v0, #0
> +; CHECK-NEXT:    shl.2d v0, v0, #56
> +; CHECK-NEXT:    shl.2d v2, v1, #56
> ; CHECK-NEXT:    sshr.2d v1, v0, #56
> ; CHECK-NEXT:    sshr.2d v0, v2, #56
> ; CHECK-NEXT:    ret
> -;
>   %r = sext <4 x i8> %v0 to <4 x i64>
>   ret <4 x i64> %r
> }
> 
> define <8 x i64> @zext_v8i8_to_v8i64(<8 x i8> %v0) nounwind {
> ; CHECK-LABEL: zext_v8i8_to_v8i64:
> -; CHECK-NEXT:    ushll.8h   v0, v0, #0
> -; CHECK-NEXT:    ushll2.4s  v2, v0, #0
> -; CHECK-NEXT:    ushll.4s   v0, v0, #0
> -; CHECK-NEXT:    ushll2.2d  v3, v2, #0
> -; CHECK-NEXT:    ushll2.2d  v1, v0, #0
> -; CHECK-NEXT:    ushll.2d   v2, v2, #0
> -; CHECK-NEXT:    ushll.2d   v0, v0, #0
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    ushll.8h v0, v0, #0
> +; CHECK-NEXT:    ushll2.4s v2, v0, #0
> +; CHECK-NEXT:    ushll.4s v0, v0, #0
> +; CHECK-NEXT:    ushll2.2d v3, v2, #0
> +; CHECK-NEXT:    ushll2.2d v1, v0, #0
> +; CHECK-NEXT:    ushll.2d v2, v2, #0
> +; CHECK-NEXT:    ushll.2d v0, v0, #0
> ; CHECK-NEXT:    ret
> -;
>   %r = zext <8 x i8> %v0 to <8 x i64>
>   ret <8 x i64> %r
> }
> 
> define <8 x i64> @sext_v8i8_to_v8i64(<8 x i8> %v0) nounwind {
> ; CHECK-LABEL: sext_v8i8_to_v8i64:
> -; CHECK-NEXT:    sshll.8h   v0, v0, #0
> -; CHECK-NEXT:    sshll2.4s  v2, v0, #0
> -; CHECK-NEXT:    sshll.4s   v0, v0, #0
> -; CHECK-NEXT:    sshll2.2d  v3, v2, #0
> -; CHECK-NEXT:    sshll2.2d  v1, v0, #0
> -; CHECK-NEXT:    sshll.2d   v2, v2, #0
> -; CHECK-NEXT:    sshll.2d   v0, v0, #0
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    sshll.8h v0, v0, #0
> +; CHECK-NEXT:    sshll2.4s v2, v0, #0
> +; CHECK-NEXT:    sshll.4s v0, v0, #0
> +; CHECK-NEXT:    sshll2.2d v3, v2, #0
> +; CHECK-NEXT:    sshll2.2d v1, v0, #0
> +; CHECK-NEXT:    sshll.2d v2, v2, #0
> +; CHECK-NEXT:    sshll.2d v0, v0, #0
> ; CHECK-NEXT:    ret
> -;
>   %r = sext <8 x i8> %v0 to <8 x i64>
>   ret <8 x i64> %r
> }
> @@ -207,7 +222,65 @@ define <8 x i64> @sext_v8i8_to_v8i64(<8 x i8> %v0) nounwind {
> 
> define <32 x i8> @zext_v32i1(<32 x i1> %arg) {
> ; CHECK-LABEL: zext_v32i1:
> -; CHECK:         and.16b v0, v0, v2
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    ldr w8, [sp, #64]
> +; CHECK-NEXT:    ldr w9, [sp, #72]
> +; CHECK-NEXT:    ldr w10, [sp, #80]
> +; CHECK-NEXT:    fmov s0, w0
> +; CHECK-NEXT:    ldr w11, [sp, #88]
> +; CHECK-NEXT:    mov.b v0[1], w1
> +; CHECK-NEXT:    ldr w12, [sp, #96]
> +; CHECK-NEXT:    fmov s1, w8
> +; CHECK-NEXT:    mov.b v0[2], w2
> +; CHECK-NEXT:    ldr w8, [sp, #104]
> +; CHECK-NEXT:    mov.b v1[1], w9
> +; CHECK-NEXT:    mov.b v0[3], w3
> +; CHECK-NEXT:    ldr w13, [sp, #112]
> +; CHECK-NEXT:    mov.b v1[2], w10
> +; CHECK-NEXT:    mov.b v0[4], w4
> +; CHECK-NEXT:    ldr w9, [sp, #120]
> +; CHECK-NEXT:    mov.b v1[3], w11
> +; CHECK-NEXT:    ldr w11, [sp]
> +; CHECK-NEXT:    mov.b v0[5], w5
> +; CHECK-NEXT:    mov.b v1[4], w12
> +; CHECK-NEXT:    ldr w12, [sp, #8]
> +; CHECK-NEXT:    ldr w14, [sp, #128]
> +; CHECK-NEXT:    mov.b v0[6], w6
> +; CHECK-NEXT:    mov.b v1[5], w8
> +; CHECK-NEXT:    ldr w8, [sp, #16]
> +; CHECK-NEXT:    ldr w10, [sp, #136]
> +; CHECK-NEXT:    mov.b v0[7], w7
> +; CHECK-NEXT:    mov.b v1[6], w13
> +; CHECK-NEXT:    ldr w13, [sp, #24]
> +; CHECK-NEXT:    ldr w15, [sp, #144]
> +; CHECK-NEXT:    mov.b v0[8], w11
> +; CHECK-NEXT:    mov.b v1[7], w9
> +; CHECK-NEXT:    ldr w9, [sp, #32]
> +; CHECK-NEXT:    ldr w16, [sp, #152]
> +; CHECK-NEXT:    mov.b v0[9], w12
> +; CHECK-NEXT:    ldr w11, [sp, #160]
> +; CHECK-NEXT:    mov.b v0[10], w8
> +; CHECK-NEXT:    mov.b v1[8], w14
> +; CHECK-NEXT:    ldr w12, [sp, #168]
> +; CHECK-NEXT:    mov.b v0[11], w13
> +; CHECK-NEXT:    ldr w14, [sp, #40]
> +; CHECK-NEXT:    mov.b v1[9], w10
> +; CHECK-NEXT:    ldr w8, [sp, #176]
> +; CHECK-NEXT:    mov.b v0[12], w9
> +; CHECK-NEXT:    ldr w9, [sp, #48]
> +; CHECK-NEXT:    mov.b v1[10], w15
> +; CHECK-NEXT:    ldr w13, [sp, #184]
> +; CHECK-NEXT:    ldr w10, [sp, #56]
> +; CHECK-NEXT:    mov.b v1[11], w16
> +; CHECK-NEXT:    mov.b v1[12], w11
> +; CHECK-NEXT:    mov.b v0[13], w14
> +; CHECK-NEXT:    mov.b v1[13], w12
> +; CHECK-NEXT:    mov.b v0[14], w9
> +; CHECK-NEXT:    mov.b v1[14], w8
> +; CHECK-NEXT:    movi.16b v2, #1
> +; CHECK-NEXT:    mov.b v0[15], w10
> +; CHECK-NEXT:    mov.b v1[15], w13
> +; CHECK-NEXT:    and.16b v0, v0, v2
> ; CHECK-NEXT:    and.16b v1, v1, v2
> ; CHECK-NEXT:    ret
>   %res = zext <32 x i1> %arg to <32 x i8>
> @@ -216,54 +289,358 @@ define <32 x i8> @zext_v32i1(<32 x i1> %arg) {
> 
> define <32 x i8> @sext_v32i1(<32 x i1> %arg) {
> ; CHECK-LABEL: sext_v32i1:
> -; CHECK:         shl.16b v0, v0, #7
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    ldr w8, [sp, #64]
> +; CHECK-NEXT:    ldr w9, [sp, #72]
> +; CHECK-NEXT:    ldr w10, [sp, #80]
> +; CHECK-NEXT:    fmov s0, w0
> +; CHECK-NEXT:    ldr w11, [sp, #88]
> +; CHECK-NEXT:    mov.b v0[1], w1
> +; CHECK-NEXT:    ldr w12, [sp, #96]
> +; CHECK-NEXT:    fmov s1, w8
> +; CHECK-NEXT:    mov.b v0[2], w2
> +; CHECK-NEXT:    ldr w8, [sp, #104]
> +; CHECK-NEXT:    mov.b v1[1], w9
> +; CHECK-NEXT:    mov.b v0[3], w3
> +; CHECK-NEXT:    ldr w13, [sp, #112]
> +; CHECK-NEXT:    mov.b v1[2], w10
> +; CHECK-NEXT:    mov.b v0[4], w4
> +; CHECK-NEXT:    ldr w9, [sp, #120]
> +; CHECK-NEXT:    mov.b v1[3], w11
> +; CHECK-NEXT:    ldr w11, [sp]
> +; CHECK-NEXT:    mov.b v0[5], w5
> +; CHECK-NEXT:    mov.b v1[4], w12
> +; CHECK-NEXT:    ldr w12, [sp, #8]
> +; CHECK-NEXT:    ldr w14, [sp, #128]
> +; CHECK-NEXT:    mov.b v0[6], w6
> +; CHECK-NEXT:    mov.b v1[5], w8
> +; CHECK-NEXT:    ldr w8, [sp, #16]
> +; CHECK-NEXT:    ldr w10, [sp, #136]
> +; CHECK-NEXT:    mov.b v0[7], w7
> +; CHECK-NEXT:    mov.b v1[6], w13
> +; CHECK-NEXT:    ldr w13, [sp, #24]
> +; CHECK-NEXT:    ldr w15, [sp, #144]
> +; CHECK-NEXT:    mov.b v0[8], w11
> +; CHECK-NEXT:    mov.b v1[7], w9
> +; CHECK-NEXT:    ldr w9, [sp, #32]
> +; CHECK-NEXT:    ldr w16, [sp, #152]
> +; CHECK-NEXT:    mov.b v0[9], w12
> +; CHECK-NEXT:    ldr w11, [sp, #160]
> +; CHECK-NEXT:    mov.b v0[10], w8
> +; CHECK-NEXT:    mov.b v1[8], w14
> +; CHECK-NEXT:    ldr w12, [sp, #168]
> +; CHECK-NEXT:    mov.b v0[11], w13
> +; CHECK-NEXT:    ldr w14, [sp, #40]
> +; CHECK-NEXT:    mov.b v1[9], w10
> +; CHECK-NEXT:    ldr w8, [sp, #176]
> +; CHECK-NEXT:    mov.b v0[12], w9
> +; CHECK-NEXT:    ldr w9, [sp, #48]
> +; CHECK-NEXT:    mov.b v1[10], w15
> +; CHECK-NEXT:    ldr w13, [sp, #184]
> +; CHECK-NEXT:    ldr w10, [sp, #56]
> +; CHECK-NEXT:    mov.b v1[11], w16
> +; CHECK-NEXT:    mov.b v1[12], w11
> +; CHECK-NEXT:    mov.b v0[13], w14
> +; CHECK-NEXT:    mov.b v1[13], w12
> +; CHECK-NEXT:    mov.b v0[14], w9
> +; CHECK-NEXT:    mov.b v1[14], w8
> +; CHECK-NEXT:    mov.b v0[15], w10
> +; CHECK-NEXT:    mov.b v1[15], w13
> +; CHECK-NEXT:    shl.16b v0, v0, #7
> ; CHECK-NEXT:    shl.16b v1, v1, #7
> ; CHECK-NEXT:    sshr.16b v0, v0, #7
> ; CHECK-NEXT:    sshr.16b v1, v1, #7
> ; CHECK-NEXT:    ret
> -;
>   %res = sext <32 x i1> %arg to <32 x i8>
>   ret <32 x i8> %res
> }
> 
> define <64 x i8> @zext_v64i1(<64 x i1> %arg) {
> ; CHECK-LABEL: zext_v64i1:
> -; CHECK:         and.16b v0, v0, [[V4:v.+]]
> -; CHECK-NEXT:    and.16b v1, v1, [[V4]]
> -; CHECK-NEXT:    and.16b v2, v2, [[V4]]
> -; CHECK-NEXT:    and.16b v3, v3, [[V4]]
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    ldr w11, [sp, #64]
> +; CHECK-NEXT:    ldr w8, [sp, #320]
> +; CHECK-NEXT:    ldr w10, [sp, #192]
> +; CHECK-NEXT:    ldr w9, [sp, #328]
> +; CHECK-NEXT:    ldr w12, [sp, #336]
> +; CHECK-NEXT:    fmov s1, w11
> +; CHECK-NEXT:    ldr w11, [sp, #200]
> +; CHECK-NEXT:    fmov s3, w8
> +; CHECK-NEXT:    ldr w8, [sp, #344]
> +; CHECK-NEXT:    fmov s0, w0
> +; CHECK-NEXT:    fmov s2, w10
> +; CHECK-NEXT:    ldr w10, [sp, #352]
> +; CHECK-NEXT:    mov.b v0[1], w1
> +; CHECK-NEXT:    mov.b v3[1], w9
> +; CHECK-NEXT:    ldr w9, [sp, #360]
> +; CHECK-NEXT:    mov.b v0[2], w2
> +; CHECK-NEXT:    ldr w13, [sp, #72]
> +; CHECK-NEXT:    mov.b v2[1], w11
> +; CHECK-NEXT:    ldr w11, [sp, #368]
> +; CHECK-NEXT:    mov.b v3[2], w12
> +; CHECK-NEXT:    mov.b v0[3], w3
> +; CHECK-NEXT:    ldr w12, [sp, #376]
> +; CHECK-NEXT:    mov.b v3[3], w8
> +; CHECK-NEXT:    mov.b v0[4], w4
> +; CHECK-NEXT:    mov.b v3[4], w10
> +; CHECK-NEXT:    ldr w10, [sp]
> +; CHECK-NEXT:    mov.b v0[5], w5
> +; CHECK-NEXT:    ldr w14, [sp, #80]
> +; CHECK-NEXT:    mov.b v3[5], w9
> +; CHECK-NEXT:    ldr w9, [sp, #8]
> +; CHECK-NEXT:    mov.b v1[1], w13
> +; CHECK-NEXT:    ldr w13, [sp, #208]
> +; CHECK-NEXT:    mov.b v0[6], w6
> +; CHECK-NEXT:    ldr w15, [sp, #88]
> +; CHECK-NEXT:    mov.b v3[6], w11
> +; CHECK-NEXT:    ldr w11, [sp, #16]
> +; CHECK-NEXT:    mov.b v0[7], w7
> +; CHECK-NEXT:    ldr w16, [sp, #96]
> +; CHECK-NEXT:    mov.b v3[7], w12
> +; CHECK-NEXT:    ldr w12, [sp, #24]
> +; CHECK-NEXT:    mov.b v0[8], w10
> +; CHECK-NEXT:    ldr w10, [sp, #104]
> +; CHECK-NEXT:    mov.b v1[2], w14
> +; CHECK-NEXT:    ldr w14, [sp, #216]
> +; CHECK-NEXT:    mov.b v0[9], w9
> +; CHECK-NEXT:    ldr w9, [sp, #112]
> +; CHECK-NEXT:    mov.b v2[2], w13
> +; CHECK-NEXT:    ldr w13, [sp, #384]
> +; CHECK-NEXT:    mov.b v1[3], w15
> +; CHECK-NEXT:    ldr w15, [sp, #224]
> +; CHECK-NEXT:    mov.b v0[10], w11
> +; CHECK-NEXT:    ldr w11, [sp, #120]
> +; CHECK-NEXT:    mov.b v1[4], w16
> +; CHECK-NEXT:    ldr w16, [sp, #232]
> +; CHECK-NEXT:    mov.b v0[11], w12
> +; CHECK-NEXT:    ldr w12, [sp, #128]
> +; CHECK-NEXT:    mov.b v1[5], w10
> +; CHECK-NEXT:    ldr w10, [sp, #240]
> +; CHECK-NEXT:    mov.b v2[3], w14
> +; CHECK-NEXT:    mov.b v1[6], w9
> +; CHECK-NEXT:    ldr w9, [sp, #248]
> +; CHECK-NEXT:    ldr w8, [sp, #392]
> +; CHECK-NEXT:    mov.b v2[4], w15
> +; CHECK-NEXT:    mov.b v1[7], w11
> +; CHECK-NEXT:    ldr w11, [sp, #256]
> +; CHECK-NEXT:    mov.b v3[8], w13
> +; CHECK-NEXT:    ldr w13, [sp, #32]
> +; CHECK-NEXT:    ldr w14, [sp, #400]
> +; CHECK-NEXT:    mov.b v2[5], w16
> +; CHECK-NEXT:    mov.b v1[8], w12
> +; CHECK-NEXT:    ldr w12, [sp, #264]
> +; CHECK-NEXT:    ldr w15, [sp, #408]
> +; CHECK-NEXT:    mov.b v2[6], w10
> +; CHECK-NEXT:    mov.b v2[7], w9
> +; CHECK-NEXT:    mov.b v2[8], w11
> +; CHECK-NEXT:    mov.b v0[12], w13
> +; CHECK-NEXT:    ldr w13, [sp, #136]
> +; CHECK-NEXT:    mov.b v3[9], w8
> +; CHECK-NEXT:    ldr w8, [sp, #40]
> +; CHECK-NEXT:    mov.b v2[9], w12
> +; CHECK-NEXT:    ldr w12, [sp, #272]
> +; CHECK-NEXT:    mov.b v3[10], w14
> +; CHECK-NEXT:    ldr w14, [sp, #48]
> +; CHECK-NEXT:    mov.b v3[11], w15
> +; CHECK-NEXT:    ldr w15, [sp, #56]
> +; CHECK-NEXT:    ldr w16, [sp, #416]
> +; CHECK-NEXT:    mov.b v1[9], w13
> +; CHECK-NEXT:    ldr w13, [sp, #280]
> +; CHECK-NEXT:    mov.b v0[13], w8
> +; CHECK-NEXT:    ldr w8, [sp, #144]
> +; CHECK-NEXT:    mov.b v2[10], w12
> +; CHECK-NEXT:    ldr w12, [sp, #288]
> +; CHECK-NEXT:    mov.b v0[14], w14
> +; CHECK-NEXT:    ldr w14, [sp, #152]
> +; CHECK-NEXT:    mov.b v0[15], w15
> +; CHECK-NEXT:    ldr w15, [sp, #160]
> +; CHECK-NEXT:    ldr w10, [sp, #424]
> +; CHECK-NEXT:    mov.b v1[10], w8
> +; CHECK-NEXT:    ldr w8, [sp, #296]
> +; CHECK-NEXT:    mov.b v2[11], w13
> +; CHECK-NEXT:    mov.b v3[12], w16
> +; CHECK-NEXT:    ldr w16, [sp, #168]
> +; CHECK-NEXT:    ldr w9, [sp, #432]
> +; CHECK-NEXT:    ldr w13, [sp, #304]
> +; CHECK-NEXT:    mov.b v1[11], w14
> +; CHECK-NEXT:    mov.b v2[12], w12
> +; CHECK-NEXT:    ldr w12, [sp, #176]
> +; CHECK-NEXT:    ldr w11, [sp, #440]
> +; CHECK-NEXT:    ldr w14, [sp, #312]
> +; CHECK-NEXT:    mov.b v1[12], w15
> +; CHECK-NEXT:    ldr w15, [sp, #184]
> +; CHECK-NEXT:    mov.b v3[13], w10
> +; CHECK-NEXT:    mov.b v2[13], w8
> +; CHECK-NEXT:    mov.b v1[13], w16
> +; CHECK-NEXT:    mov.b v3[14], w9
> +; CHECK-NEXT:    mov.b v2[14], w13
> +; CHECK-NEXT:    mov.b v1[14], w12
> +; CHECK-NEXT:    movi.16b v4, #1
> +; CHECK-NEXT:    mov.b v3[15], w11
> +; CHECK-NEXT:    mov.b v2[15], w14
> +; CHECK-NEXT:    mov.b v1[15], w15
> +; CHECK-NEXT:    and.16b v0, v0, v4
> +; CHECK-NEXT:    and.16b v1, v1, v4
> +; CHECK-NEXT:    and.16b v2, v2, v4
> +; CHECK-NEXT:    and.16b v3, v3, v4
> ; CHECK-NEXT:    ret
> -;
>   %res = zext <64 x i1> %arg to <64 x i8>
>   ret <64 x i8> %res
> }
> 
> define <64 x i8> @sext_v64i1(<64 x i1> %arg) {
> ; CHECK-LABEL: sext_v64i1:
> -; CHECK:         shl.16b v0, v0, #7
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    ldr w11, [sp, #64]
> +; CHECK-NEXT:    ldr w8, [sp, #320]
> +; CHECK-NEXT:    ldr w10, [sp, #192]
> +; CHECK-NEXT:    ldr w9, [sp, #328]
> +; CHECK-NEXT:    ldr w12, [sp, #336]
> +; CHECK-NEXT:    fmov s3, w11
> +; CHECK-NEXT:    ldr w11, [sp, #200]
> +; CHECK-NEXT:    fmov s1, w8
> +; CHECK-NEXT:    ldr w8, [sp, #344]
> +; CHECK-NEXT:    fmov s0, w0
> +; CHECK-NEXT:    fmov s2, w10
> +; CHECK-NEXT:    ldr w10, [sp, #352]
> +; CHECK-NEXT:    mov.b v0[1], w1
> +; CHECK-NEXT:    mov.b v1[1], w9
> +; CHECK-NEXT:    ldr w9, [sp, #360]
> +; CHECK-NEXT:    mov.b v0[2], w2
> +; CHECK-NEXT:    ldr w13, [sp, #72]
> +; CHECK-NEXT:    mov.b v2[1], w11
> +; CHECK-NEXT:    ldr w11, [sp, #368]
> +; CHECK-NEXT:    mov.b v1[2], w12
> +; CHECK-NEXT:    mov.b v0[3], w3
> +; CHECK-NEXT:    ldr w12, [sp, #376]
> +; CHECK-NEXT:    mov.b v1[3], w8
> +; CHECK-NEXT:    mov.b v0[4], w4
> +; CHECK-NEXT:    mov.b v1[4], w10
> +; CHECK-NEXT:    ldr w10, [sp]
> +; CHECK-NEXT:    mov.b v0[5], w5
> +; CHECK-NEXT:    ldr w14, [sp, #80]
> +; CHECK-NEXT:    mov.b v1[5], w9
> +; CHECK-NEXT:    ldr w9, [sp, #8]
> +; CHECK-NEXT:    mov.b v3[1], w13
> +; CHECK-NEXT:    ldr w13, [sp, #208]
> +; CHECK-NEXT:    mov.b v0[6], w6
> +; CHECK-NEXT:    ldr w15, [sp, #88]
> +; CHECK-NEXT:    mov.b v1[6], w11
> +; CHECK-NEXT:    ldr w11, [sp, #16]
> +; CHECK-NEXT:    mov.b v0[7], w7
> +; CHECK-NEXT:    ldr w16, [sp, #96]
> +; CHECK-NEXT:    mov.b v1[7], w12
> +; CHECK-NEXT:    ldr w12, [sp, #24]
> +; CHECK-NEXT:    mov.b v0[8], w10
> +; CHECK-NEXT:    ldr w10, [sp, #104]
> +; CHECK-NEXT:    mov.b v3[2], w14
> +; CHECK-NEXT:    ldr w14, [sp, #216]
> +; CHECK-NEXT:    mov.b v0[9], w9
> +; CHECK-NEXT:    ldr w9, [sp, #112]
> +; CHECK-NEXT:    mov.b v2[2], w13
> +; CHECK-NEXT:    ldr w13, [sp, #384]
> +; CHECK-NEXT:    mov.b v3[3], w15
> +; CHECK-NEXT:    ldr w15, [sp, #224]
> +; CHECK-NEXT:    mov.b v0[10], w11
> +; CHECK-NEXT:    ldr w11, [sp, #120]
> +; CHECK-NEXT:    mov.b v3[4], w16
> +; CHECK-NEXT:    ldr w16, [sp, #232]
> +; CHECK-NEXT:    mov.b v0[11], w12
> +; CHECK-NEXT:    ldr w12, [sp, #128]
> +; CHECK-NEXT:    mov.b v3[5], w10
> +; CHECK-NEXT:    ldr w10, [sp, #240]
> +; CHECK-NEXT:    mov.b v2[3], w14
> +; CHECK-NEXT:    mov.b v3[6], w9
> +; CHECK-NEXT:    ldr w9, [sp, #248]
> +; CHECK-NEXT:    ldr w8, [sp, #392]
> +; CHECK-NEXT:    mov.b v2[4], w15
> +; CHECK-NEXT:    mov.b v3[7], w11
> +; CHECK-NEXT:    ldr w11, [sp, #256]
> +; CHECK-NEXT:    mov.b v1[8], w13
> +; CHECK-NEXT:    ldr w13, [sp, #32]
> +; CHECK-NEXT:    ldr w14, [sp, #400]
> +; CHECK-NEXT:    mov.b v2[5], w16
> +; CHECK-NEXT:    mov.b v3[8], w12
> +; CHECK-NEXT:    ldr w12, [sp, #264]
> +; CHECK-NEXT:    ldr w15, [sp, #408]
> +; CHECK-NEXT:    mov.b v2[6], w10
> +; CHECK-NEXT:    mov.b v2[7], w9
> +; CHECK-NEXT:    mov.b v2[8], w11
> +; CHECK-NEXT:    mov.b v0[12], w13
> +; CHECK-NEXT:    ldr w13, [sp, #136]
> +; CHECK-NEXT:    mov.b v1[9], w8
> +; CHECK-NEXT:    ldr w8, [sp, #40]
> +; CHECK-NEXT:    mov.b v2[9], w12
> +; CHECK-NEXT:    ldr w12, [sp, #272]
> +; CHECK-NEXT:    mov.b v1[10], w14
> +; CHECK-NEXT:    ldr w14, [sp, #48]
> +; CHECK-NEXT:    mov.b v1[11], w15
> +; CHECK-NEXT:    ldr w15, [sp, #56]
> +; CHECK-NEXT:    ldr w16, [sp, #416]
> +; CHECK-NEXT:    mov.b v3[9], w13
> +; CHECK-NEXT:    ldr w13, [sp, #280]
> +; CHECK-NEXT:    mov.b v0[13], w8
> +; CHECK-NEXT:    ldr w8, [sp, #144]
> +; CHECK-NEXT:    mov.b v2[10], w12
> +; CHECK-NEXT:    ldr w12, [sp, #288]
> +; CHECK-NEXT:    mov.b v0[14], w14
> +; CHECK-NEXT:    ldr w14, [sp, #152]
> +; CHECK-NEXT:    mov.b v0[15], w15
> +; CHECK-NEXT:    ldr w15, [sp, #160]
> +; CHECK-NEXT:    ldr w10, [sp, #424]
> +; CHECK-NEXT:    mov.b v3[10], w8
> +; CHECK-NEXT:    ldr w8, [sp, #296]
> +; CHECK-NEXT:    mov.b v2[11], w13
> +; CHECK-NEXT:    mov.b v1[12], w16
> +; CHECK-NEXT:    ldr w16, [sp, #168]
> +; CHECK-NEXT:    ldr w9, [sp, #432]
> +; CHECK-NEXT:    ldr w13, [sp, #304]
> +; CHECK-NEXT:    mov.b v3[11], w14
> +; CHECK-NEXT:    mov.b v2[12], w12
> +; CHECK-NEXT:    ldr w12, [sp, #176]
> +; CHECK-NEXT:    ldr w11, [sp, #440]
> +; CHECK-NEXT:    ldr w14, [sp, #312]
> +; CHECK-NEXT:    mov.b v3[12], w15
> +; CHECK-NEXT:    ldr w15, [sp, #184]
> +; CHECK-NEXT:    mov.b v1[13], w10
> +; CHECK-NEXT:    mov.b v2[13], w8
> +; CHECK-NEXT:    mov.b v3[13], w16
> +; CHECK-NEXT:    mov.b v1[14], w9
> +; CHECK-NEXT:    mov.b v2[14], w13
> +; CHECK-NEXT:    mov.b v3[14], w12
> +; CHECK-NEXT:    mov.b v1[15], w11
> +; CHECK-NEXT:    mov.b v2[15], w14
> +; CHECK-NEXT:    mov.b v3[15], w15
> +; CHECK-NEXT:    shl.16b v0, v0, #7
> ; CHECK-NEXT:    shl.16b v3, v3, #7
> ; CHECK-NEXT:    shl.16b v2, v2, #7
> -; CHECK-NEXT:    shl.16b [[V4:v.+]], v1, #7
> +; CHECK-NEXT:    shl.16b v4, v1, #7
> ; CHECK-NEXT:    sshr.16b v0, v0, #7
> ; CHECK-NEXT:    sshr.16b v1, v3, #7
> ; CHECK-NEXT:    sshr.16b v2, v2, #7
> -; CHECK-NEXT:    sshr.16b v3, [[V4]], #7
> +; CHECK-NEXT:    sshr.16b v3, v4, #7
> ; CHECK-NEXT:    ret
> -;
>   %res = sext <64 x i1> %arg to <64 x i8>
>   ret <64 x i8> %res
> }
> 
> define <1 x i128> @sext_v1x64(<1 x i64> %arg) {
> -; CHECK-LABEL: sext_v1x64:
> -; CHECK-NEXT:   .cfi_startproc
> -; CHECK-NEXT:    fmov    x8, d0
> -; CHECK-NEXT:    asr x1, x8, #63
>   ; X0 & X1 are the real return registers, SDAG messes with v0 too for unknown reasons.
> -; CHECK:    {{(mov.d   v0[1], x1)?}}
> -; CHECK:    fmov    x0, d0
> -; CHECK:    ret
> +; CHECKDAG-LABEL: sext_v1x64:
> +; CHECKDAG:       // %bb.0:
> +; CHECKDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
> +; CHECKDAG-NEXT:    fmov x8, d0
> +; CHECKDAG-NEXT:    asr x1, x8, #63
> +; CHECKDAG-NEXT:    mov.d v0[1], x1
> +; CHECKDAG-NEXT:    fmov x0, d0
> +; CHECKDAG-NEXT:    ret
> ;
> +; FALLBACK-LABEL: sext_v1x64:
> +; FALLBACK:       // %bb.0:
> +; FALLBACK-NEXT:    fmov x8, d0
> +; FALLBACK-NEXT:    asr x1, x8, #63
> +; FALLBACK-NEXT:    fmov x0, d0
> +; FALLBACK-NEXT:    ret
>   %res = sext <1 x i64> %arg to <1 x i128>
>   ret <1 x i128> %res
> }
> 
> diff  --git a/llvm/test/CodeGen/AArch64/arm64-trunc-store.ll b/llvm/test/CodeGen/AArch64/arm64-trunc-store.ll
> index be0388284fb8..a3c6e99525f8 100644
> --- a/llvm/test/CodeGen/AArch64/arm64-trunc-store.ll
> +++ b/llvm/test/CodeGen/AArch64/arm64-trunc-store.ll
> @@ -1,10 +1,12 @@
> -; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-post-ra | FileCheck %s
> +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
> +; RUN: llc < %s -mtriple=arm64-none-eabi | FileCheck %s
> 
> define void @bar(<8 x i16> %arg, <8 x i8>* %p) nounwind {
> ; CHECK-LABEL: bar:
> -; CHECK: xtn.8b v[[REG:[0-9]+]], v0
> -; CHECK-NEXT: str d[[REG]], [x0]
> -; CHECK-NEXT: ret
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    xtn v0.8b, v0.8h
> +; CHECK-NEXT:    str d0, [x0]
> +; CHECK-NEXT:    ret
>   %tmp = trunc <8 x i16> %arg to <8 x i8>
>   store <8 x i8> %tmp, <8 x i8>* %p, align 8
>   ret void
> @@ -15,15 +17,14 @@ define void @bar(<8 x i16> %arg, <8 x i8>* %p) nounwind {
> @zptr32 = common global i32* null, align 8
> 
> define void @fct32(i32 %arg, i64 %var) {
> -; CHECK: fct32
> -; CHECK: adrp [[GLOBALPAGE:x[0-9]+]], _zptr32 at GOTPAGE
> -; CHECK: ldr [[GLOBALOFF:x[0-9]+]], {{\[}}[[GLOBALPAGE]], _zptr32 at GOTPAGEOFF]
> -; CHECK: ldr [[GLOBALADDR:x[0-9]+]], {{\[}}[[GLOBALOFF]]]
> -; w0 is %arg
> -; CHECK-NEXT: sub w[[OFFSETREGNUM:[0-9]+]], w0, #1
> -; w1 is %var truncated
> -; CHECK-NEXT: str w1, {{\[}}[[GLOBALADDR]], w[[OFFSETREGNUM]], sxtw #2]
> -; CHECK-NEXT: ret
> +; CHECK-LABEL: fct32:
> +; CHECK:       // %bb.0: // %bb
> +; CHECK-NEXT:    adrp x8, :got:zptr32
> +; CHECK-NEXT:    ldr x8, [x8, :got_lo12:zptr32]
> +; CHECK-NEXT:    sub w9, w0, #1
> +; CHECK-NEXT:    ldr x8, [x8]
> +; CHECK-NEXT:    str w1, [x8, w9, sxtw #2]
> +; CHECK-NEXT:    ret
> bb:
>   %.pre37 = load i32*, i32** @zptr32, align 8
>   %dec = add nsw i32 %arg, -1
> @@ -35,15 +36,14 @@ bb:
> }
> 
> define void @fct16(i32 %arg, i64 %var) {
> -; CHECK: fct16
> -; CHECK: adrp [[GLOBALPAGE:x[0-9]+]], _zptr16 at GOTPAGE
> -; CHECK: ldr [[GLOBALOFF:x[0-9]+]], {{\[}}[[GLOBALPAGE]], _zptr16 at GOTPAGEOFF]
> -; CHECK: ldr [[GLOBALADDR:x[0-9]+]], {{\[}}[[GLOBALOFF]]]
> -; w0 is %arg
> -; CHECK-NEXT: sub w[[OFFSETREGNUM:[0-9]+]], w0, #1
> -; w1 is %var truncated
> -; CHECK-NEXT: strh w1, {{\[}}[[GLOBALADDR]], w[[OFFSETREGNUM]], sxtw #1]
> -; CHECK-NEXT: ret
> +; CHECK-LABEL: fct16:
> +; CHECK:       // %bb.0: // %bb
> +; CHECK-NEXT:    adrp x8, :got:zptr16
> +; CHECK-NEXT:    ldr x8, [x8, :got_lo12:zptr16]
> +; CHECK-NEXT:    sub w9, w0, #1
> +; CHECK-NEXT:    ldr x8, [x8]
> +; CHECK-NEXT:    strh w1, [x8, w9, sxtw #1]
> +; CHECK-NEXT:    ret
> bb:
>   %.pre37 = load i16*, i16** @zptr16, align 8
>   %dec = add nsw i32 %arg, -1
> @@ -55,15 +55,14 @@ bb:
> }
> 
> define void @fct8(i32 %arg, i64 %var) {
> -; CHECK: fct8
> -; CHECK: adrp [[GLOBALPAGE:x[0-9]+]], _zptr8 at GOTPAGE
> -; CHECK: ldr [[GLOBALOFF:x[0-9]+]], {{\[}}[[GLOBALPAGE]], _zptr8 at GOTPAGEOFF]
> -; CHECK: ldr [[BASEADDR:x[0-9]+]], {{\[}}[[GLOBALOFF]]]
> -; w0 is %arg
> -; CHECK-NEXT: add [[ADDR:x[0-9]+]], [[BASEADDR]], w0, sxtw
> -; w1 is %var truncated
> -; CHECK-NEXT: sturb w1, {{\[}}[[ADDR]], #-1]
> -; CHECK-NEXT: ret
> +; CHECK-LABEL: fct8:
> +; CHECK:       // %bb.0: // %bb
> +; CHECK-NEXT:    adrp x8, :got:zptr8
> +; CHECK-NEXT:    ldr x8, [x8, :got_lo12:zptr8]
> +; CHECK-NEXT:    ldr x8, [x8]
> +; CHECK-NEXT:    add x8, x8, w0, sxtw
> +; CHECK-NEXT:    sturb w1, [x8, #-1]
> +; CHECK-NEXT:    ret
> bb:
>   %.pre37 = load i8*, i8** @zptr8, align 8
>   %dec = add nsw i32 %arg, -1
> 
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits



More information about the llvm-commits mailing list