[llvm] r206887 - AArch64/ARM64: add extra testing from AArch64 to ARM64

Tue Apr 22 05:45:33 PDT 2014

Author: tnorthover
Date: Tue Apr 22 07:45:32 2014
New Revision: 206887

URL: http://llvm.org/viewvc/llvm-project?rev=206887&view=rev
Log:
AArch64/ARM64: add extra testing from AArch64 to ARM64

Added:
    llvm/trunk/test/CodeGen/ARM64/aarch64-neon-simd-shift.ll
    llvm/trunk/test/CodeGen/ARM64/aarch64-neon-simd-vget.ll
    llvm/trunk/test/CodeGen/ARM64/aarch64-neon-vector-list-spill.ll
      - copied, changed from r206877, llvm/trunk/test/CodeGen/AArch64/neon-vector-list-spill.ll
Modified:
    llvm/trunk/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll
    llvm/trunk/test/CodeGen/AArch64/neon-scalar-fabd.ll
    llvm/trunk/test/CodeGen/AArch64/neon-scalar-fcvt.ll
    llvm/trunk/test/CodeGen/AArch64/neon-scalar-fp-compare.ll
    llvm/trunk/test/CodeGen/AArch64/neon-scalar-mul.ll
    llvm/trunk/test/CodeGen/AArch64/neon-scalar-neg.ll
    llvm/trunk/test/CodeGen/AArch64/neon-scalar-recip.ll
    llvm/trunk/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll
    llvm/trunk/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll
    llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll
    llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll
    llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll
    llvm/trunk/test/CodeGen/AArch64/neon-scalar-shift-imm.ll
    llvm/trunk/test/CodeGen/AArch64/neon-scalar-shift.ll
    llvm/trunk/test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll
    llvm/trunk/test/CodeGen/AArch64/neon-vector-list-spill.ll

Modified: llvm/trunk/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll?rev=206887&r1=206886&r2=206887&view=diff
==============================================================================

--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll Tue Apr 22 07:45:32 2014
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+; intrinsic wrangling that arm64 does differently.
 
 define i8 @test_vqmovunh_s16(i16 %a) {
 ; CHECK: test_vqmovunh_s16

Modified: llvm/trunk/test/CodeGen/AArch64/neon-scalar-fabd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-fabd.ll?rev=206887&r1=206886&r2=206887&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-fabd.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-fabd.ll Tue Apr 22 07:45:32 2014
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+; arm64 has these two tests in vabs.ll
 
 define float @test_vabds_f32(float %a, float %b) {
 ; CHECK-LABEL: test_vabds_f32

Modified: llvm/trunk/test/CodeGen/AArch64/neon-scalar-fcvt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-fcvt.ll?rev=206887&r1=206886&r2=206887&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-fcvt.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-fcvt.ll Tue Apr 22 07:45:32 2014
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+; arm64 duplicates these tests in cvt.ll
 
 ;; Scalar Floating-point Convert
 

Modified: llvm/trunk/test/CodeGen/AArch64/neon-scalar-fp-compare.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-fp-compare.ll?rev=206887&r1=206886&r2=206887&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-fp-compare.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-fp-compare.ll Tue Apr 22 07:45:32 2014
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+; arm64 does not use intrinsics for comparisons.
 
 ;; Scalar Floating-point Compare
 

Modified: llvm/trunk/test/CodeGen/AArch64/neon-scalar-mul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-mul.ll?rev=206887&r1=206886&r2=206887&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-mul.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-mul.ll Tue Apr 22 07:45:32 2014
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+; Just intrinsic wrangling, and arm64 does scalar differently anyway.
 
 define i16 @test_vqdmulhh_s16(i16 %a, i16 %b) {
 ; CHECK: test_vqdmulhh_s16

Modified: llvm/trunk/test/CodeGen/AArch64/neon-scalar-neg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-neg.ll?rev=206887&r1=206886&r2=206887&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-neg.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-neg.ll Tue Apr 22 07:45:32 2014
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+; Intrinsic wrangling. arm64 does it differently.
 
 define i64 @test_vnegd_s64(i64 %a) {
 ; CHECK: test_vnegd_s64

Modified: llvm/trunk/test/CodeGen/AArch64/neon-scalar-recip.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-recip.ll?rev=206887&r1=206886&r2=206887&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-recip.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-recip.ll Tue Apr 22 07:45:32 2014
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+; duplicates arm64 tests in vsqrt.ll
 
 define float @test_vrecpss_f32(float %a, float %b) {
 ; CHECK: test_vrecpss_f32

Modified: llvm/trunk/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll?rev=206887&r1=206886&r2=206887&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll Tue Apr 22 07:45:32 2014
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+; Intrinsic wrangling. Duplicates various arm64 tests.
 
 declare <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64>)
 

Modified: llvm/trunk/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll?rev=206887&r1=206886&r2=206887&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll Tue Apr 22 07:45:32 2014
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
+; Duplicates arm64'd vshift.ll
 
 declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>)
 declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>)

Modified: llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll?rev=206887&r1=206886&r2=206887&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll Tue Apr 22 07:45:32 2014
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+; Intrinsic wrangling and arm64 does it differently.
 
 declare <1 x i8> @llvm.arm.neon.vqaddu.v1i8(<1 x i8>, <1 x i8>)
 declare <1 x i8> @llvm.arm.neon.vqadds.v1i8(<1 x i8>, <1 x i8>)

Modified: llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll?rev=206887&r1=206886&r2=206887&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll Tue Apr 22 07:45:32 2014
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+; Intrinsic wrangling and arm64 does it differently.
 
 declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>)
 declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>)

Modified: llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll?rev=206887&r1=206886&r2=206887&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll Tue Apr 22 07:45:32 2014
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+; Intrinsic wrangling and arm64 does it differently.
 
 declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>)
 declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>)

Modified: llvm/trunk/test/CodeGen/AArch64/neon-scalar-shift-imm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-shift-imm.ll?rev=206887&r1=206886&r2=206887&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-shift-imm.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-shift-imm.ll Tue Apr 22 07:45:32 2014
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+; Intrinsic wrangling & arm64 does it differently.
 
 define i64 @test_vshrd_n_s64(i64 %a) {
 ; CHECK: test_vshrd_n_s64

Modified: llvm/trunk/test/CodeGen/AArch64/neon-scalar-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-shift.ll?rev=206887&r1=206886&r2=206887&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-shift.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-shift.ll Tue Apr 22 07:45:32 2014
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+; Duplicates existing arm64 tests in vshift.ll and vcmp.ll
 
 declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>)
 declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>)

Modified: llvm/trunk/test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll?rev=206887&r1=206886&r2=206887&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll Tue Apr 22 07:45:32 2014
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+; not relevant for arm64: <1 x iN> isn't legal
 
 ; This file tests the spill of FPR8/FPR16. The volatile loads/stores force the
 ; allocator to keep the value live until it's needed.

Modified: llvm/trunk/test/CodeGen/AArch64/neon-vector-list-spill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-vector-list-spill.ll?rev=206887&r1=206886&r2=206887&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-vector-list-spill.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-vector-list-spill.ll Tue Apr 22 07:45:32 2014
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast
+; arm64 has separate copy as aarch64-neon-vector-list-spill.ll
 
 ; FIXME: We should not generate ld/st for such register spill/fill, because the
 ; test case seems very simple and the register pressure is not high. If the

Added: llvm/trunk/test/CodeGen/ARM64/aarch64-neon-simd-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/aarch64-neon-simd-shift.ll?rev=206887&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/aarch64-neon-simd-shift.ll (added)
+++ llvm/trunk/test/CodeGen/ARM64/aarch64-neon-simd-shift.ll Tue Apr 22 07:45:32 2014
@@ -0,0 +1,663 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) {
+; CHECK: test_vshr_n_s8
+; CHECK: sshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vshr_n = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <8 x i8> %vshr_n
+}
+
+define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) {
+; CHECK: test_vshr_n_s16
+; CHECK: sshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vshr_n = ashr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
+  ret <4 x i16> %vshr_n
+}
+
+define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) {
+; CHECK: test_vshr_n_s32
+; CHECK: sshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vshr_n = ashr <2 x i32> %a, <i32 3, i32 3>
+  ret <2 x i32> %vshr_n
+}
+
+define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) {
+; CHECK: test_vshrq_n_s8
+; CHECK: sshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vshr_n = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %vshr_n
+}
+
+define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) {
+; CHECK: test_vshrq_n_s16
+; CHECK: sshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vshr_n = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %vshr_n
+}
+
+define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) {
+; CHECK: test_vshrq_n_s32
+; CHECK: sshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vshr_n = ashr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %vshr_n
+}
+
+define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) {
+; CHECK: test_vshrq_n_s64
+; CHECK: sshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vshr_n = ashr <2 x i64> %a, <i64 3, i64 3>
+  ret <2 x i64> %vshr_n
+}
+
+define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) {
+; CHECK: test_vshr_n_u8
+; CHECK: ushr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vshr_n = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <8 x i8> %vshr_n
+}
+
+define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) {
+; CHECK: test_vshr_n_u16
+; CHECK: ushr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vshr_n = lshr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
+  ret <4 x i16> %vshr_n
+}
+
+define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) {
+; CHECK: test_vshr_n_u32
+; CHECK: ushr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vshr_n = lshr <2 x i32> %a, <i32 3, i32 3>
+  ret <2 x i32> %vshr_n
+}
+
+define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) {
+; CHECK: test_vshrq_n_u8
+; CHECK: ushr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vshr_n = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %vshr_n
+}
+
+define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) {
+; CHECK: test_vshrq_n_u16
+; CHECK: ushr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vshr_n = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %vshr_n
+}
+
+define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) {
+; CHECK: test_vshrq_n_u32
+; CHECK: ushr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vshr_n = lshr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %vshr_n
+}
+
+define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) {
+; CHECK: test_vshrq_n_u64
+; CHECK: ushr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vshr_n = lshr <2 x i64> %a, <i64 3, i64 3>
+  ret <2 x i64> %vshr_n
+}
+
+define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsra_n_s8
+; CHECK: ssra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vsra_n = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  %1 = add <8 x i8> %vsra_n, %a
+  ret <8 x i8> %1
+}
+
+define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsra_n_s16
+; CHECK: ssra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vsra_n = ashr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
+  %1 = add <4 x i16> %vsra_n, %a
+  ret <4 x i16> %1
+}
+
+define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vsra_n_s32
+; CHECK: ssra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vsra_n = ashr <2 x i32> %b, <i32 3, i32 3>
+  %1 = add <2 x i32> %vsra_n, %a
+  ret <2 x i32> %1
+}
+
+define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsraq_n_s8
+; CHECK: ssra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vsra_n = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  %1 = add <16 x i8> %vsra_n, %a
+  ret <16 x i8> %1
+}
+
+define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsraq_n_s16
+; CHECK: ssra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vsra_n = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %1 = add <8 x i16> %vsra_n, %a
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsraq_n_s32
+; CHECK: ssra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vsra_n = ashr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
+  %1 = add <4 x i32> %vsra_n, %a
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsraq_n_s64
+; CHECK: ssra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vsra_n = ashr <2 x i64> %b, <i64 3, i64 3>
+  %1 = add <2 x i64> %vsra_n, %a
+  ret <2 x i64> %1
+}
+
+define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsra_n_u8
+; CHECK: usra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vsra_n = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  %1 = add <8 x i8> %vsra_n, %a
+  ret <8 x i8> %1
+}
+
+define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsra_n_u16
+; CHECK: usra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vsra_n = lshr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
+  %1 = add <4 x i16> %vsra_n, %a
+  ret <4 x i16> %1
+}
+
+define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vsra_n_u32
+; CHECK: usra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vsra_n = lshr <2 x i32> %b, <i32 3, i32 3>
+  %1 = add <2 x i32> %vsra_n, %a
+  ret <2 x i32> %1
+}
+
+define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsraq_n_u8
+; CHECK: usra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vsra_n = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  %1 = add <16 x i8> %vsra_n, %a
+  ret <16 x i8> %1
+}
+
+define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsraq_n_u16
+; CHECK: usra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vsra_n = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %1 = add <8 x i16> %vsra_n, %a
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsraq_n_u32
+; CHECK: usra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vsra_n = lshr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
+  %1 = add <4 x i32> %vsra_n, %a
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsraq_n_u64
+; CHECK: usra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vsra_n = lshr <2 x i64> %b, <i64 3, i64 3>
+  %1 = add <2 x i64> %vsra_n, %a
+  ret <2 x i64> %1
+}
+
+define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) {
+; CHECK: test_vshrn_n_s16
+; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %1 = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
+  ret <8 x i8> %vshrn_n
+}
+
+define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) {
+; CHECK: test_vshrn_n_s32
+; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %1 = ashr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
+  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
+  ret <4 x i16> %vshrn_n
+}
+
+define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) {
+; CHECK: test_vshrn_n_s64
+; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %1 = ashr <2 x i64> %a, <i64 19, i64 19>
+  %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
+  ret <2 x i32> %vshrn_n
+}
+
+define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) {
+; CHECK: test_vshrn_n_u16
+; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %1 = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
+  ret <8 x i8> %vshrn_n
+}
+
+define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) {
+; CHECK: test_vshrn_n_u32
+; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %1 = lshr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
+  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
+  ret <4 x i16> %vshrn_n
+}
+
+define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) {
+; CHECK: test_vshrn_n_u64
+; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %1 = lshr <2 x i64> %a, <i64 19, i64 19>
+  %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
+  ret <2 x i32> %vshrn_n
+}
+
+define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vshrn_high_n_s16
+; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %1 = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
+  %2 = bitcast <8 x i8> %a to <1 x i64>
+  %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %4
+}
+
+define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vshrn_high_n_s32
+; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %1 = ashr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
+  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
+  %2 = bitcast <4 x i16> %a to <1 x i64>
+  %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %4
+}
+
+define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vshrn_high_n_s64
+; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %2 = ashr <2 x i64> %b, <i64 19, i64 19>
+  %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
+  %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %4
+}
+
+define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vshrn_high_n_u16
+; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %1 = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
+  %2 = bitcast <8 x i8> %a to <1 x i64>
+  %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %4
+}
+
+define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vshrn_high_n_u32
+; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %1 = lshr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
+  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
+  %2 = bitcast <4 x i16> %a to <1 x i64>
+  %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %4
+}
+
+define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vshrn_high_n_u64
+; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %2 = lshr <2 x i64> %b, <i64 19, i64 19>
+  %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
+  %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %4
+}
+
+define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqshrun_high_n_s16
+; CHECK: sqshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqshrun = tail call <8 x i8> @llvm.arm64.neon.sqshrun.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqshrun_high_n_s32
+; CHECK: sqshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqshrun = tail call <4 x i16> @llvm.arm64.neon.sqshrun.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqshrun_high_n_s64
+; CHECK: sqshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqshrun = tail call <2 x i32> @llvm.arm64.neon.sqshrun.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vrshrn_high_n_s16
+; CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vrshrn = tail call <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vrshrn_high_n_s32
+; CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vrshrn = tail call <4 x i16> @llvm.arm64.neon.rshrn.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vrshrn_high_n_s64
+; CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vrshrn = tail call <2 x i32> @llvm.arm64.neon.rshrn.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqrshrun_high_n_s16
+; CHECK: sqrshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqrshrun = tail call <8 x i8> @llvm.arm64.neon.sqrshrun.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqrshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqrshrun_high_n_s32
+; CHECK: sqrshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqrshrun = tail call <4 x i16> @llvm.arm64.neon.sqrshrun.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqrshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqrshrun_high_n_s64
+; CHECK: sqrshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqrshrun = tail call <2 x i32> @llvm.arm64.neon.sqrshrun.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqrshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqshrn_high_n_s16
+; CHECK: sqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqshrn = tail call <8 x i8> @llvm.arm64.neon.sqshrn.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqshrn_high_n_s32
+; CHECK: sqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqshrn = tail call <4 x i16> @llvm.arm64.neon.sqshrn.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqshrn_high_n_s64
+; CHECK: sqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqshrn = tail call <2 x i32> @llvm.arm64.neon.sqshrn.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqshrn_high_n_u16
+; CHECK: uqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqshrn = tail call <8 x i8> @llvm.arm64.neon.uqshrn.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqshrn_high_n_u32
+; CHECK: uqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqshrn = tail call <4 x i16> @llvm.arm64.neon.uqshrn.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqshrn_high_n_u64
+; CHECK: uqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqshrn = tail call <2 x i32> @llvm.arm64.neon.uqshrn.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqrshrn_high_n_s16
+; CHECK: sqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqrshrn = tail call <8 x i8> @llvm.arm64.neon.sqrshrn.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqrshrn_high_n_s32
+; CHECK: sqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqrshrn = tail call <4 x i16> @llvm.arm64.neon.sqrshrn.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqrshrn_high_n_s64
+; CHECK: sqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqrshrn = tail call <2 x i32> @llvm.arm64.neon.sqrshrn.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqrshrn_high_n_u16
+; CHECK: uqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqrshrn = tail call <8 x i8> @llvm.arm64.neon.uqrshrn.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqrshrn_high_n_u32
+; CHECK: uqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqrshrn = tail call <4 x i16> @llvm.arm64.neon.uqrshrn.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqrshrn_high_n_u64
+; CHECK: uqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqrshrn = tail call <2 x i32> @llvm.arm64.neon.uqrshrn.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+
+
+declare <8 x i8> @llvm.arm64.neon.sqshrun.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.arm64.neon.sqshrun.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.arm64.neon.sqshrun.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.arm64.neon.rshrn.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.arm64.neon.rshrn.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.arm64.neon.sqrshrun.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.arm64.neon.sqrshrun.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.arm64.neon.sqrshrun.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.arm64.neon.sqshrn.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.arm64.neon.sqshrn.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.arm64.neon.sqshrn.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.arm64.neon.uqshrn.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.arm64.neon.uqshrn.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.arm64.neon.uqshrn.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.arm64.neon.sqrshrn.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.arm64.neon.sqrshrn.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.arm64.neon.sqrshrn.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.arm64.neon.uqrshrn.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.arm64.neon.uqrshrn.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.arm64.neon.uqrshrn.v2i32(<2 x i64>, i32)
+
+declare <2 x float> @llvm.arm64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32)
+
+declare <4 x float> @llvm.arm64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32)
+
+declare <2 x double> @llvm.arm64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32)
+
+declare <2 x float> @llvm.arm64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32)
+
+declare <4 x float> @llvm.arm64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32)
+
+declare <2 x double> @llvm.arm64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32)
+
+declare <2 x i32> @llvm.arm64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32)
+
+declare <4 x i32> @llvm.arm64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32)
+
+declare <2 x i64> @llvm.arm64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32)
+
+declare <2 x i32> @llvm.arm64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32)
+
+declare <4 x i32> @llvm.arm64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32)
+
+declare <2 x i64> @llvm.arm64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32)
+
+define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvt_n_s64_f64
+; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}, #64
+  %1 = tail call <1 x i64> @llvm.arm64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> %a, i32 64)
+  ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvt_n_u64_f64
+; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}, #64
+  %1 = tail call <1 x i64> @llvm.arm64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> %a, i32 64)
+  ret <1 x i64> %1
+}
+
+define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) {
+; CHECK-LABEL: test_vcvt_n_f64_s64
+; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
+  %1 = tail call <1 x double> @llvm.arm64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) {
+; CHECK-LABEL: test_vcvt_n_f64_u64
+; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
+  %1 = tail call <1 x double> @llvm.arm64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
+  ret <1 x double> %1
+}
+
+declare <1 x i64> @llvm.arm64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double>, i32)
+declare <1 x i64> @llvm.arm64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double>, i32)
+declare <1 x double> @llvm.arm64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64>, i32)
+declare <1 x double> @llvm.arm64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64>, i32)

Added: llvm/trunk/test/CodeGen/ARM64/aarch64-neon-simd-vget.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/aarch64-neon-simd-vget.ll?rev=206887&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/aarch64-neon-simd-vget.ll (added)
+++ llvm/trunk/test/CodeGen/ARM64/aarch64-neon-simd-vget.ll Tue Apr 22 07:45:32 2014
@@ -0,0 +1,225 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define <8 x i8> @test_vget_high_s8(<16 x i8> %a) {
+; CHECK-LABEL: test_vget_high_s8:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vget_high_s16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_high_s16:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vget_high_s32(<4 x i32> %a) {
+; CHECK-LABEL: test_vget_high_s32:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <1 x i64> @test_vget_high_s64(<2 x i64> %a) {
+; CHECK-LABEL: test_vget_high_s64:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
+  ret <1 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vget_high_u8(<16 x i8> %a) {
+; CHECK-LABEL: test_vget_high_u8:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vget_high_u16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_high_u16:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vget_high_u32(<4 x i32> %a) {
+; CHECK-LABEL: test_vget_high_u32:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <1 x i64> @test_vget_high_u64(<2 x i64> %a) {
+; CHECK-LABEL: test_vget_high_u64:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
+  ret <1 x i64> %shuffle.i
+}
+
+define <1 x i64> @test_vget_high_p64(<2 x i64> %a) {
+; CHECK-LABEL: test_vget_high_p64:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
+  ret <1 x i64> %shuffle.i
+}
+
+define <4 x i16> @test_vget_high_f16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_high_f16:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x float> @test_vget_high_f32(<4 x float> %a) {
+; CHECK-LABEL: test_vget_high_f32:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+  ret <2 x float> %shuffle.i
+}
+
+define <8 x i8> @test_vget_high_p8(<16 x i8> %a) {
+; CHECK-LABEL: test_vget_high_p8:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vget_high_p16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_high_p16:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <1 x double> @test_vget_high_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vget_high_f64:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> undef, <1 x i32> <i32 1>
+  ret <1 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vget_low_s8(<16 x i8> %a) {
+; CHECK-LABEL: test_vget_low_s8:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vget_low_s16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_low_s16:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vget_low_s32(<4 x i32> %a) {
+; CHECK-LABEL: test_vget_low_s32:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x i32> %shuffle.i
+}
+
+define <1 x i64> @test_vget_low_s64(<2 x i64> %a) {
+; CHECK-LABEL: test_vget_low_s64:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
+  ret <1 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vget_low_u8(<16 x i8> %a) {
+; CHECK-LABEL: test_vget_low_u8:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vget_low_u16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_low_u16:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vget_low_u32(<4 x i32> %a) {
+; CHECK-LABEL: test_vget_low_u32:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x i32> %shuffle.i
+}
+
+define <1 x i64> @test_vget_low_u64(<2 x i64> %a) {
+; CHECK-LABEL: test_vget_low_u64:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
+  ret <1 x i64> %shuffle.i
+}
+
+define <1 x i64> @test_vget_low_p64(<2 x i64> %a) {
+; CHECK-LABEL: test_vget_low_p64:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
+  ret <1 x i64> %shuffle.i
+}
+
+define <4 x i16> @test_vget_low_f16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_low_f16:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x float> @test_vget_low_f32(<4 x float> %a) {
+; CHECK-LABEL: test_vget_low_f32:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %shuffle.i
+}
+
+define <8 x i8> @test_vget_low_p8(<16 x i8> %a) {
+; CHECK-LABEL: test_vget_low_p8:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vget_low_p16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_low_p16:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i16> %shuffle.i
+}
+
+define <1 x double> @test_vget_low_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vget_low_f64:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> undef, <1 x i32> zeroinitializer
+  ret <1 x double> %shuffle.i
+}

Copied: llvm/trunk/test/CodeGen/ARM64/aarch64-neon-vector-list-spill.ll (from r206877, llvm/trunk/test/CodeGen/AArch64/neon-vector-list-spill.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/aarch64-neon-vector-list-spill.ll?p2=llvm/trunk/test/CodeGen/ARM64/aarch64-neon-vector-list-spill.ll&p1=llvm/trunk/test/CodeGen/AArch64/neon-vector-list-spill.ll&r1=206877&r2=206887&rev=206887&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-vector-list-spill.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/aarch64-neon-vector-list-spill.ll Tue Apr 22 07:45:32 2014
@@ -1,16 +1,16 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
 
 ; FIXME: We should not generate ld/st for such register spill/fill, because the
 ; test case seems very simple and the register pressure is not high. If the
 ; spill/fill algorithm is optimized, this test case may not be triggered. And
 ; then we can delete it.
-define i32 @spill.DPairReg(i8* %arg1, i32 %arg2) {
+define i32 @spill.DPairReg(i32* %arg1, i32 %arg2) {
 ; CHECK-LABEL: spill.DPairReg:
-; CHECK: ld2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
-; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+; CHECK: ld2 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
+; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
+; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
 entry:
-  %vld = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8* %arg1, i32 4)
+  %vld = tail call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2.v2i32.p0i32(i32* %arg1)
   %cmp = icmp eq i32 %arg2, 0
   br i1 %cmp, label %if.then, label %if.end
 
@@ -24,13 +24,13 @@ if.end:
   ret i32 %res
 }
 
-define i16 @spill.DTripleReg(i8* %arg1, i32 %arg2) {
+define i16 @spill.DTripleReg(i16* %arg1, i32 %arg2) {
 ; CHECK-LABEL: spill.DTripleReg:
-; CHECK: ld3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
-; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+; CHECK: ld3 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}]
+; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
+; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
 entry:
-  %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8* %arg1, i32 4)
+  %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3.v4i16.p0i16(i16* %arg1)
   %cmp = icmp eq i32 %arg2, 0
   br i1 %cmp, label %if.then, label %if.end
 
@@ -44,13 +44,13 @@ if.end:
   ret i16 %res
 }
 
-define i16 @spill.DQuadReg(i8* %arg1, i32 %arg2) {
+define i16 @spill.DQuadReg(i16* %arg1, i32 %arg2) {
 ; CHECK-LABEL: spill.DQuadReg:
-; CHECK: ld4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
-; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+; CHECK: ld4 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}]
+; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
+; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
 entry:
-  %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8* %arg1, i32 4)
+  %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4.v4i16.p0i16(i16* %arg1)
   %cmp = icmp eq i32 %arg2, 0
   br i1 %cmp, label %if.then, label %if.end
 
@@ -64,13 +64,13 @@ if.end:
   ret i16 %res
 }
 
-define i32 @spill.QPairReg(i8* %arg1, i32 %arg2) {
+define i32 @spill.QPairReg(i32* %arg1, i32 %arg2) {
 ; CHECK-LABEL: spill.QPairReg:
-; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
-; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+; CHECK: ld2 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
+; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
+; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
 entry:
-  %vld = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8* %arg1, i32 4)
+  %vld = tail call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2.v4i32.p0i32(i32* %arg1)
   %cmp = icmp eq i32 %arg2, 0
   br i1 %cmp, label %if.then, label %if.end
 
@@ -84,13 +84,13 @@ if.end:
   ret i32 %res
 }
 
-define float @spill.QTripleReg(i8* %arg1, i32 %arg2) {
+define float @spill.QTripleReg(float* %arg1, i32 %arg2) {
 ; CHECK-LABEL: spill.QTripleReg:
-; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
-; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+; CHECK: ld3 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
+; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
+; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
 entry:
-  %vld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8* %arg1, i32 4)
+  %vld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3.v4f32.p0f32(float* %arg1)
   %cmp = icmp eq i32 %arg2, 0
   br i1 %cmp, label %if.then, label %if.end
 
@@ -106,11 +106,11 @@ if.end:
 
 define i8 @spill.QQuadReg(i8* %arg1, i32 %arg2) {
 ; CHECK-LABEL: spill.QQuadReg:
-; CHECK: ld4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
-; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+; CHECK: ld4 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}]
+; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
+; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
 entry:
-  %vld = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %arg1, i32 4)
+  %vld = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0i8(i8* %arg1)
   %cmp = icmp eq i32 %arg2, 0
   br i1 %cmp, label %if.then, label %if.end
 
@@ -124,12 +124,12 @@ if.end:
   ret i8 %res
 }
 
-declare { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8*, i32)
-declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8*, i32)
-declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8*, i32)
-declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8*, i32)
-declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8*, i32)
-declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32)
+declare { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2.v2i32.p0i32(i32*)
+declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3.v4i16.p0i16(i16*)
+declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4.v4i16.p0i16(i16*)
+declare { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2.v4i32.p0i32(i32*)
+declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3.v4f32.p0f32(float*)
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0i8(i8*)
 
 declare void @foo()
 
@@ -138,8 +138,8 @@ declare void @foo()
 ; spill/fill algorithm is optimized, this test case may not be triggered. And
 ; then we can delete it.
 ; check the spill for Register Class QPair_with_qsub_0_in_FPR128Lo
-define <8 x i16> @test_2xFPR128Lo(i64 %got, i8* %ptr, <1 x i64> %a) {
-  tail call void @llvm.arm.neon.vst2lane.v1i64(i8* %ptr, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i32 0, i32 8)
+define <8 x i16> @test_2xFPR128Lo(i64 %got, i64* %ptr, <1 x i64> %a) {
+  tail call void @llvm.arm64.neon.st2lane.v1i64.p0i64(<1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i64 0, i64* %ptr)
   tail call void @foo()
   %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1>
   %1 = bitcast <2 x i64> %sv to <8 x i16>
@@ -149,8 +149,8 @@ define <8 x i16> @test_2xFPR128Lo(i64 %g
 }
 
 ; check the spill for Register Class QTriple_with_qsub_0_in_FPR128Lo
-define <8 x i16> @test_3xFPR128Lo(i64 %got, i8* %ptr, <1 x i64> %a) {
-  tail call void @llvm.arm.neon.vst3lane.v1i64(i8* %ptr, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i32 0, i32 8)
+define <8 x i16> @test_3xFPR128Lo(i64 %got, i64* %ptr, <1 x i64> %a) {
+  tail call void @llvm.arm64.neon.st3lane.v1i64.p0i64(<1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i64 0, i64* %ptr)
   tail call void @foo()
   %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1>
   %1 = bitcast <2 x i64> %sv to <8 x i16>
@@ -160,8 +160,8 @@ define <8 x i16> @test_3xFPR128Lo(i64 %g
 }
 
 ; check the spill for Register Class QQuad_with_qsub_0_in_FPR128Lo
-define <8 x i16> @test_4xFPR128Lo(i64 %got, i8* %ptr, <1 x i64> %a) {
-  tail call void @llvm.arm.neon.vst4lane.v1i64(i8* %ptr, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i32 0, i32 8)
+define <8 x i16> @test_4xFPR128Lo(i64 %got, i64* %ptr, <1 x i64> %a) {
+  tail call void @llvm.arm64.neon.st4lane.v1i64.p0i64(<1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i64 0, i64* %ptr)
   tail call void @foo()
   %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1>
   %1 = bitcast <2 x i64> %sv to <8 x i16>
@@ -170,6 +170,6 @@ define <8 x i16> @test_4xFPR128Lo(i64 %g
   ret <8 x i16> %3
 }
 
-declare void @llvm.arm.neon.vst2lane.v1i64(i8*, <1 x i64>, <1 x i64>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32)
+declare void @llvm.arm64.neon.st2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*)
+declare void @llvm.arm64.neon.st3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)
+declare void @llvm.arm64.neon.st4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)