[llvm] r209576 - AArch64/ARM64: remove AArch64 from tree prior to renaming ARM64.

Sat May 24 05:42:30 PDT 2014

Modified: llvm/trunk/test/CodeGen/AArch64/ldst-unscaledimm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/ldst-unscaledimm.ll?rev=209576&r1=209575&r2=209576&view=diff
==============================================================================

--- llvm/trunk/test/CodeGen/AArch64/ldst-unscaledimm.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/ldst-unscaledimm.ll Sat May 24 07:42:26 2014
@@ -1,5 +1,3 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 

Modified: llvm/trunk/test/CodeGen/AArch64/ldst-unsignedimm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/ldst-unsignedimm.ll?rev=209576&r1=209575&r2=209576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/ldst-unsignedimm.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/ldst-unsignedimm.ll Sat May 24 07:42:26 2014
@@ -1,5 +1,3 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 

Removed: llvm/trunk/test/CodeGen/AArch64/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/lit.local.cfg?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/lit.local.cfg (original)
+++ llvm/trunk/test/CodeGen/AArch64/lit.local.cfg (removed)
@@ -1,4 +0,0 @@
-targets = set(config.root.targets_to_build.split())
-if 'AArch64' not in targets or 'ARM64' not in targets:
-    config.unsupported = True
-

Modified: llvm/trunk/test/CodeGen/AArch64/literal_pools_float.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/literal_pools_float.ll?rev=209576&r1=209575&r2=209576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/literal_pools_float.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/literal_pools_float.ll Sat May 24 07:42:26 2014
@@ -1,7 +1,3 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -code-model=large | FileCheck --check-prefix=CHECK-LARGE %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -code-model=large -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP-LARGE %s
 ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu -mcpu=cyclone | FileCheck %s
 ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu -code-model=large -mcpu=cyclone | FileCheck --check-prefix=CHECK-LARGE %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s

Removed: llvm/trunk/test/CodeGen/AArch64/literal_pools_int.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/literal_pools_int.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/literal_pools_int.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/literal_pools_int.ll (removed)
@@ -1,58 +0,0 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -code-model=large | FileCheck --check-prefix=CHECK-LARGE %s
-; arm64 does not use literal pools for integers so there is nothing to check.
-
- at var32 = global i32 0
- at var64 = global i64 0
-
-define void @foo() {
-; CHECK-LABEL: foo:
-    %val32 = load i32* @var32
-    %val64 = load i64* @var64
-
-    %val32_lit32 = and i32 %val32, 123456785
-    store volatile i32 %val32_lit32, i32* @var32
-; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]]
-; CHECK: ldr {{w[0-9]+}}, [x[[LITBASE]], {{#?}}:lo12:[[CURLIT]]]
-
-; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI0_[0-9]+]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]]
-; CHECK-LARGE: ldr {{w[0-9]+}}, [x[[LITADDR]]]
-
-    %val64_lit32 = and i64 %val64, 305402420
-    store volatile i64 %val64_lit32, i64* @var64
-; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]]
-; CHECK: ldr {{w[0-9]+}}, [x[[LITBASE]], {{#?}}:lo12:[[CURLIT]]]
-
-; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI0_[0-9]+]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]]
-; CHECK-LARGE: ldr {{w[0-9]+}}, [x[[LITADDR]]]
-
-    %val64_lit32signed = and i64 %val64, -12345678
-    store volatile i64 %val64_lit32signed, i64* @var64
-; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]]
-; CHECK: ldrsw {{x[0-9]+}}, [x[[LITBASE]], {{#?}}:lo12:[[CURLIT]]]
-
-; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI0_[0-9]+]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]]
-; CHECK-LARGE: ldrsw {{x[0-9]+}}, [x[[LITADDR]]]
-
-    %val64_lit64 = and i64 %val64, 1234567898765432
-    store volatile i64 %val64_lit64, i64* @var64
-; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]]
-; CHECK: ldr {{x[0-9]+}}, [x[[LITBASE]], {{#?}}:lo12:[[CURLIT]]]
-
-; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI0_[0-9]+]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]]
-; CHECK-LARGE: ldr {{x[0-9]+}}, [x[[LITADDR]]]
-
-    ret void
-}

Modified: llvm/trunk/test/CodeGen/AArch64/local_vars.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/local_vars.ll?rev=209576&r1=209575&r2=209576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/local_vars.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/local_vars.ll Sat May 24 07:42:26 2014
@@ -1,5 +1,3 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 -disable-fp-elim | FileCheck -check-prefix CHECK-WITHFP-AARCH64 %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -disable-fp-elim | FileCheck -check-prefix CHECK-WITHFP-ARM64 %s
 

Modified: llvm/trunk/test/CodeGen/AArch64/logical-imm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/logical-imm.ll?rev=209576&r1=209575&r2=209576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/logical-imm.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/logical-imm.ll Sat May 24 07:42:26 2014
@@ -1,4 +1,3 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
 ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
 
 @var32 = global i32 0

Modified: llvm/trunk/test/CodeGen/AArch64/logical_shifted_reg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/logical_shifted_reg.ll?rev=209576&r1=209575&r2=209576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/logical_shifted_reg.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/logical_shifted_reg.ll Sat May 24 07:42:26 2014
@@ -1,4 +1,3 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s
 
 @var1_32 = global i32 0

Modified: llvm/trunk/test/CodeGen/AArch64/mature-mc-support.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/mature-mc-support.ll?rev=209576&r1=209575&r2=209576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/mature-mc-support.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/mature-mc-support.ll Sat May 24 07:42:26 2014
@@ -1,10 +1,8 @@
 ; Test that inline assembly is parsed by the MC layer when MC support is mature
 ; (even when the output is assembly).
 
-; RUN: not llc -mtriple=aarch64-pc-linux < %s > /dev/null 2> %t1
 ; RUN: FileCheck %s < %t1
 
-; RUN: not llc -mtriple=aarch64-pc-linux -filetype=obj < %s > /dev/null 2> %t2
 ; RUN: FileCheck %s < %t2
 
 ; RUN: not llc -mtriple=arm64-pc-linux < %s > /dev/null 2> %t3

Removed: llvm/trunk/test/CodeGen/AArch64/misched-basic-A53.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/misched-basic-A53.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/misched-basic-A53.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/misched-basic-A53.ll (removed)
@@ -1,113 +0,0 @@
-; REQUIRES: asserts
-; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a53 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
-; arm64 now has a separate copy of this test.
-;
-; The Cortex-A53 machine model will cause the MADD instruction to be scheduled
-; much higher than the ADD instructions in order to hide latency. When not
-; specifying a subtarget, the MADD will remain near the end of the block.
-;
-; CHECK: ********** MI Scheduling **********
-; CHECK: main
-; CHECK: *** Final schedule for BB#2 ***
-; CHECK: SU(13)
-; CHECK: MADDwwww
-; CHECK: SU(4)
-; CHECK: ADDwwi_lsl0_s
-; CHECK: ********** INTERVALS **********
- at main.x = private unnamed_addr constant [8 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 4
- at main.y = private unnamed_addr constant [8 x i32] [i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2], align 4
-
-; Function Attrs: nounwind
-define i32 @main() #0 {
-entry:
-  %retval = alloca i32, align 4
-  %x = alloca [8 x i32], align 4
-  %y = alloca [8 x i32], align 4
-  %i = alloca i32, align 4
-  %xx = alloca i32, align 4
-  %yy = alloca i32, align 4
-  store i32 0, i32* %retval
-  %0 = bitcast [8 x i32]* %x to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([8 x i32]* @main.x to i8*), i64 32, i32 4, i1 false)
-  %1 = bitcast [8 x i32]* %y to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([8 x i32]* @main.y to i8*), i64 32, i32 4, i1 false)
-  store i32 0, i32* %xx, align 4
-  store i32 0, i32* %yy, align 4
-  store i32 0, i32* %i, align 4
-  br label %for.cond
-
-for.cond:                                         ; preds = %for.inc, %entry
-  %2 = load i32* %i, align 4
-  %cmp = icmp slt i32 %2, 8
-  br i1 %cmp, label %for.body, label %for.end
-
-for.body:                                         ; preds = %for.cond
-  %3 = load i32* %i, align 4
-  %idxprom = sext i32 %3 to i64
-  %arrayidx = getelementptr inbounds [8 x i32]* %x, i32 0, i64 %idxprom
-  %4 = load i32* %arrayidx, align 4
-  %add = add nsw i32 %4, 1
-  store i32 %add, i32* %xx, align 4
-  %5 = load i32* %xx, align 4
-  %add1 = add nsw i32 %5, 12
-  store i32 %add1, i32* %xx, align 4
-  %6 = load i32* %xx, align 4
-  %add2 = add nsw i32 %6, 23
-  store i32 %add2, i32* %xx, align 4
-  %7 = load i32* %xx, align 4
-  %add3 = add nsw i32 %7, 34
-  store i32 %add3, i32* %xx, align 4
-  %8 = load i32* %i, align 4
-  %idxprom4 = sext i32 %8 to i64
-  %arrayidx5 = getelementptr inbounds [8 x i32]* %y, i32 0, i64 %idxprom4
-  %9 = load i32* %arrayidx5, align 4
-  %10 = load i32* %yy, align 4
-  %mul = mul nsw i32 %10, %9
-  store i32 %mul, i32* %yy, align 4
-  br label %for.inc
-
-for.inc:                                          ; preds = %for.body
-  %11 = load i32* %i, align 4
-  %inc = add nsw i32 %11, 1
-  store i32 %inc, i32* %i, align 4
-  br label %for.cond
-
-for.end:                                          ; preds = %for.cond
-  %12 = load i32* %xx, align 4
-  %13 = load i32* %yy, align 4
-  %add6 = add nsw i32 %12, %13
-  ret i32 %add6
-}
-
-
-; The Cortex-A53 machine model will cause the FDIVvvv_42 to be raised to
-; hide latency. Whereas normally there would only be a single FADDvvv_4s
-; after it, this test checks to make sure there are more than one.
-;
-; CHECK: ********** MI Scheduling **********
-; CHECK: neon4xfloat:BB#0
-; CHECK: *** Final schedule for BB#0 ***
-; CHECK: FDIVvvv_4S
-; CHECK: FADDvvv_4S
-; CHECK: FADDvvv_4S
-; CHECK: ********** INTERVALS **********
-define <4 x float> @neon4xfloat(<4 x float> %A, <4 x float> %B) {
-        %tmp1 = fadd <4 x float> %A, %B;
-        %tmp2 = fadd <4 x float> %A, %tmp1;
-        %tmp3 = fadd <4 x float> %A, %tmp2;
-        %tmp4 = fadd <4 x float> %A, %tmp3;
-        %tmp5 = fadd <4 x float> %A, %tmp4;
-        %tmp6 = fadd <4 x float> %A, %tmp5;
-        %tmp7 = fadd <4 x float> %A, %tmp6;
-        %tmp8 = fadd <4 x float> %A, %tmp7;
-        %tmp9 = fdiv <4 x float> %A, %B;
-        %tmp10 = fadd <4 x float> %tmp8, %tmp9;
-
-        ret <4 x float> %tmp10
-}
-
-; Function Attrs: nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
-
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind }

Modified: llvm/trunk/test/CodeGen/AArch64/movw-consts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/movw-consts.ll?rev=209576&r1=209575&r2=209576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/movw-consts.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/movw-consts.ll Sat May 24 07:42:26 2014
@@ -1,4 +1,3 @@
-; RUN: llc -verify-machineinstrs -O0 < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s  --check-prefix=CHECK --check-prefix=CHECK-AARCH64
 ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
 
 define i64 @test0() {
@@ -10,49 +9,42 @@ define i64 @test0() {
 
 define i64 @test1() {
 ; CHECK-LABEL: test1:
-; CHECK-AARCH64: movz x0, #1
 ; CHECK-ARM64: orr w0, wzr, #0x1
   ret i64 1
 }
 
 define i64 @test2() {
 ; CHECK-LABEL: test2:
-; CHECK-AARCH64: movz x0, #65535
 ; CHECK-ARM64: orr w0, wzr, #0xffff
   ret i64 65535
 }
 
 define i64 @test3() {
 ; CHECK-LABEL: test3:
-; CHECK-AARCH64: movz x0, #1, lsl #16
 ; CHECK-ARM64: orr w0, wzr, #0x10000
   ret i64 65536
 }
 
 define i64 @test4() {
 ; CHECK-LABEL: test4:
-; CHECK-AARCH64: movz x0, #65535, lsl #16
 ; CHECK-ARM64: orr w0, wzr, #0xffff0000
   ret i64 4294901760
 }
 
 define i64 @test5() {
 ; CHECK-LABEL: test5:
-; CHECK-AARCH64: movz x0, #1, lsl #32
 ; CHECK-ARM64: orr x0, xzr, #0x100000000
   ret i64 4294967296
 }
 
 define i64 @test6() {
 ; CHECK-LABEL: test6:
-; CHECK-AARCH64: movz x0, #65535, lsl #32
 ; CHECK-ARM64: orr x0, xzr, #0xffff00000000
   ret i64 281470681743360
 }
 
 define i64 @test7() {
 ; CHECK-LABEL: test7:
-; CHECK-AARCH64: movz x0, #1, lsl #48
 ; CHECK-ARM64: orr x0, xzr, #0x1000000000000
   ret i64 281474976710656
 }
@@ -83,7 +75,6 @@ define i64 @test10() {
 
 define void @test11() {
 ; CHECK-LABEL: test11:
-; CHECK-AARCH64: mov {{w[0-9]+}}, wzr
 ; CHECK-ARM64: str wzr
   store i32 0, i32* @var32
   ret void
@@ -91,7 +82,6 @@ define void @test11() {
 
 define void @test12() {
 ; CHECK-LABEL: test12:
-; CHECK-AARCH64: movz {{w[0-9]+}}, #1
 ; CHECK-ARM64: orr {{w[0-9]+}}, wzr, #0x1
   store i32 1, i32* @var32
   ret void
@@ -99,7 +89,6 @@ define void @test12() {
 
 define void @test13() {
 ; CHECK-LABEL: test13:
-; CHECK-AARCH64: movz {{w[0-9]+}}, #65535
 ; CHECK-ARM64: orr {{w[0-9]+}}, wzr, #0xffff
   store i32 65535, i32* @var32
   ret void
@@ -107,7 +96,6 @@ define void @test13() {
 
 define void @test14() {
 ; CHECK-LABEL: test14:
-; CHECK-AARCH64: movz {{w[0-9]+}}, #1, lsl #16
 ; CHECK-ARM64: orr {{w[0-9]+}}, wzr, #0x10000
   store i32 65536, i32* @var32
   ret void
@@ -115,7 +103,6 @@ define void @test14() {
 
 define void @test15() {
 ; CHECK-LABEL: test15:
-; CHECK-AARCH64: movz {{w[0-9]+}}, #65535, lsl #16
 ; CHECK-ARM64: orr {{w[0-9]+}}, wzr, #0xffff0000
   store i32 4294901760, i32* @var32
   ret void
@@ -132,7 +119,6 @@ define i64 @test17() {
 ; CHECK-LABEL: test17:
 
   ; Mustn't MOVN w0 here.
-; CHECK-AARCH64: movn x0, #2
 ; CHECK-ARM64: orr x0, xzr, #0xfffffffffffffffd
   ret i64 -3
 }

Modified: llvm/trunk/test/CodeGen/AArch64/movw-shift-encoding.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/movw-shift-encoding.ll?rev=209576&r1=209575&r2=209576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/movw-shift-encoding.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/movw-shift-encoding.ll Sat May 24 07:42:26 2014
@@ -1,4 +1,3 @@
-; RUN: llc -mtriple=aarch64-linux-gnu < %s -show-mc-encoding -code-model=large | FileCheck %s --check-prefix=CHECK-AARCH64
 ; RUN: llc -mtriple=arm64-linux-gnu < %s -show-mc-encoding -code-model=large | FileCheck %s --check-prefix=CHECK-ARM64
 
 @var = global i32 0
@@ -8,10 +7,6 @@
 
 define i32* @get_var() {
   ret i32* @var
-; CHECK-AARCH64: movz    x0, #:abs_g3:var        // encoding: [A,A,0xe0'A',0xd2'A']
-; CHECK-AARCH64: movk    x0, #:abs_g2_nc:var     // encoding: [A,A,0xc0'A',0xf2'A']
-; CHECK-AARCH64: movk    x0, #:abs_g1_nc:var     // encoding: [A,A,0xa0'A',0xf2'A']
-; CHECK-AARCH64: movk    x0, #:abs_g0_nc:var     // encoding: [A,A,0x80'A',0xf2'A']
 
 ; CHECK-ARM64: movz    x0, #:abs_g3:var        // encoding: [0bAAA00000,A,0b111AAAAA,0xd2]
 ; CHECK-ARM64: movk    x0, #:abs_g2_nc:var     // encoding: [0bAAA00000,A,0b110AAAAA,0xf2]

Modified: llvm/trunk/test/CodeGen/AArch64/mul-lohi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/mul-lohi.ll?rev=209576&r1=209575&r2=209576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/mul-lohi.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/mul-lohi.ll Sat May 24 07:42:26 2014
@@ -1,5 +1,3 @@
-; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s
-; RUN: llc -mtriple=aarch64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s
 ; RUN: llc -mtriple=arm64-apple-ios7.0 %s -o - | FileCheck %s
 ; RUN: llc -mtriple=arm64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s
 

Removed: llvm/trunk/test/CodeGen/AArch64/named-reg-alloc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/named-reg-alloc.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/named-reg-alloc.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/named-reg-alloc.ll (removed)
@@ -1,14 +0,0 @@
-; RUN: not llc < %s -mtriple=aarch64-linux-gnueabi 2>&1 | FileCheck %s
-; arm64 has separate copy of this test
-
-define i32 @get_stack() nounwind {
-entry:
-; FIXME: Include an allocatable-specific error message
-; CHECK: Invalid register name global variable
-	%sp = call i32 @llvm.read_register.i32(metadata !0)
-  ret i32 %sp
-}
-
-declare i32 @llvm.read_register.i32(metadata) nounwind
-
-!0 = metadata !{metadata !"x5\00"}

Removed: llvm/trunk/test/CodeGen/AArch64/named-reg-notareg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/named-reg-notareg.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/named-reg-notareg.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/named-reg-notareg.ll (removed)
@@ -1,13 +0,0 @@
-; RUN: not llc < %s -mtriple=aarch64-linux-gnueabi 2>&1 | FileCheck %s
-; arm64 has separate copy of this test
-
-define i32 @get_stack() nounwind {
-entry:
-; CHECK: Invalid register name global variable
-	%sp = call i32 @llvm.read_register.i32(metadata !0)
-  ret i32 %sp
-}
-
-declare i32 @llvm.read_register.i32(metadata) nounwind
-
-!0 = metadata !{metadata !"notareg\00"}

Removed: llvm/trunk/test/CodeGen/AArch64/neon-2velem-high.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-2velem-high.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-2velem-high.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-2velem-high.ll (removed)
@@ -1,331 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
-; arm64 has copied test in its directory due to differing intrinsics.
-declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
-
-declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
-
-declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>)
-
-declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>)
-
-declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>)
-
-declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>)
-
-declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>)
-
-declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>)
-
-declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>)
-
-declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>)
-
-define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) {
-; CHECK: test_vmull_high_n_s16:
-; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
-  %vmull15.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  ret <4 x i32> %vmull15.i.i
-}
-
-define <2 x i64> @test_vmull_high_n_s32(<4 x i32> %a, i32 %b) {
-; CHECK: test_vmull_high_n_s32:
-; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
-  %vmull9.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  ret <2 x i64> %vmull9.i.i
-}
-
-define <4 x i32> @test_vmull_high_n_u16(<8 x i16> %a, i16 %b) {
-; CHECK: test_vmull_high_n_u16:
-; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
-  %vmull15.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  ret <4 x i32> %vmull15.i.i
-}
-
-define <2 x i64> @test_vmull_high_n_u32(<4 x i32> %a, i32 %b) {
-; CHECK: test_vmull_high_n_u32:
-; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
-  %vmull9.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  ret <2 x i64> %vmull9.i.i
-}
-
-define <4 x i32> @test_vqdmull_high_n_s16(<8 x i16> %a, i16 %b) {
-; CHECK: test_vqdmull_high_n_s16:
-; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
-  %vqdmull15.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  ret <4 x i32> %vqdmull15.i.i
-}
-
-define <2 x i64> @test_vqdmull_high_n_s32(<4 x i32> %a, i32 %b) {
-; CHECK: test_vqdmull_high_n_s32:
-; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
-  %vqdmull9.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  ret <2 x i64> %vqdmull9.i.i
-}
-
-define <4 x i32> @test_vmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
-; CHECK: test_vmlal_high_n_s16:
-; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
-  ret <4 x i32> %add.i.i
-}
-
-define <2 x i64> @test_vmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
-; CHECK: test_vmlal_high_n_s32:
-; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
-  ret <2 x i64> %add.i.i
-}
-
-define <4 x i32> @test_vmlal_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
-; CHECK: test_vmlal_high_n_u16:
-; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
-  ret <4 x i32> %add.i.i
-}
-
-define <2 x i64> @test_vmlal_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
-; CHECK: test_vmlal_high_n_u32:
-; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
-  ret <2 x i64> %add.i.i
-}
-
-define <4 x i32> @test_vqdmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
-; CHECK: test_vqdmlal_high_n_s16:
-; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
-  %vqdmlal15.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  %vqdmlal17.i.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i)
-  ret <4 x i32> %vqdmlal17.i.i
-}
-
-define <2 x i64> @test_vqdmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
-; CHECK: test_vqdmlal_high_n_s32:
-; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
-  %vqdmlal9.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  %vqdmlal11.i.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i)
-  ret <2 x i64> %vqdmlal11.i.i
-}
-
-define <4 x i32> @test_vmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
-; CHECK: test_vmlsl_high_n_s16:
-; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
-  ret <4 x i32> %sub.i.i
-}
-
-define <2 x i64> @test_vmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
-; CHECK: test_vmlsl_high_n_s32:
-; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
-  ret <2 x i64> %sub.i.i
-}
-
-define <4 x i32> @test_vmlsl_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
-; CHECK: test_vmlsl_high_n_u16:
-; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
-  ret <4 x i32> %sub.i.i
-}
-
-define <2 x i64> @test_vmlsl_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
-; CHECK: test_vmlsl_high_n_u32:
-; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
-  ret <2 x i64> %sub.i.i
-}
-
-define <4 x i32> @test_vqdmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
-; CHECK: test_vqdmlsl_high_n_s16:
-; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
-  %vqdmlsl15.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  %vqdmlsl17.i.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i)
-  ret <4 x i32> %vqdmlsl17.i.i
-}
-
-define <2 x i64> @test_vqdmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
-; CHECK: test_vqdmlsl_high_n_s32:
-; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
-  %vqdmlsl9.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  %vqdmlsl11.i.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i)
-  ret <2 x i64> %vqdmlsl11.i.i
-}
-
-define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) {
-; CHECK: test_vmul_n_f32:
-; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-entry:
-  %vecinit.i = insertelement <2 x float> undef, float %b, i32 0
-  %vecinit1.i = insertelement <2 x float> %vecinit.i, float %b, i32 1
-  %mul.i = fmul <2 x float> %vecinit1.i, %a
-  ret <2 x float> %mul.i
-}
-
-define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) {
-; CHECK: test_vmulq_n_f32:
-; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-entry:
-  %vecinit.i = insertelement <4 x float> undef, float %b, i32 0
-  %vecinit1.i = insertelement <4 x float> %vecinit.i, float %b, i32 1
-  %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %b, i32 2
-  %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %b, i32 3
-  %mul.i = fmul <4 x float> %vecinit3.i, %a
-  ret <4 x float> %mul.i
-}
-
-define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) {
-; CHECK: test_vmulq_n_f64:
-; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-entry:
-  %vecinit.i = insertelement <2 x double> undef, double %b, i32 0
-  %vecinit1.i = insertelement <2 x double> %vecinit.i, double %b, i32 1
-  %mul.i = fmul <2 x double> %vecinit1.i, %a
-  ret <2 x double> %mul.i
-}
-
-define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) {
-; CHECK: test_vfma_n_f32:
-; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
-entry:
-  %vecinit.i = insertelement <2 x float> undef, float %n, i32 0
-  %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %vecinit1.i, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) {
-; CHECK: test_vfmaq_n_f32:
-; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
-entry:
-  %vecinit.i = insertelement <4 x float> undef, float %n, i32 0
-  %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1
-  %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2
-  %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %vecinit3.i, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) {
-; CHECK: test_vfms_n_f32:
-; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
-entry:
-  %vecinit.i = insertelement <2 x float> undef, float %n, i32 0
-  %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1
-  %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b
-  %1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %vecinit1.i, <2 x float> %a)
-  ret <2 x float> %1
-}
-
-define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) {
-; CHECK: test_vfmsq_n_f32:
-; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
-entry:
-  %vecinit.i = insertelement <4 x float> undef, float %n, i32 0
-  %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1
-  %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2
-  %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3
-  %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
-  %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %vecinit3.i, <4 x float> %a)
-  ret <4 x float> %1
-}

Removed: llvm/trunk/test/CodeGen/AArch64/neon-2velem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-2velem.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-2velem.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-2velem.ll (removed)
@@ -1,2854 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
-; arm64 has copied test in its directory due to differing intrinsics.
-
-declare <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double>, <2 x double>)
-
-declare <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float>, <4 x float>)
-
-declare <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float>, <2 x float>)
-
-declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>)
-
-declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>)
-
-declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>)
-
-declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>)
-
-declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>)
-
-declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>)
-
-declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>)
-
-declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>)
-
-declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>)
-
-declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>)
-
-declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>)
-
-declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>)
-
-declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>)
-
-declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmla_lane_s16:
-; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <4 x i16> %shuffle, %b
-  %add = add <4 x i16> %mul, %a
-  ret <4 x i16> %add
-}
-
-define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlaq_lane_s16:
-; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <8 x i16> %shuffle, %b
-  %add = add <8 x i16> %mul, %a
-  ret <8 x i16> %add
-}
-
-define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmla_lane_s32:
-; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %mul = mul <2 x i32> %shuffle, %b
-  %add = add <2 x i32> %mul, %a
-  ret <2 x i32> %add
-}
-
-define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlaq_lane_s32:
-; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %mul = mul <4 x i32> %shuffle, %b
-  %add = add <4 x i32> %mul, %a
-  ret <4 x i32> %add
-}
-
-define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmla_laneq_s16:
-; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %mul = mul <4 x i16> %shuffle, %b
-  %add = add <4 x i16> %mul, %a
-  ret <4 x i16> %add
-}
-
-define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlaq_laneq_s16:
-; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-  %mul = mul <8 x i16> %shuffle, %b
-  %add = add <8 x i16> %mul, %a
-  ret <8 x i16> %add
-}
-
-define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmla_laneq_s32:
-; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %mul = mul <2 x i32> %shuffle, %b
-  %add = add <2 x i32> %mul, %a
-  ret <2 x i32> %add
-}
-
-define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlaq_laneq_s32:
-; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <4 x i32> %shuffle, %b
-  %add = add <4 x i32> %mul, %a
-  ret <4 x i32> %add
-}
-
-define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmls_lane_s16:
-; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <4 x i16> %shuffle, %b
-  %sub = sub <4 x i16> %a, %mul
-  ret <4 x i16> %sub
-}
-
-define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlsq_lane_s16:
-; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <8 x i16> %shuffle, %b
-  %sub = sub <8 x i16> %a, %mul
-  ret <8 x i16> %sub
-}
-
-define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmls_lane_s32:
-; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %mul = mul <2 x i32> %shuffle, %b
-  %sub = sub <2 x i32> %a, %mul
-  ret <2 x i32> %sub
-}
-
-define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlsq_lane_s32:
-; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %mul = mul <4 x i32> %shuffle, %b
-  %sub = sub <4 x i32> %a, %mul
-  ret <4 x i32> %sub
-}
-
-define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmls_laneq_s16:
-; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %mul = mul <4 x i16> %shuffle, %b
-  %sub = sub <4 x i16> %a, %mul
-  ret <4 x i16> %sub
-}
-
-define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlsq_laneq_s16:
-; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-  %mul = mul <8 x i16> %shuffle, %b
-  %sub = sub <8 x i16> %a, %mul
-  ret <8 x i16> %sub
-}
-
-define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmls_laneq_s32:
-; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %mul = mul <2 x i32> %shuffle, %b
-  %sub = sub <2 x i32> %a, %mul
-  ret <2 x i32> %sub
-}
-
-define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlsq_laneq_s32:
-; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <4 x i32> %shuffle, %b
-  %sub = sub <4 x i32> %a, %mul
-  ret <4 x i32> %sub
-}
-
-define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmul_lane_s16:
-; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <4 x i16> %shuffle, %a
-  ret <4 x i16> %mul
-}
-
-define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmulq_lane_s16:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <8 x i16> %shuffle, %a
-  ret <8 x i16> %mul
-}
-
-define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmul_lane_s32:
-; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %mul = mul <2 x i32> %shuffle, %a
-  ret <2 x i32> %mul
-}
-
-define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmulq_lane_s32:
-; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %mul = mul <4 x i32> %shuffle, %a
-  ret <4 x i32> %mul
-}
-
-define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmul_lane_u16:
-; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <4 x i16> %shuffle, %a
-  ret <4 x i16> %mul
-}
-
-define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmulq_lane_u16:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <8 x i16> %shuffle, %a
-  ret <8 x i16> %mul
-}
-
-define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmul_lane_u32:
-; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %mul = mul <2 x i32> %shuffle, %a
-  ret <2 x i32> %mul
-}
-
-define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmulq_lane_u32:
-; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %mul = mul <4 x i32> %shuffle, %a
-  ret <4 x i32> %mul
-}
-
-define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmul_laneq_s16:
-; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %mul = mul <4 x i16> %shuffle, %a
-  ret <4 x i16> %mul
-}
-
-define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmulq_laneq_s16:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-  %mul = mul <8 x i16> %shuffle, %a
-  ret <8 x i16> %mul
-}
-
-define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmul_laneq_s32:
-; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %mul = mul <2 x i32> %shuffle, %a
-  ret <2 x i32> %mul
-}
-
-define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmulq_laneq_s32:
-; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <4 x i32> %shuffle, %a
-  ret <4 x i32> %mul
-}
-
-define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmul_laneq_u16:
-; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %mul = mul <4 x i16> %shuffle, %a
-  ret <4 x i16> %mul
-}
-
-define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmulq_laneq_u16:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-  %mul = mul <8 x i16> %shuffle, %a
-  ret <8 x i16> %mul
-}
-
-define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmul_laneq_u32:
-; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %mul = mul <2 x i32> %shuffle, %a
-  ret <2 x i32> %mul
-}
-
-define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmulq_laneq_u32:
-; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <4 x i32> %shuffle, %a
-  ret <4 x i32> %mul
-}
-
-define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
-; CHECK: test_vfma_lane_f32:
-; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
-
-define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
-; CHECK: test_vfmaq_lane_f32:
-; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
-
-define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
-; CHECK: test_vfma_laneq_f32:
-; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
-; CHECK: test_vfmaq_laneq_f32:
-; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
-; CHECK: test_vfms_lane_f32:
-; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
-  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> <i32 1, i32 1>
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
-; CHECK: test_vfmsq_lane_f32:
-; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
-  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
-; CHECK: test_vfms_laneq_f32:
-; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
-  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> <i32 3, i32 3>
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
-; CHECK: test_vfmsq_laneq_f32:
-; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
-  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
-; CHECK: test_vfmaq_lane_f64:
-; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
-  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
-  ret <2 x double> %0
-}
-
-declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
-
-define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
-; CHECK: test_vfmaq_laneq_f64:
-; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
-  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
-  ret <2 x double> %0
-}
-
-define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
-; CHECK: test_vfmsq_lane_f64:
-; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <1 x double> <double -0.000000e+00>, %v
-  %lane = shufflevector <1 x double> %sub, <1 x double> undef, <2 x i32> zeroinitializer
-  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
-  ret <2 x double> %0
-}
-
-define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
-; CHECK: test_vfmsq_laneq_f64:
-; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
-  %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> <i32 1, i32 1>
-  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
-  ret <2 x double> %0
-}
-
-define float @test_vfmas_laneq_f32(float %a, float %b, <4 x float> %v) {
-; CHECK-LABEL: test_vfmas_laneq_f32
-; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %extract = extractelement <4 x float> %v, i32 3
-  %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a)
-  ret float %0
-}
-
-declare float @llvm.fma.f32(float, float, float)
-
-define double @test_vfmsd_lane_f64(double %a, double %b, <1 x double> %v) {
-; CHECK-LABEL: test_vfmsd_lane_f64
-; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %extract.rhs = extractelement <1 x double> %v, i32 0
-  %extract = fsub double -0.000000e+00, %extract.rhs
-  %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a)
-  ret double %0
-}
-
-declare double @llvm.fma.f64(double, double, double)
-
-define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) {
-; CHECK: test_vfmss_laneq_f32
-; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %extract.rhs = extractelement <4 x float> %v, i32 3
-  %extract = fsub float -0.000000e+00, %extract.rhs
-  %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a)
-  ret float %0
-}
-
-define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) {
-; CHECK-LABEL: test_vfmsd_laneq_f64
-; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
-; CHECK-NEXT: ret
-entry:
-  %extract.rhs = extractelement <2 x double> %v, i32 1
-  %extract = fsub double -0.000000e+00, %extract.rhs
-  %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a)
-  ret double %0
-}
-
-define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlal_lane_s16:
-; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlal_lane_s32:
-; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlal_laneq_s16:
-; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlal_laneq_s32:
-; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlal_high_lane_s16:
-; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlal_high_lane_s32:
-; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlal_high_laneq_s16:
-; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlal_high_laneq_s32:
-; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlsl_lane_s16:
-; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlsl_lane_s32:
-; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlsl_laneq_s16:
-; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlsl_laneq_s32:
-; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlsl_high_lane_s16:
-; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlsl_high_lane_s32:
-; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlsl_high_laneq_s16:
-; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlsl_high_laneq_s32:
-; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlal_lane_u16:
-; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlal_lane_u32:
-; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlal_laneq_u16:
-; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlal_laneq_u32:
-; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlal_high_lane_u16:
-; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlal_high_lane_u32:
-; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlal_high_laneq_u16:
-; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlal_high_laneq_u32:
-; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlsl_lane_u16:
-; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlsl_lane_u32:
-; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlsl_laneq_u16:
-; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlsl_laneq_u32:
-; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlsl_high_lane_u16:
-; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlsl_high_lane_u32:
-; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlsl_high_laneq_u16:
-; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlsl_high_laneq_u32:
-; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmull_lane_s16:
-; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmull_lane_s32:
-; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmull_lane_u16:
-; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmull_lane_u32:
-; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmull_high_lane_s16:
-; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmull_high_lane_s32:
-; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmull_high_lane_u16:
-; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmull_high_lane_u32:
-; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmull_laneq_s16:
-; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmull_laneq_s32:
-; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmull_laneq_u16:
-; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmull_laneq_u32:
-; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmull_high_laneq_s16:
-; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmull_high_laneq_s32:
-; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmull_high_laneq_u16:
-; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmull_high_laneq_u32:
-; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vqdmlal_lane_s16:
-; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
-  ret <4 x i32> %vqdmlal4.i
-}
-
-define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vqdmlal_lane_s32:
-; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
-  ret <2 x i64> %vqdmlal4.i
-}
-
-define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vqdmlal_high_lane_s16:
-; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
-  ret <4 x i32> %vqdmlal4.i
-}
-
-define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vqdmlal_high_lane_s32:
-; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
-  ret <2 x i64> %vqdmlal4.i
-}
-
-define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vqdmlsl_lane_s16:
-; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
-  ret <4 x i32> %vqdmlsl4.i
-}
-
-define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vqdmlsl_lane_s32:
-; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
-  ret <2 x i64> %vqdmlsl4.i
-}
-
-define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vqdmlsl_high_lane_s16:
-; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
-  ret <4 x i32> %vqdmlsl4.i
-}
-
-define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vqdmlsl_high_lane_s32:
-; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
-  ret <2 x i64> %vqdmlsl4.i
-}
-
-define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vqdmull_lane_s16:
-; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vqdmull_lane_s32:
-; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vqdmull_laneq_s16:
-; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vqdmull_laneq_s32:
-; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vqdmull_high_lane_s16:
-; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vqdmull_high_lane_s32:
-; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vqdmull_high_laneq_s16:
-; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vqdmull_high_laneq_s32:
-; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vqdmulh_lane_s16:
-; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i16> %vqdmulh2.i
-}
-
-define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vqdmulhq_lane_s16:
-; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %vqdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
-  ret <8 x i16> %vqdmulh2.i
-}
-
-define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vqdmulh_lane_s32:
-; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i32> %vqdmulh2.i
-}
-
-define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vqdmulhq_lane_s32:
-; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %vqdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
-  ret <4 x i32> %vqdmulh2.i
-}
-
-define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vqrdmulh_lane_s16:
-; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqrdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i16> %vqrdmulh2.i
-}
-
-define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vqrdmulhq_lane_s16:
-; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %vqrdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
-  ret <8 x i16> %vqrdmulh2.i
-}
-
-define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vqrdmulh_lane_s32:
-; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqrdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i32> %vqrdmulh2.i
-}
-
-define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vqrdmulhq_lane_s32:
-; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %vqrdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
-  ret <4 x i32> %vqrdmulh2.i
-}
-
-define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) {
-; CHECK: test_vmul_lane_f32:
-; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
-  %mul = fmul <2 x float> %shuffle, %a
-  ret <2 x float> %mul
-}
-
-define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) {
-; CHECK: test_vmul_lane_f64:
-; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %0 = bitcast <1 x double> %a to <8 x i8>
-  %1 = bitcast <8 x i8> %0 to double
-  %extract = extractelement <1 x double> %v, i32 0
-  %2 = fmul double %1, %extract
-  %3 = insertelement <1 x double> undef, double %2, i32 0
-  ret <1 x double> %3
-}
-
-define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) {
-; CHECK: test_vmulq_lane_f32:
-; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %mul = fmul <4 x float> %shuffle, %a
-  ret <4 x float> %mul
-}
-
-define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) {
-; CHECK: test_vmulq_lane_f64:
-; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
-  %mul = fmul <2 x double> %shuffle, %a
-  ret <2 x double> %mul
-}
-
-define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) {
-; CHECK: test_vmul_laneq_f32:
-; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
-  %mul = fmul <2 x float> %shuffle, %a
-  ret <2 x float> %mul
-}
-
-define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) {
-; CHECK: test_vmul_laneq_f64:
-; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
-; CHECK-NEXT: ret
-entry:
-  %0 = bitcast <1 x double> %a to <8 x i8>
-  %1 = bitcast <8 x i8> %0 to double
-  %extract = extractelement <2 x double> %v, i32 1
-  %2 = fmul double %1, %extract
-  %3 = insertelement <1 x double> undef, double %2, i32 0
-  ret <1 x double> %3
-}
-
-define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) {
-; CHECK: test_vmulq_laneq_f32:
-; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = fmul <4 x float> %shuffle, %a
-  ret <4 x float> %mul
-}
-
-define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) {
-; CHECK: test_vmulq_laneq_f64:
-; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
-  %mul = fmul <2 x double> %shuffle, %a
-  ret <2 x double> %mul
-}
-
-define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) {
-; CHECK: test_vmulx_lane_f32:
-; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
-  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
-  ret <2 x float> %vmulx2.i
-}
-
-define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) {
-; CHECK: test_vmulxq_lane_f32:
-; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
-  ret <4 x float> %vmulx2.i
-}
-
-define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) {
-; CHECK: test_vmulxq_lane_f64:
-; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
-  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
-  ret <2 x double> %vmulx2.i
-}
-
-define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) {
-; CHECK: test_vmulx_laneq_f32:
-; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
-  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
-  ret <2 x float> %vmulx2.i
-}
-
-define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) {
-; CHECK: test_vmulxq_laneq_f32:
-; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
-  ret <4 x float> %vmulx2.i
-}
-
-define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) {
-; CHECK: test_vmulxq_laneq_f64:
-; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
-  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
-  ret <2 x double> %vmulx2.i
-}
-
-define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmla_lane_s16_0:
-; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i16> %shuffle, %b
-  %add = add <4 x i16> %mul, %a
-  ret <4 x i16> %add
-}
-
-define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlaq_lane_s16_0:
-; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
-  %mul = mul <8 x i16> %shuffle, %b
-  %add = add <8 x i16> %mul, %a
-  ret <8 x i16> %add
-}
-
-define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmla_lane_s32_0:
-; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %mul = mul <2 x i32> %shuffle, %b
-  %add = add <2 x i32> %mul, %a
-  ret <2 x i32> %add
-}
-
-define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlaq_lane_s32_0:
-; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i32> %shuffle, %b
-  %add = add <4 x i32> %mul, %a
-  ret <4 x i32> %add
-}
-
-define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmla_laneq_s16_0:
-; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i16> %shuffle, %b
-  %add = add <4 x i16> %mul, %a
-  ret <4 x i16> %add
-}
-
-define <8 x i16> @test_vmlaq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlaq_laneq_s16_0:
-; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
-  %mul = mul <8 x i16> %shuffle, %b
-  %add = add <8 x i16> %mul, %a
-  ret <8 x i16> %add
-}
-
-define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmla_laneq_s32_0:
-; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %mul = mul <2 x i32> %shuffle, %b
-  %add = add <2 x i32> %mul, %a
-  ret <2 x i32> %add
-}
-
-define <4 x i32> @test_vmlaq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlaq_laneq_s32_0:
-; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i32> %shuffle, %b
-  %add = add <4 x i32> %mul, %a
-  ret <4 x i32> %add
-}
-
-define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmls_lane_s16_0:
-; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i16> %shuffle, %b
-  %sub = sub <4 x i16> %a, %mul
-  ret <4 x i16> %sub
-}
-
-define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlsq_lane_s16_0:
-; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
-  %mul = mul <8 x i16> %shuffle, %b
-  %sub = sub <8 x i16> %a, %mul
-  ret <8 x i16> %sub
-}
-
-define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmls_lane_s32_0:
-; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %mul = mul <2 x i32> %shuffle, %b
-  %sub = sub <2 x i32> %a, %mul
-  ret <2 x i32> %sub
-}
-
-define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlsq_lane_s32_0:
-; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i32> %shuffle, %b
-  %sub = sub <4 x i32> %a, %mul
-  ret <4 x i32> %sub
-}
-
-define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmls_laneq_s16_0:
-; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i16> %shuffle, %b
-  %sub = sub <4 x i16> %a, %mul
-  ret <4 x i16> %sub
-}
-
-define <8 x i16> @test_vmlsq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlsq_laneq_s16_0:
-; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
-  %mul = mul <8 x i16> %shuffle, %b
-  %sub = sub <8 x i16> %a, %mul
-  ret <8 x i16> %sub
-}
-
-define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmls_laneq_s32_0:
-; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %mul = mul <2 x i32> %shuffle, %b
-  %sub = sub <2 x i32> %a, %mul
-  ret <2 x i32> %sub
-}
-
-define <4 x i32> @test_vmlsq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlsq_laneq_s32_0:
-; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i32> %shuffle, %b
-  %sub = sub <4 x i32> %a, %mul
-  ret <4 x i32> %sub
-}
-
-define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmul_lane_s16_0:
-; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i16> %shuffle, %a
-  ret <4 x i16> %mul
-}
-
-define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmulq_lane_s16_0:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
-  %mul = mul <8 x i16> %shuffle, %a
-  ret <8 x i16> %mul
-}
-
-define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmul_lane_s32_0:
-; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %mul = mul <2 x i32> %shuffle, %a
-  ret <2 x i32> %mul
-}
-
-define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmulq_lane_s32_0:
-; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i32> %shuffle, %a
-  ret <4 x i32> %mul
-}
-
-define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmul_lane_u16_0:
-; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i16> %shuffle, %a
-  ret <4 x i16> %mul
-}
-
-define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmulq_lane_u16_0:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
-  %mul = mul <8 x i16> %shuffle, %a
-  ret <8 x i16> %mul
-}
-
-define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmul_lane_u32_0:
-; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %mul = mul <2 x i32> %shuffle, %a
-  ret <2 x i32> %mul
-}
-
-define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmulq_lane_u32_0:
-; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i32> %shuffle, %a
-  ret <4 x i32> %mul
-}
-
-define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmul_laneq_s16_0:
-; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i16> %shuffle, %a
-  ret <4 x i16> %mul
-}
-
-define <8 x i16> @test_vmulq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmulq_laneq_s16_0:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
-  %mul = mul <8 x i16> %shuffle, %a
-  ret <8 x i16> %mul
-}
-
-define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmul_laneq_s32_0:
-; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %mul = mul <2 x i32> %shuffle, %a
-  ret <2 x i32> %mul
-}
-
-define <4 x i32> @test_vmulq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmulq_laneq_s32_0:
-; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i32> %shuffle, %a
-  ret <4 x i32> %mul
-}
-
-define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmul_laneq_u16_0:
-; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i16> %shuffle, %a
-  ret <4 x i16> %mul
-}
-
-define <8 x i16> @test_vmulq_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmulq_laneq_u16_0:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
-  %mul = mul <8 x i16> %shuffle, %a
-  ret <8 x i16> %mul
-}
-
-define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmul_laneq_u32_0:
-; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %mul = mul <2 x i32> %shuffle, %a
-  ret <2 x i32> %mul
-}
-
-define <4 x i32> @test_vmulq_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmulq_laneq_u32_0:
-; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i32> %shuffle, %a
-  ret <4 x i32> %mul
-}
-
-define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
-; CHECK: test_vfma_lane_f32_0:
-; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
-; CHECK: test_vfmaq_lane_f32_0:
-; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
-; CHECK: test_vfma_laneq_f32_0:
-; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
-; CHECK: test_vfmaq_laneq_f32_0:
-; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
-; CHECK: test_vfms_lane_f32_0:
-; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
-  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> zeroinitializer
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
-; CHECK: test_vfmsq_lane_f32_0:
-; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
-  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> zeroinitializer
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
-; CHECK: test_vfms_laneq_f32_0:
-; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
-  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> zeroinitializer
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
-; CHECK: test_vfmsq_laneq_f32_0:
-; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
-  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> zeroinitializer
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
-; CHECK: test_vfmaq_laneq_f64_0:
-; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
-  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
-  ret <2 x double> %0
-}
-
-define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
-; CHECK: test_vfmsq_laneq_f64_0:
-; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
-  %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> zeroinitializer
-  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
-  ret <2 x double> %0
-}
-
-define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlal_lane_s16_0:
-; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlal_lane_s32_0:
-; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlal_laneq_s16_0:
-; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlal_laneq_s32_0:
-; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlal_high_lane_s16_0:
-; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlal_high_lane_s32_0:
-; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlal_high_laneq_s16_0:
-; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlal_high_laneq_s32_0:
-; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlsl_lane_s16_0:
-; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlsl_lane_s32_0:
-; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlsl_laneq_s16_0:
-; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlsl_laneq_s32_0:
-; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlsl_high_lane_s16_0:
-; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlsl_high_lane_s32_0:
-; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlsl_high_laneq_s16_0:
-; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlsl_high_laneq_s32_0:
-; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlal_lane_u16_0:
-; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlal_lane_u32_0:
-; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlal_laneq_u16_0:
-; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlal_laneq_u32_0:
-; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlal_high_lane_u16_0:
-; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlal_high_lane_u32_0:
-; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlal_high_laneq_u16_0:
-; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlal_high_laneq_u32_0:
-; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlsl_lane_u16_0:
-; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlsl_lane_u32_0:
-; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlsl_laneq_u16_0:
-; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlsl_laneq_u32_0:
-; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlsl_high_lane_u16_0:
-; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlsl_high_lane_u32_0:
-; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlsl_high_laneq_u16_0:
-; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlsl_high_laneq_u32_0:
-; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmull_lane_s16_0:
-; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmull_lane_s32_0:
-; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmull_lane_u16_0:
-; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmull_lane_u32_0:
-; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmull_high_lane_s16_0:
-; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmull_high_lane_s32_0:
-; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmull_high_lane_u16_0:
-; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmull_high_lane_u32_0:
-; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmull_laneq_s16_0:
-; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmull_laneq_s32_0:
-; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmull_laneq_u16_0:
-; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmull_laneq_u32_0:
-; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmull_high_laneq_s16_0:
-; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmull_high_laneq_s32_0:
-; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmull_high_laneq_u16_0:
-; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmull_high_laneq_u32_0:
-; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vqdmlal_lane_s16_0:
-; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
-  ret <4 x i32> %vqdmlal4.i
-}
-
-define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vqdmlal_lane_s32_0:
-; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
-  ret <2 x i64> %vqdmlal4.i
-}
-
-define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vqdmlal_high_lane_s16_0:
-; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
-  ret <4 x i32> %vqdmlal4.i
-}
-
-define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vqdmlal_high_lane_s32_0:
-; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
-  ret <2 x i64> %vqdmlal4.i
-}
-
-define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vqdmlsl_lane_s16_0:
-; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
-  ret <4 x i32> %vqdmlsl4.i
-}
-
-define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vqdmlsl_lane_s32_0:
-; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
-  ret <2 x i64> %vqdmlsl4.i
-}
-
-define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vqdmlsl_high_lane_s16_0:
-; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
-  ret <4 x i32> %vqdmlsl4.i
-}
-
-define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vqdmlsl_high_lane_s32_0:
-; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
-  ret <2 x i64> %vqdmlsl4.i
-}
-
-define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vqdmull_lane_s16_0:
-; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vqdmull_lane_s32_0:
-; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vqdmull_laneq_s16_0:
-; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vqdmull_laneq_s32_0:
-; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vqdmull_high_lane_s16_0:
-; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vqdmull_high_lane_s32_0:
-; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vqdmull_high_laneq_s16_0:
-; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vqdmull_high_laneq_s32_0:
-; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vqdmulh_lane_s16_0:
-; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i16> %vqdmulh2.i
-}
-
-define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vqdmulhq_lane_s16_0:
-; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
-  %vqdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
-  ret <8 x i16> %vqdmulh2.i
-}
-
-define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vqdmulh_lane_s32_0:
-; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i32> %vqdmulh2.i
-}
-
-define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vqdmulhq_lane_s32_0:
-; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
-  %vqdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
-  ret <4 x i32> %vqdmulh2.i
-}
-
-define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vqrdmulh_lane_s16_0:
-; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqrdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i16> %vqrdmulh2.i
-}
-
-define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vqrdmulhq_lane_s16_0:
-; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
-  %vqrdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
-  ret <8 x i16> %vqrdmulh2.i
-}
-
-define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vqrdmulh_lane_s32_0:
-; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqrdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i32> %vqrdmulh2.i
-}
-
-define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vqrdmulhq_lane_s32_0:
-; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
-  %vqrdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
-  ret <4 x i32> %vqrdmulh2.i
-}
-
-define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) {
-; CHECK: test_vmul_lane_f32_0:
-; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
-  %mul = fmul <2 x float> %shuffle, %a
-  ret <2 x float> %mul
-}
-
-define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
-; CHECK: test_vmulq_lane_f32_0:
-; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
-  %mul = fmul <4 x float> %shuffle, %a
-  ret <4 x float> %mul
-}
-
-define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
-; CHECK: test_vmul_laneq_f32_0:
-; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
-  %mul = fmul <2 x float> %shuffle, %a
-  ret <2 x float> %mul
-}
-
-define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) {
-; CHECK: test_vmul_laneq_f64_0:
-; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %0 = bitcast <1 x double> %a to <8 x i8>
-  %1 = bitcast <8 x i8> %0 to double
-  %extract = extractelement <2 x double> %v, i32 0
-  %2 = fmul double %1, %extract
-  %3 = insertelement <1 x double> undef, double %2, i32 0
-  ret <1 x double> %3
-}
-
-define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) {
-; CHECK: test_vmulq_laneq_f32_0:
-; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
-  %mul = fmul <4 x float> %shuffle, %a
-  ret <4 x float> %mul
-}
-
-define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) {
-; CHECK: test_vmulq_laneq_f64_0:
-; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
-  %mul = fmul <2 x double> %shuffle, %a
-  ret <2 x double> %mul
-}
-
-define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) {
-; CHECK: test_vmulx_lane_f32_0:
-; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
-  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
-  ret <2 x float> %vmulx2.i
-}
-
-define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
-; CHECK: test_vmulxq_lane_f32_0:
-; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
-  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
-  ret <4 x float> %vmulx2.i
-}
-
-define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) {
-; CHECK: test_vmulxq_lane_f64_0:
-; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
-  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
-  ret <2 x double> %vmulx2.i
-}
-
-define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
-; CHECK: test_vmulx_laneq_f32_0:
-; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
-  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
-  ret <2 x float> %vmulx2.i
-}
-
-define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) {
-; CHECK: test_vmulxq_laneq_f32_0:
-; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
-  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
-  ret <4 x float> %vmulx2.i
-}
-
-define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) {
-; CHECK: test_vmulxq_laneq_f64_0:
-; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
-  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
-  ret <2 x double> %vmulx2.i
-}
-

Removed: llvm/trunk/test/CodeGen/AArch64/neon-3vdiff.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-3vdiff.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-3vdiff.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-3vdiff.ll (removed)
@@ -1,1834 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-; arm64 has its own copy of this test in its directory.
-
-declare <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>)
-
-declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>)
-
-declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>)
-
-declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>)
-
-declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>)
-
-declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>)
-
-declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>)
-
-declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>)
-
-declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>)
-
-declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>)
-
-declare <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8>, <8 x i8>)
-
-declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>)
-
-declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>)
-
-declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>)
-
-declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>)
-
-declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>)
-
-declare <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>)
-
-declare <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64>, <2 x i64>)
-
-declare <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32>, <4 x i32>)
-
-declare <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16>, <8 x i16>)
-
-declare <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64>, <2 x i64>)
-
-declare <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32>, <4 x i32>)
-
-declare <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vaddl_s8:
-; CHECK: saddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmovl.i.i = sext <8 x i8> %a to <8 x i16>
-  %vmovl.i2.i = sext <8 x i8> %b to <8 x i16>
-  %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vaddl_s16:
-; CHECK: saddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmovl.i.i = sext <4 x i16> %a to <4 x i32>
-  %vmovl.i2.i = sext <4 x i16> %b to <4 x i32>
-  %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vaddl_s32:
-; CHECK: saddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmovl.i.i = sext <2 x i32> %a to <2 x i64>
-  %vmovl.i2.i = sext <2 x i32> %b to <2 x i64>
-  %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vaddl_u8:
-; CHECK: uaddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmovl.i.i = zext <8 x i8> %a to <8 x i16>
-  %vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
-  %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vaddl_u16:
-; CHECK: uaddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmovl.i.i = zext <4 x i16> %a to <4 x i32>
-  %vmovl.i2.i = zext <4 x i16> %b to <4 x i32>
-  %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vaddl_u32:
-; CHECK: uaddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmovl.i.i = zext <2 x i32> %a to <2 x i64>
-  %vmovl.i2.i = zext <2 x i32> %b to <2 x i64>
-  %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vaddl_high_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vaddl_high_s8:
-; CHECK: saddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
-  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16>
-  %add.i = add <8 x i16> %0, %1
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vaddl_high_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vaddl_high_s16:
-; CHECK: saddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
-  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32>
-  %add.i = add <4 x i32> %0, %1
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vaddl_high_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vaddl_high_s32:
-; CHECK: saddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
-  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64>
-  %add.i = add <2 x i64> %0, %1
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vaddl_high_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vaddl_high_u8:
-; CHECK: uaddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
-  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16>
-  %add.i = add <8 x i16> %0, %1
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vaddl_high_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vaddl_high_u16:
-; CHECK: uaddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
-  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32>
-  %add.i = add <4 x i32> %0, %1
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vaddl_high_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vaddl_high_u32:
-; CHECK: uaddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
-  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64>
-  %add.i = add <2 x i64> %0, %1
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) {
-; CHECK: test_vaddw_s8:
-; CHECK: saddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
-entry:
-  %vmovl.i.i = sext <8 x i8> %b to <8 x i16>
-  %add.i = add <8 x i16> %vmovl.i.i, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) {
-; CHECK: test_vaddw_s16:
-; CHECK: saddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
-entry:
-  %vmovl.i.i = sext <4 x i16> %b to <4 x i32>
-  %add.i = add <4 x i32> %vmovl.i.i, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) {
-; CHECK: test_vaddw_s32:
-; CHECK: saddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
-entry:
-  %vmovl.i.i = sext <2 x i32> %b to <2 x i64>
-  %add.i = add <2 x i64> %vmovl.i.i, %a
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) {
-; CHECK: test_vaddw_u8:
-; CHECK: uaddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
-entry:
-  %vmovl.i.i = zext <8 x i8> %b to <8 x i16>
-  %add.i = add <8 x i16> %vmovl.i.i, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) {
-; CHECK: test_vaddw_u16:
-; CHECK: uaddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
-entry:
-  %vmovl.i.i = zext <4 x i16> %b to <4 x i32>
-  %add.i = add <4 x i32> %vmovl.i.i, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) {
-; CHECK: test_vaddw_u32:
-; CHECK: uaddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
-entry:
-  %vmovl.i.i = zext <2 x i32> %b to <2 x i64>
-  %add.i = add <2 x i64> %vmovl.i.i, %a
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vaddw_high_s8(<8 x i16> %a, <16 x i8> %b) {
-; CHECK: test_vaddw_high_s8:
-; CHECK: saddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
-  %add.i = add <8 x i16> %0, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vaddw_high_s16(<4 x i32> %a, <8 x i16> %b) {
-; CHECK: test_vaddw_high_s16:
-; CHECK: saddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
-  %add.i = add <4 x i32> %0, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vaddw_high_s32(<2 x i64> %a, <4 x i32> %b) {
-; CHECK: test_vaddw_high_s32:
-; CHECK: saddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
-  %add.i = add <2 x i64> %0, %a
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vaddw_high_u8(<8 x i16> %a, <16 x i8> %b) {
-; CHECK: test_vaddw_high_u8:
-; CHECK: uaddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
-  %add.i = add <8 x i16> %0, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vaddw_high_u16(<4 x i32> %a, <8 x i16> %b) {
-; CHECK: test_vaddw_high_u16:
-; CHECK: uaddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
-  %add.i = add <4 x i32> %0, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vaddw_high_u32(<2 x i64> %a, <4 x i32> %b) {
-; CHECK: test_vaddw_high_u32:
-; CHECK: uaddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
-  %add.i = add <2 x i64> %0, %a
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vsubl_s8:
-; CHECK: ssubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmovl.i.i = sext <8 x i8> %a to <8 x i16>
-  %vmovl.i2.i = sext <8 x i8> %b to <8 x i16>
-  %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vsubl_s16:
-; CHECK: ssubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmovl.i.i = sext <4 x i16> %a to <4 x i32>
-  %vmovl.i2.i = sext <4 x i16> %b to <4 x i32>
-  %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vsubl_s32:
-; CHECK: ssubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmovl.i.i = sext <2 x i32> %a to <2 x i64>
-  %vmovl.i2.i = sext <2 x i32> %b to <2 x i64>
-  %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vsubl_u8:
-; CHECK: usubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmovl.i.i = zext <8 x i8> %a to <8 x i16>
-  %vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
-  %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vsubl_u16:
-; CHECK: usubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmovl.i.i = zext <4 x i16> %a to <4 x i32>
-  %vmovl.i2.i = zext <4 x i16> %b to <4 x i32>
-  %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vsubl_u32:
-; CHECK: usubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmovl.i.i = zext <2 x i32> %a to <2 x i64>
-  %vmovl.i2.i = zext <2 x i32> %b to <2 x i64>
-  %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vsubl_high_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vsubl_high_s8:
-; CHECK: ssubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
-  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16>
-  %sub.i = sub <8 x i16> %0, %1
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vsubl_high_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vsubl_high_s16:
-; CHECK: ssubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
-  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32>
-  %sub.i = sub <4 x i32> %0, %1
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vsubl_high_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vsubl_high_s32:
-; CHECK: ssubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
-  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64>
-  %sub.i = sub <2 x i64> %0, %1
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vsubl_high_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vsubl_high_u8:
-; CHECK: usubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
-  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16>
-  %sub.i = sub <8 x i16> %0, %1
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vsubl_high_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vsubl_high_u16:
-; CHECK: usubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
-  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32>
-  %sub.i = sub <4 x i32> %0, %1
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vsubl_high_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vsubl_high_u32:
-; CHECK: usubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
-  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64>
-  %sub.i = sub <2 x i64> %0, %1
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) {
-; CHECK: test_vsubw_s8:
-; CHECK: ssubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
-entry:
-  %vmovl.i.i = sext <8 x i8> %b to <8 x i16>
-  %sub.i = sub <8 x i16> %a, %vmovl.i.i
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) {
-; CHECK: test_vsubw_s16:
-; CHECK: ssubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
-entry:
-  %vmovl.i.i = sext <4 x i16> %b to <4 x i32>
-  %sub.i = sub <4 x i32> %a, %vmovl.i.i
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) {
-; CHECK: test_vsubw_s32:
-; CHECK: ssubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
-entry:
-  %vmovl.i.i = sext <2 x i32> %b to <2 x i64>
-  %sub.i = sub <2 x i64> %a, %vmovl.i.i
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) {
-; CHECK: test_vsubw_u8:
-; CHECK: usubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
-entry:
-  %vmovl.i.i = zext <8 x i8> %b to <8 x i16>
-  %sub.i = sub <8 x i16> %a, %vmovl.i.i
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) {
-; CHECK: test_vsubw_u16:
-; CHECK: usubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
-entry:
-  %vmovl.i.i = zext <4 x i16> %b to <4 x i32>
-  %sub.i = sub <4 x i32> %a, %vmovl.i.i
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) {
-; CHECK: test_vsubw_u32:
-; CHECK: usubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
-entry:
-  %vmovl.i.i = zext <2 x i32> %b to <2 x i64>
-  %sub.i = sub <2 x i64> %a, %vmovl.i.i
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vsubw_high_s8(<8 x i16> %a, <16 x i8> %b) {
-; CHECK: test_vsubw_high_s8:
-; CHECK: ssubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
-  %sub.i = sub <8 x i16> %a, %0
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vsubw_high_s16(<4 x i32> %a, <8 x i16> %b) {
-; CHECK: test_vsubw_high_s16:
-; CHECK: ssubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
-  %sub.i = sub <4 x i32> %a, %0
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vsubw_high_s32(<2 x i64> %a, <4 x i32> %b) {
-; CHECK: test_vsubw_high_s32:
-; CHECK: ssubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
-  %sub.i = sub <2 x i64> %a, %0
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vsubw_high_u8(<8 x i16> %a, <16 x i8> %b) {
-; CHECK: test_vsubw_high_u8:
-; CHECK: usubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
-  %sub.i = sub <8 x i16> %a, %0
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vsubw_high_u16(<4 x i32> %a, <8 x i16> %b) {
-; CHECK: test_vsubw_high_u16:
-; CHECK: usubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
-  %sub.i = sub <4 x i32> %a, %0
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vsubw_high_u32(<2 x i64> %a, <4 x i32> %b) {
-; CHECK: test_vsubw_high_u32:
-; CHECK: usubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
-  %sub.i = sub <2 x i64> %a, %0
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vaddhn_s16:
-; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vaddhn.i = add <8 x i16> %a, %b
-  %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8>
-  ret <8 x i8> %vaddhn2.i
-}
-
-define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vaddhn_s32:
-; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vaddhn.i = add <4 x i32> %a, %b
-  %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
-  %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16>
-  ret <4 x i16> %vaddhn2.i
-}
-
-define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vaddhn_s64:
-; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vaddhn.i = add <2 x i64> %a, %b
-  %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
-  %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32>
-  ret <2 x i32> %vaddhn2.i
-}
-
-define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vaddhn_u16:
-; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vaddhn.i = add <8 x i16> %a, %b
-  %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8>
-  ret <8 x i8> %vaddhn2.i
-}
-
-define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vaddhn_u32:
-; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vaddhn.i = add <4 x i32> %a, %b
-  %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
-  %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16>
-  ret <4 x i16> %vaddhn2.i
-}
-
-define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vaddhn_u64:
-; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vaddhn.i = add <2 x i64> %a, %b
-  %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
-  %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32>
-  ret <2 x i32> %vaddhn2.i
-}
-
-define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vaddhn_high_s16:
-; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vaddhn.i.i = add <8 x i16> %a, %b
-  %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8>
-  %0 = bitcast <8 x i8> %r to <1 x i64>
-  %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
-  ret <16 x i8> %2
-}
-
-define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vaddhn_high_s32:
-; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vaddhn.i.i = add <4 x i32> %a, %b
-  %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16>
-  %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16>
-  %0 = bitcast <4 x i16> %r to <1 x i64>
-  %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vaddhn_high_s64:
-; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vaddhn.i.i = add <2 x i64> %a, %b
-  %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32>
-  %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32>
-  %0 = bitcast <2 x i32> %r to <1 x i64>
-  %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
-  ret <4 x i32> %2
-}
-
-define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vaddhn_high_u16:
-; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vaddhn.i.i = add <8 x i16> %a, %b
-  %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8>
-  %0 = bitcast <8 x i8> %r to <1 x i64>
-  %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
-  ret <16 x i8> %2
-}
-
-define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vaddhn_high_u32:
-; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vaddhn.i.i = add <4 x i32> %a, %b
-  %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16>
-  %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16>
-  %0 = bitcast <4 x i16> %r to <1 x i64>
-  %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vaddhn_high_u64:
-; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vaddhn.i.i = add <2 x i64> %a, %b
-  %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32>
-  %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32>
-  %0 = bitcast <2 x i32> %r to <1 x i64>
-  %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
-  ret <4 x i32> %2
-}
-
-define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vraddhn_s16:
-; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vraddhn2.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-  ret <8 x i8> %vraddhn2.i
-}
-
-define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vraddhn_s32:
-; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vraddhn2.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-  ret <4 x i16> %vraddhn2.i
-}
-
-define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vraddhn_s64:
-; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vraddhn2.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-  ret <2 x i32> %vraddhn2.i
-}
-
-define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vraddhn_u16:
-; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vraddhn2.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-  ret <8 x i8> %vraddhn2.i
-}
-
-define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vraddhn_u32:
-; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vraddhn2.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-  ret <4 x i16> %vraddhn2.i
-}
-
-define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vraddhn_u64:
-; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vraddhn2.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-  ret <2 x i32> %vraddhn2.i
-}
-
-define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vraddhn_high_s16:
-; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vraddhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-  %0 = bitcast <8 x i8> %r to <1 x i64>
-  %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
-  ret <16 x i8> %2
-}
-
-define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vraddhn_high_s32:
-; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vraddhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-  %0 = bitcast <4 x i16> %r to <1 x i64>
-  %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vraddhn_high_s64:
-; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vraddhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-  %0 = bitcast <2 x i32> %r to <1 x i64>
-  %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
-  ret <4 x i32> %2
-}
-
-define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vraddhn_high_u16:
-; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vraddhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-  %0 = bitcast <8 x i8> %r to <1 x i64>
-  %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
-  ret <16 x i8> %2
-}
-
-define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vraddhn_high_u32:
-; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vraddhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-  %0 = bitcast <4 x i16> %r to <1 x i64>
-  %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vraddhn_high_u64:
-; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vraddhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-  %0 = bitcast <2 x i32> %r to <1 x i64>
-  %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
-  ret <4 x i32> %2
-}
-
-define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vsubhn_s16:
-; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vsubhn.i = sub <8 x i16> %a, %b
-  %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8>
-  ret <8 x i8> %vsubhn2.i
-}
-
-define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vsubhn_s32:
-; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vsubhn.i = sub <4 x i32> %a, %b
-  %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
-  %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16>
-  ret <4 x i16> %vsubhn2.i
-}
-
-define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vsubhn_s64:
-; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vsubhn.i = sub <2 x i64> %a, %b
-  %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
-  %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32>
-  ret <2 x i32> %vsubhn2.i
-}
-
-define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vsubhn_u16:
-; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vsubhn.i = sub <8 x i16> %a, %b
-  %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8>
-  ret <8 x i8> %vsubhn2.i
-}
-
-define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vsubhn_u32:
-; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vsubhn.i = sub <4 x i32> %a, %b
-  %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
-  %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16>
-  ret <4 x i16> %vsubhn2.i
-}
-
-define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vsubhn_u64:
-; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vsubhn.i = sub <2 x i64> %a, %b
-  %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
-  %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32>
-  ret <2 x i32> %vsubhn2.i
-}
-
-define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vsubhn_high_s16:
-; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vsubhn.i.i = sub <8 x i16> %a, %b
-  %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8>
-  %0 = bitcast <8 x i8> %r to <1 x i64>
-  %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
-  ret <16 x i8> %2
-}
-
-define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vsubhn_high_s32:
-; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vsubhn.i.i = sub <4 x i32> %a, %b
-  %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16>
-  %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16>
-  %0 = bitcast <4 x i16> %r to <1 x i64>
-  %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vsubhn_high_s64:
-; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vsubhn.i.i = sub <2 x i64> %a, %b
-  %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32>
-  %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32>
-  %0 = bitcast <2 x i32> %r to <1 x i64>
-  %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
-  ret <4 x i32> %2
-}
-
-define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vsubhn_high_u16:
-; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vsubhn.i.i = sub <8 x i16> %a, %b
-  %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8>
-  %0 = bitcast <8 x i8> %r to <1 x i64>
-  %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
-  ret <16 x i8> %2
-}
-
-define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vsubhn_high_u32:
-; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vsubhn.i.i = sub <4 x i32> %a, %b
-  %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16>
-  %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16>
-  %0 = bitcast <4 x i16> %r to <1 x i64>
-  %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vsubhn_high_u64:
-; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vsubhn.i.i = sub <2 x i64> %a, %b
-  %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32>
-  %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32>
-  %0 = bitcast <2 x i32> %r to <1 x i64>
-  %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
-  ret <4 x i32> %2
-}
-
-define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vrsubhn_s16:
-; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vrsubhn2.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-  ret <8 x i8> %vrsubhn2.i
-}
-
-define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vrsubhn_s32:
-; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vrsubhn2.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-  ret <4 x i16> %vrsubhn2.i
-}
-
-define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vrsubhn_s64:
-; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vrsubhn2.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-  ret <2 x i32> %vrsubhn2.i
-}
-
-define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vrsubhn_u16:
-; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vrsubhn2.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-  ret <8 x i8> %vrsubhn2.i
-}
-
-define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vrsubhn_u32:
-; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vrsubhn2.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-  ret <4 x i16> %vrsubhn2.i
-}
-
-define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vrsubhn_u64:
-; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vrsubhn2.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-  ret <2 x i32> %vrsubhn2.i
-}
-
-define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vrsubhn_high_s16:
-; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vrsubhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-  %0 = bitcast <8 x i8> %r to <1 x i64>
-  %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
-  ret <16 x i8> %2
-}
-
-define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vrsubhn_high_s32:
-; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vrsubhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-  %0 = bitcast <4 x i16> %r to <1 x i64>
-  %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vrsubhn_high_s64:
-; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vrsubhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-  %0 = bitcast <2 x i32> %r to <1 x i64>
-  %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
-  ret <4 x i32> %2
-}
-
-define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vrsubhn_high_u16:
-; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vrsubhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-  %0 = bitcast <8 x i8> %r to <1 x i64>
-  %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
-  ret <16 x i8> %2
-}
-
-define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vrsubhn_high_u32:
-; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vrsubhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-  %0 = bitcast <4 x i16> %r to <1 x i64>
-  %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vrsubhn_high_u64:
-; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vrsubhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-  %0 = bitcast <2 x i32> %r to <1 x i64>
-  %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
-  ret <4 x i32> %2
-}
-
-define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vabdl_s8:
-; CHECK: sabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vabd.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b)
-  %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16>
-  ret <8 x i16> %vmovl.i.i
-}
-
-define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vabdl_s16:
-; CHECK: sabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vabd2.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b)
-  %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32>
-  ret <4 x i32> %vmovl.i.i
-}
-
-define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vabdl_s32:
-; CHECK: sabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vabd2.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b)
-  %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64>
-  ret <2 x i64> %vmovl.i.i
-}
-
-define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vabdl_u8:
-; CHECK: uabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vabd.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b)
-  %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16>
-  ret <8 x i16> %vmovl.i.i
-}
-
-define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vabdl_u16:
-; CHECK: uabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vabd2.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b)
-  %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32>
-  ret <4 x i32> %vmovl.i.i
-}
-
-define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vabdl_u32:
-; CHECK: uabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vabd2.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b)
-  %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64>
-  ret <2 x i64> %vmovl.i.i
-}
-
-define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
-; CHECK: test_vabal_s8:
-; CHECK: sabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c)
-  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
-  %add.i = add <8 x i16> %vmovl.i.i.i, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
-; CHECK: test_vabal_s16:
-; CHECK: sabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c)
-  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
-  %add.i = add <4 x i32> %vmovl.i.i.i, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
-; CHECK: test_vabal_s32:
-; CHECK: sabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c)
-  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
-  %add.i = add <2 x i64> %vmovl.i.i.i, %a
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
-; CHECK: test_vabal_u8:
-; CHECK: uabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c)
-  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
-  %add.i = add <8 x i16> %vmovl.i.i.i, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
-; CHECK: test_vabal_u16:
-; CHECK: uabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c)
-  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
-  %add.i = add <4 x i32> %vmovl.i.i.i, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
-; CHECK: test_vabal_u32:
-; CHECK: uabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c)
-  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
-  %add.i = add <2 x i64> %vmovl.i.i.i, %a
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vabdl_high_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vabdl_high_s8:
-; CHECK: sabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
-  ret <8 x i16> %vmovl.i.i.i
-}
-
-define <4 x i32> @test_vabdl_high_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vabdl_high_s16:
-; CHECK: sabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
-  ret <4 x i32> %vmovl.i.i.i
-}
-
-define <2 x i64> @test_vabdl_high_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vabdl_high_s32:
-; CHECK: sabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
-  ret <2 x i64> %vmovl.i.i.i
-}
-
-define <8 x i16> @test_vabdl_high_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vabdl_high_u8:
-; CHECK: uabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
-  ret <8 x i16> %vmovl.i.i.i
-}
-
-define <4 x i32> @test_vabdl_high_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vabdl_high_u16:
-; CHECK: uabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
-  ret <4 x i32> %vmovl.i.i.i
-}
-
-define <2 x i64> @test_vabdl_high_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vabdl_high_u32:
-; CHECK: uabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
-  ret <2 x i64> %vmovl.i.i.i
-}
-
-define <8 x i16> @test_vabal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
-; CHECK: test_vabal_high_s8:
-; CHECK: sabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vabd.i.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16>
-  %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a
-  ret <8 x i16> %add.i.i
-}
-
-define <4 x i32> @test_vabal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK: test_vabal_high_s16:
-; CHECK: sabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vabd2.i.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32>
-  %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a
-  ret <4 x i32> %add.i.i
-}
-
-define <2 x i64> @test_vabal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK: test_vabal_high_s32:
-; CHECK: sabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vabd2.i.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64>
-  %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a
-  ret <2 x i64> %add.i.i
-}
-
-define <8 x i16> @test_vabal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
-; CHECK: test_vabal_high_u8:
-; CHECK: uabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vabd.i.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16>
-  %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a
-  ret <8 x i16> %add.i.i
-}
-
-define <4 x i32> @test_vabal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK: test_vabal_high_u16:
-; CHECK: uabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vabd2.i.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32>
-  %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a
-  ret <4 x i32> %add.i.i
-}
-
-define <2 x i64> @test_vabal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK: test_vabal_high_u32:
-; CHECK: uabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vabd2.i.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64>
-  %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a
-  ret <2 x i64> %add.i.i
-}
-
-define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vmull_s8:
-; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %a, <8 x i8> %b)
-  ret <8 x i16> %vmull.i
-}
-
-define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vmull_s16:
-; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %b)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vmull_s32:
-; CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %b)
-  ret <2 x i64> %vmull2.i
-}
-
-define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vmull_u8:
-; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %a, <8 x i8> %b)
-  ret <8 x i16> %vmull.i
-}
-
-define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vmull_u16:
-; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %b)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vmull_u32:
-; CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %b)
-  ret <2 x i64> %vmull2.i
-}
-
-define <8 x i16> @test_vmull_high_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vmull_high_s8:
-; CHECK: smull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  ret <8 x i16> %vmull.i.i
-}
-
-define <4 x i32> @test_vmull_high_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vmull_high_s16:
-; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  ret <4 x i32> %vmull2.i.i
-}
-
-define <2 x i64> @test_vmull_high_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vmull_high_s32:
-; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  ret <2 x i64> %vmull2.i.i
-}
-
-define <8 x i16> @test_vmull_high_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vmull_high_u8:
-; CHECK: umull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  ret <8 x i16> %vmull.i.i
-}
-
-define <4 x i32> @test_vmull_high_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vmull_high_u16:
-; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  ret <4 x i32> %vmull2.i.i
-}
-
-define <2 x i64> @test_vmull_high_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vmull_high_u32:
-; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  ret <2 x i64> %vmull2.i.i
-}
-
-define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
-; CHECK: test_vmlal_s8:
-; CHECK: smlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c)
-  %add.i = add <8 x i16> %vmull.i.i, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
-; CHECK: test_vmlal_s16:
-; CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c)
-  %add.i = add <4 x i32> %vmull2.i.i, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
-; CHECK: test_vmlal_s32:
-; CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c)
-  %add.i = add <2 x i64> %vmull2.i.i, %a
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
-; CHECK: test_vmlal_u8:
-; CHECK: umlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c)
-  %add.i = add <8 x i16> %vmull.i.i, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
-; CHECK: test_vmlal_u16:
-; CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c)
-  %add.i = add <4 x i32> %vmull2.i.i, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
-; CHECK: test_vmlal_u32:
-; CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c)
-  %add.i = add <2 x i64> %vmull2.i.i, %a
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vmlal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
-; CHECK: test_vmlal_high_s8:
-; CHECK: smlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  %add.i.i = add <8 x i16> %vmull.i.i.i, %a
-  ret <8 x i16> %add.i.i
-}
-
-define <4 x i32> @test_vmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK: test_vmlal_high_s16:
-; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
-  ret <4 x i32> %add.i.i
-}
-
-define <2 x i64> @test_vmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK: test_vmlal_high_s32:
-; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
-  ret <2 x i64> %add.i.i
-}
-
-define <8 x i16> @test_vmlal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
-; CHECK: test_vmlal_high_u8:
-; CHECK: umlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  %add.i.i = add <8 x i16> %vmull.i.i.i, %a
-  ret <8 x i16> %add.i.i
-}
-
-define <4 x i32> @test_vmlal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK: test_vmlal_high_u16:
-; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
-  ret <4 x i32> %add.i.i
-}
-
-define <2 x i64> @test_vmlal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK: test_vmlal_high_u32:
-; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
-  ret <2 x i64> %add.i.i
-}
-
-define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
-; CHECK: test_vmlsl_s8:
-; CHECK: smlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c)
-  %sub.i = sub <8 x i16> %a, %vmull.i.i
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
-; CHECK: test_vmlsl_s16:
-; CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c)
-  %sub.i = sub <4 x i32> %a, %vmull2.i.i
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
-; CHECK: test_vmlsl_s32:
-; CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c)
-  %sub.i = sub <2 x i64> %a, %vmull2.i.i
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
-; CHECK: test_vmlsl_u8:
-; CHECK: umlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c)
-  %sub.i = sub <8 x i16> %a, %vmull.i.i
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
-; CHECK: test_vmlsl_u16:
-; CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c)
-  %sub.i = sub <4 x i32> %a, %vmull2.i.i
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
-; CHECK: test_vmlsl_u32:
-; CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c)
-  %sub.i = sub <2 x i64> %a, %vmull2.i.i
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vmlsl_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
-; CHECK: test_vmlsl_high_s8:
-; CHECK: smlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i
-  ret <8 x i16> %sub.i.i
-}
-
-define <4 x i32> @test_vmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK: test_vmlsl_high_s16:
-; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
-  ret <4 x i32> %sub.i.i
-}
-
-define <2 x i64> @test_vmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK: test_vmlsl_high_s32:
-; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
-  ret <2 x i64> %sub.i.i
-}
-
-define <8 x i16> @test_vmlsl_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
-; CHECK: test_vmlsl_high_u8:
-; CHECK: umlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i
-  ret <8 x i16> %sub.i.i
-}
-
-define <4 x i32> @test_vmlsl_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK: test_vmlsl_high_u16:
-; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
-  ret <4 x i32> %sub.i.i
-}
-
-define <2 x i64> @test_vmlsl_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK: test_vmlsl_high_u32:
-; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
-  ret <2 x i64> %sub.i.i
-}
-
-define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vqdmull_s16:
-; CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vqdmull_s32:
-; CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
-; CHECK: test_vqdmlal_s16:
-; CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
-  %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
-  ret <4 x i32> %vqdmlal4.i
-}
-
-define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
-; CHECK: test_vqdmlal_s32:
-; CHECK: sqdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
-  %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
-  ret <2 x i64> %vqdmlal4.i
-}
-
-define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
-; CHECK: test_vqdmlsl_s16:
-; CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
-  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
-  ret <4 x i32> %vqdmlsl4.i
-}
-
-define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
-; CHECK: test_vqdmlsl_s32:
-; CHECK: sqdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
-  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
-  ret <2 x i64> %vqdmlsl4.i
-}
-
-define <4 x i32> @test_vqdmull_high_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vqdmull_high_s16:
-; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vqdmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  ret <4 x i32> %vqdmull2.i.i
-}
-
-define <2 x i64> @test_vqdmull_high_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vqdmull_high_s32:
-; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vqdmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  ret <2 x i64> %vqdmull2.i.i
-}
-
-define <4 x i32> @test_vqdmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK: test_vqdmlal_high_s16:
-; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vqdmlal2.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %vqdmlal4.i.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i.i)
-  ret <4 x i32> %vqdmlal4.i.i
-}
-
-define <2 x i64> @test_vqdmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK: test_vqdmlal_high_s32:
-; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vqdmlal2.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %vqdmlal4.i.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i.i)
-  ret <2 x i64> %vqdmlal4.i.i
-}
-
-define <4 x i32> @test_vqdmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK: test_vqdmlsl_high_s16:
-; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vqdmlsl2.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %vqdmlsl4.i.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i.i)
-  ret <4 x i32> %vqdmlsl4.i.i
-}
-
-define <2 x i64> @test_vqdmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK: test_vqdmlsl_high_s32:
-; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vqdmlsl2.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %vqdmlsl4.i.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i.i)
-  ret <2 x i64> %vqdmlsl4.i.i
-}
-
-define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vmull_p8:
-; CHECK: pmull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %a, <8 x i8> %b)
-  ret <8 x i16> %vmull.i
-}
-
-define <8 x i16> @test_vmull_high_p8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vmull_high_p8:
-; CHECK: pmull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  ret <8 x i16> %vmull.i.i
-}
-
-define i128 @test_vmull_p64(i64 %a, i64 %b) #4 {
-; CHECK: test_vmull_p64
-; CHECK: pmull {{v[0-9]+}}.1q, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d
-entry:
-  %vmull.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vmull1.i = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.vmull.p64(<1 x i64> %vmull.i, <1 x i64> %vmull1.i) #1
-  %vmull3.i = bitcast <16 x i8> %vmull2.i to i128
-  ret i128 %vmull3.i
-}
-
-define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #4 {
-; CHECK: test_vmull_high_p64
-; CHECK: pmull2 {{v[0-9]+}}.1q, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %0 = extractelement <2 x i64> %a, i32 1
-  %1 = extractelement <2 x i64> %b, i32 1
-  %vmull.i.i = insertelement <1 x i64> undef, i64 %0, i32 0
-  %vmull1.i.i = insertelement <1 x i64> undef, i64 %1, i32 0
-  %vmull2.i.i = tail call <16 x i8> @llvm.aarch64.neon.vmull.p64(<1 x i64> %vmull.i.i, <1 x i64> %vmull1.i.i) #1
-  %vmull3.i.i = bitcast <16 x i8> %vmull2.i.i to i128
-  ret i128 %vmull3.i.i
-}
-
-declare <16 x i8> @llvm.aarch64.neon.vmull.p64(<1 x i64>, <1 x i64>) #5
-
-

Removed: llvm/trunk/test/CodeGen/AArch64/neon-aba-abd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-aba-abd.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-aba-abd.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-aba-abd.ll (removed)
@@ -1,237 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-; arm64 has copied test in its own directory (different intrinsic names).
-
-declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_uabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_uabd_v8i8:
-  %abd = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: uabd v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %abd
-}
-
-define <8 x i8> @test_uaba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_uaba_v8i8:
-  %abd = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-  %aba = add <8 x i8> %lhs, %abd
-; CHECK: uaba v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %aba
-}
-
-define <8 x i8> @test_sabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_sabd_v8i8:
-  %abd = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: sabd v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %abd
-}
-
-define <8 x i8> @test_saba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_saba_v8i8:
-  %abd = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-  %aba = add <8 x i8> %lhs, %abd
-; CHECK: saba v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %aba
-}
-
-declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_uabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_uabd_v16i8:
-  %abd = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: uabd v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %abd
-}
-
-define <16 x i8> @test_uaba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_uaba_v16i8:
-  %abd = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-  %aba = add <16 x i8> %lhs, %abd
-; CHECK: uaba v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %aba
-}
-
-define <16 x i8> @test_sabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_sabd_v16i8:
-  %abd = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: sabd v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %abd
-}
-
-define <16 x i8> @test_saba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_saba_v16i8:
-  %abd = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-  %aba = add <16 x i8> %lhs, %abd
-; CHECK: saba v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %aba
-}
-
-declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_uabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_uabd_v4i16:
-  %abd = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: uabd v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %abd
-}
-
-define <4 x i16> @test_uaba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_uaba_v4i16:
-  %abd = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-  %aba = add <4 x i16> %lhs, %abd
-; CHECK: uaba v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %aba
-}
-
-define <4 x i16> @test_sabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_sabd_v4i16:
-  %abd = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: sabd v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %abd
-}
-
-define <4 x i16> @test_saba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_saba_v4i16:
-  %abd = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-  %aba = add <4 x i16> %lhs, %abd
-; CHECK: saba v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %aba
-}
-
-declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_uabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_uabd_v8i16:
-  %abd = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: uabd v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %abd
-}
-
-define <8 x i16> @test_uaba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_uaba_v8i16:
-  %abd = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-  %aba = add <8 x i16> %lhs, %abd
-; CHECK: uaba v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %aba
-}
-
-define <8 x i16> @test_sabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_sabd_v8i16:
-  %abd = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: sabd v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %abd
-}
-
-define <8 x i16> @test_saba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_saba_v8i16:
-  %abd = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-  %aba = add <8 x i16> %lhs, %abd
-; CHECK: saba v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %aba
-}
-
-declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_uabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_uabd_v2i32:
-  %abd = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: uabd v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %abd
-}
-
-define <2 x i32> @test_uaba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_uaba_v2i32:
-  %abd = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-  %aba = add <2 x i32> %lhs, %abd
-; CHECK: uaba v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %aba
-}
-
-define <2 x i32> @test_sabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_sabd_v2i32:
-  %abd = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: sabd v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %abd
-}
-
-define <2 x i32> @test_sabd_v2i32_const() {
-; CHECK: test_sabd_v2i32_const:
-; CHECK: movi     d1, #0xffffffff0000
-; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s
-  %1 = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(
-    <2 x i32> <i32 -2147483648, i32 2147450880>,
-    <2 x i32> <i32 -65536, i32 65535>)
-  ret <2 x i32> %1
-}
-
-define <2 x i32> @test_saba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_saba_v2i32:
-  %abd = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-  %aba = add <2 x i32> %lhs, %abd
-; CHECK: saba v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %aba
-}
-
-declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_uabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_uabd_v4i32:
-  %abd = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: uabd v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %abd
-}
-
-define <4 x i32> @test_uaba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_uaba_v4i32:
-  %abd = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-  %aba = add <4 x i32> %lhs, %abd
-; CHECK: uaba v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %aba
-}
-
-define <4 x i32> @test_sabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_sabd_v4i32:
-  %abd = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: sabd v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %abd
-}
-
-define <4 x i32> @test_saba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_saba_v4i32:
-  %abd = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-  %aba = add <4 x i32> %lhs, %abd
-; CHECK: saba v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %aba
-}
-
-declare <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float>, <2 x float>)
-
-define <2 x float> @test_fabd_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; CHECK: test_fabd_v2f32:
-  %abd = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-; CHECK: fabd v0.2s, v0.2s, v1.2s
-  ret <2 x float> %abd
-}
-
-declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>)
-
-define <4 x float> @test_fabd_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; CHECK: test_fabd_v4f32:
-  %abd = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-; CHECK: fabd v0.4s, v0.4s, v1.4s
-  ret <4 x float> %abd
-}
-
-declare <2 x double> @llvm.arm.neon.vabds.v2f64(<2 x double>, <2 x double>)
-
-define <2 x double> @test_fabd_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; CHECK: test_fabd_v2f64:
-  %abd = call <2 x double> @llvm.arm.neon.vabds.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-; CHECK: fabd v0.2d, v0.2d, v1.2d
-  ret <2 x double> %abd
-}

Removed: llvm/trunk/test/CodeGen/AArch64/neon-across.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-across.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-across.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-across.ll (removed)
@@ -1,473 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-; arm64 has copied test in its own directory.
-
-declare float @llvm.aarch64.neon.vminnmv(<4 x float>)
-
-declare float @llvm.aarch64.neon.vmaxnmv(<4 x float>)
-
-declare float @llvm.aarch64.neon.vminv(<4 x float>)
-
-declare float @llvm.aarch64.neon.vmaxv(<4 x float>)
-
-declare <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v4i32(<4 x i32>)
-
-declare <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v8i16(<8 x i16>)
-
-declare <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v16i8(<16 x i8>)
-
-declare <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v4i16(<4 x i16>)
-
-declare <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v8i8(<8 x i8>)
-
-declare <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v4i32(<4 x i32>)
-
-declare <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v8i16(<8 x i16>)
-
-declare <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v16i8(<16 x i8>)
-
-declare <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v4i32(<4 x i32>)
-
-declare <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v8i16(<8 x i16>)
-
-declare <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v16i8(<16 x i8>)
-
-declare <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v4i16(<4 x i16>)
-
-declare <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v8i8(<8 x i8>)
-
-declare <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v4i16(<4 x i16>)
-
-declare <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v8i8(<8 x i8>)
-
-declare <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v4i32(<4 x i32>)
-
-declare <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v8i16(<8 x i16>)
-
-declare <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v16i8(<16 x i8>)
-
-declare <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v4i32(<4 x i32>)
-
-declare <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v8i16(<8 x i16>)
-
-declare <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v16i8(<16 x i8>)
-
-declare <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v4i16(<4 x i16>)
-
-declare <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v8i8(<8 x i8>)
-
-declare <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v4i16(<4 x i16>)
-
-declare <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v8i8(<8 x i8>)
-
-declare <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v4i32(<4 x i32>)
-
-declare <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v8i16(<8 x i16>)
-
-declare <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v16i8(<16 x i8>)
-
-declare <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v4i32(<4 x i32>)
-
-declare <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v8i16(<8 x i16>)
-
-declare <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v16i8(<16 x i8>)
-
-declare <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v4i16(<4 x i16>)
-
-declare <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v8i8(<8 x i8>)
-
-declare <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v4i16(<4 x i16>)
-
-declare <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v8i8(<8 x i8>)
-
-define i16 @test_vaddlv_s8(<8 x i8> %a) {
-; CHECK: test_vaddlv_s8:
-; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.8b
-entry:
-  %saddlv.i = tail call <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v8i8(<8 x i8> %a)
-  %0 = extractelement <1 x i16> %saddlv.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vaddlv_s16(<4 x i16> %a) {
-; CHECK: test_vaddlv_s16:
-; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.4h
-entry:
-  %saddlv.i = tail call <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v4i16(<4 x i16> %a)
-  %0 = extractelement <1 x i32> %saddlv.i, i32 0
-  ret i32 %0
-}
-
-define i16 @test_vaddlv_u8(<8 x i8> %a) {
-; CHECK: test_vaddlv_u8:
-; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.8b
-entry:
-  %uaddlv.i = tail call <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v8i8(<8 x i8> %a)
-  %0 = extractelement <1 x i16> %uaddlv.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vaddlv_u16(<4 x i16> %a) {
-; CHECK: test_vaddlv_u16:
-; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.4h
-entry:
-  %uaddlv.i = tail call <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v4i16(<4 x i16> %a)
-  %0 = extractelement <1 x i32> %uaddlv.i, i32 0
-  ret i32 %0
-}
-
-define i16 @test_vaddlvq_s8(<16 x i8> %a) {
-; CHECK: test_vaddlvq_s8:
-; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.16b
-entry:
-  %saddlv.i = tail call <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v16i8(<16 x i8> %a)
-  %0 = extractelement <1 x i16> %saddlv.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vaddlvq_s16(<8 x i16> %a) {
-; CHECK: test_vaddlvq_s16:
-; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.8h
-entry:
-  %saddlv.i = tail call <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v8i16(<8 x i16> %a)
-  %0 = extractelement <1 x i32> %saddlv.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vaddlvq_s32(<4 x i32> %a) {
-; CHECK: test_vaddlvq_s32:
-; CHECK: saddlv d{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %saddlv.i = tail call <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v4i32(<4 x i32> %a)
-  %0 = extractelement <1 x i64> %saddlv.i, i32 0
-  ret i64 %0
-}
-
-define i16 @test_vaddlvq_u8(<16 x i8> %a) {
-; CHECK: test_vaddlvq_u8:
-; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.16b
-entry:
-  %uaddlv.i = tail call <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v16i8(<16 x i8> %a)
-  %0 = extractelement <1 x i16> %uaddlv.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vaddlvq_u16(<8 x i16> %a) {
-; CHECK: test_vaddlvq_u16:
-; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.8h
-entry:
-  %uaddlv.i = tail call <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v8i16(<8 x i16> %a)
-  %0 = extractelement <1 x i32> %uaddlv.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vaddlvq_u32(<4 x i32> %a) {
-; CHECK: test_vaddlvq_u32:
-; CHECK: uaddlv d{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %uaddlv.i = tail call <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v4i32(<4 x i32> %a)
-  %0 = extractelement <1 x i64> %uaddlv.i, i32 0
-  ret i64 %0
-}
-
-define i8 @test_vmaxv_s8(<8 x i8> %a) {
-; CHECK: test_vmaxv_s8:
-; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.8b
-entry:
-  %smaxv.i = tail call <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v8i8(<8 x i8> %a)
-  %0 = extractelement <1 x i8> %smaxv.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vmaxv_s16(<4 x i16> %a) {
-; CHECK: test_vmaxv_s16:
-; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.4h
-entry:
-  %smaxv.i = tail call <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v4i16(<4 x i16> %a)
-  %0 = extractelement <1 x i16> %smaxv.i, i32 0
-  ret i16 %0
-}
-
-define i8 @test_vmaxv_u8(<8 x i8> %a) {
-; CHECK: test_vmaxv_u8:
-; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.8b
-entry:
-  %umaxv.i = tail call <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v8i8(<8 x i8> %a)
-  %0 = extractelement <1 x i8> %umaxv.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vmaxv_u16(<4 x i16> %a) {
-; CHECK: test_vmaxv_u16:
-; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.4h
-entry:
-  %umaxv.i = tail call <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v4i16(<4 x i16> %a)
-  %0 = extractelement <1 x i16> %umaxv.i, i32 0
-  ret i16 %0
-}
-
-define i8 @test_vmaxvq_s8(<16 x i8> %a) {
-; CHECK: test_vmaxvq_s8:
-; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.16b
-entry:
-  %smaxv.i = tail call <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v16i8(<16 x i8> %a)
-  %0 = extractelement <1 x i8> %smaxv.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vmaxvq_s16(<8 x i16> %a) {
-; CHECK: test_vmaxvq_s16:
-; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.8h
-entry:
-  %smaxv.i = tail call <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v8i16(<8 x i16> %a)
-  %0 = extractelement <1 x i16> %smaxv.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vmaxvq_s32(<4 x i32> %a) {
-; CHECK: test_vmaxvq_s32:
-; CHECK: smaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %smaxv.i = tail call <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v4i32(<4 x i32> %a)
-  %0 = extractelement <1 x i32> %smaxv.i, i32 0
-  ret i32 %0
-}
-
-define i8 @test_vmaxvq_u8(<16 x i8> %a) {
-; CHECK: test_vmaxvq_u8:
-; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.16b
-entry:
-  %umaxv.i = tail call <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v16i8(<16 x i8> %a)
-  %0 = extractelement <1 x i8> %umaxv.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vmaxvq_u16(<8 x i16> %a) {
-; CHECK: test_vmaxvq_u16:
-; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.8h
-entry:
-  %umaxv.i = tail call <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v8i16(<8 x i16> %a)
-  %0 = extractelement <1 x i16> %umaxv.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vmaxvq_u32(<4 x i32> %a) {
-; CHECK: test_vmaxvq_u32:
-; CHECK: umaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %umaxv.i = tail call <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v4i32(<4 x i32> %a)
-  %0 = extractelement <1 x i32> %umaxv.i, i32 0
-  ret i32 %0
-}
-
-define i8 @test_vminv_s8(<8 x i8> %a) {
-; CHECK: test_vminv_s8:
-; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.8b
-entry:
-  %sminv.i = tail call <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v8i8(<8 x i8> %a)
-  %0 = extractelement <1 x i8> %sminv.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vminv_s16(<4 x i16> %a) {
-; CHECK: test_vminv_s16:
-; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.4h
-entry:
-  %sminv.i = tail call <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v4i16(<4 x i16> %a)
-  %0 = extractelement <1 x i16> %sminv.i, i32 0
-  ret i16 %0
-}
-
-define i8 @test_vminv_u8(<8 x i8> %a) {
-; CHECK: test_vminv_u8:
-; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.8b
-entry:
-  %uminv.i = tail call <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v8i8(<8 x i8> %a)
-  %0 = extractelement <1 x i8> %uminv.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vminv_u16(<4 x i16> %a) {
-; CHECK: test_vminv_u16:
-; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.4h
-entry:
-  %uminv.i = tail call <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v4i16(<4 x i16> %a)
-  %0 = extractelement <1 x i16> %uminv.i, i32 0
-  ret i16 %0
-}
-
-define i8 @test_vminvq_s8(<16 x i8> %a) {
-; CHECK: test_vminvq_s8:
-; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.16b
-entry:
-  %sminv.i = tail call <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v16i8(<16 x i8> %a)
-  %0 = extractelement <1 x i8> %sminv.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vminvq_s16(<8 x i16> %a) {
-; CHECK: test_vminvq_s16:
-; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.8h
-entry:
-  %sminv.i = tail call <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v8i16(<8 x i16> %a)
-  %0 = extractelement <1 x i16> %sminv.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vminvq_s32(<4 x i32> %a) {
-; CHECK: test_vminvq_s32:
-; CHECK: sminv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %sminv.i = tail call <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v4i32(<4 x i32> %a)
-  %0 = extractelement <1 x i32> %sminv.i, i32 0
-  ret i32 %0
-}
-
-define i8 @test_vminvq_u8(<16 x i8> %a) {
-; CHECK: test_vminvq_u8:
-; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.16b
-entry:
-  %uminv.i = tail call <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v16i8(<16 x i8> %a)
-  %0 = extractelement <1 x i8> %uminv.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vminvq_u16(<8 x i16> %a) {
-; CHECK: test_vminvq_u16:
-; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.8h
-entry:
-  %uminv.i = tail call <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v8i16(<8 x i16> %a)
-  %0 = extractelement <1 x i16> %uminv.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vminvq_u32(<4 x i32> %a) {
-; CHECK: test_vminvq_u32:
-; CHECK: uminv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %uminv.i = tail call <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v4i32(<4 x i32> %a)
-  %0 = extractelement <1 x i32> %uminv.i, i32 0
-  ret i32 %0
-}
-
-define i8 @test_vaddv_s8(<8 x i8> %a) {
-; CHECK: test_vaddv_s8:
-; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b
-entry:
-  %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v8i8(<8 x i8> %a)
-  %0 = extractelement <1 x i8> %vaddv.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vaddv_s16(<4 x i16> %a) {
-; CHECK: test_vaddv_s16:
-; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h
-entry:
-  %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v4i16(<4 x i16> %a)
-  %0 = extractelement <1 x i16> %vaddv.i, i32 0
-  ret i16 %0
-}
-
-define i8 @test_vaddv_u8(<8 x i8> %a) {
-; CHECK: test_vaddv_u8:
-; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b
-entry:
-  %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v8i8(<8 x i8> %a)
-  %0 = extractelement <1 x i8> %vaddv.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vaddv_u16(<4 x i16> %a) {
-; CHECK: test_vaddv_u16:
-; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h
-entry:
-  %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v4i16(<4 x i16> %a)
-  %0 = extractelement <1 x i16> %vaddv.i, i32 0
-  ret i16 %0
-}
-
-define i8 @test_vaddvq_s8(<16 x i8> %a) {
-; CHECK: test_vaddvq_s8:
-; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b
-entry:
-  %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v16i8(<16 x i8> %a)
-  %0 = extractelement <1 x i8> %vaddv.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vaddvq_s16(<8 x i16> %a) {
-; CHECK: test_vaddvq_s16:
-; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h
-entry:
-  %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v8i16(<8 x i16> %a)
-  %0 = extractelement <1 x i16> %vaddv.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vaddvq_s32(<4 x i32> %a) {
-; CHECK: test_vaddvq_s32:
-; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %vaddv.i = tail call <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v4i32(<4 x i32> %a)
-  %0 = extractelement <1 x i32> %vaddv.i, i32 0
-  ret i32 %0
-}
-
-define i8 @test_vaddvq_u8(<16 x i8> %a) {
-; CHECK: test_vaddvq_u8:
-; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b
-entry:
-  %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v16i8(<16 x i8> %a)
-  %0 = extractelement <1 x i8> %vaddv.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vaddvq_u16(<8 x i16> %a) {
-; CHECK: test_vaddvq_u16:
-; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h
-entry:
-  %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v8i16(<8 x i16> %a)
-  %0 = extractelement <1 x i16> %vaddv.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vaddvq_u32(<4 x i32> %a) {
-; CHECK: test_vaddvq_u32:
-; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %vaddv.i = tail call <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v4i32(<4 x i32> %a)
-  %0 = extractelement <1 x i32> %vaddv.i, i32 0
-  ret i32 %0
-}
-
-define float @test_vmaxvq_f32(<4 x float> %a) {
-; CHECK: test_vmaxvq_f32:
-; CHECK: fmaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %0 = call float @llvm.aarch64.neon.vmaxv(<4 x float> %a)
-  ret float %0
-}
-
-define float @test_vminvq_f32(<4 x float> %a) {
-; CHECK: test_vminvq_f32:
-; CHECK: fminv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %0 = call float @llvm.aarch64.neon.vminv(<4 x float> %a)
-  ret float %0
-}
-
-define float @test_vmaxnmvq_f32(<4 x float> %a) {
-; CHECK: test_vmaxnmvq_f32:
-; CHECK: fmaxnmv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %0 = call float @llvm.aarch64.neon.vmaxnmv(<4 x float> %a)
-  ret float %0
-}
-
-define float @test_vminnmvq_f32(<4 x float> %a) {
-; CHECK: test_vminnmvq_f32:
-; CHECK: fminnmv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %0 = call float @llvm.aarch64.neon.vminnmv(<4 x float> %a)
-  ret float %0
-}
-

Removed: llvm/trunk/test/CodeGen/AArch64/neon-add-pairwise.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-add-pairwise.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-add-pairwise.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-add-pairwise.ll (removed)
@@ -1,102 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-; arm64 has a copy of this test in its own directory.
-
-declare <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_addp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; CHECK: test_addp_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: addp v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vpadd.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_addp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_addp_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vpadd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: addp v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_addp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_addp_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: addp v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-declare <8 x i16> @llvm.arm.neon.vpadd.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_addp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_addp_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vpadd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: addp v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_addp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_addp_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: addp v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vpadd.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_addp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_addp_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vpadd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: addp v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-
-declare <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64>, <2 x i64>)
-
-define <2 x i64> @test_addp_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_addp_v2i64:
-        %val = call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: addp v0.2d, v0.2d, v1.2d
-        ret <2 x i64> %val
-}
-
-declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.arm.neon.vpadd.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.arm.neon.vpadd.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @test_faddp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; CHECK: test_faddp_v2f32:
-        %val = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-; CHECK: faddp v0.2s, v0.2s, v1.2s
-        ret <2 x float> %val
-}
-
-define <4 x float> @test_faddp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; CHECK: test_faddp_v4f32:
-        %val = call <4 x float> @llvm.arm.neon.vpadd.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-; CHECK: faddp v0.4s, v0.4s, v1.4s
-        ret <4 x float> %val
-}
-
-define <2 x double> @test_faddp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; CHECK: test_faddp_v2f64:
-        %val = call <2 x double> @llvm.arm.neon.vpadd.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-; CHECK: faddp v0.2d, v0.2d, v1.2d
-        ret <2 x double> %val
-}
-
-define i32 @test_vaddv.v2i32(<2 x i32> %a) {
-; CHECK-LABEL: test_vaddv.v2i32
-; CHECK: addp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %1 = tail call <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v2i32(<2 x i32> %a)
-  %2 = extractelement <1 x i32> %1, i32 0
-  ret i32 %2
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v2i32(<2 x i32>)

Removed: llvm/trunk/test/CodeGen/AArch64/neon-add-sub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-add-sub.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-add-sub.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-add-sub.ll (removed)
@@ -1,280 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-; arm64 has its own copy of this test
-
-define <8 x i8> @add8xi8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-	%tmp3 = add <8 x i8> %A, %B;
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @add16xi8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: add {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-	%tmp3 = add <16 x i8> %A, %B;
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @add4xi16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: add {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-	%tmp3 = add <4 x i16> %A, %B;
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @add8xi16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: add {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-	%tmp3 = add <8 x i16> %A, %B;
-	ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @add2xi32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: add {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = add <2 x i32> %A, %B;
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @add4x32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: add {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = add <4 x i32> %A, %B;
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @add2xi64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-	%tmp3 = add <2 x i64> %A, %B;
-	ret <2 x i64> %tmp3
-}
-
-define <2 x float> @add2xfloat(<2 x float> %A, <2 x float> %B) {
-;CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = fadd <2 x float> %A, %B;
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @add4xfloat(<4 x float> %A, <4 x float> %B) {
-;CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = fadd <4 x float> %A, %B;
-	ret <4 x float> %tmp3
-}
-define <2 x double> @add2xdouble(<2 x double> %A, <2 x double> %B) {
-;CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-	%tmp3 = fadd <2 x double> %A, %B;
-	ret <2 x double> %tmp3
-}
-
-define <8 x i8> @sub8xi8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: sub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-	%tmp3 = sub <8 x i8> %A, %B;
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @sub16xi8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: sub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-	%tmp3 = sub <16 x i8> %A, %B;
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @sub4xi16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: sub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-	%tmp3 = sub <4 x i16> %A, %B;
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @sub8xi16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: sub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-	%tmp3 = sub <8 x i16> %A, %B;
-	ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @sub2xi32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: sub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = sub <2 x i32> %A, %B;
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @sub4x32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: sub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = sub <4 x i32> %A, %B;
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @sub2xi64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-	%tmp3 = sub <2 x i64> %A, %B;
-	ret <2 x i64> %tmp3
-}
-
-define <2 x float> @sub2xfloat(<2 x float> %A, <2 x float> %B) {
-;CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = fsub <2 x float> %A, %B;
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @sub4xfloat(<4 x float> %A, <4 x float> %B) {
-;CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = fsub <4 x float> %A, %B;
-	ret <4 x float> %tmp3
-}
-define <2 x double> @sub2xdouble(<2 x double> %A, <2 x double> %B) {
-;CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-	%tmp3 = fsub <2 x double> %A, %B;
-	ret <2 x double> %tmp3
-}
-
-define <1 x double> @test_vadd_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vadd_f64
-; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fadd <1 x double> %a, %b
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vmul_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vmul_f64
-; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fmul <1 x double> %a, %b
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vdiv_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vdiv_f64
-; CHECK: fdiv d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fdiv <1 x double> %a, %b
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vmla_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
-; CHECK-LABEL: test_vmla_f64
-; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fmul <1 x double> %b, %c
-  %2 = fadd <1 x double> %1, %a
-  ret <1 x double> %2
-}
-
-define <1 x double> @test_vmls_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
-; CHECK-LABEL: test_vmls_f64
-; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fmul <1 x double> %b, %c
-  %2 = fsub <1 x double> %a, %1
-  ret <1 x double> %2
-}
-
-define <1 x double> @test_vfms_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
-; CHECK-LABEL: test_vfms_f64
-; CHECK: fmsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fsub <1 x double> <double -0.000000e+00>, %b
-  %2 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %1, <1 x double> %c, <1 x double> %a)
-  ret <1 x double> %2
-}
-
-define <1 x double> @test_vfma_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
-; CHECK-LABEL: test_vfma_f64
-; CHECK: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vsub_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vsub_f64
-; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fsub <1 x double> %a, %b
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vabd_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vabd_f64
-; CHECK: fabd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.arm.neon.vabds.v1f64(<1 x double> %a, <1 x double> %b)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vmax_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vmax_f64
-; CHECK: fmax d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.arm.neon.vmaxs.v1f64(<1 x double> %a, <1 x double> %b)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vmin_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vmin_f64
-; CHECK: fmin d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.arm.neon.vmins.v1f64(<1 x double> %a, <1 x double> %b)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vmaxnm_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vmaxnm_f64
-; CHECK: fmaxnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.aarch64.neon.vmaxnm.v1f64(<1 x double> %a, <1 x double> %b)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vminnm_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vminnm_f64
-; CHECK: fminnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.aarch64.neon.vminnm.v1f64(<1 x double> %a, <1 x double> %b)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vabs_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vabs_f64
-; CHECK: fabs d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.fabs.v1f64(<1 x double> %a)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vneg_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vneg_f64
-; CHECK: fneg d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fsub <1 x double> <double -0.000000e+00>, %a
-  ret <1 x double> %1
-}
-
-declare <1 x double> @llvm.fabs.v1f64(<1 x double>)
-declare <1 x double> @llvm.aarch64.neon.vminnm.v1f64(<1 x double>, <1 x double>)
-declare <1 x double> @llvm.aarch64.neon.vmaxnm.v1f64(<1 x double>, <1 x double>)
-declare <1 x double> @llvm.arm.neon.vmins.v1f64(<1 x double>, <1 x double>)
-declare <1 x double> @llvm.arm.neon.vmaxs.v1f64(<1 x double>, <1 x double>)
-declare <1 x double> @llvm.arm.neon.vabds.v1f64(<1 x double>, <1 x double>)
-declare <1 x double> @llvm.fma.v1f64(<1 x double>, <1 x double>, <1 x double>)
-
-define <1 x i8> @test_add_v1i8(<1 x i8> %a, <1 x i8> %b) {
-;CHECK-LABEL: test_add_v1i8:
-;CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-  %c = add <1 x i8> %a, %b
-  ret <1 x i8> %c
-}
-
-define <1 x i16> @test_add_v1i16(<1 x i16> %a, <1 x i16> %b) {
-;CHECK-LABEL: test_add_v1i16:
-;CHECK: add {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-  %c = add <1 x i16> %a, %b
-  ret <1 x i16> %c
-}
-
-define <1 x i32> @test_add_v1i32(<1 x i32> %a, <1 x i32> %b) {
-;CHECK-LABEL: test_add_v1i32:
-;CHECK: add {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %c = add <1 x i32> %a, %b
-  ret <1 x i32> %c
-}
-
-define <1 x i8> @test_sub_v1i8(<1 x i8> %a, <1 x i8> %b) {
-;CHECK-LABEL: test_sub_v1i8:
-;CHECK: sub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-  %c = sub <1 x i8> %a, %b
-  ret <1 x i8> %c
-}
-
-define <1 x i16> @test_sub_v1i16(<1 x i16> %a, <1 x i16> %b) {
-;CHECK-LABEL: test_sub_v1i16:
-;CHECK: sub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-  %c = sub <1 x i16> %a, %b
-  ret <1 x i16> %c
-}
-
-define <1 x i32> @test_sub_v1i32(<1 x i32> %a, <1 x i32> %b) {
-;CHECK-LABEL: test_sub_v1i32:
-;CHECK: sub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %c = sub <1 x i32> %a, %b
-  ret <1 x i32> %c
-}

Modified: llvm/trunk/test/CodeGen/AArch64/neon-bitcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-bitcast.ll?rev=209576&r1=209575&r2=209576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-bitcast.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-bitcast.ll Sat May 24 07:42:26 2014
@@ -1,4 +1,3 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s
 
 ; From <8 x i8>

Modified: llvm/trunk/test/CodeGen/AArch64/neon-bitwise-instructions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-bitwise-instructions.ll?rev=209576&r1=209575&r2=209576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-bitwise-instructions.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-bitwise-instructions.ll Sat May 24 07:42:26 2014
@@ -1,4 +1,3 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
 
 define <8 x i8> @and8xi8(<8 x i8> %a, <8 x i8> %b) {

Removed: llvm/trunk/test/CodeGen/AArch64/neon-bsl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-bsl.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-bsl.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-bsl.ll (removed)
@@ -1,237 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-; arm64 has no equivalent vbsl intrinsic, always using the and/or IR. The final
-; two tests are duplicated by ARM64's vselect.ll test.
-
-declare <2 x double> @llvm.arm.neon.vbsl.v2f64(<2 x double>, <2 x double>, <2 x double>)
-
-declare <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
-
-declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
-
-declare <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float>, <4 x float>, <4 x float>)
-
-declare <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
-
-declare <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
-
-declare <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>)
-
-declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>)
-
-declare <1 x double> @llvm.arm.neon.vbsl.v1f64(<1 x double>, <1 x double>, <1 x double>)
-
-declare <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float>, <2 x float>, <2 x float>)
-
-declare <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64>, <1 x i64>, <1 x i64>)
-
-declare <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
-
-define <8 x i8> @test_vbsl_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
-; CHECK-LABEL: test_vbsl_s8:
-; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3)
-  ret <8 x i8> %vbsl.i
-}
-
-define <8 x i8> @test_vbsl_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
-; CHECK-LABEL: test_vbsl_s16:
-; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3)
-  %0 = bitcast <4 x i16> %vbsl3.i to <8 x i8>
-  ret <8 x i8> %0
-}
-
-define <2 x i32> @test_vbsl_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) {
-; CHECK-LABEL: test_vbsl_s32:
-; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3)
-  ret <2 x i32> %vbsl3.i
-}
-
-define <1 x i64> @test_vbsl_s64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
-; CHECK-LABEL: test_vbsl_s64:
-; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3)
-  ret <1 x i64> %vbsl3.i
-}
-
-define <8 x i8> @test_vbsl_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
-; CHECK-LABEL: test_vbsl_u8:
-; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3)
-  ret <8 x i8> %vbsl.i
-}
-
-define <4 x i16> @test_vbsl_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
-; CHECK-LABEL: test_vbsl_u16:
-; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3)
-  ret <4 x i16> %vbsl3.i
-}
-
-define <2 x i32> @test_vbsl_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) {
-; CHECK-LABEL: test_vbsl_u32:
-; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3)
-  ret <2 x i32> %vbsl3.i
-}
-
-define <1 x i64> @test_vbsl_u64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
-; CHECK-LABEL: test_vbsl_u64:
-; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3)
-  ret <1 x i64> %vbsl3.i
-}
-
-define <2 x float> @test_vbsl_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) {
-; CHECK-LABEL: test_vbsl_f32:
-; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vbsl3.i = tail call <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3)
-  ret <2 x float> %vbsl3.i
-}
-
-define <1 x double> @test_vbsl_f64(<1 x i64> %v1, <1 x double> %v2, <1 x double> %v3) {
-; CHECK-LABEL: test_vbsl_f64:
-; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vbsl.i = bitcast <1 x i64> %v1 to <1 x double>
-  %vbsl3.i = tail call <1 x double> @llvm.arm.neon.vbsl.v1f64(<1 x double> %vbsl.i, <1 x double> %v2, <1 x double> %v3)
-  ret <1 x double> %vbsl3.i
-}
-
-define <8 x i8> @test_vbsl_p8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
-; CHECK-LABEL: test_vbsl_p8:
-; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3)
-  ret <8 x i8> %vbsl.i
-}
-
-define <4 x i16> @test_vbsl_p16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
-; CHECK-LABEL: test_vbsl_p16:
-; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3)
-  ret <4 x i16> %vbsl3.i
-}
-
-define <16 x i8> @test_vbslq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
-; CHECK-LABEL: test_vbslq_s8:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3)
-  ret <16 x i8> %vbsl.i
-}
-
-define <8 x i16> @test_vbslq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
-; CHECK-LABEL: test_vbslq_s16:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3)
-  ret <8 x i16> %vbsl3.i
-}
-
-define <4 x i32> @test_vbslq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
-; CHECK-LABEL: test_vbslq_s32:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3)
-  ret <4 x i32> %vbsl3.i
-}
-
-define <2 x i64> @test_vbslq_s64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) {
-; CHECK-LABEL: test_vbslq_s64:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3)
-  ret <2 x i64> %vbsl3.i
-}
-
-define <16 x i8> @test_vbslq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
-; CHECK-LABEL: test_vbslq_u8:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3)
-  ret <16 x i8> %vbsl.i
-}
-
-define <8 x i16> @test_vbslq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
-; CHECK-LABEL: test_vbslq_u16:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3)
-  ret <8 x i16> %vbsl3.i
-}
-
-define <4 x i32> @test_vbslq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
-; CHECK-LABEL: test_vbslq_u32:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3)
-  ret <4 x i32> %vbsl3.i
-}
-
-define <2 x i64> @test_vbslq_u64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) {
-; CHECK-LABEL: test_vbslq_u64:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3)
-  ret <2 x i64> %vbsl3.i
-}
-
-define <4 x float> @test_vbslq_f32(<4 x i32> %v1, <4 x float> %v2, <4 x float> %v3) {
-; CHECK-LABEL: test_vbslq_f32:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %vbsl.i = bitcast <4 x i32> %v1 to <4 x float>
-  %vbsl3.i = tail call <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float> %vbsl.i, <4 x float> %v2, <4 x float> %v3)
-  ret <4 x float> %vbsl3.i
-}
-
-define <16 x i8> @test_vbslq_p8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
-; CHECK-LABEL: test_vbslq_p8:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3)
-  ret <16 x i8> %vbsl.i
-}
-
-define <8 x i16> @test_vbslq_p16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
-; CHECK-LABEL: test_vbslq_p16:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3)
-  ret <8 x i16> %vbsl3.i
-}
-
-define <2 x double> @test_vbslq_f64(<2 x i64> %v1, <2 x double> %v2, <2 x double> %v3) {
-; CHECK-LABEL: test_vbslq_f64:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %vbsl.i = bitcast <2 x i64> %v1 to <2 x double>
-  %vbsl3.i = tail call <2 x double> @llvm.arm.neon.vbsl.v2f64(<2 x double> %vbsl.i, <2 x double> %v2, <2 x double> %v3)
-  ret <2 x double> %vbsl3.i
-}
-
-define <2 x double> @test_bsl_v2f64(<2 x i1> %v1, <2 x double> %v2, <2 x double> %v3) {
-; CHECK-LABEL: test_bsl_v2f64:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-  %1 = select <2 x i1> %v1, <2 x double> %v2, <2 x double> %v3
-  ret <2 x double> %1
-}
-
-define <4 x float> @test_bsl_v4f32(<4 x i1> %v1, <4 x float> %v2, <4 x float> %v3) {
-; CHECK-LABEL: test_bsl_v4f32:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-  %1 = select <4 x i1> %v1, <4 x float> %v2, <4 x float> %v3
-  ret <4 x float> %1
-}

Modified: llvm/trunk/test/CodeGen/AArch64/neon-compare-instructions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-compare-instructions.ll?rev=209576&r1=209575&r2=209576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-compare-instructions.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-compare-instructions.ll Sat May 24 07:42:26 2014
@@ -1,4 +1,3 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
 ; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
 
 define <8 x i8> @cmeq8xi8(<8 x i8> %A, <8 x i8> %B) {

Removed: llvm/trunk/test/CodeGen/AArch64/neon-copy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-copy.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-copy.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-copy.ll (removed)
@@ -1,1402 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
-; arm64 has copied equivalent test due to intrinsics.
-
-define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) {
-;CHECK: ins {{v[0-9]+}}.b[15], {{w[0-9]+}}
-  %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15
-  ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) {
-;CHECK: ins {{v[0-9]+}}.h[6], {{w[0-9]+}}
-  %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6
-  ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) {
-;CHECK: ins {{v[0-9]+}}.s[2], {{w[0-9]+}}
-  %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2
-  ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) {
-;CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}}
-  %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1
-  ret <2 x i64> %tmp3
-}
-
-define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
-;CHECK: ins {{v[0-9]+}}.b[5], {{w[0-9]+}}
-  %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5
-  ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
-;CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}}
-  %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3
-  ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) {
-;CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}}
-  %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
-  ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
-  %tmp3 = extractelement <16 x i8> %tmp1, i32 2
-  %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
-  ret <16 x i8> %tmp4
-}
-
-define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
-  %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
-  ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
-  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
-  %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
-  ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-  %tmp3 = extractelement <2 x i64> %tmp1, i32 0
-  %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
-  ret <2 x i64> %tmp4
-}
-
-define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
-  %tmp3 = extractelement <4 x float> %tmp1, i32 2
-  %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
-  ret <4 x float> %tmp4
-}
-
-define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-  %tmp3 = extractelement <2 x double> %tmp1, i32 0
-  %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
-  ret <2 x double> %tmp4
-}
-
-define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
-  %tmp3 = extractelement <8 x i8> %tmp1, i32 2
-  %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
-  ret <16 x i8> %tmp4
-}
-
-define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
-  %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
-  ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
-  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
-  %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
-  ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
-  %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
-  ret <2 x i64> %tmp4
-}
-
-define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
-  %tmp3 = extractelement <2 x float> %tmp1, i32 1
-  %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
-  ret <4 x float> %tmp4
-}
-
-define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-  %tmp3 = extractelement <1 x double> %tmp1, i32 0
-  %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
-  ret <2 x double> %tmp4
-}
-
-define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[2]
-  %tmp3 = extractelement <16 x i8> %tmp1, i32 2
-  %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7
-  ret <8 x i8> %tmp4
-}
-
-define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
-  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
-  ret <4 x i16> %tmp4
-}
-
-define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
-  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
-  %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
-  ret <2 x i32> %tmp4
-}
-
-define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
-  %tmp3 = extractelement <2 x i64> %tmp1, i32 0
-  %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
-  ret <1 x i64> %tmp4
-}
-
-define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
-  %tmp3 = extractelement <4 x float> %tmp1, i32 2
-  %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
-  ret <2 x float> %tmp4
-}
-
-define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
-  %tmp3 = extractelement <2 x double> %tmp1, i32 0
-  %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
-  ret <1 x double> %tmp4
-}
-
-define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.b[4], {{v[0-9]+}}.b[2]
-  %tmp3 = extractelement <8 x i8> %tmp1, i32 2
-  %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
-  ret <8 x i8> %tmp4
-}
-
-define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
-  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
-  ret <4 x i16> %tmp4
-}
-
-define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
-  %tmp3 = extractelement <2 x i32> %tmp1, i32 0
-  %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
-  ret <2 x i32> %tmp4
-}
-
-define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
-  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
-  %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
-  ret <1 x i64> %tmp4
-}
-
-define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
-  %tmp3 = extractelement <2 x float> %tmp1, i32 0
-  %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
-  ret <2 x float> %tmp4
-}
-
-define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
-  %tmp3 = extractelement <1 x double> %tmp1, i32 0
-  %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
-  ret <1 x double> %tmp4
-}
-
-define i32 @umovw16b(<16 x i8> %tmp1) {
-;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
-  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
-  %tmp4 = zext i8 %tmp3 to i32
-  ret i32 %tmp4
-}
-
-define i32 @umovw8h(<8 x i16> %tmp1) {
-;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
-  %tmp4 = zext i16 %tmp3 to i32
-  ret i32 %tmp4
-}
-
-define i32 @umovw4s(<4 x i32> %tmp1) {
-;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.s[2]
-  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
-  ret i32 %tmp3
-}
-
-define i64 @umovx2d(<2 x i64> %tmp1) {
-;CHECK: umov {{x[0-9]+}}, {{v[0-9]+}}.d[0]
-  %tmp3 = extractelement <2 x i64> %tmp1, i32 0
-  ret i64 %tmp3
-}
-
-define i32 @umovw8b(<8 x i8> %tmp1) {
-;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[7]
-  %tmp3 = extractelement <8 x i8> %tmp1, i32 7
-  %tmp4 = zext i8 %tmp3 to i32
-  ret i32 %tmp4
-}
-
-define i32 @umovw4h(<4 x i16> %tmp1) {
-;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
-  %tmp4 = zext i16 %tmp3 to i32
-  ret i32 %tmp4
-}
-
-define i32 @umovw2s(<2 x i32> %tmp1) {
-;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.s[1]
-  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
-  ret i32 %tmp3
-}
-
-define i64 @umovx1d(<1 x i64> %tmp1) {
-;CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
-  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
-  ret i64 %tmp3
-}
-
-define i32 @smovw16b(<16 x i8> %tmp1) {
-;CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
-  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
-  %tmp4 = sext i8 %tmp3 to i32
-  %tmp5 = add i32 5, %tmp4
-  ret i32 %tmp5
-}
-
-define i32 @smovw8h(<8 x i16> %tmp1) {
-;CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
-  %tmp4 = sext i16 %tmp3 to i32
-  %tmp5 = add i32 5, %tmp4
-  ret i32 %tmp5
-}
-
-define i32 @smovx16b(<16 x i8> %tmp1) {
-;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8]
-  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
-  %tmp4 = sext i8 %tmp3 to i32
-  ret i32 %tmp4
-}
-
-define i32 @smovx8h(<8 x i16> %tmp1) {
-;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
-  %tmp4 = sext i16 %tmp3 to i32
-  ret i32 %tmp4
-}
-
-define i64 @smovx4s(<4 x i32> %tmp1) {
-;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2]
-  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
-  %tmp4 = sext i32 %tmp3 to i64
-  ret i64 %tmp4
-}
-
-define i32 @smovw8b(<8 x i8> %tmp1) {
-;CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[4]
-  %tmp3 = extractelement <8 x i8> %tmp1, i32 4
-  %tmp4 = sext i8 %tmp3 to i32
-  %tmp5 = add i32 5, %tmp4
-  ret i32 %tmp5
-}
-
-define i32 @smovw4h(<4 x i16> %tmp1) {
-;CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
-  %tmp4 = sext i16 %tmp3 to i32
-  %tmp5 = add i32 5, %tmp4
-  ret i32 %tmp5
-}
-
-define i32 @smovx8b(<8 x i8> %tmp1) {
-;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[6]
-  %tmp3 = extractelement <8 x i8> %tmp1, i32 6
-  %tmp4 = sext i8 %tmp3 to i32
-  ret i32 %tmp4
-}
-
-define i32 @smovx4h(<4 x i16> %tmp1) {
-;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
-  %tmp4 = sext i16 %tmp3 to i32
-  ret i32 %tmp4
-}
-
-define i64 @smovx2s(<2 x i32> %tmp1) {
-;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1]
-  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
-  %tmp4 = sext i32 %tmp3 to i64
-  ret i64 %tmp4
-}
-
-define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
-;CHECK: ins  {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
-  %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
-  ret <8 x i8> %vset_lane
-}
-
-define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
-;CHECK: ins  {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6]
-  %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15>
-  ret <16 x i8> %vset_lane
-}
-
-define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
-;CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0]
-  %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
-  ret <8 x i8> %vset_lane
-}
-
-define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) {
-;CHECK: ins {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15]
-  %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-  ret <16 x i8> %vset_lane
-}
-
-define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
-  %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0
-  %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1
-  %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2
-  %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3
-  %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4
-  %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5
-  %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6
-  %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7
-  ret <8 x i8> %vecinit7.i
-}
-
-define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
-  %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0
-  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1
-  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2
-  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3
-  ret <4 x i16> %vecinit3.i
-}
-
-define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
-  %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0
-  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1
-  ret <2 x i32> %vecinit1.i
-}
-
-define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 {
-;CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
-  %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0
-  ret <1 x i64> %vecinit.i
-}
-
-define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
-  %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0
-  %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1
-  %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2
-  %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3
-  %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4
-  %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5
-  %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6
-  %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7
-  %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8
-  %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9
-  %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10
-  %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11
-  %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12
-  %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13
-  %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14
-  %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15
-  ret <16 x i8> %vecinit15.i
-}
-
-define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
-  %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0
-  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1
-  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2
-  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3
-  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4
-  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5
-  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6
-  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7
-  ret <8 x i16> %vecinit7.i
-}
-
-define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
-  %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0
-  %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1
-  %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2
-  %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3
-  ret <4 x i32> %vecinit3.i
-}
-
-define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
-  %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0
-  %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1
-  ret <2 x i64> %vecinit1.i
-}
-
-define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
-  %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-  ret <8 x i8> %shuffle
-}
-
-define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
-  %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
-  ret <4 x i16> %shuffle
-}
-
-define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-  %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  ret <2 x i32> %shuffle
-}
-
-define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
-;CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
-  %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-  ret <16 x i8> %shuffle
-}
-
-define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
-;CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
-  %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-  ret <8 x i16> %shuffle
-}
-
-define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
-;CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-  %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  ret <4 x i32> %shuffle
-}
-
-define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
-;CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-  %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
-  ret <2 x i64> %shuffle
-}
-
-define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
-  %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-  ret <8 x i8> %shuffle
-}
-
-define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
-  %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
-  ret <4 x i16> %shuffle
-}
-
-define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-  %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
-  ret <2 x i32> %shuffle
-}
-
-define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
-  %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-  ret <16 x i8> %shuffle
-}
-
-define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 {
-;CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
-  %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-  ret <8 x i16> %shuffle
-}
-
-define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-  %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  ret <4 x i32> %shuffle
-}
-
-define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-  %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
-  ret <2 x i64> %shuffle
-}
-
-define i64 @test_bitcastv8i8toi64(<8 x i8> %in) {
-; CHECK-LABEL: test_bitcastv8i8toi64:
-   %res = bitcast <8 x i8> %in to i64
-; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
-   ret i64 %res
-}
-
-define i64 @test_bitcastv4i16toi64(<4 x i16> %in) {
-; CHECK-LABEL: test_bitcastv4i16toi64:
-   %res = bitcast <4 x i16> %in to i64
-; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
-   ret i64 %res
-}
-
-define i64 @test_bitcastv2i32toi64(<2 x i32> %in) {
-; CHECK-LABEL: test_bitcastv2i32toi64:
-   %res = bitcast <2 x i32> %in to i64
-; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
-   ret i64 %res
-}
-
-define i64 @test_bitcastv2f32toi64(<2 x float> %in) {
-; CHECK-LABEL: test_bitcastv2f32toi64:
-   %res = bitcast <2 x float> %in to i64
-; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
-   ret i64 %res
-}
-
-define i64 @test_bitcastv1i64toi64(<1 x i64> %in) {
-; CHECK-LABEL: test_bitcastv1i64toi64:
-   %res = bitcast <1 x i64> %in to i64
-; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
-   ret i64 %res
-}
-
-define i64 @test_bitcastv1f64toi64(<1 x double> %in) {
-; CHECK-LABEL: test_bitcastv1f64toi64:
-   %res = bitcast <1 x double> %in to i64
-; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
-   ret i64 %res
-}
-
-define <8 x i8> @test_bitcasti64tov8i8(i64 %in) {
-; CHECK-LABEL: test_bitcasti64tov8i8:
-   %res = bitcast i64 %in to <8 x i8>
-; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
-   ret <8 x i8> %res
-}
-
-define <4 x i16> @test_bitcasti64tov4i16(i64 %in) {
-; CHECK-LABEL: test_bitcasti64tov4i16:
-   %res = bitcast i64 %in to <4 x i16>
-; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
-   ret <4 x i16> %res
-}
-
-define <2 x i32> @test_bitcasti64tov2i32(i64 %in) {
-; CHECK-LABEL: test_bitcasti64tov2i32:
-   %res = bitcast i64 %in to <2 x i32>
-; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
-   ret <2 x i32> %res
-}
-
-define <2 x float> @test_bitcasti64tov2f32(i64 %in) {
-; CHECK-LABEL: test_bitcasti64tov2f32:
-   %res = bitcast i64 %in to <2 x float>
-; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
-   ret <2 x float> %res
-}
-
-define <1 x i64> @test_bitcasti64tov1i64(i64 %in) {
-; CHECK-LABEL: test_bitcasti64tov1i64:
-   %res = bitcast i64 %in to <1 x i64>
-; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
-   ret <1 x i64> %res
-}
-
-define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
-; CHECK-LABEL: test_bitcasti64tov1f64:
-   %res = bitcast i64 %in to <1 x double>
-; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
-   ret <1 x double> %res
-}
-
-define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
-; CHECK-LABEL: test_bitcastv8i8tov1f64:
-; CHECK: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-; CHECK-NEXT: fcvtzs {{d[0-9]+}}, {{d[0-9]+}}
-  %sub.i = sub <8 x i8> zeroinitializer, %a
-  %1 = bitcast <8 x i8> %sub.i to <1 x double>
-  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
-  ret <1 x i64> %vcvt.i
-}
-
-define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
-; CHECK-LABEL: test_bitcastv4i16tov1f64:
-; CHECK: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-; CHECK-NEXT: fcvtzs {{d[0-9]+}}, {{d[0-9]+}}
-  %sub.i = sub <4 x i16> zeroinitializer, %a
-  %1 = bitcast <4 x i16> %sub.i to <1 x double>
-  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
-  ret <1 x i64> %vcvt.i
-}
-
-define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 {
-; CHECK-LABEL: test_bitcastv2i32tov1f64:
-; CHECK: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-; CHECK-NEXT: fcvtzs {{d[0-9]+}}, {{d[0-9]+}}
-  %sub.i = sub <2 x i32> zeroinitializer, %a
-  %1 = bitcast <2 x i32> %sub.i to <1 x double>
-  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
-  ret <1 x i64> %vcvt.i
-}
-
-define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 {
-; CHECK-LABEL: test_bitcastv1i64tov1f64:
-; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}}
-; CHECK-NEXT: fcvtzs {{d[0-9]+}}, {{d[0-9]+}}
-  %sub.i = sub <1 x i64> zeroinitializer, %a
-  %1 = bitcast <1 x i64> %sub.i to <1 x double>
-  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
-  ret <1 x i64> %vcvt.i
-}
-
-define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 {
-; CHECK-LABEL: test_bitcastv2f32tov1f64:
-; CHECK: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-; CHECK-NEXT: fcvtzs {{d[0-9]+}}, {{d[0-9]+}}
-  %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
-  %1 = bitcast <2 x float> %sub.i to <1 x double>
-  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
-  ret <1 x i64> %vcvt.i
-}
-
-define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 {
-; CHECK-LABEL: test_bitcastv1f64tov8i8:
-; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}}
-; CHECK-NEXT: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
-  %1 = bitcast <1 x double> %vcvt.i to <8 x i8>
-  %sub.i = sub <8 x i8> zeroinitializer, %1
-  ret <8 x i8> %sub.i
-}
-
-define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 {
-; CHECK-LABEL: test_bitcastv1f64tov4i16:
-; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}}
-; CHECK-NEXT: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
-  %1 = bitcast <1 x double> %vcvt.i to <4 x i16>
-  %sub.i = sub <4 x i16> zeroinitializer, %1
-  ret <4 x i16> %sub.i
-}
-
-define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 {
-; CHECK-LABEL: test_bitcastv1f64tov2i32:
-; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}}
-; CHECK-NEXT: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
-  %1 = bitcast <1 x double> %vcvt.i to <2 x i32>
-  %sub.i = sub <2 x i32> zeroinitializer, %1
-  ret <2 x i32> %sub.i
-}
-
-define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 {
-; CHECK-LABEL: test_bitcastv1f64tov1i64:
-; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}}
-; CHECK-NEXT: neg {{d[0-9]+}}, {{d[0-9]+}}
-  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
-  %1 = bitcast <1 x double> %vcvt.i to <1 x i64>
-  %sub.i = sub <1 x i64> zeroinitializer, %1
-  ret <1 x i64> %sub.i
-}
-
-define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 {
-; CHECK-LABEL: test_bitcastv1f64tov2f32:
-; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}}
-; CHECK-NEXT: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
-  %1 = bitcast <1 x double> %vcvt.i to <2 x float>
-  %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1
-  ret <2 x float> %sub.i
-}
-
-; Test insert element into an undef vector
-define <8 x i8> @scalar_to_vector.v8i8(i8 %a) {
-; CHECK-LABEL: scalar_to_vector.v8i8:
-; CHECK: ins {{v[0-9]+}}.b[0], {{w[0-9]+}}
-  %b = insertelement <8 x i8> undef, i8 %a, i32 0
-  ret <8 x i8> %b
-}
-
-define <16 x i8> @scalar_to_vector.v16i8(i8 %a) {
-; CHECK-LABEL: scalar_to_vector.v16i8:
-; CHECK: ins {{v[0-9]+}}.b[0], {{w[0-9]+}}
-  %b = insertelement <16 x i8> undef, i8 %a, i32 0
-  ret <16 x i8> %b
-}
-
-define <4 x i16> @scalar_to_vector.v4i16(i16 %a) {
-; CHECK-LABEL: scalar_to_vector.v4i16:
-; CHECK: ins {{v[0-9]+}}.h[0], {{w[0-9]+}}
-  %b = insertelement <4 x i16> undef, i16 %a, i32 0
-  ret <4 x i16> %b
-}
-
-define <8 x i16> @scalar_to_vector.v8i16(i16 %a) {
-; CHECK-LABEL: scalar_to_vector.v8i16:
-; CHECK: ins {{v[0-9]+}}.h[0], {{w[0-9]+}}
-  %b = insertelement <8 x i16> undef, i16 %a, i32 0
-  ret <8 x i16> %b
-}
-
-define <2 x i32> @scalar_to_vector.v2i32(i32 %a) {
-; CHECK-LABEL: scalar_to_vector.v2i32:
-; CHECK: ins {{v[0-9]+}}.s[0], {{w[0-9]+}}
-  %b = insertelement <2 x i32> undef, i32 %a, i32 0
-  ret <2 x i32> %b
-}
-
-define <4 x i32> @scalar_to_vector.v4i32(i32 %a) {
-; CHECK-LABEL: scalar_to_vector.v4i32:
-; CHECK: ins {{v[0-9]+}}.s[0], {{w[0-9]+}}
-  %b = insertelement <4 x i32> undef, i32 %a, i32 0
-  ret <4 x i32> %b
-}
-
-define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
-; CHECK-LABEL: scalar_to_vector.v2i64:
-; CHECK: ins {{v[0-9]+}}.d[0], {{x[0-9]+}}
-  %b = insertelement <2 x i64> undef, i64 %a, i32 0
-  ret <2 x i64> %b
-}
-
-define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
-; CHECK-LABEL: testDUP.v1i8:
-; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
-  %b = extractelement <1 x i8> %a, i32 0
-  %c = insertelement <8 x i8> undef, i8 %b, i32 0
-  %d = insertelement <8 x i8> %c, i8 %b, i32 1
-  %e = insertelement <8 x i8> %d, i8 %b, i32 2
-  %f = insertelement <8 x i8> %e, i8 %b, i32 3
-  %g = insertelement <8 x i8> %f, i8 %b, i32 4
-  %h = insertelement <8 x i8> %g, i8 %b, i32 5
-  %i = insertelement <8 x i8> %h, i8 %b, i32 6
-  %j = insertelement <8 x i8> %i, i8 %b, i32 7
-  ret <8 x i8> %j
-}
-
-define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
-; CHECK-LABEL: testDUP.v1i16:
-; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
-  %b = extractelement <1 x i16> %a, i32 0
-  %c = insertelement <8 x i16> undef, i16 %b, i32 0
-  %d = insertelement <8 x i16> %c, i16 %b, i32 1
-  %e = insertelement <8 x i16> %d, i16 %b, i32 2
-  %f = insertelement <8 x i16> %e, i16 %b, i32 3
-  %g = insertelement <8 x i16> %f, i16 %b, i32 4
-  %h = insertelement <8 x i16> %g, i16 %b, i32 5
-  %i = insertelement <8 x i16> %h, i16 %b, i32 6
-  %j = insertelement <8 x i16> %i, i16 %b, i32 7
-  ret <8 x i16> %j
-}
-
-define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
-; CHECK-LABEL: testDUP.v1i32:
-; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
-  %b = extractelement <1 x i32> %a, i32 0
-  %c = insertelement <4 x i32> undef, i32 %b, i32 0
-  %d = insertelement <4 x i32> %c, i32 %b, i32 1
-  %e = insertelement <4 x i32> %d, i32 %b, i32 2
-  %f = insertelement <4 x i32> %e, i32 %b, i32 3
-  ret <4 x i32> %f
-}
-
-define <8 x i8> @getl(<16 x i8> %x) #0 {
-; CHECK-LABEL: getl:
-; CHECK: ret
-  %vecext = extractelement <16 x i8> %x, i32 0
-  %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0
-  %vecext1 = extractelement <16 x i8> %x, i32 1
-  %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1
-  %vecext3 = extractelement <16 x i8> %x, i32 2
-  %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2
-  %vecext5 = extractelement <16 x i8> %x, i32 3
-  %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3
-  %vecext7 = extractelement <16 x i8> %x, i32 4
-  %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4
-  %vecext9 = extractelement <16 x i8> %x, i32 5
-  %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5
-  %vecext11 = extractelement <16 x i8> %x, i32 6
-  %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6
-  %vecext13 = extractelement <16 x i8> %x, i32 7
-  %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7
-  ret <8 x i8> %vecinit14
-}
-
-define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) {
-; CHECK-LABEL: test_dup_v2i32_v4i16:
-; CHECK: dup v0.4h, v0.h[2]
-entry:
-  %x = extractelement <2 x i32> %a, i32 1
-  %vget_lane = trunc i32 %x to i16
-  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
-  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
-  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
-  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
-  ret <4 x i16> %vecinit3.i
-}
-
-define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) {
-; CHECK-LABEL: test_dup_v4i32_v8i16:
-; CHECK: dup v0.8h, v0.h[6]
-entry:
-  %x = extractelement <4 x i32> %a, i32 3
-  %vget_lane = trunc i32 %x to i16
-  %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
-  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
-  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
-  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
-  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
-  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
-  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
-  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
-  ret <8 x i16> %vecinit7.i
-}
-
-define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) {
-; CHECK-LABEL: test_dup_v1i64_v4i16:
-; CHECK: dup v0.4h, v0.h[0]
-entry:
-  %x = extractelement <1 x i64> %a, i32 0
-  %vget_lane = trunc i64 %x to i16
-  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
-  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
-  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
-  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
-  ret <4 x i16> %vecinit3.i
-}
-
-define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) {
-; CHECK-LABEL: test_dup_v1i64_v2i32:
-; CHECK: dup v0.2s, v0.s[0]
-entry:
-  %x = extractelement <1 x i64> %a, i32 0
-  %vget_lane = trunc i64 %x to i32
-  %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
-  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
-  ret <2 x i32> %vecinit1.i
-}
-
-define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) {
-; CHECK-LABEL: test_dup_v2i64_v8i16:
-; CHECK: dup v0.8h, v0.h[4]
-entry:
-  %x = extractelement <2 x i64> %a, i32 1
-  %vget_lane = trunc i64 %x to i16
-  %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
-  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
-  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
-  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
-  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
-  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
-  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
-  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
-  ret <8 x i16> %vecinit7.i
-}
-
-define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) {
-; CHECK-LABEL: test_dup_v2i64_v4i32:
-; CHECK: dup v0.4s, v0.s[2]
-entry:
-  %x = extractelement <2 x i64> %a, i32 1
-  %vget_lane = trunc i64 %x to i32
-  %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0
-  %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1
-  %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2
-  %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3
-  ret <4 x i32> %vecinit3.i
-}
-
-define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) {
-; CHECK-LABEL: test_dup_v4i32_v4i16:
-; CHECK: dup v0.4h, v0.h[2]
-entry:
-  %x = extractelement <4 x i32> %a, i32 1
-  %vget_lane = trunc i32 %x to i16
-  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
-  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
-  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
-  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
-  ret <4 x i16> %vecinit3.i
-}
-
-define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) {
-; CHECK-LABEL: test_dup_v2i64_v4i16:
-; CHECK: dup v0.4h, v0.h[0]
-entry:
-  %x = extractelement <2 x i64> %a, i32 0
-  %vget_lane = trunc i64 %x to i16
-  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
-  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
-  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
-  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
-  ret <4 x i16> %vecinit3.i
-}
-
-define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) {
-; CHECK-LABEL: test_dup_v2i64_v2i32:
-; CHECK: dup v0.2s, v0.s[0]
-entry:
-  %x = extractelement <2 x i64> %a, i32 0
-  %vget_lane = trunc i64 %x to i32
-  %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
-  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
-  ret <2 x i32> %vecinit1.i
-}
-
-
-define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) {
-; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32:
-; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
-; CHECK-NEXT: ret
-entry:
-  %0 = call float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float> %a)
-  %1 = insertelement <1 x float> undef, float %0, i32 0
-  %2 = extractelement <1 x float> %1, i32 0
-  %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0
-  ret <2 x float> %vecinit1.i
-}
-
-define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) {
-; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32:
-; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
-; CHECK-NEXT: ret
-entry:
-  %0 = call float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float> %a)
-  %1 = insertelement <1 x float> undef, float %0, i32 0
-  %2 = extractelement <1 x float> %1, i32 0
-  %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0
-  ret <4 x float> %vecinit1.i
-}
-
-declare float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float>)
-
-define <2 x i32> @test_concat_undef_v1i32(<1 x i32> %a) {
-; CHECK-LABEL: test_concat_undef_v1i32:
-; CHECK: ins v{{[0-9]+}}.s[1], v{{[0-9]+}}.s[0]
-entry:
-  %0 = extractelement <1 x i32> %a, i32 0
-  %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1
-  ret <2 x i32> %vecinit1.i
-}
-
-declare <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32>) #4
-
-define <2 x i32> @test_concat_v1i32_undef(<1 x i32> %a) {
-; CHECK-LABEL: test_concat_v1i32_undef:
-; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
-; CHECK-NEXT: ret
-entry:
-  %b = tail call <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32> %a)
-  %0 = extractelement <1 x i32> %b, i32 0
-  %vecinit.i432 = insertelement <2 x i32> undef, i32 %0, i32 0
-  ret <2 x i32> %vecinit.i432
-}
-
-define <2 x i32> @test_concat_same_v1i32_v1i32(<1 x i32> %a) {
-; CHECK-LABEL: test_concat_same_v1i32_v1i32:
-; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
-entry:
-  %0 = extractelement <1 x i32> %a, i32 0
-  %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0
-  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1
-  ret <2 x i32> %vecinit1.i
-}
-
-define <2 x i32> @test_concat_diff_v1i32_v1i32(<1 x i32> %a, <1 x i32> %b) {
-; CHECK-LABEL: test_concat_diff_v1i32_v1i32:
-; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
-; CHECK-NEXT: sqabs s{{[0-9]+}}, s{{[0-9]+}}
-; CHECK-NEXT: ins v0.s[1], v1.s[0]
-entry:
-  %c = tail call <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32> %a)
-  %d = extractelement <1 x i32> %c, i32 0
-  %e = tail call <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32> %b)
-  %f = extractelement <1 x i32> %e, i32 0
-  %h = shufflevector <1 x i32> %c, <1 x i32> %e, <2 x i32> <i32 0, i32 1>
-  ret <2 x i32> %h
-}
-
-define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
-; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-  ret <16 x i8> %vecinit30
-}
-
-define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
-; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <8 x i8> %x, i32 0
-  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
-  %vecext1 = extractelement <8 x i8> %x, i32 1
-  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
-  %vecext3 = extractelement <8 x i8> %x, i32 2
-  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
-  %vecext5 = extractelement <8 x i8> %x, i32 3
-  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
-  %vecext7 = extractelement <8 x i8> %x, i32 4
-  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
-  %vecext9 = extractelement <8 x i8> %x, i32 5
-  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
-  %vecext11 = extractelement <8 x i8> %x, i32 6
-  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
-  %vecext13 = extractelement <8 x i8> %x, i32 7
-  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
-  %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-  ret <16 x i8> %vecinit30
-}
-
-define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
-; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <16 x i8> %x, i32 0
-  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
-  %vecext1 = extractelement <16 x i8> %x, i32 1
-  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
-  %vecext3 = extractelement <16 x i8> %x, i32 2
-  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
-  %vecext5 = extractelement <16 x i8> %x, i32 3
-  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
-  %vecext7 = extractelement <16 x i8> %x, i32 4
-  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
-  %vecext9 = extractelement <16 x i8> %x, i32 5
-  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
-  %vecext11 = extractelement <16 x i8> %x, i32 6
-  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
-  %vecext13 = extractelement <16 x i8> %x, i32 7
-  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
-  %vecext15 = extractelement <8 x i8> %y, i32 0
-  %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
-  %vecext17 = extractelement <8 x i8> %y, i32 1
-  %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
-  %vecext19 = extractelement <8 x i8> %y, i32 2
-  %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
-  %vecext21 = extractelement <8 x i8> %y, i32 3
-  %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
-  %vecext23 = extractelement <8 x i8> %y, i32 4
-  %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
-  %vecext25 = extractelement <8 x i8> %y, i32 5
-  %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
-  %vecext27 = extractelement <8 x i8> %y, i32 6
-  %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
-  %vecext29 = extractelement <8 x i8> %y, i32 7
-  %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
-  ret <16 x i8> %vecinit30
-}
-
-define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
-; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <8 x i8> %x, i32 0
-  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
-  %vecext1 = extractelement <8 x i8> %x, i32 1
-  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
-  %vecext3 = extractelement <8 x i8> %x, i32 2
-  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
-  %vecext5 = extractelement <8 x i8> %x, i32 3
-  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
-  %vecext7 = extractelement <8 x i8> %x, i32 4
-  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
-  %vecext9 = extractelement <8 x i8> %x, i32 5
-  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
-  %vecext11 = extractelement <8 x i8> %x, i32 6
-  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
-  %vecext13 = extractelement <8 x i8> %x, i32 7
-  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
-  %vecext15 = extractelement <8 x i8> %y, i32 0
-  %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
-  %vecext17 = extractelement <8 x i8> %y, i32 1
-  %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
-  %vecext19 = extractelement <8 x i8> %y, i32 2
-  %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
-  %vecext21 = extractelement <8 x i8> %y, i32 3
-  %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
-  %vecext23 = extractelement <8 x i8> %y, i32 4
-  %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
-  %vecext25 = extractelement <8 x i8> %y, i32 5
-  %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
-  %vecext27 = extractelement <8 x i8> %y, i32 6
-  %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
-  %vecext29 = extractelement <8 x i8> %y, i32 7
-  %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
-  ret <16 x i8> %vecinit30
-}
-
-define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
-; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-  ret <8 x i16> %vecinit14
-}
-
-define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
-; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <4 x i16> %x, i32 0
-  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
-  %vecext1 = extractelement <4 x i16> %x, i32 1
-  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
-  %vecext3 = extractelement <4 x i16> %x, i32 2
-  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
-  %vecext5 = extractelement <4 x i16> %x, i32 3
-  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
-  %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-  ret <8 x i16> %vecinit14
-}
-
-define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
-; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <8 x i16> %x, i32 0
-  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
-  %vecext1 = extractelement <8 x i16> %x, i32 1
-  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
-  %vecext3 = extractelement <8 x i16> %x, i32 2
-  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
-  %vecext5 = extractelement <8 x i16> %x, i32 3
-  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
-  %vecext7 = extractelement <4 x i16> %y, i32 0
-  %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
-  %vecext9 = extractelement <4 x i16> %y, i32 1
-  %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
-  %vecext11 = extractelement <4 x i16> %y, i32 2
-  %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
-  %vecext13 = extractelement <4 x i16> %y, i32 3
-  %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
-  ret <8 x i16> %vecinit14
-}
-
-define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
-; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <4 x i16> %x, i32 0
-  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
-  %vecext1 = extractelement <4 x i16> %x, i32 1
-  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
-  %vecext3 = extractelement <4 x i16> %x, i32 2
-  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
-  %vecext5 = extractelement <4 x i16> %x, i32 3
-  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
-  %vecext7 = extractelement <4 x i16> %y, i32 0
-  %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
-  %vecext9 = extractelement <4 x i16> %y, i32 1
-  %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
-  %vecext11 = extractelement <4 x i16> %y, i32 2
-  %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
-  %vecext13 = extractelement <4 x i16> %y, i32 3
-  %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
-  ret <8 x i16> %vecinit14
-}
-
-define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
-; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-  ret <4 x i32> %vecinit6
-}
-
-define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
-; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <2 x i32> %x, i32 0
-  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
-  %vecext1 = extractelement <2 x i32> %x, i32 1
-  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
-  %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-  ret <4 x i32> %vecinit6
-}
-
-define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
-; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <4 x i32> %x, i32 0
-  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
-  %vecext1 = extractelement <4 x i32> %x, i32 1
-  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
-  %vecext3 = extractelement <2 x i32> %y, i32 0
-  %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
-  %vecext5 = extractelement <2 x i32> %y, i32 1
-  %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
-  ret <4 x i32> %vecinit6
-}
-
-define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
-; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <2 x i32> %x, i32 0
-  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
-  %vecext1 = extractelement <2 x i32> %x, i32 1
-  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
-  %vecext3 = extractelement <2 x i32> %y, i32 0
-  %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
-  %vecext5 = extractelement <2 x i32> %y, i32 1
-  %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
-  ret <4 x i32> %vecinit6
-}
-
-define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 {
-; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
-  ret <2 x i64> %vecinit2
-}
-
-define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
-; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <1 x i64> %x, i32 0
-  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
-  %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
-  ret <2 x i64> %vecinit2
-}
-
-define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
-; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <2 x i64> %x, i32 0
-  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
-  %vecext1 = extractelement <1 x i64> %y, i32 0
-  %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
-  ret <2 x i64> %vecinit2
-}
-
-define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
-; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <1 x i64> %x, i32 0
-  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
-  %vecext1 = extractelement <1 x i64> %y, i32 0
-  %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
-  ret <2 x i64> %vecinit2
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8>, <1 x i8>)
-
-; This case tests the copy of two FPR8 registers, which is implemented by fmov
-; of two FPR32 registers.
-define <1 x i8> @test_copy_FPR8_FPR8(<1 x i8> %a, <1 x i8> %b) {
-; CHECK-LABEL: test_copy_FPR8_FPR8:
-; CHECK: usqadd b1, b0
-; CHECK-NEXT: fmov s0, s1
-entry:
- %vsqadd2.i = call <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8> %b, <1 x i8> %a)
- ret <1 x i8> %vsqadd2.i
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16>, <1 x i16>)
-
-define <1 x i16> @test_copy_FPR16_FPR16(<1 x i16> %a, <1 x i16> %b) {
-; CHECK-LABEL: test_copy_FPR16_FPR16:
-; CHECK: usqadd h1, h0
-; CHECK-NEXT: fmov s0, s1
-entry:
-  %vsqadd2.i = call <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16> %b, <1 x i16> %a)
-  ret <1 x i16> %vsqadd2.i
-}
-
-define <4 x i16> @concat_vector_v4i16_const() {
-; CHECK-LABEL: concat_vector_v4i16_const:
-; CHECK: dup {{v[0-9]+}}.4h, wzr
- %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer
- ret <4 x i16> %r
-}
-
-define <4 x i16> @concat_vector_v4i16_const_one() {
-; CHECK-LABEL: concat_vector_v4i16_const_one:
-; CHECK: movz {{w[0-9]+}}, #1
-; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
- %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer
- ret <4 x i16> %r
-}
-
-define <4 x i32> @concat_vector_v4i32_const() {
-; CHECK-LABEL: concat_vector_v4i32_const:
-; CHECK: dup {{v[0-9]+}}.4s, wzr
- %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer
- ret <4 x i32> %r
-}
-
-define <8 x i8> @concat_vector_v8i8_const() {
-; CHECK-LABEL: concat_vector_v8i8_const:
-; CHECK: dup {{v[0-9]+}}.8b, wzr
- %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer
- ret <8 x i8> %r
-}
-
-define <8 x i16> @concat_vector_v8i16_const() {
-; CHECK-LABEL: concat_vector_v8i16_const:
-; CHECK: dup {{v[0-9]+}}.8h, wzr
- %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer
- ret <8 x i16> %r
-}
-
-define <8 x i16> @concat_vector_v8i16_const_one() {
-; CHECK-LABEL: concat_vector_v8i16_const_one:
-; CHECK: movz {{w[0-9]+}}, #1
-; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
- %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer
- ret <8 x i16> %r
-}
-
-define <16 x i8> @concat_vector_v16i8_const() {
-; CHECK-LABEL: concat_vector_v16i8_const:
-; CHECK: dup {{v[0-9]+}}.16b, wzr
- %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer
- ret <16 x i8> %r
-}
-
-define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
-; CHECK-LABEL: concat_vector_v4i16:
-; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
- %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
- ret <4 x i16> %r
-}
-
-define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
-; CHECK-LABEL: concat_vector_v4i32:
-; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
- %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
- ret <4 x i32> %r
-}
-
-define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
-; CHECK-LABEL: concat_vector_v8i8:
-; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[0]
- %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
- ret <8 x i8> %r
-}
-
-define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
-; CHECK-LABEL: concat_vector_v8i16:
-; CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
- %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
- ret <8 x i16> %r
-}
-
-define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
-; CHECK-LABEL: concat_vector_v16i8:
-; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[0]
- %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer
- ret <16 x i8> %r
-}

Removed: llvm/trunk/test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll (removed)
@@ -1,48 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-; arm64 has a separate copy due to intrinsics
-
-define <4 x i32> @copyTuple.QPair(i8* %a, i8* %b) {
-; CHECK-LABEL: copyTuple.QPair:
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: ld2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}]
-entry:
-  %vld = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %a, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>, i32 0, i32 4)
-  %extract = extractvalue { <4 x i32>, <4 x i32> } %vld, 0
-  %vld1 = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %b, <4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 1, i32 4)
-  %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld1, 0
-  ret <4 x i32> %vld1.fca.0.extract
-}
-
-define <4 x i32> @copyTuple.QTriple(i8* %a, i8* %b, <4 x i32> %c) {
-; CHECK-LABEL: copyTuple.QTriple:
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: ld3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}]
-entry:
-  %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8* %a, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, i32 0, i32 4)
-  %extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0
-  %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8* %b, <4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, i32 1, i32 4)
-  %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld1, 0
-  ret <4 x i32> %vld1.fca.0.extract
-}
-
-define <4 x i32> @copyTuple.QQuad(i8* %a, i8* %b, <4 x i32> %c) {
-; CHECK-LABEL: copyTuple.QQuad:
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: ld4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}]
-entry:
-  %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8* %a, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, <4 x i32> %c, i32 0, i32 4)
-  %extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0
-  %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8* %b, <4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, i32 1, i32 4)
-  %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld1, 0
-  ret <4 x i32> %vld1.fca.0.extract
-}
-
-declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32)
-declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32)
-declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32)

Removed: llvm/trunk/test/CodeGen/AArch64/neon-crypto.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-crypto.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-crypto.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-crypto.ll (removed)
@@ -1,145 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -mattr=+crypto | FileCheck %s
-; RUN: not llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon 2>&1 | FileCheck --check-prefix=CHECK-NO-CRYPTO %s
-; arm64 has a separate test for this, covering the same features (crypto.ll). N.b. NO-CRYPTO will need porting.
-
-declare <4 x i32> @llvm.arm.neon.sha256su1(<4 x i32>, <4 x i32>, <4 x i32>) #1
-
-declare <4 x i32> @llvm.arm.neon.sha256h2(<4 x i32>, <4 x i32>, <4 x i32>) #1
-
-declare <4 x i32> @llvm.arm.neon.sha256h(<4 x i32>, <4 x i32>, <4 x i32>) #1
-
-declare <4 x i32> @llvm.arm.neon.sha1su0(<4 x i32>, <4 x i32>, <4 x i32>) #1
-
-declare <4 x i32> @llvm.arm.neon.sha1m(<4 x i32>, i32, <4 x i32>) #1
-
-declare <4 x i32> @llvm.arm.neon.sha1p(<4 x i32>, i32, <4 x i32>) #1
-
-declare <4 x i32> @llvm.arm.neon.sha1c(<4 x i32>, i32, <4 x i32>) #1
-
-declare <4 x i32> @llvm.arm.neon.sha256su0(<4 x i32>, <4 x i32>) #1
-
-declare <4 x i32> @llvm.arm.neon.sha1su1(<4 x i32>, <4 x i32>) #1
-
-declare i32 @llvm.arm.neon.sha1h(i32) #1
-
-declare <16 x i8> @llvm.arm.neon.aesimc(<16 x i8>) #1
-
-declare <16 x i8> @llvm.arm.neon.aesmc(<16 x i8>) #1
-
-declare <16 x i8> @llvm.arm.neon.aesd(<16 x i8>, <16 x i8>) #1
-
-declare <16 x i8> @llvm.arm.neon.aese(<16 x i8>, <16 x i8>) #1
-
-define <16 x i8> @test_vaeseq_u8(<16 x i8> %data, <16 x i8> %key) {
-; CHECK: test_vaeseq_u8:
-; CHECK: aese {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-; CHECK-NO-CRYPTO: Cannot select: intrinsic %llvm.arm.neon.aese
-entry:
-  %aese.i = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data, <16 x i8> %key)
-  ret <16 x i8> %aese.i
-}
-
-define <16 x i8> @test_vaesdq_u8(<16 x i8> %data, <16 x i8> %key) {
-; CHECK: test_vaesdq_u8:
-; CHECK: aesd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %aesd.i = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data, <16 x i8> %key)
-  ret <16 x i8> %aesd.i
-}
-
-define <16 x i8> @test_vaesmcq_u8(<16 x i8> %data) {
-; CHECK: test_vaesmcq_u8:
-; CHECK: aesmc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %aesmc.i = tail call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %data)
-  ret <16 x i8> %aesmc.i
-}
-
-define <16 x i8> @test_vaesimcq_u8(<16 x i8> %data) {
-; CHECK: test_vaesimcq_u8:
-; CHECK: aesimc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %aesimc.i = tail call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %data)
-  ret <16 x i8> %aesimc.i
-}
-
-define i32 @test_vsha1h_u32(i32 %hash_e) {
-; CHECK: test_vsha1h_u32:
-; CHECK: sha1h {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %sha1h1.i = tail call i32 @llvm.arm.neon.sha1h(i32 %hash_e)
-  ret i32 %sha1h1.i
-}
-
-define <4 x i32> @test_vsha1su1q_u32(<4 x i32> %tw0_3, <4 x i32> %w12_15) {
-; CHECK: test_vsha1su1q_u32:
-; CHECK: sha1su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %sha1su12.i = tail call <4 x i32> @llvm.arm.neon.sha1su1(<4 x i32> %tw0_3, <4 x i32> %w12_15)
-  ret <4 x i32> %sha1su12.i
-}
-
-define <4 x i32> @test_vsha256su0q_u32(<4 x i32> %w0_3, <4 x i32> %w4_7) {
-; CHECK: test_vsha256su0q_u32:
-; CHECK: sha256su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %sha256su02.i = tail call <4 x i32> @llvm.arm.neon.sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7)
-  ret <4 x i32> %sha256su02.i
-}
-
-define <4 x i32> @test_vsha1cq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) {
-; CHECK: test_vsha1cq_u32:
-; CHECK: sha1c {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %sha1c1.i = tail call <4 x i32> @llvm.arm.neon.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
-  ret <4 x i32> %sha1c1.i
-}
-
-define <4 x i32> @test_vsha1pq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) {
-; CHECK: test_vsha1pq_u32:
-; CHECK: sha1p {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %sha1p1.i = tail call <4 x i32> @llvm.arm.neon.sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
-  ret <4 x i32> %sha1p1.i
-}
-
-define <4 x i32> @test_vsha1mq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) {
-; CHECK: test_vsha1mq_u32:
-; CHECK: sha1m {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %sha1m1.i = tail call <4 x i32> @llvm.arm.neon.sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
-  ret <4 x i32> %sha1m1.i
-}
-
-define <4 x i32> @test_vsha1su0q_u32(<4 x i32> %w0_3, <4 x i32> %w4_7, <4 x i32> %w8_11) {
-; CHECK: test_vsha1su0q_u32:
-; CHECK: sha1su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %sha1su03.i = tail call <4 x i32> @llvm.arm.neon.sha1su0(<4 x i32> %w0_3, <4 x i32> %w4_7, <4 x i32> %w8_11)
-  ret <4 x i32> %sha1su03.i
-}
-
-define <4 x i32> @test_vsha256hq_u32(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) {
-; CHECK: test_vsha256hq_u32:
-; CHECK: sha256h {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %sha256h3.i = tail call <4 x i32> @llvm.arm.neon.sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk)
-  ret <4 x i32> %sha256h3.i
-}
-
-define <4 x i32> @test_vsha256h2q_u32(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk) {
-; CHECK: test_vsha256h2q_u32:
-; CHECK: sha256h2 {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %sha256h23.i = tail call <4 x i32> @llvm.arm.neon.sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk)
-  ret <4 x i32> %sha256h23.i
-}
-
-define <4 x i32> @test_vsha256su1q_u32(<4 x i32> %tw0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) {
-; CHECK: test_vsha256su1q_u32:
-; CHECK: sha256su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %sha256su13.i = tail call <4 x i32> @llvm.arm.neon.sha256su1(<4 x i32> %tw0_3, <4 x i32> %w8_11, <4 x i32> %w12_15)
-  ret <4 x i32> %sha256su13.i
-}
-

Modified: llvm/trunk/test/CodeGen/AArch64/neon-diagnostics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-diagnostics.ll?rev=209576&r1=209575&r2=209576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-diagnostics.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-diagnostics.ll Sat May 24 07:42:26 2014
@@ -1,4 +1,3 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
 
 define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {

Modified: llvm/trunk/test/CodeGen/AArch64/neon-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-extract.ll?rev=209576&r1=209575&r2=209576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-extract.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-extract.ll Sat May 24 07:42:26 2014
@@ -1,4 +1,3 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
 
 define <8 x i8> @test_vext_s8(<8 x i8> %a, <8 x i8> %b) {

Removed: llvm/trunk/test/CodeGen/AArch64/neon-facge-facgt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-facge-facgt.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-facge-facgt.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-facge-facgt.ll (removed)
@@ -1,57 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-; arm64 has duplicates for this functionality in vcmp.ll.
-
-declare <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float>, <2 x float>)
-declare <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float>, <4 x float>)
-declare <2 x i64> @llvm.arm.neon.vacge.v2i64.v2f64(<2 x double>, <2 x double>)
-
-define <2 x i32> @facge_from_intr_v2i32(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
-; Using registers other than v0, v1 and v2 are possible, but would be odd.
-; CHECK: facge_from_intr_v2i32:
-  %val = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %A, <2 x float> %B)
-; CHECK: facge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  ret <2 x i32> %val
-}
-define <4 x i32> @facge_from_intr_v4i32( <4 x float> %A, <4 x float> %B) {
-; Using registers other than v0, v1 and v2 are possible, but would be odd.
-; CHECK: facge_from_intr_v4i32:
-  %val = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %A, <4 x float> %B)
-; CHECK: facge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-  ret <4 x i32> %val
-}
-
-define <2 x i64> @facge_from_intr_v2i64(<2 x double> %A, <2 x double> %B) {
-; Using registers other than v0, v1 and v2 are possible, but would be odd.
-; CHECK: facge_from_intr_v2i64:
-  %val = call <2 x i64> @llvm.arm.neon.vacge.v2i64.v2f64(<2 x double> %A, <2 x double> %B)
-; CHECK: facge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-  ret <2 x i64> %val
-}
-
-declare <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float>, <2 x float>)
-declare <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float>, <4 x float>)
-declare <2 x i64> @llvm.arm.neon.vacgt.v2i64.v2f64(<2 x double>, <2 x double>)
-
-define <2 x i32> @facgt_from_intr_v2i32(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
-; Using registers other than v0, v1 and v2 are possible, but would be odd.
-; CHECK: facgt_from_intr_v2i32:
-  %val = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %A, <2 x float> %B)
-; CHECK: facgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  ret <2 x i32> %val
-}
-define <4 x i32> @facgt_from_intr_v4i32( <4 x float> %A, <4 x float> %B) {
-; Using registers other than v0, v1 and v2 are possible, but would be odd.
-; CHECK: facgt_from_intr_v4i32:
-  %val = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %A, <4 x float> %B)
-; CHECK: facgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-  ret <4 x i32> %val
-}
-
-define <2 x i64> @facgt_from_intr_v2i64(<2 x double> %A, <2 x double> %B) {
-; Using registers other than v0, v1 and v2 are possible, but would be odd.
-; CHECK: facgt_from_intr_v2i64:
-  %val = call <2 x i64> @llvm.arm.neon.vacgt.v2i64.v2f64(<2 x double> %A, <2 x double> %B)
-; CHECK: facgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-  ret <2 x i64> %val
-}
-

Modified: llvm/trunk/test/CodeGen/AArch64/neon-fma.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-fma.ll?rev=209576&r1=209575&r2=209576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-fma.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-fma.ll Sat May 24 07:42:26 2014
@@ -1,4 +1,3 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
 
 define <2 x float> @fmla2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) {

Modified: llvm/trunk/test/CodeGen/AArch64/neon-fpround_f128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-fpround_f128.ll?rev=209576&r1=209575&r2=209576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-fpround_f128.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-fpround_f128.ll Sat May 24 07:42:26 2014
@@ -1,4 +1,3 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
 
 define <1 x double> @test_fpround_v1f128(<1 x fp128>* %a) {

Removed: llvm/trunk/test/CodeGen/AArch64/neon-frsqrt-frecp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-frsqrt-frecp.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-frsqrt-frecp.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-frsqrt-frecp.ll (removed)
@@ -1,55 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon  | FileCheck %s
-; arm64 has a duplicate for all these tests in vsqrt.ll
-
-; Set of tests for when the intrinsic is used.
-
-declare <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.arm.neon.vrsqrts.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @frsqrts_from_intr_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; CHECK: frsqrts v0.2s, v0.2s, v1.2s
-        %val = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-        ret <2 x float> %val
-}
-
-define <4 x float> @frsqrts_from_intr_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; CHECK: frsqrts v0.4s, v0.4s, v1.4s
-        %val = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-        ret <4 x float> %val
-}
-
-define <2 x double> @frsqrts_from_intr_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; CHECK: frsqrts v0.2d, v0.2d, v1.2d
-        %val = call <2 x double> @llvm.arm.neon.vrsqrts.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-        ret <2 x double> %val
-}
-
-declare <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.arm.neon.vrecps.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @frecps_from_intr_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; CHECK: frecps v0.2s, v0.2s, v1.2s
-        %val = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-        ret <2 x float> %val
-}
-
-define <4 x float> @frecps_from_intr_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; CHECK: frecps v0.4s, v0.4s, v1.4s
-        %val = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-        ret <4 x float> %val
-}
-
-define <2 x double> @frecps_from_intr_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; CHECK: frecps v0.2d, v0.2d, v1.2d
-        %val = call <2 x double> @llvm.arm.neon.vrecps.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-        ret <2 x double> %val
-}
-

Removed: llvm/trunk/test/CodeGen/AArch64/neon-halving-add-sub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-halving-add-sub.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-halving-add-sub.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-halving-add-sub.ll (removed)
@@ -1,208 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-; arm64 duplicates these in vhadd.ll and vhsub.ll
-
-declare <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_uhadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_uhadd_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: uhadd v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_shadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_shadd_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: shadd v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_uhadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_uhadd_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: uhadd v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_shadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_shadd_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: shadd v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_uhadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_uhadd_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: uhadd v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_shadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_shadd_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: shadd v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-declare <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_uhadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_uhadd_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: uhadd v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_shadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_shadd_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: shadd v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-declare <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_uhadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_uhadd_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: uhadd v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_shadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_shadd_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: shadd v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_uhadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_uhadd_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: uhadd v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_shadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_shadd_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: shadd v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-
-declare <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_uhsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_uhsub_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: uhsub v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_shsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_shsub_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: shsub v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_uhsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_uhsub_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: uhsub v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_shsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_shsub_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: shsub v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_uhsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_uhsub_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: uhsub v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_shsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_shsub_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: shsub v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-declare <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_uhsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_uhsub_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: uhsub v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_shsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_shsub_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: shsub v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-declare <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_uhsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_uhsub_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: uhsub v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_shsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_shsub_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: shsub v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_uhsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_uhsub_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: uhsub v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_shsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_shsub_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: shsub v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-

Modified: llvm/trunk/test/CodeGen/AArch64/neon-idiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-idiv.ll?rev=209576&r1=209575&r2=209576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-idiv.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-idiv.ll Sat May 24 07:42:26 2014
@@ -1,4 +1,3 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -mattr=+neon | FileCheck %s
 ; RUN: llc -mtriple=arm64-none-linux-gnu < %s -mattr=+neon | FileCheck %s
 
 define <4 x i32> @test1(<4 x i32> %a) {

Removed: llvm/trunk/test/CodeGen/AArch64/neon-load-store-v1i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-load-store-v1i32.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-load-store-v1i32.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-load-store-v1i32.ll (removed)
@@ -1,30 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-; arm64 does not use these pseudo-vectors, and they're not blessed by the PCS. Skipping.
-
-; Test load/store of v1i8, v1i16, v1i32 types can be selected correctly
-define void @load.store.v1i8(<1 x i8>* %ptr, <1 x i8>* %ptr2) {
-; CHECK-LABEL: load.store.v1i8:
-; CHECK: ldr b{{[0-9]+}}, [x{{[0-9]+|sp}}]
-; CHECK: str b{{[0-9]+}}, [x{{[0-9]+|sp}}]
-  %a = load <1 x i8>* %ptr
-  store <1 x i8> %a, <1 x i8>* %ptr2
-  ret void
-}
-
-define void @load.store.v1i16(<1 x i16>* %ptr, <1 x i16>* %ptr2) {
-; CHECK-LABEL: load.store.v1i16:
-; CHECK: ldr h{{[0-9]+}}, [x{{[0-9]+|sp}}]
-; CHECK: str h{{[0-9]+}}, [x{{[0-9]+|sp}}]
-  %a = load <1 x i16>* %ptr
-  store <1 x i16> %a, <1 x i16>* %ptr2
-  ret void
-}
-
-define void @load.store.v1i32(<1 x i32>* %ptr, <1 x i32>* %ptr2) {
-; CHECK-LABEL: load.store.v1i32:
-; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+|sp}}]
-; CHECK: str s{{[0-9]+}}, [x{{[0-9]+|sp}}]
-  %a = load <1 x i32>* %ptr
-  store <1 x i32> %a, <1 x i32>* %ptr2
-  ret void
-}

Removed: llvm/trunk/test/CodeGen/AArch64/neon-max-min-pairwise.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-max-min-pairwise.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-max-min-pairwise.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-max-min-pairwise.ll (removed)
@@ -1,347 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-; These duplicate arm64 tests in vmax.ll
-
-declare <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_smaxp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; CHECK: test_smaxp_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: smaxp v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_umaxp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: umaxp v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vpmaxs.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vpmaxu.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_smaxp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_smaxp_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vpmaxs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: smaxp v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_umaxp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_umaxp_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vpmaxu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: umaxp v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_smaxp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_smaxp_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: smaxp v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_umaxp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_umaxp_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: umaxp v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-
-declare <8 x i16> @llvm.arm.neon.vpmaxs.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vpmaxu.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_smaxp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_smaxp_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vpmaxs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: smaxp v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_umaxp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_umaxp_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vpmaxu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: umaxp v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-
-declare <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_smaxp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_smaxp_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: smaxp v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_umaxp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_umaxp_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: umaxp v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vpmaxs.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vpmaxu.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_smaxp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_smaxp_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vpmaxs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: smaxp v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_umaxp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_umaxp_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vpmaxu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: umaxp v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-declare <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_sminp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; CHECK: test_sminp_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: sminp v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_uminp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: uminp v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vpmins.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vpminu.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_sminp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_sminp_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vpmins.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: sminp v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_uminp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_uminp_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vpminu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: uminp v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_sminp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_sminp_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: sminp v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_uminp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_uminp_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: uminp v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-
-declare <8 x i16> @llvm.arm.neon.vpmins.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vpminu.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_sminp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_sminp_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vpmins.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: sminp v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_uminp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_uminp_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vpminu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: uminp v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-
-declare <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_sminp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_sminp_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: sminp v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_uminp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_uminp_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: uminp v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vpmins.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vpminu.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_sminp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_sminp_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vpmins.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: sminp v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_uminp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_uminp_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vpminu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: uminp v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.arm.neon.vpmaxs.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.arm.neon.vpmaxs.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @test_fmaxp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; CHECK: test_fmaxp_v2f32:
-        %val = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-; CHECK: fmaxp v0.2s, v0.2s, v1.2s
-        ret <2 x float> %val
-}
-
-define <4 x float> @test_fmaxp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; CHECK: test_fmaxp_v4f32:
-        %val = call <4 x float> @llvm.arm.neon.vpmaxs.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-; CHECK: fmaxp v0.4s, v0.4s, v1.4s
-        ret <4 x float> %val
-}
-
-define <2 x double> @test_fmaxp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; CHECK: test_fmaxp_v2f64:
-        %val = call <2 x double> @llvm.arm.neon.vpmaxs.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-; CHECK: fmaxp v0.2d, v0.2d, v1.2d
-        ret <2 x double> %val
-}
-
-declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.arm.neon.vpmins.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.arm.neon.vpmins.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @test_fminp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; CHECK: test_fminp_v2f32:
-        %val = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-; CHECK: fminp v0.2s, v0.2s, v1.2s
-        ret <2 x float> %val
-}
-
-define <4 x float> @test_fminp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; CHECK: test_fminp_v4f32:
-        %val = call <4 x float> @llvm.arm.neon.vpmins.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-; CHECK: fminp v0.4s, v0.4s, v1.4s
-        ret <4 x float> %val
-}
-
-define <2 x double> @test_fminp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; CHECK: test_fminp_v2f64:
-        %val = call <2 x double> @llvm.arm.neon.vpmins.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-; CHECK: fminp v0.2d, v0.2d, v1.2d
-        ret <2 x double> %val
-}
-
-declare <2 x float> @llvm.aarch64.neon.vpmaxnm.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.aarch64.neon.vpmaxnm.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.aarch64.neon.vpmaxnm.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @test_fmaxnmp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; CHECK: test_fmaxnmp_v2f32:
-        %val = call <2 x float> @llvm.aarch64.neon.vpmaxnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-; CHECK: fmaxnmp v0.2s, v0.2s, v1.2s
-        ret <2 x float> %val
-}
-
-define <4 x float> @test_fmaxnmp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; CHECK: test_fmaxnmp_v4f32:
-        %val = call <4 x float> @llvm.aarch64.neon.vpmaxnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-; CHECK: fmaxnmp v0.4s, v0.4s, v1.4s
-        ret <4 x float> %val
-}
-
-define <2 x double> @test_fmaxnmp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; CHECK: test_fmaxnmp_v2f64:
-        %val = call <2 x double> @llvm.aarch64.neon.vpmaxnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-; CHECK: fmaxnmp v0.2d, v0.2d, v1.2d
-        ret <2 x double> %val
-}
-
-declare <2 x float> @llvm.aarch64.neon.vpminnm.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.aarch64.neon.vpminnm.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.aarch64.neon.vpminnm.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @test_fminnmp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; CHECK: test_fminnmp_v2f32:
-        %val = call <2 x float> @llvm.aarch64.neon.vpminnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-; CHECK: fminnmp v0.2s, v0.2s, v1.2s
-        ret <2 x float> %val
-}
-
-define <4 x float> @test_fminnmp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; CHECK: test_fminnmp_v4f32:
-        %val = call <4 x float> @llvm.aarch64.neon.vpminnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-; CHECK: fminnmp v0.4s, v0.4s, v1.4s
-        ret <4 x float> %val
-}
-
-define <2 x double> @test_fminnmp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; CHECK: test_fminnmp_v2f64:
-        %val = call <2 x double> @llvm.aarch64.neon.vpminnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-; CHECK: fminnmp v0.2d, v0.2d, v1.2d
-        ret <2 x double> %val
-}
-
-define i32 @test_vminv_s32(<2 x i32> %a) {
-; CHECK-LABEL: test_vminv_s32
-; CHECK: sminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %1 = tail call <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v2i32(<2 x i32> %a)
-  %2 = extractelement <1 x i32> %1, i32 0
-  ret i32 %2
-}
-
-define i32 @test_vminv_u32(<2 x i32> %a) {
-; CHECK-LABEL: test_vminv_u32
-; CHECK: uminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %1 = tail call <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v2i32(<2 x i32> %a)
-  %2 = extractelement <1 x i32> %1, i32 0
-  ret i32 %2
-}
-
-define i32 @test_vmaxv_s32(<2 x i32> %a) {
-; CHECK-LABEL: test_vmaxv_s32
-; CHECK: smaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %1 = tail call <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v2i32(<2 x i32> %a)
-  %2 = extractelement <1 x i32> %1, i32 0
-  ret i32 %2
-}
-
-define i32 @test_vmaxv_u32(<2 x i32> %a) {
-; CHECK-LABEL: test_vmaxv_u32
-; CHECK: umaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %1 = tail call <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v2i32(<2 x i32> %a)
-  %2 = extractelement <1 x i32> %1, i32 0
-  ret i32 %2
-}
-
-declare <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v2i32(<2 x i32>)
-declare <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v2i32(<2 x i32>)
-declare <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v2i32(<2 x i32>)
-declare <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v2i32(<2 x i32>)

Removed: llvm/trunk/test/CodeGen/AArch64/neon-max-min.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-max-min.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-max-min.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-max-min.ll (removed)
@@ -1,311 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-; These duplicate tests in arm64's vmax.ll
-
-declare <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_smax_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; CHECK: test_smax_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: smax v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_umax_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: umax v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_smax_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_smax_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: smax v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_umax_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_umax_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: umax v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_smax_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_smax_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: smax v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_umax_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_umax_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: umax v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-
-declare <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_smax_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_smax_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: smax v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_umax_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_umax_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: umax v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-
-declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_smax_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_smax_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: smax v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_umax_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_umax_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: umax v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_smax_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_smax_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: smax v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_umax_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_umax_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: umax v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-declare <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_smin_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; CHECK: test_smin_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: smin v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_umin_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: umin v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_smin_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_smin_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: smin v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_umin_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_umin_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: umin v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_smin_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_smin_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: smin v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_umin_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_umin_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: umin v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-
-declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_smin_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_smin_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: smin v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_umin_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_umin_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: umin v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-
-declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_smin_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_smin_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: smin v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_umin_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_umin_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: umin v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_smin_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_smin_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: smin v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_umin_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_umin_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: umin v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.arm.neon.vmaxs.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @test_fmax_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; CHECK: test_fmax_v2f32:
-        %val = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-; CHECK: fmax v0.2s, v0.2s, v1.2s
-        ret <2 x float> %val
-}
-
-define <4 x float> @test_fmax_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; CHECK: test_fmax_v4f32:
-        %val = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-; CHECK: fmax v0.4s, v0.4s, v1.4s
-        ret <4 x float> %val
-}
-
-define <2 x double> @test_fmax_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; CHECK: test_fmax_v2f64:
-        %val = call <2 x double> @llvm.arm.neon.vmaxs.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-; CHECK: fmax v0.2d, v0.2d, v1.2d
-        ret <2 x double> %val
-}
-
-declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.arm.neon.vmins.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @test_fmin_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; CHECK: test_fmin_v2f32:
-        %val = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-; CHECK: fmin v0.2s, v0.2s, v1.2s
-        ret <2 x float> %val
-}
-
-define <4 x float> @test_fmin_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; CHECK: test_fmin_v4f32:
-        %val = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-; CHECK: fmin v0.4s, v0.4s, v1.4s
-        ret <4 x float> %val
-}
-
-define <2 x double> @test_fmin_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; CHECK: test_fmin_v2f64:
-        %val = call <2 x double> @llvm.arm.neon.vmins.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-; CHECK: fmin v0.2d, v0.2d, v1.2d
-        ret <2 x double> %val
-}
-
-
-declare <2 x float> @llvm.aarch64.neon.vmaxnm.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.aarch64.neon.vmaxnm.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.aarch64.neon.vmaxnm.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @test_fmaxnm_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; CHECK: test_fmaxnm_v2f32:
-        %val = call <2 x float> @llvm.aarch64.neon.vmaxnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-; CHECK: fmaxnm v0.2s, v0.2s, v1.2s
-        ret <2 x float> %val
-}
-
-define <4 x float> @test_fmaxnm_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; CHECK: test_fmaxnm_v4f32:
-        %val = call <4 x float> @llvm.aarch64.neon.vmaxnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-; CHECK: fmaxnm v0.4s, v0.4s, v1.4s
-        ret <4 x float> %val
-}
-
-define <2 x double> @test_fmaxnm_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; CHECK: test_fmaxnm_v2f64:
-        %val = call <2 x double> @llvm.aarch64.neon.vmaxnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-; CHECK: fmaxnm v0.2d, v0.2d, v1.2d
-        ret <2 x double> %val
-}
-
-declare <2 x float> @llvm.aarch64.neon.vminnm.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.aarch64.neon.vminnm.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.aarch64.neon.vminnm.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @test_fminnm_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; CHECK: test_fminnm_v2f32:
-        %val = call <2 x float> @llvm.aarch64.neon.vminnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-; CHECK: fminnm v0.2s, v0.2s, v1.2s
-        ret <2 x float> %val
-}
-
-define <4 x float> @test_fminnm_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; CHECK: test_fminnm_v4f32:
-        %val = call <4 x float> @llvm.aarch64.neon.vminnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-; CHECK: fminnm v0.4s, v0.4s, v1.4s
-        ret <4 x float> %val
-}
-
-define <2 x double> @test_fminnm_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; CHECK: test_fminnm_v2f64:
-        %val = call <2 x double> @llvm.aarch64.neon.vminnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-; CHECK: fminnm v0.2d, v0.2d, v1.2d
-        ret <2 x double> %val
-}

Removed: llvm/trunk/test/CodeGen/AArch64/neon-misc-scalar.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-misc-scalar.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-misc-scalar.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-misc-scalar.ll (removed)
@@ -1,61 +0,0 @@
-;RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-; arm64 already has copies of these tests (scattered).
-
-declare <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64>)
-
-declare <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64>)
-
-declare <1 x i64> @llvm.arm.neon.vabs.v1i64(<1 x i64>)
-
-declare <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64>, <1 x i64>)
-
-declare <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_vuqadd_s64(<1 x i64> %a, <1 x i64> %b) {
-entry:
-  ; CHECK: test_vuqadd_s64
-  %vuqadd2.i = tail call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
-  ; CHECK: suqadd d{{[0-9]+}}, d{{[0-9]+}}
-  ret <1 x i64> %vuqadd2.i
-}
-
-define <1 x i64> @test_vsqadd_u64(<1 x i64> %a, <1 x i64> %b) {
-entry:
-  ; CHECK: test_vsqadd_u64
-  %vsqadd2.i = tail call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
-  ; CHECK: usqadd d{{[0-9]+}}, d{{[0-9]+}}
-  ret <1 x i64> %vsqadd2.i
-}
-
-define <1 x i64> @test_vabs_s64(<1 x i64> %a) {
-  ; CHECK: test_vabs_s64
-entry:
-  %vabs1.i = tail call <1 x i64> @llvm.arm.neon.vabs.v1i64(<1 x i64> %a)
-  ; CHECK: abs d{{[0-9]+}}, d{{[0-9]+}}
-  ret <1 x i64> %vabs1.i
-}
-
-define <1 x i64> @test_vqabs_s64(<1 x i64> %a) {
-  ; CHECK: test_vqabs_s64
-entry:
-  %vqabs1.i = tail call <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64> %a)
-  ; CHECK: sqabs d{{[0-9]+}}, d{{[0-9]+}}
-  ret <1 x i64> %vqabs1.i
-}
-
-define <1 x i64> @test_vqneg_s64(<1 x i64> %a) {
-  ; CHECK: test_vqneg_s64
-entry:
-  %vqneg1.i = tail call <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64> %a)
-  ; CHECK: sqneg d{{[0-9]+}}, d{{[0-9]+}}
-  ret <1 x i64> %vqneg1.i
-}
-
-define <1 x i64> @test_vneg_s64(<1 x i64> %a) {
-  ; CHECK: test_vneg_s64
-entry:
-  %sub.i = sub <1 x i64> zeroinitializer, %a
-  ; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}}
-  ret <1 x i64> %sub.i
-}
-

Removed: llvm/trunk/test/CodeGen/AArch64/neon-misc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-misc.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-misc.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-misc.ll (removed)
@@ -1,2014 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
-; arm64 has a separate copy of these in aarch64-neon-misc.ll due to different intrinsics.
-
-define <8 x i8> @test_vrev16_s8(<8 x i8> %a) #0 {
-; CHECK: rev16 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-  ret <8 x i8> %shuffle.i
-}
-
-define <16 x i8> @test_vrev16q_s8(<16 x i8> %a) #0 {
-; CHECK: rev16 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
-  ret <16 x i8> %shuffle.i
-}
-
-define <8 x i8> @test_vrev32_s8(<8 x i8> %a) #0 {
-; CHECK: rev32 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-  ret <8 x i8> %shuffle.i
-}
-
-define <4 x i16> @test_vrev32_s16(<4 x i16> %a) #0 {
-; CHECK: rev32 v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-  ret <4 x i16> %shuffle.i
-}
-
-define <16 x i8> @test_vrev32q_s8(<16 x i8> %a) #0 {
-; CHECK: rev32 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
-  ret <16 x i8> %shuffle.i
-}
-
-define <8 x i16> @test_vrev32q_s16(<8 x i16> %a) #0 {
-; CHECK: rev32 v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-  ret <8 x i16> %shuffle.i
-}
-
-define <8 x i8> @test_vrev64_s8(<8 x i8> %a) #0 {
-; CHECK: rev64 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-  ret <8 x i8> %shuffle.i
-}
-
-define <4 x i16> @test_vrev64_s16(<4 x i16> %a) #0 {
-; CHECK: rev64 v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-  ret <4 x i16> %shuffle.i
-}
-
-define <2 x i32> @test_vrev64_s32(<2 x i32> %a) #0 {
-; CHECK: rev64 v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
-  ret <2 x i32> %shuffle.i
-}
-
-define <2 x float> @test_vrev64_f32(<2 x float> %a) #0 {
-; CHECK: rev64 v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %shuffle.i = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> <i32 1, i32 0>
-  ret <2 x float> %shuffle.i
-}
-
-define <16 x i8> @test_vrev64q_s8(<16 x i8> %a) #0 {
-; CHECK: rev64 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
-  ret <16 x i8> %shuffle.i
-}
-
-define <8 x i16> @test_vrev64q_s16(<8 x i16> %a) #0 {
-; CHECK: rev64 v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-  ret <8 x i16> %shuffle.i
-}
-
-define <4 x i32> @test_vrev64q_s32(<4 x i32> %a) #0 {
-; CHECK: rev64 v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-  ret <4 x i32> %shuffle.i
-}
-
-define <4 x float> @test_vrev64q_f32(<4 x float> %a) #0 {
-; CHECK: rev64 v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-  ret <4 x float> %shuffle.i
-}
-
-define <4 x i16> @test_vpaddl_s8(<8 x i8> %a) #0 {
-; CHECK: saddlp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
-  %vpaddl.i = tail call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %a) #4
-  ret <4 x i16> %vpaddl.i
-}
-
-define <2 x i32> @test_vpaddl_s16(<4 x i16> %a) #0 {
-; CHECK: saddlp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
-  %vpaddl1.i = tail call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %a) #4
-  ret <2 x i32> %vpaddl1.i
-}
-
-define <1 x i64> @test_vpaddl_s32(<2 x i32> %a) #0 {
-; CHECK: saddlp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
-  %vpaddl1.i = tail call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %a) #4
-  ret <1 x i64> %vpaddl1.i
-}
-
-define <4 x i16> @test_vpaddl_u8(<8 x i8> %a) #0 {
-; CHECK: uaddlp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
-  %vpaddl.i = tail call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %a) #4
-  ret <4 x i16> %vpaddl.i
-}
-
-define <2 x i32> @test_vpaddl_u16(<4 x i16> %a) #0 {
-; CHECK: uaddlp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
-  %vpaddl1.i = tail call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %a) #4
-  ret <2 x i32> %vpaddl1.i
-}
-
-define <1 x i64> @test_vpaddl_u32(<2 x i32> %a) #0 {
-; CHECK: uaddlp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
-  %vpaddl1.i = tail call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %a) #4
-  ret <1 x i64> %vpaddl1.i
-}
-
-define <8 x i16> @test_vpaddlq_s8(<16 x i8> %a) #0 {
-; CHECK: saddlp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
-  %vpaddl.i = tail call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %a) #4
-  ret <8 x i16> %vpaddl.i
-}
-
-define <4 x i32> @test_vpaddlq_s16(<8 x i16> %a) #0 {
-; CHECK: saddlp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
-  %vpaddl1.i = tail call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %a) #4
-  ret <4 x i32> %vpaddl1.i
-}
-
-define <2 x i64> @test_vpaddlq_s32(<4 x i32> %a) #0 {
-; CHECK: saddlp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
-  %vpaddl1.i = tail call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %a) #4
-  ret <2 x i64> %vpaddl1.i
-}
-
-define <8 x i16> @test_vpaddlq_u8(<16 x i8> %a) #0 {
-; CHECK: uaddlp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
-  %vpaddl.i = tail call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %a) #4
-  ret <8 x i16> %vpaddl.i
-}
-
-define <4 x i32> @test_vpaddlq_u16(<8 x i16> %a) #0 {
-; CHECK: uaddlp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
-  %vpaddl1.i = tail call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %a) #4
-  ret <4 x i32> %vpaddl1.i
-}
-
-define <2 x i64> @test_vpaddlq_u32(<4 x i32> %a) #0 {
-; CHECK: uaddlp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
-  %vpaddl1.i = tail call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %a) #4
-  ret <2 x i64> %vpaddl1.i
-}
-
-define <4 x i16> @test_vpadal_s8(<4 x i16> %a, <8 x i8> %b) #0 {
-; CHECK: sadalp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
-  %vpadal1.i = tail call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b) #4
-  ret <4 x i16> %vpadal1.i
-}
-
-define <2 x i32> @test_vpadal_s16(<2 x i32> %a, <4 x i16> %b) #0 {
-; CHECK: sadalp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
-  %vpadal2.i = tail call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b) #4
-  ret <2 x i32> %vpadal2.i
-}
-
-define <1 x i64> @test_vpadal_s32(<1 x i64> %a, <2 x i32> %b) #0 {
-; CHECK: sadalp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
-  %vpadal2.i = tail call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b) #4
-  ret <1 x i64> %vpadal2.i
-}
-
-define <4 x i16> @test_vpadal_u8(<4 x i16> %a, <8 x i8> %b) #0 {
-; CHECK: uadalp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
-  %vpadal1.i = tail call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b) #4
-  ret <4 x i16> %vpadal1.i
-}
-
-define <2 x i32> @test_vpadal_u16(<2 x i32> %a, <4 x i16> %b) #0 {
-; CHECK: uadalp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
-  %vpadal2.i = tail call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b) #4
-  ret <2 x i32> %vpadal2.i
-}
-
-define <1 x i64> @test_vpadal_u32(<1 x i64> %a, <2 x i32> %b) #0 {
-; CHECK: uadalp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
-  %vpadal2.i = tail call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b) #4
-  ret <1 x i64> %vpadal2.i
-}
-
-define <8 x i16> @test_vpadalq_s8(<8 x i16> %a, <16 x i8> %b) #0 {
-; CHECK: sadalp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
-  %vpadal1.i = tail call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b) #4
-  ret <8 x i16> %vpadal1.i
-}
-
-define <4 x i32> @test_vpadalq_s16(<4 x i32> %a, <8 x i16> %b) #0 {
-; CHECK: sadalp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
-  %vpadal2.i = tail call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b) #4
-  ret <4 x i32> %vpadal2.i
-}
-
-define <2 x i64> @test_vpadalq_s32(<2 x i64> %a, <4 x i32> %b) #0 {
-; CHECK: sadalp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
-  %vpadal2.i = tail call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b) #4
-  ret <2 x i64> %vpadal2.i
-}
-
-define <8 x i16> @test_vpadalq_u8(<8 x i16> %a, <16 x i8> %b) #0 {
-; CHECK: uadalp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
-  %vpadal1.i = tail call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b) #4
-  ret <8 x i16> %vpadal1.i
-}
-
-define <4 x i32> @test_vpadalq_u16(<4 x i32> %a, <8 x i16> %b) #0 {
-; CHECK: uadalp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
-  %vpadal2.i = tail call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b) #4
-  ret <4 x i32> %vpadal2.i
-}
-
-define <2 x i64> @test_vpadalq_u32(<2 x i64> %a, <4 x i32> %b) #0 {
-; CHECK: uadalp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
-  %vpadal2.i = tail call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b) #4
-  ret <2 x i64> %vpadal2.i
-}
-
-define <8 x i8> @test_vqabs_s8(<8 x i8> %a) #0 {
-; CHECK: sqabs v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %vqabs.i = tail call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %a) #4
-  ret <8 x i8> %vqabs.i
-}
-
-define <16 x i8> @test_vqabsq_s8(<16 x i8> %a) #0 {
-; CHECK: sqabs v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %vqabs.i = tail call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %a) #4
-  ret <16 x i8> %vqabs.i
-}
-
-define <4 x i16> @test_vqabs_s16(<4 x i16> %a) #0 {
-; CHECK: sqabs v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %vqabs1.i = tail call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %a) #4
-  ret <4 x i16> %vqabs1.i
-}
-
-define <8 x i16> @test_vqabsq_s16(<8 x i16> %a) #0 {
-; CHECK: sqabs v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-  %vqabs1.i = tail call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %a) #4
-  ret <8 x i16> %vqabs1.i
-}
-
-define <2 x i32> @test_vqabs_s32(<2 x i32> %a) #0 {
-; CHECK: sqabs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vqabs1.i = tail call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %a) #4
-  ret <2 x i32> %vqabs1.i
-}
-
-define <4 x i32> @test_vqabsq_s32(<4 x i32> %a) #0 {
-; CHECK: sqabs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vqabs1.i = tail call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %a) #4
-  ret <4 x i32> %vqabs1.i
-}
-
-define <2 x i64> @test_vqabsq_s64(<2 x i64> %a) #0 {
-; CHECK: sqabs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vqabs1.i = tail call <2 x i64> @llvm.arm.neon.vqabs.v2i64(<2 x i64> %a) #4
-  ret <2 x i64> %vqabs1.i
-}
-
-define <8 x i8> @test_vqneg_s8(<8 x i8> %a) #0 {
-; CHECK: sqneg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %vqneg.i = tail call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %a) #4
-  ret <8 x i8> %vqneg.i
-}
-
-define <16 x i8> @test_vqnegq_s8(<16 x i8> %a) #0 {
-; CHECK: sqneg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %vqneg.i = tail call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %a) #4
-  ret <16 x i8> %vqneg.i
-}
-
-define <4 x i16> @test_vqneg_s16(<4 x i16> %a) #0 {
-; CHECK: sqneg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %vqneg1.i = tail call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %a) #4
-  ret <4 x i16> %vqneg1.i
-}
-
-define <8 x i16> @test_vqnegq_s16(<8 x i16> %a) #0 {
-; CHECK: sqneg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-  %vqneg1.i = tail call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %a) #4
-  ret <8 x i16> %vqneg1.i
-}
-
-define <2 x i32> @test_vqneg_s32(<2 x i32> %a) #0 {
-; CHECK: sqneg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vqneg1.i = tail call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %a) #4
-  ret <2 x i32> %vqneg1.i
-}
-
-define <4 x i32> @test_vqnegq_s32(<4 x i32> %a) #0 {
-; CHECK: sqneg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vqneg1.i = tail call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %a) #4
-  ret <4 x i32> %vqneg1.i
-}
-
-define <2 x i64> @test_vqnegq_s64(<2 x i64> %a) #0 {
-; CHECK: sqneg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vqneg1.i = tail call <2 x i64> @llvm.arm.neon.vqneg.v2i64(<2 x i64> %a) #4
-  ret <2 x i64> %vqneg1.i
-}
-
-define <8 x i8> @test_vneg_s8(<8 x i8> %a) #0 {
-; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %sub.i = sub <8 x i8> zeroinitializer, %a
-  ret <8 x i8> %sub.i
-}
-
-define <16 x i8> @test_vnegq_s8(<16 x i8> %a) #0 {
-; CHECK: neg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %sub.i = sub <16 x i8> zeroinitializer, %a
-  ret <16 x i8> %sub.i
-}
-
-define <4 x i16> @test_vneg_s16(<4 x i16> %a) #0 {
-; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %sub.i = sub <4 x i16> zeroinitializer, %a
-  ret <4 x i16> %sub.i
-}
-
-define <8 x i16> @test_vnegq_s16(<8 x i16> %a) #0 {
-; CHECK: neg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-  %sub.i = sub <8 x i16> zeroinitializer, %a
-  ret <8 x i16> %sub.i
-}
-
-define <2 x i32> @test_vneg_s32(<2 x i32> %a) #0 {
-; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %sub.i = sub <2 x i32> zeroinitializer, %a
-  ret <2 x i32> %sub.i
-}
-
-define <4 x i32> @test_vnegq_s32(<4 x i32> %a) #0 {
-; CHECK: neg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %sub.i = sub <4 x i32> zeroinitializer, %a
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vnegq_s64(<2 x i64> %a) #0 {
-; CHECK: neg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %sub.i = sub <2 x i64> zeroinitializer, %a
-  ret <2 x i64> %sub.i
-}
-
-define <2 x float> @test_vneg_f32(<2 x float> %a) #0 {
-; CHECK: fneg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
-  ret <2 x float> %sub.i
-}
-
-define <4 x float> @test_vnegq_f32(<4 x float> %a) #0 {
-; CHECK: fneg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
-  ret <4 x float> %sub.i
-}
-
-define <2 x double> @test_vnegq_f64(<2 x double> %a) #0 {
-; CHECK: fneg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
-  ret <2 x double> %sub.i
-}
-
-define <8 x i8> @test_vabs_s8(<8 x i8> %a) #0 {
-; CHECK: abs v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %vabs.i = tail call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %a) #4
-  ret <8 x i8> %vabs.i
-}
-
-define <16 x i8> @test_vabsq_s8(<16 x i8> %a) #0 {
-; CHECK: abs v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %vabs.i = tail call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %a) #4
-  ret <16 x i8> %vabs.i
-}
-
-define <4 x i16> @test_vabs_s16(<4 x i16> %a) #0 {
-; CHECK: abs v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %vabs1.i = tail call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %a) #4
-  ret <4 x i16> %vabs1.i
-}
-
-define <8 x i16> @test_vabsq_s16(<8 x i16> %a) #0 {
-; CHECK: abs v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-  %vabs1.i = tail call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %a) #4
-  ret <8 x i16> %vabs1.i
-}
-
-define <2 x i32> @test_vabs_s32(<2 x i32> %a) #0 {
-; CHECK: abs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vabs1.i = tail call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %a) #4
-  ret <2 x i32> %vabs1.i
-}
-
-define <4 x i32> @test_vabsq_s32(<4 x i32> %a) #0 {
-; CHECK: abs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vabs1.i = tail call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %a) #4
-  ret <4 x i32> %vabs1.i
-}
-
-define <2 x i64> @test_vabsq_s64(<2 x i64> %a) #0 {
-; CHECK: abs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vabs1.i = tail call <2 x i64> @llvm.arm.neon.vabs.v2i64(<2 x i64> %a) #4
-  ret <2 x i64> %vabs1.i
-}
-
-define <2 x float> @test_vabs_f32(<2 x float> %a) #1 {
-; CHECK: fabs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vabs1.i = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> %a) #4
-  ret <2 x float> %vabs1.i
-}
-
-define <4 x float> @test_vabsq_f32(<4 x float> %a) #1 {
-; CHECK: fabs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vabs1.i = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) #4
-  ret <4 x float> %vabs1.i
-}
-
-define <2 x double> @test_vabsq_f64(<2 x double> %a) #1 {
-; CHECK: fabs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vabs1.i = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %a) #4
-  ret <2 x double> %vabs1.i
-}
-
-define <8 x i8> @test_vuqadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
-; CHECK: suqadd v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %vuqadd.i = tail call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
-  ret <8 x i8> %vuqadd.i
-}
-
-define <16 x i8> @test_vuqaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
-; CHECK: suqadd v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %vuqadd.i = tail call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4
-  ret <16 x i8> %vuqadd.i
-}
-
-define <4 x i16> @test_vuqadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
-; CHECK: suqadd v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %vuqadd2.i = tail call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %a, <4 x i16> %b) #4
-  ret <4 x i16> %vuqadd2.i
-}
-
-define <8 x i16> @test_vuqaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
-; CHECK: suqadd v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-  %vuqadd2.i = tail call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %a, <8 x i16> %b) #4
-  ret <8 x i16> %vuqadd2.i
-}
-
-define <2 x i32> @test_vuqadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
-; CHECK: suqadd v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vuqadd2.i = tail call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %a, <2 x i32> %b) #4
-  ret <2 x i32> %vuqadd2.i
-}
-
-define <4 x i32> @test_vuqaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
-; CHECK: suqadd v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vuqadd2.i = tail call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %a, <4 x i32> %b) #4
-  ret <4 x i32> %vuqadd2.i
-}
-
-define <2 x i64> @test_vuqaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
-; CHECK: suqadd v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vuqadd2.i = tail call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %a, <2 x i64> %b) #4
-  ret <2 x i64> %vuqadd2.i
-}
-
-define <8 x i8> @test_vcls_s8(<8 x i8> %a) #0 {
-; CHECK: cls v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %vcls.i = tail call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %a) #4
-  ret <8 x i8> %vcls.i
-}
-
-define <16 x i8> @test_vclsq_s8(<16 x i8> %a) #0 {
-; CHECK: cls v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %vcls.i = tail call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %a) #4
-  ret <16 x i8> %vcls.i
-}
-
-define <4 x i16> @test_vcls_s16(<4 x i16> %a) #0 {
-; CHECK: cls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %vcls1.i = tail call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %a) #4
-  ret <4 x i16> %vcls1.i
-}
-
-define <8 x i16> @test_vclsq_s16(<8 x i16> %a) #0 {
-; CHECK: cls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-  %vcls1.i = tail call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %a) #4
-  ret <8 x i16> %vcls1.i
-}
-
-define <2 x i32> @test_vcls_s32(<2 x i32> %a) #0 {
-; CHECK: cls v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcls1.i = tail call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %a) #4
-  ret <2 x i32> %vcls1.i
-}
-
-define <4 x i32> @test_vclsq_s32(<4 x i32> %a) #0 {
-; CHECK: cls v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcls1.i = tail call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %a) #4
-  ret <4 x i32> %vcls1.i
-}
-
-define <8 x i8> @test_vclz_s8(<8 x i8> %a) #0 {
-; CHECK: clz v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %vclz.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #4
-  ret <8 x i8> %vclz.i
-}
-
-define <16 x i8> @test_vclzq_s8(<16 x i8> %a) #0 {
-; CHECK: clz v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %vclz.i = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #4
-  ret <16 x i8> %vclz.i
-}
-
-define <4 x i16> @test_vclz_s16(<4 x i16> %a) #0 {
-; CHECK: clz v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %vclz1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) #4
-  ret <4 x i16> %vclz1.i
-}
-
-define <8 x i16> @test_vclzq_s16(<8 x i16> %a) #0 {
-; CHECK: clz v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-  %vclz1.i = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) #4
-  ret <8 x i16> %vclz1.i
-}
-
-define <2 x i32> @test_vclz_s32(<2 x i32> %a) #0 {
-; CHECK: clz v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vclz1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) #4
-  ret <2 x i32> %vclz1.i
-}
-
-define <4 x i32> @test_vclzq_s32(<4 x i32> %a) #0 {
-; CHECK: clz v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vclz1.i = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) #4
-  ret <4 x i32> %vclz1.i
-}
-
-define <8 x i8> @test_vcnt_s8(<8 x i8> %a) #0 {
-; CHECK: cnt v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %vctpop.i = tail call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4
-  ret <8 x i8> %vctpop.i
-}
-
-define <16 x i8> @test_vcntq_s8(<16 x i8> %a) #0 {
-; CHECK: cnt v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %vctpop.i = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4
-  ret <16 x i8> %vctpop.i
-}
-
-define <8 x i8> @test_vmvn_s8(<8 x i8> %a) #0 {
-; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %neg.i = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-  ret <8 x i8> %neg.i
-}
-
-define <16 x i8> @test_vmvnq_s8(<16 x i8> %a) #0 {
-; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %neg.i = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-  ret <16 x i8> %neg.i
-}
-
-define <4 x i16> @test_vmvn_s16(<4 x i16> %a) #0 {
-; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %neg.i = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
-  ret <4 x i16> %neg.i
-}
-
-define <8 x i16> @test_vmvnq_s16(<8 x i16> %a) #0 {
-; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %neg.i = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
-  ret <8 x i16> %neg.i
-}
-
-define <2 x i32> @test_vmvn_s32(<2 x i32> %a) #0 {
-; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %neg.i = xor <2 x i32> %a, <i32 -1, i32 -1>
-  ret <2 x i32> %neg.i
-}
-
-define <4 x i32> @test_vmvnq_s32(<4 x i32> %a) #0 {
-; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %neg.i = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
-  ret <4 x i32> %neg.i
-}
-
-define <8 x i8> @test_vrbit_s8(<8 x i8> %a) #0 {
-; CHECK: rbit v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %vrbit.i = tail call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a) #4
-  ret <8 x i8> %vrbit.i
-}
-
-define <16 x i8> @test_vrbitq_s8(<16 x i8> %a) #0 {
-; CHECK: rbit v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %vrbit.i = tail call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a) #4
-  ret <16 x i8> %vrbit.i
-}
-
-define <8 x i8> @test_vmovn_s16(<8 x i16> %a) #0 {
-; CHECK: xtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
-  %vmovn.i = trunc <8 x i16> %a to <8 x i8>
-  ret <8 x i8> %vmovn.i
-}
-
-define <4 x i16> @test_vmovn_s32(<4 x i32> %a) #0 {
-; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
-  %vmovn.i = trunc <4 x i32> %a to <4 x i16>
-  ret <4 x i16> %vmovn.i
-}
-
-define <2 x i32> @test_vmovn_s64(<2 x i64> %a) #0 {
-; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
-  %vmovn.i = trunc <2 x i64> %a to <2 x i32>
-  ret <2 x i32> %vmovn.i
-}
-
-define <16 x i8> @test_vmovn_high_s16(<8 x i8> %a, <8 x i16> %b) #0 {
-; CHECK: xtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
-  %vmovn.i.i = trunc <8 x i16> %b to <8 x i8>
-  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vmovn.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  ret <16 x i8> %shuffle.i
-}
-
-define <8 x i16> @test_vmovn_high_s32(<4 x i16> %a, <4 x i32> %b) #0 {
-; CHECK: xtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
-  %vmovn.i.i = trunc <4 x i32> %b to <4 x i16>
-  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vmovn.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i16> %shuffle.i
-}
-
-define <4 x i32> @test_vmovn_high_s64(<2 x i32> %a, <2 x i64> %b) #0 {
-; CHECK: xtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
-  %vmovn.i.i = trunc <2 x i64> %b to <2 x i32>
-  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vmovn.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x i32> %shuffle.i
-}
-
-define <8 x i8> @test_vqmovun_s16(<8 x i16> %a) #0 {
-; CHECK: sqxtun v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
-  %vqdmull1.i = tail call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %a) #4
-  ret <8 x i8> %vqdmull1.i
-}
-
-define <4 x i16> @test_vqmovun_s32(<4 x i32> %a) #0 {
-; CHECK: sqxtun v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
-  %vqdmull1.i = tail call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %a) #4
-  ret <4 x i16> %vqdmull1.i
-}
-
-define <2 x i32> @test_vqmovun_s64(<2 x i64> %a) #0 {
-; CHECK: sqxtun v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
-  %vqdmull1.i = tail call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %a) #4
-  ret <2 x i32> %vqdmull1.i
-}
-
-define <16 x i8> @test_vqmovun_high_s16(<8 x i8> %a, <8 x i16> %b) #0 {
-; CHECK: sqxtun2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
-  %vqdmull1.i.i = tail call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %b) #4
-  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqdmull1.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  ret <16 x i8> %shuffle.i
-}
-
-define <8 x i16> @test_vqmovun_high_s32(<4 x i16> %a, <4 x i32> %b) #0 {
-; CHECK: sqxtun2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
-  %vqdmull1.i.i = tail call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %b) #4
-  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqdmull1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i16> %shuffle.i
-}
-
-define <4 x i32> @test_vqmovun_high_s64(<2 x i32> %a, <2 x i64> %b) #0 {
-; CHECK: sqxtun2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
-  %vqdmull1.i.i = tail call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %b) #4
-  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqdmull1.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x i32> %shuffle.i
-}
-
-define <8 x i8> @test_vqmovn_s16(<8 x i16> %a) #0 {
-; CHECK: sqxtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
-  %vqmovn1.i = tail call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %a) #4
-  ret <8 x i8> %vqmovn1.i
-}
-
-define <4 x i16> @test_vqmovn_s32(<4 x i32> %a) #0 {
-; CHECK: sqxtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
-  %vqmovn1.i = tail call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %a) #4
-  ret <4 x i16> %vqmovn1.i
-}
-
-define <2 x i32> @test_vqmovn_s64(<2 x i64> %a) #0 {
-; CHECK: sqxtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
-  %vqmovn1.i = tail call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %a) #4
-  ret <2 x i32> %vqmovn1.i
-}
-
-define <16 x i8> @test_vqmovn_high_s16(<8 x i8> %a, <8 x i16> %b) #0 {
-; CHECK: sqxtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
-  %vqmovn1.i.i = tail call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %b) #4
-  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqmovn1.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  ret <16 x i8> %shuffle.i
-}
-
-define <8 x i16> @test_vqmovn_high_s32(<4 x i16> %a, <4 x i32> %b) #0 {
-; CHECK: test_vqmovn_high_s32
-  %vqmovn1.i.i = tail call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %b) #4
-  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqmovn1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i16> %shuffle.i
-}
-
-define <4 x i32> @test_vqmovn_high_s64(<2 x i32> %a, <2 x i64> %b) #0 {
-; CHECK: test_vqmovn_high_s64
-  %vqmovn1.i.i = tail call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %b) #4
-  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqmovn1.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x i32> %shuffle.i
-}
-
-define <8 x i8> @test_vqmovn_u16(<8 x i16> %a) #0 {
-; CHECK: uqxtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
-  %vqmovn1.i = tail call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %a) #4
-  ret <8 x i8> %vqmovn1.i
-}
-
-define <4 x i16> @test_vqmovn_u32(<4 x i32> %a) #0 {
-; CHECK: uqxtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
-  %vqmovn1.i = tail call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %a) #4
-  ret <4 x i16> %vqmovn1.i
-}
-
-define <2 x i32> @test_vqmovn_u64(<2 x i64> %a) #0 {
-; CHECK: uqxtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
-  %vqmovn1.i = tail call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %a) #4
-  ret <2 x i32> %vqmovn1.i
-}
-
-define <16 x i8> @test_vqmovn_high_u16(<8 x i8> %a, <8 x i16> %b) #0 {
-; CHECK: uqxtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
-  %vqmovn1.i.i = tail call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %b) #4
-  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqmovn1.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  ret <16 x i8> %shuffle.i
-}
-
-define <8 x i16> @test_vqmovn_high_u32(<4 x i16> %a, <4 x i32> %b) #0 {
-; CHECK: uqxtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
-  %vqmovn1.i.i = tail call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %b) #4
-  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqmovn1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i16> %shuffle.i
-}
-
-define <4 x i32> @test_vqmovn_high_u64(<2 x i32> %a, <2 x i64> %b) #0 {
-; CHECK: uqxtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
-  %vqmovn1.i.i = tail call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %b) #4
-  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqmovn1.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x i32> %shuffle.i
-}
-
-define <8 x i16> @test_vshll_n_s8(<8 x i8> %a) #0 {
-; CHECK: shll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #8
-  %1 = sext <8 x i8> %a to <8 x i16>
-  %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  ret <8 x i16> %vshll_n
-}
-
-define <4 x i32> @test_vshll_n_s16(<4 x i16> %a) #0 {
-; CHECK: shll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #16
-  %1 = sext <4 x i16> %a to <4 x i32>
-  %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
-  ret <4 x i32> %vshll_n
-}
-
-define <2 x i64> @test_vshll_n_s32(<2 x i32> %a) #0 {
-; CHECK: shll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #32
-  %1 = sext <2 x i32> %a to <2 x i64>
-  %vshll_n = shl <2 x i64> %1, <i64 32, i64 32>
-  ret <2 x i64> %vshll_n
-}
-
-define <8 x i16> @test_vshll_n_u8(<8 x i8> %a) #0 {
-; CHECK: shll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #8
-  %1 = zext <8 x i8> %a to <8 x i16>
-  %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  ret <8 x i16> %vshll_n
-}
-
-define <4 x i32> @test_vshll_n_u16(<4 x i16> %a) #0 {
-; CHECK: shll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #16
-  %1 = zext <4 x i16> %a to <4 x i32>
-  %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
-  ret <4 x i32> %vshll_n
-}
-
-define <2 x i64> @test_vshll_n_u32(<2 x i32> %a) #0 {
-; CHECK: shll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #32
-  %1 = zext <2 x i32> %a to <2 x i64>
-  %vshll_n = shl <2 x i64> %1, <i64 32, i64 32>
-  ret <2 x i64> %vshll_n
-}
-
-define <8 x i16> @test_vshll_high_n_s8(<16 x i8> %a) #0 {
-; CHECK: shll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #8
-  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %1 = sext <8 x i8> %shuffle.i to <8 x i16>
-  %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  ret <8 x i16> %vshll_n
-}
-
-define <4 x i32> @test_vshll_high_n_s16(<8 x i16> %a) #0 {
-; CHECK: shll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #16
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %1 = sext <4 x i16> %shuffle.i to <4 x i32>
-  %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
-  ret <4 x i32> %vshll_n
-}
-
-define <2 x i64> @test_vshll_high_n_s32(<4 x i32> %a) #0 {
-; CHECK: shll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #32
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %1 = sext <2 x i32> %shuffle.i to <2 x i64>
-  %vshll_n = shl <2 x i64> %1, <i64 32, i64 32>
-  ret <2 x i64> %vshll_n
-}
-
-define <8 x i16> @test_vshll_high_n_u8(<16 x i8> %a) #0 {
-; CHECK: shll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #8
-  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %1 = zext <8 x i8> %shuffle.i to <8 x i16>
-  %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  ret <8 x i16> %vshll_n
-}
-
-define <4 x i32> @test_vshll_high_n_u16(<8 x i16> %a) #0 {
-; CHECK: shll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #16
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %1 = zext <4 x i16> %shuffle.i to <4 x i32>
-  %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
-  ret <4 x i32> %vshll_n
-}
-
-define <2 x i64> @test_vshll_high_n_u32(<4 x i32> %a) #0 {
-; CHECK: shll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #32
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %1 = zext <2 x i32> %shuffle.i to <2 x i64>
-  %vshll_n = shl <2 x i64> %1, <i64 32, i64 32>
-  ret <2 x i64> %vshll_n
-}
-
-define <4 x i16> @test_vcvt_f16_f32(<4 x float> %a) #0 {
-; CHECK: fcvtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
-  %vcvt1.i = tail call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %a) #4
-  ret <4 x i16> %vcvt1.i
-}
-
-define <8 x i16> @test_vcvt_high_f16_f32(<4 x i16> %a, <4 x float> %b) #0 {
-; CHECK: fcvtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
-  %vcvt1.i.i = tail call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %b) #4
-  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vcvt1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i16> %shuffle.i
-}
-
-define <4 x float> @test_vcvt_f32_f16(<4 x i16> %a) #0 {
-; CHECK: fcvtl v{{[0-9]+}}.4s, v{{[0-9]+}}.4h
-  %vcvt1.i = tail call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %a) #4
-  ret <4 x float> %vcvt1.i
-}
-
-define <4 x float> @test_vcvt_high_f32_f16(<8 x i16> %a) #0 {
-; CHECK: fcvtl2 v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vcvt1.i.i = tail call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %shuffle.i.i) #4
-  ret <4 x float> %vcvt1.i.i
-}
-
-define <2 x float> @test_vcvt_f32_f64(<2 x double> %a) #0 {
-; CHECK: fcvtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
-  %vcvt.i = fptrunc <2 x double> %a to <2 x float>
-  ret <2 x float> %vcvt.i
-}
-
-define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 {
-; CHECK: fcvtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
-  %vcvt.i.i = fptrunc <2 x double> %b to <2 x float>
-  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %vcvt.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x float> %shuffle.i
-}
-
-define <2 x float> @test_vcvtx_f32_f64(<2 x double> %a) #0 {
-; CHECK: fcvtxn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
-  %vcvtx_f32_f641.i = call <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double> %a) #4
-  ret <2 x float> %vcvtx_f32_f641.i
-}
-
-define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 {
-; CHECK: fcvtxn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
-  %vcvtx_f32_f641.i.i = tail call <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double> %b) #4
-  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %vcvtx_f32_f641.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x float> %shuffle.i
-}
-
-define <2 x double> @test_vcvt_f64_f32(<2 x float> %a) #0 {
-; CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s
-  %vcvt.i = fpext <2 x float> %a to <2 x double>
-  ret <2 x double> %vcvt.i
-}
-
-define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %a) #0 {
-; CHECK: fcvtl2 v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
-  %shuffle.i.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 2, i32 3>
-  %vcvt.i.i = fpext <2 x float> %shuffle.i.i to <2 x double>
-  ret <2 x double> %vcvt.i.i
-}
-
-define <2 x float> @test_vrndn_f32(<2 x float> %a) #0 {
-; CHECK: frintn v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vrndn1.i = tail call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %a) #4
-  ret <2 x float> %vrndn1.i
-}
-
-define <4 x float> @test_vrndnq_f32(<4 x float> %a) #0 {
-; CHECK: frintn v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vrndn1.i = tail call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %a) #4
-  ret <4 x float> %vrndn1.i
-}
-
-define <2 x double> @test_vrndnq_f64(<2 x double> %a) #0 {
-; CHECK: frintn v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vrndn1.i = tail call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %a) #4
-  ret <2 x double> %vrndn1.i
-}
-
-define <2 x float> @test_vrnda_f32(<2 x float> %a) #0 {
-; CHECK: frinta v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vrnda1.i = tail call <2 x float> @llvm.round.v2f32(<2 x float> %a) #4
-  ret <2 x float> %vrnda1.i
-}
-
-define <4 x float> @test_vrndaq_f32(<4 x float> %a) #0 {
-; CHECK: frinta v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-   %vrnda1.i = tail call <4 x float> @llvm.round.v4f32(<4 x float> %a) #4
-  ret <4 x float> %vrnda1.i
-}
-
-define <2 x double> @test_vrndaq_f64(<2 x double> %a) #0 {
-; CHECK: frinta v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vrnda1.i = tail call <2 x double> @llvm.round.v2f64(<2 x double> %a) #4
-  ret <2 x double> %vrnda1.i
-}
-
-define <2 x float> @test_vrndp_f32(<2 x float> %a) #0 {
-; CHECK: frintp v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vrndp1.i = tail call <2 x float> @llvm.ceil.v2f32(<2 x float> %a) #4
-  ret <2 x float> %vrndp1.i
-}
-
-define <4 x float> @test_vrndpq_f32(<4 x float> %a) #0 {
-; CHECK: frintp v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
- %vrndp1.i = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) #4
-  ret <4 x float> %vrndp1.i
-}
-
-define <2 x double> @test_vrndpq_f64(<2 x double> %a) #0 {
-; CHECK: frintp v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vrndp1.i = tail call <2 x double> @llvm.ceil.v2f64(<2 x double> %a) #4
-  ret <2 x double> %vrndp1.i
-}
-
-define <2 x float> @test_vrndm_f32(<2 x float> %a) #0 {
-; CHECK: frintm v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vrndm1.i = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %a) #4
-  ret <2 x float> %vrndm1.i
-}
-
-define <4 x float> @test_vrndmq_f32(<4 x float> %a) #0 {
-; CHECK: frintm v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vrndm1.i = tail call <4 x float> @llvm.floor.v4f32(<4 x float> %a) #4
-  ret <4 x float> %vrndm1.i
-}
-
-define <2 x double> @test_vrndmq_f64(<2 x double> %a) #0 {
-; CHECK: frintm v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-   %vrndm1.i = tail call <2 x double> @llvm.floor.v2f64(<2 x double> %a) #4
-  ret <2 x double> %vrndm1.i
-}
-
-define <2 x float> @test_vrndx_f32(<2 x float> %a) #0 {
-; CHECK: frintx v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vrndx1.i = tail call <2 x float> @llvm.rint.v2f32(<2 x float> %a) #4
-  ret <2 x float> %vrndx1.i
-}
-
-define <4 x float> @test_vrndxq_f32(<4 x float> %a) #0 {
-; CHECK: frintx v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vrndx1.i = tail call <4 x float> @llvm.rint.v4f32(<4 x float> %a) #4
-  ret <4 x float> %vrndx1.i
-}
-
-define <2 x double> @test_vrndxq_f64(<2 x double> %a) #0 {
-; CHECK: frintx v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vrndx1.i = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %a) #4
-  ret <2 x double> %vrndx1.i
-}
-
-define <2 x float> @test_vrnd_f32(<2 x float> %a) #0 {
-; CHECK: frintz v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-   %vrnd1.i = tail call <2 x float> @llvm.trunc.v2f32(<2 x float> %a) #4
-  ret <2 x float> %vrnd1.i
-}
-
-define <4 x float> @test_vrndq_f32(<4 x float> %a) #0 {
-; CHECK: frintz v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vrnd1.i = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> %a) #4
-  ret <4 x float> %vrnd1.i
-}
-
-define <2 x double> @test_vrndq_f64(<2 x double> %a) #0 {
-; CHECK: frintz v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vrnd1.i = tail call <2 x double> @llvm.trunc.v2f64(<2 x double> %a) #4
-  ret <2 x double> %vrnd1.i
-}
-
-define <2 x float> @test_vrndi_f32(<2 x float> %a) #0 {
-; CHECK: frinti v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vrndi1.i = tail call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %a) #4
-  ret <2 x float> %vrndi1.i
-}
-
-define <4 x float> @test_vrndiq_f32(<4 x float> %a) #0 {
-; CHECK: frinti v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vrndi1.i = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a) #4
-  ret <4 x float> %vrndi1.i
-}
-
-define <2 x double> @test_vrndiq_f64(<2 x double> %a) #0 {
-; CHECK: frinti v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vrndi1.i = tail call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a) #4
-  ret <2 x double> %vrndi1.i
-}
-
-define <2 x i32> @test_vcvt_s32_f32(<2 x float> %a) #0 {
-; CHECK: fcvtzs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvt.i = fptosi <2 x float> %a to <2 x i32>
-  ret <2 x i32> %vcvt.i
-}
-
-define <4 x i32> @test_vcvtq_s32_f32(<4 x float> %a) #0 {
-; CHECK: fcvtzs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvt.i = fptosi <4 x float> %a to <4 x i32>
-  ret <4 x i32> %vcvt.i
-}
-
-define <2 x i64> @test_vcvtq_s64_f64(<2 x double> %a) #0 {
-; CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvt.i = fptosi <2 x double> %a to <2 x i64>
-  ret <2 x i64> %vcvt.i
-}
-
-define <2 x i32> @test_vcvt_u32_f32(<2 x float> %a) #0 {
-; CHECK: fcvtzu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvt.i = fptoui <2 x float> %a to <2 x i32>
-  ret <2 x i32> %vcvt.i
-}
-
-define <4 x i32> @test_vcvtq_u32_f32(<4 x float> %a) #0 {
-; CHECK: fcvtzu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvt.i = fptoui <4 x float> %a to <4 x i32>
-  ret <4 x i32> %vcvt.i
-}
-
-define <2 x i64> @test_vcvtq_u64_f64(<2 x double> %a) #0 {
-; CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvt.i = fptoui <2 x double> %a to <2 x i64>
-  ret <2 x i64> %vcvt.i
-}
-
-define <2 x i64> @test_vcvt_s64_f32(<2 x float> %a) #0 {
-; CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s
-; CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvt.i = fptosi <2 x float> %a to <2 x i64>
-  ret <2 x i64> %vcvt.i
-}
-
-define <2 x i64> @test_vcvt_u64_f32(<2 x float> %a) #0 {
-; CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s
-; CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvt.i = fptoui <2 x float> %a to <2 x i64>
-  ret <2 x i64> %vcvt.i
-}
-
-define <4 x i16> @test_vcvt_s16_f32(<4 x float> %a) #0 {
-; CHECK: fcvtzs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
-  %vcvt.i = fptosi <4 x float> %a to <4 x i16>
-  ret <4 x i16> %vcvt.i
-}
-
-define <4 x i16> @test_vcvt_u16_f32(<4 x float> %a) #0 {
-; CHECK: fcvtzu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
-  %vcvt.i = fptoui <4 x float> %a to <4 x i16>
-  ret <4 x i16> %vcvt.i
-}
-
-define <2 x i32> @test_vcvt_s32_f64(<2 x double> %a) #0 {
-; CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
-  %vcvt.i = fptosi <2 x double> %a to <2 x i32>
-  ret <2 x i32> %vcvt.i
-}
-
-define <2 x i32> @test_vcvt_u32_f64(<2 x double> %a) #0 {
-; CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
-  %vcvt.i = fptoui <2 x double> %a to <2 x i32>
-  ret <2 x i32> %vcvt.i
-}
-
-define <1 x i8> @test_vcvt_s8_f64(<1 x double> %a) #0 {
-; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: ins v{{[0-9]+}}.b[0], w{{[0-9]+}}
-  %vcvt.i = fptosi <1 x double> %a to <1 x i8>
-  ret <1 x i8> %vcvt.i
-}
-
-define <1 x i8> @test_vcvt_u8_f64(<1 x double> %a) #0 {
-; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: ins v{{[0-9]+}}.b[0], w{{[0-9]+}}
-  %vcvt.i = fptoui <1 x double> %a to <1 x i8>
-  ret <1 x i8> %vcvt.i
-}
-
-define <1 x i16> @test_vcvt_s16_f64(<1 x double> %a) #0 {
-; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: ins v{{[0-9]+}}.h[0], w{{[0-9]+}}
-  %vcvt.i = fptosi <1 x double> %a to <1 x i16>
-  ret <1 x i16> %vcvt.i
-}
-
-define <1 x i16> @test_vcvt_u16_f64(<1 x double> %a) #0 {
-; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: ins v{{[0-9]+}}.h[0], w{{[0-9]+}}
-  %vcvt.i = fptoui <1 x double> %a to <1 x i16>
-  ret <1 x i16> %vcvt.i
-}
-
-define <1 x i32> @test_vcvt_s32_f64_v1(<1 x double> %a) #0 {
-; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: fmov s{{[0-9]+}}, w{{[0-9]+}}
-  %vcvt.i = fptosi <1 x double> %a to <1 x i32>
-  ret <1 x i32> %vcvt.i
-}
-
-define <1 x i32> @test_vcvt_u32_f64_v1(<1 x double> %a) #0 {
-; CHECK: fcvtzu w{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: fmov s{{[0-9]+}}, w{{[0-9]+}}
-  %vcvt.i = fptoui <1 x double> %a to <1 x i32>
-  ret <1 x i32> %vcvt.i
-}
-
-define <2 x i32> @test_vcvtn_s32_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vcvtn_s32_f32
-; CHECK: fcvtns v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvtns_f321.i = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> %a)
-  ret <2 x i32> %vcvtns_f321.i
-}
-
-define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) {
-; CHECK-LABEL: test_vcvtnq_s32_f32
-; CHECK: fcvtns v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvtns_f321.i = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> %a)
-  ret <4 x i32> %vcvtns_f321.i
-}
-
-define <2 x i64> @test_vcvtnq_s64_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vcvtnq_s64_f64
-; CHECK: fcvtns v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvtns_f641.i = call <2 x i64> @llvm.arm.neon.vcvtns.v2i64.v2f64(<2 x double> %a)
-  ret <2 x i64> %vcvtns_f641.i
-}
-
-define <2 x i32> @test_vcvtn_u32_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vcvtn_u32_f32
-; CHECK: fcvtnu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvtnu_f321.i = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> %a)
-  ret <2 x i32> %vcvtnu_f321.i
-}
-
-define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) {
-; CHECK-LABEL: test_vcvtnq_u32_f32
-; CHECK: fcvtnu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvtnu_f321.i = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> %a)
-  ret <4 x i32> %vcvtnu_f321.i
-}
-
-define <2 x i64> @test_vcvtnq_u64_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vcvtnq_u64_f64
-; CHECK: fcvtnu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvtnu_f641.i = call <2 x i64> @llvm.arm.neon.vcvtnu.v2i64.v2f64(<2 x double> %a)
-  ret <2 x i64> %vcvtnu_f641.i
-}
-
-define <2 x i32> @test_vcvtp_s32_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vcvtp_s32_f32
-; CHECK: fcvtps v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvtps_f321.i = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> %a)
-  ret <2 x i32> %vcvtps_f321.i
-}
-
-define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) {
-; CHECK-LABEL: test_vcvtpq_s32_f32
-; CHECK: fcvtps v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvtps_f321.i = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> %a)
-  ret <4 x i32> %vcvtps_f321.i
-}
-
-define <2 x i64> @test_vcvtpq_s64_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vcvtpq_s64_f64
-; CHECK: fcvtps v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvtps_f641.i = call <2 x i64> @llvm.arm.neon.vcvtps.v2i64.v2f64(<2 x double> %a)
-  ret <2 x i64> %vcvtps_f641.i
-}
-
-define <2 x i32> @test_vcvtp_u32_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vcvtp_u32_f32
-; CHECK: fcvtpu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvtpu_f321.i = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> %a)
-  ret <2 x i32> %vcvtpu_f321.i
-}
-
-define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) {
-; CHECK-LABEL: test_vcvtpq_u32_f32
-; CHECK: fcvtpu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvtpu_f321.i = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> %a)
-  ret <4 x i32> %vcvtpu_f321.i
-}
-
-define <2 x i64> @test_vcvtpq_u64_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vcvtpq_u64_f64
-; CHECK: fcvtpu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvtpu_f641.i = call <2 x i64> @llvm.arm.neon.vcvtpu.v2i64.v2f64(<2 x double> %a)
-  ret <2 x i64> %vcvtpu_f641.i
-}
-
-define <2 x i32> @test_vcvtm_s32_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vcvtm_s32_f32
-; CHECK: fcvtms v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvtms_f321.i = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> %a)
-  ret <2 x i32> %vcvtms_f321.i
-}
-
-define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) {
-; CHECK-LABEL: test_vcvtmq_s32_f32
-; CHECK: fcvtms v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvtms_f321.i = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> %a)
-  ret <4 x i32> %vcvtms_f321.i
-}
-
-define <2 x i64> @test_vcvtmq_s64_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vcvtmq_s64_f64
-; CHECK: fcvtms v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvtms_f641.i = call <2 x i64> @llvm.arm.neon.vcvtms.v2i64.v2f64(<2 x double> %a)
-  ret <2 x i64> %vcvtms_f641.i
-}
-
-define <2 x i32> @test_vcvtm_u32_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vcvtm_u32_f32
-; CHECK: fcvtmu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvtmu_f321.i = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> %a)
-  ret <2 x i32> %vcvtmu_f321.i
-}
-
-define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) {
-; CHECK-LABEL: test_vcvtmq_u32_f32
-; CHECK: fcvtmu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvtmu_f321.i = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> %a)
-  ret <4 x i32> %vcvtmu_f321.i
-}
-
-define <2 x i64> @test_vcvtmq_u64_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vcvtmq_u64_f64
-; CHECK: fcvtmu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvtmu_f641.i = call <2 x i64> @llvm.arm.neon.vcvtmu.v2i64.v2f64(<2 x double> %a)
-  ret <2 x i64> %vcvtmu_f641.i
-}
-
-define <2 x i32> @test_vcvta_s32_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vcvta_s32_f32
-; CHECK: fcvtas v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvtas_f321.i = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> %a)
-  ret <2 x i32> %vcvtas_f321.i
-}
-
-define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) {
-; CHECK-LABEL: test_vcvtaq_s32_f32
-; CHECK: fcvtas v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvtas_f321.i = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> %a)
-  ret <4 x i32> %vcvtas_f321.i
-}
-
-define <2 x i64> @test_vcvtaq_s64_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vcvtaq_s64_f64
-; CHECK: fcvtas v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvtas_f641.i = call <2 x i64> @llvm.arm.neon.vcvtas.v2i64.v2f64(<2 x double> %a)
-  ret <2 x i64> %vcvtas_f641.i
-}
-
-define <2 x i32> @test_vcvta_u32_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vcvta_u32_f32
-; CHECK: fcvtau v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvtau_f321.i = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> %a)
-  ret <2 x i32> %vcvtau_f321.i
-}
-
-define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) {
-; CHECK-LABEL: test_vcvtaq_u32_f32
-; CHECK: fcvtau v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvtau_f321.i = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> %a)
-  ret <4 x i32> %vcvtau_f321.i
-}
-
-define <2 x i64> @test_vcvtaq_u64_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vcvtaq_u64_f64
-; CHECK: fcvtau v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvtau_f641.i = call <2 x i64> @llvm.arm.neon.vcvtau.v2i64.v2f64(<2 x double> %a)
-  ret <2 x i64> %vcvtau_f641.i
-}
-
-define <2 x float> @test_vrsqrte_f32(<2 x float> %a) #0 {
-; CHECK: frsqrte v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vrsqrte1.i = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %a) #4
-  ret <2 x float> %vrsqrte1.i
-}
-
-define <4 x float> @test_vrsqrteq_f32(<4 x float> %a) #0 {
-; CHECK: frsqrte v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vrsqrte1.i = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %a) #4
-  ret <4 x float> %vrsqrte1.i
-}
-
-define <2 x double> @test_vrsqrteq_f64(<2 x double> %a) #0 {
-; CHECK: frsqrte v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vrsqrte1.i = tail call <2 x double> @llvm.arm.neon.vrsqrte.v2f64(<2 x double> %a) #4
-  ret <2 x double> %vrsqrte1.i
-}
-
-define <2 x float> @test_vrecpe_f32(<2 x float> %a) #0 {
-; CHECK: frecpe v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vrecpe1.i = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %a) #4
-  ret <2 x float> %vrecpe1.i
-}
-
-define <4 x float> @test_vrecpeq_f32(<4 x float> %a) #0 {
-; CHECK: frecpe v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vrecpe1.i = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %a) #4
-  ret <4 x float> %vrecpe1.i
-}
-
-define <2 x double> @test_vrecpeq_f64(<2 x double> %a) #0 {
-; CHECK: frecpe v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vrecpe1.i = tail call <2 x double> @llvm.arm.neon.vrecpe.v2f64(<2 x double> %a) #4
-  ret <2 x double> %vrecpe1.i
-}
-
-define <2 x i32> @test_vrecpe_u32(<2 x i32> %a) #0 {
-; CHECK: urecpe v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vrecpe1.i = tail call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %a) #4
-  ret <2 x i32> %vrecpe1.i
-}
-
-define <4 x i32> @test_vrecpeq_u32(<4 x i32> %a) #0 {
-; CHECK: urecpe v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vrecpe1.i = tail call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %a) #4
-  ret <4 x i32> %vrecpe1.i
-}
-
-define <2 x float> @test_vsqrt_f32(<2 x float> %a) #0 {
-; CHECK: fsqrt v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vsqrt1.i = tail call <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) #4
-  ret <2 x float> %vsqrt1.i
-}
-
-define <4 x float> @test_vsqrtq_f32(<4 x float> %a) #0 {
-; CHECK: fsqrt v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vsqrt1.i = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) #4
-  ret <4 x float> %vsqrt1.i
-}
-
-define <2 x double> @test_vsqrtq_f64(<2 x double> %a) #0 {
-; CHECK: fsqrt v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vsqrt1.i = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) #4
-  ret <2 x double> %vsqrt1.i
-}
-
-define <2 x float> @test_vcvt_f32_s32(<2 x i32> %a) #0 {
-; CHECK: scvtf v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvt.i = sitofp <2 x i32> %a to <2 x float>
-  ret <2 x float> %vcvt.i
-}
-
-define <2 x float> @test_vcvt_f32_u32(<2 x i32> %a) #0 {
-; CHECK: ucvtf v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvt.i = uitofp <2 x i32> %a to <2 x float>
-  ret <2 x float> %vcvt.i
-}
-
-define <4 x float> @test_vcvtq_f32_s32(<4 x i32> %a) #0 {
-; CHECK: scvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvt.i = sitofp <4 x i32> %a to <4 x float>
-  ret <4 x float> %vcvt.i
-}
-
-define <4 x float> @test_vcvtq_f32_u32(<4 x i32> %a) #0 {
-; CHECK: ucvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvt.i = uitofp <4 x i32> %a to <4 x float>
-  ret <4 x float> %vcvt.i
-}
-
-define <2 x double> @test_vcvtq_f64_s64(<2 x i64> %a) #0 {
-; CHECK: scvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvt.i = sitofp <2 x i64> %a to <2 x double>
-  ret <2 x double> %vcvt.i
-}
-
-define <2 x double> @test_vcvtq_f64_u64(<2 x i64> %a) #0 {
-; CHECK: ucvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvt.i = uitofp <2 x i64> %a to <2 x double>
-  ret <2 x double> %vcvt.i
-}
-
-define <2 x float> @test_vcvt_f32_s64(<2 x i64> %a) #0 {
-; CHECK: scvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-; CHECK: fcvtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
-  %vcvt.i = sitofp <2 x i64> %a to <2 x float>
-  ret <2 x float> %vcvt.i
-}
-
-define <2 x float> @test_vcvt_f32_u64(<2 x i64> %a) #0 {
-; CHECK: ucvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-; CHECK: fcvtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
-  %vcvt.i = uitofp <2 x i64> %a to <2 x float>
-  ret <2 x float> %vcvt.i
-}
-
-define <4 x float> @test_vcvt_f32_s16(<4 x i16> %a) #0 {
-; CHECK: sshll v{{[0-9]+}}.4s, v{{[0-9]+}}.4h, #0
-; CHECK: scvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvt.i = sitofp <4 x i16> %a to <4 x float>
-  ret <4 x float> %vcvt.i
-}
-
-define <4 x float> @test_vcvt_f32_u16(<4 x i16> %a) #0 {
-; CHECK: ushll v{{[0-9]+}}.4s, v{{[0-9]+}}.4h, #0
-; CHECK: ucvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvt.i = uitofp <4 x i16> %a to <4 x float>
-  ret <4 x float> %vcvt.i
-}
-
-define <2 x double> @test_vcvt_f64_s32(<2 x i32> %a) #0 {
-; CHECK: sshll v{{[0-9]+}}.2d, v{{[0-9]+}}.2s, #0
-; CHECK: scvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvt.i = sitofp <2 x i32> %a to <2 x double>
-  ret <2 x double> %vcvt.i
-}
-
-define <2 x double> @test_vcvt_f64_u32(<2 x i32> %a) #0 {
-; CHECK: ushll v{{[0-9]+}}.2d, v{{[0-9]+}}.2s, #0
-; CHECK: ucvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvt.i = uitofp <2 x i32> %a to <2 x double>
-  ret <2 x double> %vcvt.i
-}
-
-define <1 x double> @test_vcvt_f64_s8(<1 x i8> %a) #0 {
-; CHECK: umov w{{[0-9]+}}, v{{[0-9]+}}.b[0]
-; CHECK: sxtb w{{[0-9]+}}, w{{[0-9]+}}
-; CHECK: scvtf d{{[0-9]+}}, w{{[0-9]+}}
-  %vcvt.i = sitofp <1 x i8> %a to <1 x double>
-  ret <1 x double> %vcvt.i
-}
-
-define <1 x double> @test_vcvt_f64_u8(<1 x i8> %a) #0 {
-; CHECK: umov w{{[0-9]+}}, v{{[0-9]+}}.b[0]
-; CHECK: and w{{[0-9]+}}, w{{[0-9]+}}, #0xff
-; CHECK: ucvtf d{{[0-9]+}}, w{{[0-9]+}}
-  %vcvt.i = uitofp <1 x i8> %a to <1 x double>
-  ret <1 x double> %vcvt.i
-}
-
-define <1 x double> @test_vcvt_f64_s16(<1 x i16> %a) #0 {
-; CHECK: umov w{{[0-9]+}}, v{{[0-9]+}}.h[0]
-; CHECK: sxth w{{[0-9]+}}, w{{[0-9]+}}
-; CHECK: scvtf d{{[0-9]+}}, w{{[0-9]+}}
-  %vcvt.i = sitofp <1 x i16> %a to <1 x double>
-  ret <1 x double> %vcvt.i
-}
-
-define <1 x double> @test_vcvt_f64_u16(<1 x i16> %a) #0 {
-; CHECK: umov w{{[0-9]+}}, v{{[0-9]+}}.h[0]
-; CHECK: and w{{[0-9]+}}, w{{[0-9]+}}, #0xffff
-; CHECK: ucvtf d{{[0-9]+}}, w{{[0-9]+}}
-  %vcvt.i = uitofp <1 x i16> %a to <1 x double>
-  ret <1 x double> %vcvt.i
-}
-
-define <1 x double> @test_vcvt_f64_s32_v1(<1 x i32> %a) #0 {
-; CHECK: fmov w{{[0-9]+}}, s{{[0-9]+}}
-; CHECK: scvtf d{{[0-9]+}}, w{{[0-9]+}}
-  %vcvt.i = sitofp <1 x i32> %a to <1 x double>
-  ret <1 x double> %vcvt.i
-}
-
-define <1 x double> @test_vcvt_f64_u32_v1(<1 x i32> %a) #0 {
-; CHECK: fmov w{{[0-9]+}}, s{{[0-9]+}}
-; CHECK: ucvtf d{{[0-9]+}}, w{{[0-9]+}}
-  %vcvt.i = uitofp <1 x i32> %a to <1 x double>
-  ret <1 x double> %vcvt.i
-}
-
-declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #2
-
-declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #2
-
-declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #2
-
-declare <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32>) #2
-
-declare <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32>) #2
-
-declare <2 x double> @llvm.arm.neon.vrecpe.v2f64(<2 x double>) #2
-
-declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) #2
-
-declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) #2
-
-declare <2 x double> @llvm.arm.neon.vrsqrte.v2f64(<2 x double>) #2
-
-declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) #2
-
-declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) #2
-
-declare <2 x i64> @llvm.arm.neon.vcvtau.v2i64.v2f64(<2 x double>)
-
-declare <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float>)
-
-declare <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float>)
-
-declare <2 x i64> @llvm.arm.neon.vcvtas.v2i64.v2f64(<2 x double>)
-
-declare <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float>)
-
-declare <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float>)
-
-declare <2 x i64> @llvm.arm.neon.vcvtmu.v2i64.v2f64(<2 x double>)
-
-declare <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float>)
-
-declare <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float>)
-
-declare <2 x i64> @llvm.arm.neon.vcvtms.v2i64.v2f64(<2 x double>)
-
-declare <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float>)
-
-declare <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float>)
-
-declare <2 x i64> @llvm.arm.neon.vcvtpu.v2i64.v2f64(<2 x double>)
-
-declare <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float>)
-
-declare <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float>)
-
-declare <2 x i64> @llvm.arm.neon.vcvtps.v2i64.v2f64(<2 x double>)
-
-declare <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float>)
-
-declare <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float>)
-
-declare <2 x i64> @llvm.arm.neon.vcvtnu.v2i64.v2f64(<2 x double>)
-
-declare <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float>)
-
-declare <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float>)
-
-declare <2 x i64> @llvm.arm.neon.vcvtns.v2i64.v2f64(<2 x double>)
-
-declare <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float>)
-
-declare <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float>)
-
-declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #3
-
-declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) #3
-
-declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) #3
-
-declare <2 x double> @llvm.trunc.v2f64(<2 x double>) #3
-
-declare <4 x float> @llvm.trunc.v4f32(<4 x float>) #3
-
-declare <2 x float> @llvm.trunc.v2f32(<2 x float>) #3
-
-declare <2 x double> @llvm.rint.v2f64(<2 x double>) #3
-
-declare <4 x float> @llvm.rint.v4f32(<4 x float>) #3
-
-declare <2 x float> @llvm.rint.v2f32(<2 x float>) #3
-
-declare <2 x double> @llvm.floor.v2f64(<2 x double>) #3
-
-declare <4 x float> @llvm.floor.v4f32(<4 x float>) #3
-
-declare <2 x float> @llvm.floor.v2f32(<2 x float>) #3
-
-declare <2 x double> @llvm.ceil.v2f64(<2 x double>) #3
-
-declare <4 x float> @llvm.ceil.v4f32(<4 x float>) #3
-
-declare <2 x float> @llvm.ceil.v2f32(<2 x float>) #3
-
-declare <2 x double> @llvm.round.v2f64(<2 x double>) #3
-
-declare <4 x float> @llvm.round.v4f32(<4 x float>) #3
-
-declare <2 x float> @llvm.round.v2f32(<2 x float>) #3
-
-declare <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double>) #2
-
-declare <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float>) #2
-
-declare <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float>) #2
-
-declare <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double>) #2
-
-declare <2 x float> @llvm.aarch64.neon.fcvtn.v2f32.v2f64(<2 x double>) #2
-
-declare <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64>) #2
-
-declare <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32>) #2
-
-declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) #2
-
-declare <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64>) #2
-
-declare <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32>) #2
-
-declare <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16>) #2
-
-declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) #2
-
-declare <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32>) #2
-
-declare <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16>) #2
-
-declare <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8>) #2
-
-declare <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8>) #2
-
-declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) #2
-
-declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>) #2
-
-declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) #2
-
-declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) #2
-
-declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) #2
-
-declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) #2
-
-declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) #2
-
-declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) #2
-
-declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) #2
-
-declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) #2
-
-declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) #2
-
-declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) #2
-
-declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) #2
-
-declare <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8>) #2
-
-declare <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64>, <2 x i64>) #2
-
-declare <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32>, <4 x i32>) #2
-
-declare <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32>, <2 x i32>) #2
-
-declare <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16>, <8 x i16>) #2
-
-declare <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16>, <4 x i16>) #2
-
-declare <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8>, <16 x i8>) #2
-
-declare <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8>, <8 x i8>) #2
-
-declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #3
-
-declare <4 x float> @llvm.fabs.v4f32(<4 x float>) #3
-
-declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #3
-
-declare <2 x i64> @llvm.arm.neon.vabs.v2i64(<2 x i64>) #2
-
-declare <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32>) #2
-
-declare <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32>) #2
-
-declare <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16>) #2
-
-declare <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16>) #2
-
-declare <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8>) #2
-
-declare <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8>) #2
-
-declare <2 x i64> @llvm.arm.neon.vqneg.v2i64(<2 x i64>) #2
-
-declare <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32>) #2
-
-declare <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32>) #2
-
-declare <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16>) #2
-
-declare <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16>) #2
-
-declare <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8>) #2
-
-declare <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8>) #2
-
-declare <2 x i64> @llvm.arm.neon.vqabs.v2i64(<2 x i64>) #2
-
-declare <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32>) #2
-
-declare <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32>) #2
-
-declare <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16>) #2
-
-declare <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16>) #2
-
-declare <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8>) #2
-
-declare <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8>) #2
-
-declare <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64>, <4 x i32>) #2
-
-declare <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32>, <8 x i16>) #2
-
-declare <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16>, <16 x i8>) #2
-
-declare <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64>, <4 x i32>) #2
-
-declare <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32>, <8 x i16>) #2
-
-declare <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16>, <16 x i8>) #2
-
-declare <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64>, <2 x i32>) #2
-
-declare <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32>, <4 x i16>) #2
-
-declare <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16>, <8 x i8>) #2
-
-declare <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64>, <2 x i32>) #2
-
-declare <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32>, <4 x i16>) #2
-
-declare <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16>, <8 x i8>) #2
-
-declare <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32>) #2
-
-declare <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16>) #2
-
-declare <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8>) #2
-
-declare <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32>) #2
-
-declare <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16>) #2
-
-declare <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8>) #2
-
-declare <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32>) #2
-
-declare <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16>) #2
-
-declare <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8>) #2
-
-declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) #2
-
-declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) #2
-
-declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) #2
-
-declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) #2
-
-declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) #2
-
-
-define <1 x i64> @test_vcvt_s64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvt_s64_f64
-; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fptosi <1 x double> %a to <1 x i64>
-  ret <1 x i64> %1
-}
-
-define <1 x i64> @test_vcvt_u64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvt_u64_f64
-; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fptoui <1 x double> %a to <1 x i64>
-  ret <1 x i64> %1
-}
-
-define <1 x i64> @test_vcvtn_s64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvtn_s64_f64
-; CHECK: fcvtns d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = call <1 x i64> @llvm.arm.neon.vcvtns.v1i64.v1f64(<1 x double> %a)
-  ret <1 x i64> %1
-}
-
-define <1 x i64> @test_vcvtn_u64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvtn_u64_f64
-; CHECK: fcvtnu d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = call <1 x i64> @llvm.arm.neon.vcvtnu.v1i64.v1f64(<1 x double> %a)
-  ret <1 x i64> %1
-}
-
-define <1 x i64> @test_vcvtp_s64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvtp_s64_f64
-; CHECK: fcvtps d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = call <1 x i64> @llvm.arm.neon.vcvtps.v1i64.v1f64(<1 x double> %a)
-  ret <1 x i64> %1
-}
-
-define <1 x i64> @test_vcvtp_u64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvtp_u64_f64
-; CHECK: fcvtpu d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = call <1 x i64> @llvm.arm.neon.vcvtpu.v1i64.v1f64(<1 x double> %a)
-  ret <1 x i64> %1
-}
-
-define <1 x i64> @test_vcvtm_s64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvtm_s64_f64
-; CHECK: fcvtms d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = call <1 x i64> @llvm.arm.neon.vcvtms.v1i64.v1f64(<1 x double> %a)
-  ret <1 x i64> %1
-}
-
-define <1 x i64> @test_vcvtm_u64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvtm_u64_f64
-; CHECK: fcvtmu d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = call <1 x i64> @llvm.arm.neon.vcvtmu.v1i64.v1f64(<1 x double> %a)
-  ret <1 x i64> %1
-}
-
-define <1 x i64> @test_vcvta_s64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvta_s64_f64
-; CHECK: fcvtas d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = call <1 x i64> @llvm.arm.neon.vcvtas.v1i64.v1f64(<1 x double> %a)
-  ret <1 x i64> %1
-}
-
-define <1 x i64> @test_vcvta_u64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvta_u64_f64
-; CHECK: fcvtau d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = call <1 x i64> @llvm.arm.neon.vcvtau.v1i64.v1f64(<1 x double> %a)
-  ret <1 x i64> %1
-}
-
-define <1 x double> @test_vcvt_f64_s64(<1 x i64> %a) {
-; CHECK-LABEL: test_vcvt_f64_s64
-; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = sitofp <1 x i64> %a to <1 x double>
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vcvt_f64_u64(<1 x i64> %a) {
-; CHECK-LABEL: test_vcvt_f64_u64
-; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = uitofp <1 x i64> %a to <1 x double>
-  ret <1 x double> %1
-}
-
-declare <1 x i64> @llvm.arm.neon.vcvtau.v1i64.v1f64(<1 x double>)
-declare <1 x i64> @llvm.arm.neon.vcvtas.v1i64.v1f64(<1 x double>)
-declare <1 x i64> @llvm.arm.neon.vcvtmu.v1i64.v1f64(<1 x double>)
-declare <1 x i64> @llvm.arm.neon.vcvtms.v1i64.v1f64(<1 x double>)
-declare <1 x i64> @llvm.arm.neon.vcvtpu.v1i64.v1f64(<1 x double>)
-declare <1 x i64> @llvm.arm.neon.vcvtps.v1i64.v1f64(<1 x double>)
-declare <1 x i64> @llvm.arm.neon.vcvtnu.v1i64.v1f64(<1 x double>)
-declare <1 x i64> @llvm.arm.neon.vcvtns.v1i64.v1f64(<1 x double>)
-
-define <1 x double> @test_vrndn_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vrndn_f64
-; CHECK: frintn d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> %a)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vrnda_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vrnda_f64
-; CHECK: frinta d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.round.v1f64(<1 x double> %a)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vrndp_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vrndp_f64
-; CHECK: frintp d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.ceil.v1f64(<1 x double> %a)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vrndm_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vrndm_f64
-; CHECK: frintm d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.floor.v1f64(<1 x double> %a)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vrndx_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vrndx_f64
-; CHECK: frintx d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.rint.v1f64(<1 x double> %a)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vrnd_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vrnd_f64
-; CHECK: frintz d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.trunc.v1f64(<1 x double> %a)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vrndi_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vrndi_f64
-; CHECK: frinti d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a)
-  ret <1 x double> %1
-}
-
-declare <1 x double> @llvm.nearbyint.v1f64(<1 x double>)
-declare <1 x double> @llvm.trunc.v1f64(<1 x double>)
-declare <1 x double> @llvm.rint.v1f64(<1 x double>)
-declare <1 x double> @llvm.floor.v1f64(<1 x double>)
-declare <1 x double> @llvm.ceil.v1f64(<1 x double>)
-declare <1 x double> @llvm.round.v1f64(<1 x double>)
-declare <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double>)
-
-define <1 x double> @test_vrsqrte_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vrsqrte_f64
-; CHECK: frsqrte d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double> %a)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vrecpe_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vrecpe_f64
-; CHECK: frecpe d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double> %a)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vsqrt_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vsqrt_f64
-; CHECK: fsqrt d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vrecps_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vrecps_f64
-; CHECK: frecps d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double> %a, <1 x double> %b)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vrsqrts_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vrsqrts_f64
-; CHECK: frsqrts d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double> %a, <1 x double> %b)
-  ret <1 x double> %1
-}
-
-declare <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double>, <1 x double>)
-declare <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double>, <1 x double>)
-declare <1 x double> @llvm.sqrt.v1f64(<1 x double>)
-declare <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double>)
-declare <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double>)
-
-define i64 @test_vaddlv_s32(<2 x i32> %a) {
-; CHECK-LABEL: test_vaddlv_s32
-; CHECK: saddlp {{v[0-9]+}}.1d, {{v[0-9]+}}.2s
-  %1 = tail call <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v2i32(<2 x i32> %a)
-  %2 = extractelement <1 x i64> %1, i32 0
-  ret i64 %2
-}
-
-define i64 @test_vaddlv_u32(<2 x i32> %a) {
-; CHECK-LABEL: test_vaddlv_u32
-; CHECK: uaddlp {{v[0-9]+}}.1d, {{v[0-9]+}}.2s
-  %1 = tail call <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v2i32(<2 x i32> %a)
-  %2 = extractelement <1 x i64> %1, i32 0
-  ret i64 %2
-}
-
-declare <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v2i32(<2 x i32>)
-declare <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v2i32(<2 x i32>)

Modified: llvm/trunk/test/CodeGen/AArch64/neon-mla-mls.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-mla-mls.ll?rev=209576&r1=209575&r2=209576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-mla-mls.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-mla-mls.ll Sat May 24 07:42:26 2014
@@ -1,4 +1,3 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
 
 

Modified: llvm/trunk/test/CodeGen/AArch64/neon-mov.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-mov.ll?rev=209576&r1=209575&r2=209576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-mov.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-mov.ll Sat May 24 07:42:26 2014
@@ -1,4 +1,3 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK  --check-prefix=CHECK-AARCH64
 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
 
 define <8 x i8> @movi8b() {
@@ -15,21 +14,18 @@ define <16 x i8> @movi16b() {
 
 define <2 x i32> @movi2s_lsl0() {
 ; CHECK-LABEL: movi2s_lsl0:
-; CHECK-AARCH64:  movi {{v[0-9]+}}.2s, #0xff
 ; CHECK-ARM64: movi {{d[0-9]+}}, #0x0000ff000000ff
    ret <2 x i32> < i32 255, i32 255 >
 }
 
 define <2 x i32> @movi2s_lsl8() {
 ; CHECK-LABEL: movi2s_lsl8:
-; CHECK-AARCH64:  movi {{v[0-9]+}}.2s, #0xff, lsl #8
 ; CHECK-ARM64: movi {{d[0-9]+}}, #0x00ff000000ff00
    ret <2 x i32> < i32 65280, i32 65280 >
 }
 
 define <2 x i32> @movi2s_lsl16() {
 ; CHECK-LABEL: movi2s_lsl16:
-; CHECK-AARCH64:  movi {{v[0-9]+}}.2s, #0xff, lsl #16
 ; CHECK-ARM64: movi {{d[0-9]+}}, #0xff000000ff0000
    ret <2 x i32> < i32 16711680, i32 16711680 >
 
@@ -37,28 +33,24 @@ define <2 x i32> @movi2s_lsl16() {
 
 define <2 x i32> @movi2s_lsl24() {
 ; CHECK-LABEL: movi2s_lsl24:
-; CHECK-AARCH64:  movi {{v[0-9]+}}.2s, #0xff, lsl #24
 ; CHECK-ARM64: movi {{d[0-9]+}}, #0xff000000ff000000
    ret <2 x i32> < i32 4278190080, i32 4278190080 >
 }
 
 define <4 x i32> @movi4s_lsl0() {
 ; CHECK-LABEL: movi4s_lsl0:
-; CHECK-AARCH64:  movi {{v[0-9]+}}.4s, #0xff
 ; CHECK-ARM64: movi {{v[0-9]+}}.2d, #0x0000ff000000ff
    ret <4 x i32> < i32 255, i32 255, i32 255, i32 255 >
 }
 
 define <4 x i32> @movi4s_lsl8() {
 ; CHECK-LABEL: movi4s_lsl8:
-; CHECK-AARCH64:  movi {{v[0-9]+}}.4s, #0xff, lsl #8
 ; CHECK-ARM64: movi {{v[0-9]+}}.2d, #0x00ff000000ff00
    ret <4 x i32> < i32 65280, i32 65280, i32 65280, i32 65280 >
 }
 
 define <4 x i32> @movi4s_lsl16() {
 ; CHECK-LABEL: movi4s_lsl16:
-; CHECK-AARCH64:  movi {{v[0-9]+}}.4s, #0xff, lsl #16
 ; CHECK-ARM64:  movi {{v[0-9]+}}.2d, #0xff000000ff0000
    ret <4 x i32> < i32 16711680, i32 16711680, i32 16711680, i32 16711680 >
 
@@ -66,35 +58,30 @@ define <4 x i32> @movi4s_lsl16() {
 
 define <4 x i32> @movi4s_lsl24() {
 ; CHECK-LABEL: movi4s_lsl24:
-; CHECK-AARCH64:  movi {{v[0-9]+}}.4s, #0xff, lsl #24
 ; CHECK-ARM64:  movi {{v[0-9]+}}.2d, #0xff000000ff000000
    ret <4 x i32> < i32 4278190080, i32 4278190080, i32 4278190080, i32 4278190080 >
 }
 
 define <4 x i16> @movi4h_lsl0() {
 ; CHECK-LABEL: movi4h_lsl0:
-; CHECK-AARCH64:  movi {{v[0-9]+}}.4h, #0xff
 ; CHECK-ARM64:  movi {{d[0-9]+}}, #0xff00ff00ff00ff
    ret <4 x i16> < i16 255, i16 255, i16 255, i16 255 >
 }
 
 define <4 x i16> @movi4h_lsl8() {
 ; CHECK-LABEL: movi4h_lsl8:
-; CHECK-AARCH64:  movi {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8
 ; CHECK-ARM64: movi d0, #0xff00ff00ff00ff00
    ret <4 x i16> < i16 65280, i16 65280, i16 65280, i16 65280 >
 }
 
 define <8 x i16> @movi8h_lsl0() {
 ; CHECK-LABEL: movi8h_lsl0:
-; CHECK-AARCH64:  movi {{v[0-9]+}}.8h, #{{0xff|255}}
 ; CHECK-ARM64: movi v0.2d, #0xff00ff00ff00ff
    ret <8 x i16> < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255 >
 }
 
 define <8 x i16> @movi8h_lsl8() {
 ; CHECK-LABEL: movi8h_lsl8:
-; CHECK-AARCH64:  movi {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8
 ; CHECK-ARM64: movi v0.2d, #0xff00ff00ff00ff00
    ret <8 x i16> < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 >
 }
@@ -177,14 +164,12 @@ define <8 x i16> @mvni8h_lsl8() {
 
 define <2 x i32> @movi2s_msl8(<2 x i32> %a) {
 ; CHECK-LABEL: movi2s_msl8:
-; CHECK-AARCH64:  movi {{v[0-9]+}}.2s, #0xff, msl #8
 ; CHECK-ARM64: movi {{d[0-9]+}}, #0x00ffff0000ffff
 	ret <2 x i32> < i32 65535, i32 65535 >
 }
 
 define <2 x i32> @movi2s_msl16() {
 ; CHECK-LABEL: movi2s_msl16:
-; CHECK-AARCH64:  movi {{v[0-9]+}}.2s, #0xff, msl #16
 ; CHECK-ARM64:  movi d0, #0xffffff00ffffff
    ret <2 x i32> < i32 16777215, i32 16777215 >
 }
@@ -192,14 +177,12 @@ define <2 x i32> @movi2s_msl16() {
 
 define <4 x i32> @movi4s_msl8() {
 ; CHECK-LABEL: movi4s_msl8:
-; CHECK-AARCH64:  movi {{v[0-9]+}}.4s, #0xff, msl #8
 ; CHECK-ARM64:  movi v0.2d, #0x00ffff0000ffff
    ret <4 x i32> < i32 65535, i32 65535, i32 65535, i32 65535 >
 }
 
 define <4 x i32> @movi4s_msl16() {
 ; CHECK-LABEL: movi4s_msl16:
-; CHECK-AARCH64:  movi {{v[0-9]+}}.4s, #0xff, msl #16
 ; CHECK-ARM64:  movi v0.2d, #0xffffff00ffffff
    ret <4 x i32> < i32 16777215, i32 16777215, i32 16777215, i32 16777215 >
 }

Removed: llvm/trunk/test/CodeGen/AArch64/neon-mul-div.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-mul-div.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-mul-div.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-mul-div.ll (removed)
@@ -1,754 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-; arm64 has its own copy of this because of the intrinsics
-
-define <8 x i8> @mul8xi8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: mul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-	%tmp3 = mul <8 x i8> %A, %B;
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @mul16xi8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: mul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-	%tmp3 = mul <16 x i8> %A, %B;
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @mul4xi16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-	%tmp3 = mul <4 x i16> %A, %B;
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @mul8xi16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-	%tmp3 = mul <8 x i16> %A, %B;
-	ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @mul2xi32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = mul <2 x i32> %A, %B;
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @mul4x32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = mul <4 x i32> %A, %B;
-	ret <4 x i32> %tmp3
-}
-
-define <1 x i64> @mul1xi64(<1 x i64> %A, <1 x i64> %B) {
-;CHECK-LABEL: mul1xi64:
-;CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
-  %tmp3 = mul <1 x i64> %A, %B;
-  ret <1 x i64> %tmp3
-}
-
-define <2 x i64> @mul2xi64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK-LABEL: mul2xi64:
-;CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
-;CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
-  %tmp3 = mul <2 x i64> %A, %B;
-  ret <2 x i64> %tmp3
-}
-
- define <2 x float> @mul2xfloat(<2 x float> %A, <2 x float> %B) {
-;CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = fmul <2 x float> %A, %B;
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @mul4xfloat(<4 x float> %A, <4 x float> %B) {
-;CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = fmul <4 x float> %A, %B;
-	ret <4 x float> %tmp3
-}
-define <2 x double> @mul2xdouble(<2 x double> %A, <2 x double> %B) {
-;CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-	%tmp3 = fmul <2 x double> %A, %B;
-	ret <2 x double> %tmp3
-}
-
-
- define <2 x float> @div2xfloat(<2 x float> %A, <2 x float> %B) {
-;CHECK: fdiv {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = fdiv <2 x float> %A, %B;
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @div4xfloat(<4 x float> %A, <4 x float> %B) {
-;CHECK: fdiv {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = fdiv <4 x float> %A, %B;
-	ret <4 x float> %tmp3
-}
-define <2 x double> @div2xdouble(<2 x double> %A, <2 x double> %B) {
-;CHECK: fdiv {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-	%tmp3 = fdiv <2 x double> %A, %B;
-	ret <2 x double> %tmp3
-}
-
-define <1 x i8> @sdiv1x8(<1 x i8> %A, <1 x i8> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <1 x i8> %A, %B;
-	ret <1 x i8> %tmp3
-}
-
-define <8 x i8> @sdiv8x8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <8 x i8> %A, %B;
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @sdiv16x8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <16 x i8> %A, %B;
-	ret <16 x i8> %tmp3
-}
-
-define <1 x i16> @sdiv1x16(<1 x i16> %A, <1 x i16> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <1 x i16> %A, %B;
-	ret <1 x i16> %tmp3
-}
-
-define <4 x i16> @sdiv4x16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <4 x i16> %A, %B;
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @sdiv8x16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <8 x i16> %A, %B;
-	ret <8 x i16> %tmp3
-}
-
-define <1 x i32> @sdiv1x32(<1 x i32> %A, <1 x i32> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <1 x i32> %A, %B;
-	ret <1 x i32> %tmp3
-}
-
-define <2 x i32> @sdiv2x32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <2 x i32> %A, %B;
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @sdiv4x32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <4 x i32> %A, %B;
-	ret <4 x i32> %tmp3
-}
-
-define <1 x i64> @sdiv1x64(<1 x i64> %A, <1 x i64> %B) {
-;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-	%tmp3 = sdiv <1 x i64> %A, %B;
-	ret <1 x i64> %tmp3
-}
-
-define <2 x i64> @sdiv2x64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-	%tmp3 = sdiv <2 x i64> %A, %B;
-	ret <2 x i64> %tmp3
-}
-
-define <1 x i8> @udiv1x8(<1 x i8> %A, <1 x i8> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <1 x i8> %A, %B;
-	ret <1 x i8> %tmp3
-}
-
-define <8 x i8> @udiv8x8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <8 x i8> %A, %B;
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @udiv16x8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <16 x i8> %A, %B;
-	ret <16 x i8> %tmp3
-}
-
-define <1 x i16> @udiv1x16(<1 x i16> %A, <1 x i16> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <1 x i16> %A, %B;
-	ret <1 x i16> %tmp3
-}
-
-define <4 x i16> @udiv4x16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <4 x i16> %A, %B;
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @udiv8x16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <8 x i16> %A, %B;
-	ret <8 x i16> %tmp3
-}
-
-define <1 x i32> @udiv1x32(<1 x i32> %A, <1 x i32> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <1 x i32> %A, %B;
-	ret <1 x i32> %tmp3
-}
-
-define <2 x i32> @udiv2x32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <2 x i32> %A, %B;
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @udiv4x32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <4 x i32> %A, %B;
-	ret <4 x i32> %tmp3
-}
-
-define <1 x i64> @udiv1x64(<1 x i64> %A, <1 x i64> %B) {
-;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-	%tmp3 = udiv <1 x i64> %A, %B;
-	ret <1 x i64> %tmp3
-}
-
-define <2 x i64> @udiv2x64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-	%tmp3 = udiv <2 x i64> %A, %B;
-	ret <2 x i64> %tmp3
-}
-
-define <1 x i8> @srem1x8(<1 x i8> %A, <1 x i8> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <1 x i8> %A, %B;
-	ret <1 x i8> %tmp3
-}
-
-define <8 x i8> @srem8x8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <8 x i8> %A, %B;
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @srem16x8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <16 x i8> %A, %B;
-	ret <16 x i8> %tmp3
-}
-
-define <1 x i16> @srem1x16(<1 x i16> %A, <1 x i16> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <1 x i16> %A, %B;
-	ret <1 x i16> %tmp3
-}
-
-define <4 x i16> @srem4x16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <4 x i16> %A, %B;
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @srem8x16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <8 x i16> %A, %B;
-	ret <8 x i16> %tmp3
-}
-
-define <1 x i32> @srem1x32(<1 x i32> %A, <1 x i32> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <1 x i32> %A, %B;
-	ret <1 x i32> %tmp3
-}
-
-define <2 x i32> @srem2x32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <2 x i32> %A, %B;
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @srem4x32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <4 x i32> %A, %B;
-	ret <4 x i32> %tmp3
-}
-
-define <1 x i64> @srem1x64(<1 x i64> %A, <1 x i64> %B) {
-;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-	%tmp3 = srem <1 x i64> %A, %B;
-	ret <1 x i64> %tmp3
-}
-
-define <2 x i64> @srem2x64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-	%tmp3 = srem <2 x i64> %A, %B;
-	ret <2 x i64> %tmp3
-}
-
-define <1 x i8> @urem1x8(<1 x i8> %A, <1 x i8> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <1 x i8> %A, %B;
-	ret <1 x i8> %tmp3
-}
-
-define <8 x i8> @urem8x8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <8 x i8> %A, %B;
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @urem16x8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <16 x i8> %A, %B;
-	ret <16 x i8> %tmp3
-}
-
-define <1 x i16> @urem1x16(<1 x i16> %A, <1 x i16> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <1 x i16> %A, %B;
-	ret <1 x i16> %tmp3
-}
-
-define <4 x i16> @urem4x16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <4 x i16> %A, %B;
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @urem8x16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <8 x i16> %A, %B;
-	ret <8 x i16> %tmp3
-}
-
-define <1 x i32> @urem1x32(<1 x i32> %A, <1 x i32> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <1 x i32> %A, %B;
-	ret <1 x i32> %tmp3
-}
-
-define <2 x i32> @urem2x32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <2 x i32> %A, %B;
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @urem4x32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <4 x i32> %A, %B;
-	ret <4 x i32> %tmp3
-}
-
-define <1 x i64> @urem1x64(<1 x i64> %A, <1 x i64> %B) {
-;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-	%tmp3 = urem <1 x i64> %A, %B;
-	ret <1 x i64> %tmp3
-}
-
-define <2 x i64> @urem2x64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-	%tmp3 = urem <2 x i64> %A, %B;
-	ret <2 x i64> %tmp3
-}
-
-define <2 x float> @frem2f32(<2 x float> %A, <2 x float> %B) {
-; CHECK: bl fmodf
-; CHECK: bl fmodf
-	%tmp3 = frem <2 x float> %A, %B;
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @frem4f32(<4 x float> %A, <4 x float> %B) {
-; CHECK: bl fmodf
-; CHECK: bl fmodf
-; CHECK: bl fmodf
-; CHECK: bl fmodf
-	%tmp3 = frem <4 x float> %A, %B;
-	ret <4 x float> %tmp3
-}
-
-define <1 x double> @frem1d64(<1 x double> %A, <1 x double> %B) {
-; CHECK: bl fmod
-	%tmp3 = frem <1 x double> %A, %B;
-	ret <1 x double> %tmp3
-}
-
-define <2 x double> @frem2d64(<2 x double> %A, <2 x double> %B) {
-; CHECK: bl fmod
-; CHECK: bl fmod
-	%tmp3 = frem <2 x double> %A, %B;
-	ret <2 x double> %tmp3
-}
-
-declare <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8>, <8 x i8>)
-declare <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8>, <16 x i8>)
-
-define <8 x i8> @poly_mulv8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: poly_mulv8i8:
-   %prod = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: pmul v0.8b, v0.8b, v1.8b
-   ret <8 x i8> %prod
-}
-
-define <16 x i8> @poly_mulv16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: poly_mulv16i8:
-   %prod = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: pmul v0.16b, v0.16b, v1.16b
-   ret <16 x i8> %prod
-}
-
-declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>)
-declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>)
-declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>)
-declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i16> @test_sqdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_sqdmulh_v4i16:
-   %prod = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: sqdmulh v0.4h, v0.4h, v1.4h
-   ret <4 x i16> %prod
-}
-
-define <8 x i16> @test_sqdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_sqdmulh_v8i16:
-   %prod = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: sqdmulh v0.8h, v0.8h, v1.8h
-   ret <8 x i16> %prod
-}
-
-define <2 x i32> @test_sqdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_sqdmulh_v2i32:
-   %prod = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: sqdmulh v0.2s, v0.2s, v1.2s
-   ret <2 x i32> %prod
-}
-
-define <4 x i32> @test_sqdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_sqdmulh_v4i32:
-   %prod = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: sqdmulh v0.4s, v0.4s, v1.4s
-   ret <4 x i32> %prod
-}
-
-declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>)
-declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>)
-declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>)
-declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i16> @test_sqrdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_sqrdmulh_v4i16:
-   %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: sqrdmulh v0.4h, v0.4h, v1.4h
-   ret <4 x i16> %prod
-}
-
-define <8 x i16> @test_sqrdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_sqrdmulh_v8i16:
-   %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: sqrdmulh v0.8h, v0.8h, v1.8h
-   ret <8 x i16> %prod
-}
-
-define <2 x i32> @test_sqrdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_sqrdmulh_v2i32:
-   %prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: sqrdmulh v0.2s, v0.2s, v1.2s
-   ret <2 x i32> %prod
-}
-
-define <4 x i32> @test_sqrdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_sqrdmulh_v4i32:
-   %prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: sqrdmulh v0.4s, v0.4s, v1.4s
-   ret <4 x i32> %prod
-}
-
-declare <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @fmulx_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; Using registers other than v0, v1 and v2 are possible, but would be odd.
-; CHECK: fmulx v0.2s, v0.2s, v1.2s
-        %val = call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-        ret <2 x float> %val
-}
-
-define <4 x float> @fmulx_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; Using registers other than v0, v1 and v2 are possible, but would be odd.
-; CHECK: fmulx v0.4s, v0.4s, v1.4s
-        %val = call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-        ret <4 x float> %val
-}
-
-define <2 x double> @fmulx_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; Using registers other than v0, v1 and v2 are possible, but would be odd.
-; CHECK: fmulx v0.2d, v0.2d, v1.2d
-        %val = call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-        ret <2 x double> %val
-}
-
-define <1 x i8> @test_mul_v1i8(<1 x i8> %a, <1 x i8> %b) {
-;CHECK-LABEL: test_mul_v1i8:
-;CHECK: mul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-  %c = mul <1 x i8> %a, %b
-  ret <1 x i8> %c
-}
-
-define <1 x i16> @test_mul_v1i16(<1 x i16> %a, <1 x i16> %b) {
-;CHECK-LABEL: test_mul_v1i16:
-;CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-  %c = mul <1 x i16> %a, %b
-  ret <1 x i16> %c
-}
-
-define <1 x i32> @test_mul_v1i32(<1 x i32> %a, <1 x i32> %b) {
-;CHECK-LABEL: test_mul_v1i32:
-;CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %c = mul <1 x i32> %a, %b
-  ret <1 x i32> %c
-}

Modified: llvm/trunk/test/CodeGen/AArch64/neon-or-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-or-combine.ll?rev=209576&r1=209575&r2=209576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-or-combine.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-or-combine.ll Sat May 24 07:42:26 2014
@@ -1,4 +1,3 @@
-; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
 
 ; Check that the DAGCombiner does not crash with an assertion failure

Modified: llvm/trunk/test/CodeGen/AArch64/neon-perm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-perm.ll?rev=209576&r1=209575&r2=209576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-perm.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-perm.ll Sat May 24 07:42:26 2014
@@ -1,4 +1,3 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64
 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
 
 %struct.int8x8x2_t = type { [2 x <8 x i8>] }
@@ -54,7 +53,6 @@ entry:
 
 define <2 x i32> @test_vuzp1_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vuzp1_s32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
@@ -71,7 +69,6 @@ entry:
 
 define <2 x i64> @test_vuzp1q_s64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vuzp1q_s64:
-; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
@@ -112,7 +109,6 @@ entry:
 
 define <2 x i32> @test_vuzp1_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vuzp1_u32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
@@ -129,7 +125,6 @@ entry:
 
 define <2 x i64> @test_vuzp1q_u64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vuzp1q_u64:
-; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
@@ -138,7 +133,6 @@ entry:
 
 define <2 x float> @test_vuzp1_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vuzp1_f32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
@@ -155,7 +149,6 @@ entry:
 
 define <2 x double> @test_vuzp1q_f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: test_vuzp1q_f64:
-; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
@@ -228,7 +221,6 @@ entry:
 
 define <2 x i32> @test_vuzp2_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vuzp2_s32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
@@ -245,8 +237,6 @@ entry:
 
 define <2 x i64> @test_vuzp2q_s64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vuzp2q_s64:
-; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
-; CHECK-AARCH64-NEXT: mov {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
@@ -287,7 +277,6 @@ entry:
 
 define <2 x i32> @test_vuzp2_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vuzp2_u32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
@@ -304,8 +293,6 @@ entry:
 
 define <2 x i64> @test_vuzp2q_u64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vuzp2q_u64:
-; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
-; CHECK-AARCH64-NEXT: mov {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
@@ -314,7 +301,6 @@ entry:
 
 define <2 x float> @test_vuzp2_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vuzp2_f32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
@@ -331,8 +317,6 @@ entry:
 
 define <2 x double> @test_vuzp2q_f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: test_vuzp2q_f64:
-; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
-; CHECK-AARCH64-NEXT: mov {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
@@ -405,7 +389,6 @@ entry:
 
 define <2 x i32> @test_vzip1_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vzip1_s32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
@@ -422,7 +405,6 @@ entry:
 
 define <2 x i64> @test_vzip1q_s64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vzip1q_s64:
-; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
@@ -463,7 +445,6 @@ entry:
 
 define <2 x i32> @test_vzip1_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vzip1_u32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
@@ -480,7 +461,6 @@ entry:
 
 define <2 x i64> @test_vzip1q_u64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vzip1q_u64:
-; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
@@ -489,7 +469,6 @@ entry:
 
 define <2 x float> @test_vzip1_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vzip1_f32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
@@ -506,7 +485,6 @@ entry:
 
 define <2 x double> @test_vzip1q_f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: test_vzip1q_f64:
-; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
@@ -579,7 +557,6 @@ entry:
 
 define <2 x i32> @test_vzip2_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vzip2_s32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
@@ -596,7 +573,6 @@ entry:
 
 define <2 x i64> @test_vzip2q_s64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vzip2q_s64:
-; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
@@ -637,7 +613,6 @@ entry:
 
 define <2 x i32> @test_vzip2_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vzip2_u32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
@@ -654,7 +629,6 @@ entry:
 
 define <2 x i64> @test_vzip2q_u64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vzip2q_u64:
-; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
@@ -663,7 +637,6 @@ entry:
 
 define <2 x float> @test_vzip2_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vzip2_f32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
@@ -680,7 +653,6 @@ entry:
 
 define <2 x double> @test_vzip2q_f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: test_vzip2q_f64:
-; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
@@ -753,7 +725,6 @@ entry:
 
 define <2 x i32> @test_vtrn1_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vtrn1_s32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
@@ -770,7 +741,6 @@ entry:
 
 define <2 x i64> @test_vtrn1q_s64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vtrn1q_s64:
-; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
@@ -811,7 +781,6 @@ entry:
 
 define <2 x i32> @test_vtrn1_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vtrn1_u32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
@@ -828,7 +797,6 @@ entry:
 
 define <2 x i64> @test_vtrn1q_u64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vtrn1q_u64:
-; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
@@ -837,7 +805,6 @@ entry:
 
 define <2 x float> @test_vtrn1_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vtrn1_f32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
@@ -854,7 +821,6 @@ entry:
 
 define <2 x double> @test_vtrn1q_f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: test_vtrn1q_f64:
-; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
@@ -927,7 +893,6 @@ entry:
 
 define <2 x i32> @test_vtrn2_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vtrn2_s32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
@@ -944,7 +909,6 @@ entry:
 
 define <2 x i64> @test_vtrn2q_s64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vtrn2q_s64:
-; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
@@ -985,7 +949,6 @@ entry:
 
 define <2 x i32> @test_vtrn2_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vtrn2_u32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
@@ -1002,7 +965,6 @@ entry:
 
 define <2 x i64> @test_vtrn2q_u64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vtrn2q_u64:
-; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
@@ -1011,7 +973,6 @@ entry:
 
 define <2 x float> @test_vtrn2_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vtrn2_f32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
@@ -1028,7 +989,6 @@ entry:
 
 define <2 x double> @test_vtrn2q_f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: test_vtrn2q_f64:
-; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
@@ -2534,8 +2494,6 @@ entry:
 
 define %struct.int32x2x2_t @test_vuzp_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vuzp_s32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
@@ -2572,8 +2530,6 @@ entry:
 
 define %struct.uint32x2x2_t @test_vuzp_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vuzp_u32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
@@ -2586,8 +2542,6 @@ entry:
 
 define %struct.float32x2x2_t @test_vuzp_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vuzp_f32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
@@ -2756,8 +2710,6 @@ entry:
 
 define %struct.int32x2x2_t @test_vzip_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vzip_s32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
@@ -2794,8 +2746,6 @@ entry:
 
 define %struct.uint32x2x2_t @test_vzip_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vzip_u32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
@@ -2808,8 +2758,6 @@ entry:
 
 define %struct.float32x2x2_t @test_vzip_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vzip_f32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
@@ -2978,8 +2926,6 @@ entry:
 
 define %struct.int32x2x2_t @test_vtrn_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vtrn_s32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
@@ -3016,8 +2962,6 @@ entry:
 
 define %struct.uint32x2x2_t @test_vtrn_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vtrn_u32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
@@ -3030,8 +2974,6 @@ entry:
 
 define %struct.float32x2x2_t @test_vtrn_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vtrn_f32:
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
-; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
 ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
@@ -3183,7 +3125,4 @@ define %struct.uint8x8x2_t @test_uzp(<16
   %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1
   ret %struct.uint8x8x2_t %.fca.0.1.insert
 
-; CHECK-AARCH64: dup	{{d[0-9]+}}, {{v[0-9]+}}.d[1]
-; CHECK-AARCH64-NEXT: uzp1	{{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-; CHECK-AARCH64-NEXT: uzp2	{{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 }

Removed: llvm/trunk/test/CodeGen/AArch64/neon-rounding-halving-add.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-rounding-halving-add.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-rounding-halving-add.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-rounding-halving-add.ll (removed)
@@ -1,106 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-; Just intrinsic calls: arm64 has similar in vhadd.ll
-
-declare <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_urhadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_urhadd_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: urhadd v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_srhadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_srhadd_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: srhadd v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_urhadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_urhadd_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: urhadd v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_srhadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_srhadd_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: srhadd v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_urhadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_urhadd_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: urhadd v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_srhadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_srhadd_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: srhadd v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-declare <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_urhadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_urhadd_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: urhadd v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_srhadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_srhadd_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: srhadd v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-declare <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_urhadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_urhadd_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: urhadd v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_srhadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_srhadd_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: srhadd v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_urhadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_urhadd_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: urhadd v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_srhadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_srhadd_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: srhadd v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-

Removed: llvm/trunk/test/CodeGen/AArch64/neon-rounding-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-rounding-shift.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-rounding-shift.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-rounding-shift.ll (removed)
@@ -1,122 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-; Just intrinsic calls: arm64 has similar in vshift.ll
-
-declare <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_urshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_urshl_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: urshl v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_srshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_srshl_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: srshl v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_urshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_urshl_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: urshl v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_srshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_srshl_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: srshl v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_urshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_urshl_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: urshl v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_srshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_srshl_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: srshl v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_urshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_urshl_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: urshl v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_srshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_srshl_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: srshl v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-declare <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_urshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_urshl_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: urshl v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_srshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_srshl_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: srshl v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_urshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_urshl_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: urshl v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_srshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_srshl_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: srshl v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>)
-declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>)
-
-define <2 x i64> @test_urshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_urshl_v2i64:
-  %tmp1 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: urshl v0.2d, v0.2d, v1.2d
-  ret <2 x i64> %tmp1
-}
-
-define <2 x i64> @test_srshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_srshl_v2i64:
-  %tmp1 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: srshl v0.2d, v0.2d, v1.2d
-  ret <2 x i64> %tmp1
-}
-

Removed: llvm/trunk/test/CodeGen/AArch64/neon-saturating-add-sub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-saturating-add-sub.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-saturating-add-sub.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-saturating-add-sub.ll (removed)
@@ -1,241 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-; Just intrinsic calls: arm64 has similar in vqadd.ll
-declare <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_uqadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_uqadd_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: uqadd v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_sqadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_sqadd_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: sqadd v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_uqadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_uqadd_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: uqadd v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_sqadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_sqadd_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: sqadd v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_uqadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_uqadd_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: uqadd v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_sqadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_sqadd_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: sqadd v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-declare <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_uqadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_uqadd_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: uqadd v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_sqadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_sqadd_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: sqadd v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-declare <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_uqadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_uqadd_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: uqadd v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_sqadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_sqadd_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: sqadd v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_uqadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_uqadd_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: uqadd v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_sqadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_sqadd_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: sqadd v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-
-
-declare <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64>, <2 x i64>)
-declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>)
-
-define <2 x i64> @test_uqadd_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_uqadd_v2i64:
-  %tmp1 = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: uqadd v0.2d, v0.2d, v1.2d
-  ret <2 x i64> %tmp1
-}
-
-define <2 x i64> @test_sqadd_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_sqadd_v2i64:
-  %tmp1 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: sqadd v0.2d, v0.2d, v1.2d
-  ret <2 x i64> %tmp1
-}
-
-declare <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_uqsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_uqsub_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: uqsub v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_sqsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_sqsub_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: sqsub v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_uqsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_uqsub_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: uqsub v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_sqsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_sqsub_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: sqsub v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_uqsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_uqsub_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: uqsub v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_sqsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_sqsub_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: sqsub v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_uqsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_uqsub_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: uqsub v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_sqsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_sqsub_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: sqsub v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-declare <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_uqsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_uqsub_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: uqsub v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_sqsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_sqsub_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: sqsub v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_uqsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_uqsub_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: uqsub v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_sqsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_sqsub_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: sqsub v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-declare <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64>, <2 x i64>)
-declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>)
-
-define <2 x i64> @test_uqsub_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_uqsub_v2i64:
-  %tmp1 = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: uqsub v0.2d, v0.2d, v1.2d
-  ret <2 x i64> %tmp1
-}
-
-define <2 x i64> @test_sqsub_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_sqsub_v2i64:
-  %tmp1 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: sqsub v0.2d, v0.2d, v1.2d
-  ret <2 x i64> %tmp1
-}

Removed: llvm/trunk/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll (removed)
@@ -1,122 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-; Just intrinsic calls: arm64 has similar in vshift.ll
-
-declare <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_uqrshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_uqrshl_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: uqrshl v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_sqrshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_sqrshl_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: sqrshl v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_uqrshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_uqrshl_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: uqrshl v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_sqrshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_sqrshl_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: sqrshl v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_uqrshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_uqrshl_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: uqrshl v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_sqrshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_sqrshl_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: sqrshl v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-declare <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_uqrshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_uqrshl_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: uqrshl v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_sqrshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_sqrshl_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: sqrshl v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-declare <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_uqrshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_uqrshl_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: uqrshl v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_sqrshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_sqrshl_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: sqrshl v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_uqrshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_uqrshl_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: uqrshl v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_sqrshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_sqrshl_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: sqrshl v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>)
-declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>)
-
-define <2 x i64> @test_uqrshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_uqrshl_v2i64:
-  %tmp1 = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: uqrshl v0.2d, v0.2d, v1.2d
-  ret <2 x i64> %tmp1
-}
-
-define <2 x i64> @test_sqrshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_sqrshl_v2i64:
-  %tmp1 = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: sqrshl v0.2d, v0.2d, v1.2d
-  ret <2 x i64> %tmp1
-}
-

Removed: llvm/trunk/test/CodeGen/AArch64/neon-saturating-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-saturating-shift.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-saturating-shift.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-saturating-shift.ll (removed)
@@ -1,122 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-; Just intrinsic calls: arm64 has similar in vshift.ll
-
-declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_uqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_uqshl_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: uqshl v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_sqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_sqshl_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: sqshl v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_uqshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_uqshl_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: uqshl v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_sqshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_sqshl_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: sqshl v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_uqshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_uqshl_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: uqshl v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_sqshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_sqshl_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: sqshl v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_uqshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_uqshl_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: uqshl v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_sqshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_sqshl_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: sqshl v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_uqshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_uqshl_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: uqshl v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_sqshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_sqshl_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: sqshl v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_uqshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_uqshl_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: uqshl v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_sqshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_sqshl_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: sqshl v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>)
-declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>)
-
-define <2 x i64> @test_uqshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_uqshl_v2i64:
-  %tmp1 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: uqshl v0.2d, v0.2d, v1.2d
-  ret <2 x i64> %tmp1
-}
-
-define <2 x i64> @test_sqshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_sqshl_v2i64:
-  %tmp1 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: sqshl v0.2d, v0.2d, v1.2d
-  ret <2 x i64> %tmp1
-}
-

Removed: llvm/trunk/test/CodeGen/AArch64/neon-scalar-abs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-abs.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-abs.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-abs.ll (removed)
@@ -1,62 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-; arm64 has tests for i64 versions, uses different approach for others.
-
-define i64 @test_vabsd_s64(i64 %a) {
-; CHECK: test_vabsd_s64
-; CHECK: abs {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %vabs.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vabs1.i = tail call <1 x i64> @llvm.aarch64.neon.vabs(<1 x i64> %vabs.i)
-  %0 = extractelement <1 x i64> %vabs1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vabs(<1 x i64>)
-
-define i8 @test_vqabsb_s8(i8 %a) {
-; CHECK: test_vqabsb_s8
-; CHECK: sqabs {{b[0-9]+}}, {{b[0-9]+}}
-entry:
-  %vqabs.i = insertelement <1 x i8> undef, i8 %a, i32 0
-  %vqabs1.i = call <1 x i8> @llvm.arm.neon.vqabs.v1i8(<1 x i8> %vqabs.i)
-  %0 = extractelement <1 x i8> %vqabs1.i, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.arm.neon.vqabs.v1i8(<1 x i8>)
-
-define i16 @test_vqabsh_s16(i16 %a) {
-; CHECK: test_vqabsh_s16
-; CHECK: sqabs {{h[0-9]+}}, {{h[0-9]+}}
-entry:
-  %vqabs.i = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vqabs1.i = call <1 x i16> @llvm.arm.neon.vqabs.v1i16(<1 x i16> %vqabs.i)
-  %0 = extractelement <1 x i16> %vqabs1.i, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.arm.neon.vqabs.v1i16(<1 x i16>)
-
-define i32 @test_vqabss_s32(i32 %a) {
-; CHECK: test_vqabss_s32
-; CHECK: sqabs {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %vqabs.i = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vqabs1.i = call <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32> %vqabs.i)
-  %0 = extractelement <1 x i32> %vqabs1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32>)
-
-define i64 @test_vqabsd_s64(i64 %a) {
-; CHECK: test_vqabsd_s64
-; CHECK: sqabs {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %vqabs.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vqabs1.i = call <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64> %vqabs.i)
-  %0 = extractelement <1 x i64> %vqabs1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64>)

Removed: llvm/trunk/test/CodeGen/AArch64/neon-scalar-add-sub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-add-sub.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-add-sub.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-add-sub.ll (removed)
@@ -1,51 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-; arm64 has a copy of the key parts in AdvSIMD-Scalar.ll
-
-define <1 x i64> @add1xi64(<1 x i64> %A, <1 x i64> %B) {
-;CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-	%tmp3 = add <1 x i64> %A, %B;
-	ret <1 x i64> %tmp3
-}
-
-define <1 x i64> @sub1xi64(<1 x i64> %A, <1 x i64> %B) {
-;CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-	%tmp3 = sub <1 x i64> %A, %B;
-	ret <1 x i64> %tmp3
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_add_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_add_v1i64:
-  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_uadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uadd_v1i64:
-  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_sub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sub_v1i64:
-  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_usub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_usub_v1i64:
-  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-
-

Modified: llvm/trunk/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll?rev=209576&r1=209575&r2=209576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll Sat May 24 07:42:26 2014
@@ -1,4 +1,3 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
 
 declare float @llvm.fma.f32(float, float, float)

Removed: llvm/trunk/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll (removed)
@@ -1,124 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
-; arm64 has separate copy due to intrinsics (aarch64-neon-scalar-by-elem-mul.ll)
-define float @test_fmul_lane_ss2S(float %a, <2 x float> %v) {
-  ; CHECK: test_fmul_lane_ss2S
-  ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
-  %tmp1 = extractelement <2 x float> %v, i32 1
-  %tmp2 = fmul float %a, %tmp1;
-  ret float %tmp2;
-}
-
-define float @test_fmul_lane_ss2S_swap(float %a, <2 x float> %v) {
-  ; CHECK: test_fmul_lane_ss2S_swap
-  ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
-  %tmp1 = extractelement <2 x float> %v, i32 1
-  %tmp2 = fmul float %tmp1, %a;
-  ret float %tmp2;
-}
-
-
-define float @test_fmul_lane_ss4S(float %a, <4 x float> %v) {
-  ; CHECK: test_fmul_lane_ss4S
-  ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
-  %tmp1 = extractelement <4 x float> %v, i32 3
-  %tmp2 = fmul float %a, %tmp1;
-  ret float %tmp2;
-}
-
-define float @test_fmul_lane_ss4S_swap(float %a, <4 x float> %v) {
-  ; CHECK: test_fmul_lane_ss4S_swap
-  ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
-  %tmp1 = extractelement <4 x float> %v, i32 3
-  %tmp2 = fmul float %tmp1, %a;
-  ret float %tmp2;
-}
-
-
-define double @test_fmul_lane_ddD(double %a, <1 x double> %v) {
-  ; CHECK: test_fmul_lane_ddD
-  ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
-  %tmp1 = extractelement <1 x double> %v, i32 0
-  %tmp2 = fmul double %a, %tmp1;
-  ret double %tmp2;
-}
-
-
-
-define double @test_fmul_lane_dd2D(double %a, <2 x double> %v) {
-  ; CHECK: test_fmul_lane_dd2D
-  ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
-  %tmp1 = extractelement <2 x double> %v, i32 1
-  %tmp2 = fmul double %a, %tmp1;
-  ret double %tmp2;
-}
-
-
-define double @test_fmul_lane_dd2D_swap(double %a, <2 x double> %v) {
-  ; CHECK: test_fmul_lane_dd2D_swap
-  ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
-  %tmp1 = extractelement <2 x double> %v, i32 1
-  %tmp2 = fmul double %tmp1, %a;
-  ret double %tmp2;
-}
-
-declare float @llvm.aarch64.neon.vmulx.f32(float, float)
-
-define float @test_fmulx_lane_f32(float %a, <2 x float> %v) {
-  ; CHECK: test_fmulx_lane_f32
-  ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
-  %tmp1 = extractelement <2 x float> %v, i32 1
-  %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %a, float %tmp1)
-  ret float %tmp2;
-}
-
-define float @test_fmulx_laneq_f32(float %a, <4 x float> %v) {
-  ; CHECK: test_fmulx_laneq_f32
-  ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
-  %tmp1 = extractelement <4 x float> %v, i32 3
-  %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %a, float %tmp1)
-  ret float %tmp2;
-}
-
-define float @test_fmulx_laneq_f32_swap(float %a, <4 x float> %v) {
-  ; CHECK: test_fmulx_laneq_f32_swap
-  ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
-  %tmp1 = extractelement <4 x float> %v, i32 3
-  %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %tmp1, float %a)
-  ret float %tmp2;
-}
-
-declare double @llvm.aarch64.neon.vmulx.f64(double, double)
-
-define double @test_fmulx_lane_f64(double %a, <1 x double> %v) {
-  ; CHECK: test_fmulx_lane_f64
-  ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
-  %tmp1 = extractelement <1 x double> %v, i32 0
-  %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1)
-  ret double %tmp2;
-}
-
-define double @test_fmulx_laneq_f64_0(double %a, <2 x double> %v) {
-  ; CHECK: test_fmulx_laneq_f64_0
-  ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
-  %tmp1 = extractelement <2 x double> %v, i32 0
-  %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1)
-  ret double %tmp2;
-}
-
-
-define double @test_fmulx_laneq_f64_1(double %a, <2 x double> %v) {
-  ; CHECK: test_fmulx_laneq_f64_1
-  ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
-  %tmp1 = extractelement <2 x double> %v, i32 1
-  %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1)
-  ret double %tmp2;
-}
-
-define double @test_fmulx_laneq_f64_1_swap(double %a, <2 x double> %v) {
-  ; CHECK: test_fmulx_laneq_f64_1_swap
-  ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
-  %tmp1 = extractelement <2 x double> %v, i32 1
-  %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %tmp1, double %a)
-  ret double %tmp2;
-}
-

Removed: llvm/trunk/test/CodeGen/AArch64/neon-scalar-compare.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-compare.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-compare.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-compare.ll (removed)
@@ -1,344 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-; arm64 has (the non-trivial parts of) this test covered by vcmp.ll
-
-;; Scalar Integer Compare
-
-define i64 @test_vceqd(i64 %a, i64 %b) {
-; CHECK: test_vceqd
-; CHECK: cmeq {{d[0-9]+}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vceq.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vceq1.i = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vceq2.i = call <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1i64.v1i64(<1 x i64> %vceq.i, <1 x i64> %vceq1.i)
-  %0 = extractelement <1 x i64> %vceq2.i, i32 0
-  ret i64 %0
-}
-
-define i64 @test_vceqzd(i64 %a) {
-; CHECK: test_vceqzd
-; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0
-entry:
-  %vceqz.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vceqz1.i = call <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1i64.v1i64(<1 x i64> %vceqz.i, <1 x i64> zeroinitializer)
-  %0 = extractelement <1 x i64> %vceqz1.i, i32 0
-  ret i64 %0
-}
-
-define i64 @test_vcged(i64 %a, i64 %b) {
-; CHECK: test_vcged
-; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcge.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vcge1.i = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vcge2.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1i64.v1i64(<1 x i64> %vcge.i, <1 x i64> %vcge1.i)
-  %0 = extractelement <1 x i64> %vcge2.i, i32 0
-  ret i64 %0
-}
-
-define i64 @test_vcgezd(i64 %a) {
-; CHECK: test_vcgezd
-; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, #0x0
-entry:
-  %vcgez.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vcgez1.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1i64.v1i64(<1 x i64> %vcgez.i, <1 x i64> zeroinitializer)
-  %0 = extractelement <1 x i64> %vcgez1.i, i32 0
-  ret i64 %0
-}
-
-define i64 @test_vcgtd(i64 %a, i64 %b) {
-; CHECK: test_vcgtd
-; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcgt.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vcgt1.i = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vcgt2.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1i64.v1i64(<1 x i64> %vcgt.i, <1 x i64> %vcgt1.i)
-  %0 = extractelement <1 x i64> %vcgt2.i, i32 0
-  ret i64 %0
-}
-
-define i64 @test_vcgtzd(i64 %a) {
-; CHECK: test_vcgtzd
-; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, #0x0
-entry:
-  %vcgtz.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vcgtz1.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1i64.v1i64(<1 x i64> %vcgtz.i, <1 x i64> zeroinitializer)
-  %0 = extractelement <1 x i64> %vcgtz1.i, i32 0
-  ret i64 %0
-}
-
-define i64 @test_vcled(i64 %a, i64 %b) {
-; CHECK: test_vcled
-; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcgt.i = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vcgt1.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vcgt2.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1i64.v1i64(<1 x i64> %vcgt.i, <1 x i64> %vcgt1.i)
-  %0 = extractelement <1 x i64> %vcgt2.i, i32 0
-  ret i64 %0
-}
-
-define i64 @test_vclezd(i64 %a) {
-; CHECK: test_vclezd
-; CHECK: cmle {{d[0-9]}}, {{d[0-9]}}, #0x0
-entry:
-  %vclez.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vclez1.i = call <1 x i64> @llvm.aarch64.neon.vclez.v1i64.v1i64.v1i64(<1 x i64> %vclez.i, <1 x i64> zeroinitializer)
-  %0 = extractelement <1 x i64> %vclez1.i, i32 0
-  ret i64 %0
-}
-
-define i64 @test_vcltd(i64 %a, i64 %b) {
-; CHECK: test_vcltd
-; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcge.i = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vcge1.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vcge2.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1i64.v1i64(<1 x i64> %vcge.i, <1 x i64> %vcge1.i)
-  %0 = extractelement <1 x i64> %vcge2.i, i32 0
-  ret i64 %0
-}
-
-define i64 @test_vcltzd(i64 %a) {
-; CHECK: test_vcltzd
-; CHECK: cmlt {{d[0-9]}}, {{d[0-9]}}, #0x0
-entry:
-  %vcltz.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vcltz1.i = call <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1i64.v1i64(<1 x i64> %vcltz.i, <1 x i64> zeroinitializer)
-  %0 = extractelement <1 x i64> %vcltz1.i, i32 0
-  ret i64 %0
-}
-
-define i64 @test_vtstd(i64 %a, i64 %b) {
-; CHECK: test_vtstd
-; CHECK: cmtst {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vtst.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vtst1.i = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vtst2.i = call <1 x i64> @llvm.aarch64.neon.vtstd.v1i64.v1i64.v1i64(<1 x i64> %vtst.i, <1 x i64> %vtst1.i)
-  %0 = extractelement <1 x i64> %vtst2.i, i32 0
-  ret i64 %0
-}
-
-
-define <1 x i64> @test_vcage_f64(<1 x double> %a, <1 x double> %b) #0 {
-; CHECK: test_vcage_f64
-; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %vcage2.i = tail call <1 x i64> @llvm.arm.neon.vacge.v1i64.v1f64(<1 x double> %a, <1 x double> %b) #2
-  ret <1 x i64> %vcage2.i
-}
-
-define <1 x i64> @test_vcagt_f64(<1 x double> %a, <1 x double> %b) #0 {
-; CHECK: test_vcagt_f64
-; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %vcagt2.i = tail call <1 x i64> @llvm.arm.neon.vacgt.v1i64.v1f64(<1 x double> %a, <1 x double> %b) #2
-  ret <1 x i64> %vcagt2.i
-}
-
-define <1 x i64> @test_vcale_f64(<1 x double> %a, <1 x double> %b) #0 {
-; CHECK: test_vcale_f64
-; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %vcage2.i = tail call <1 x i64> @llvm.arm.neon.vacge.v1i64.v1f64(<1 x double> %b, <1 x double> %a) #2
-  ret <1 x i64> %vcage2.i
-}
-
-define <1 x i64> @test_vcalt_f64(<1 x double> %a, <1 x double> %b) #0 {
-; CHECK: test_vcalt_f64
-; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %vcagt2.i = tail call <1 x i64> @llvm.arm.neon.vacgt.v1i64.v1f64(<1 x double> %b, <1 x double> %a) #2
-  ret <1 x i64> %vcagt2.i
-}
-
-define <1 x i64> @test_vceq_s64(<1 x i64> %a, <1 x i64> %b) #0 {
-; CHECK: test_vceq_s64
-; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = icmp eq <1 x i64> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vceq_u64(<1 x i64> %a, <1 x i64> %b) #0 {
-; CHECK: test_vceq_u64
-; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = icmp eq <1 x i64> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vceq_f64(<1 x double> %a, <1 x double> %b) #0 {
-; CHECK: test_vceq_f64
-; CHECK: fcmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = fcmp oeq <1 x double> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vcge_s64(<1 x i64> %a, <1 x i64> %b) #0 {
-; CHECK: test_vcge_s64
-; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = icmp sge <1 x i64> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vcge_u64(<1 x i64> %a, <1 x i64> %b) #0 {
-; CHECK: test_vcge_u64
-; CHECK: cmhs {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = icmp uge <1 x i64> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vcge_f64(<1 x double> %a, <1 x double> %b) #0 {
-; CHECK: test_vcge_f64
-; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = fcmp oge <1 x double> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vcle_s64(<1 x i64> %a, <1 x i64> %b) #0 {
-; CHECK: test_vcle_s64
-; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = icmp sle <1 x i64> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vcle_u64(<1 x i64> %a, <1 x i64> %b) #0 {
-; CHECK: test_vcle_u64
-; CHECK: cmhs {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = icmp ule <1 x i64> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vcle_f64(<1 x double> %a, <1 x double> %b) #0 {
-; CHECK: test_vcle_f64
-; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = fcmp ole <1 x double> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vcgt_s64(<1 x i64> %a, <1 x i64> %b) #0 {
-; CHECK: test_vcgt_s64
-; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = icmp sgt <1 x i64> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vcgt_u64(<1 x i64> %a, <1 x i64> %b) #0 {
-; CHECK: test_vcgt_u64
-; CHECK: cmhi {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = icmp ugt <1 x i64> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vcgt_f64(<1 x double> %a, <1 x double> %b) #0 {
-; CHECK: test_vcgt_f64
-; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = fcmp ogt <1 x double> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vclt_s64(<1 x i64> %a, <1 x i64> %b) #0 {
-; CHECK: test_vclt_s64
-; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = icmp slt <1 x i64> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vclt_u64(<1 x i64> %a, <1 x i64> %b) #0 {
-; CHECK: test_vclt_u64
-; CHECK: cmhi {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = icmp ult <1 x i64> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vclt_f64(<1 x double> %a, <1 x double> %b) #0 {
-; CHECK: test_vclt_f64
-; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = fcmp olt <1 x double> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vceqz_s64(<1 x i64> %a) #0 {
-; CHECK: test_vceqz_s64
-; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0
-  %1 = icmp eq <1 x i64> %a, zeroinitializer
-  %vceqz.i = sext <1 x i1> %1 to <1 x i64>
-  ret <1 x i64> %vceqz.i
-}
-
-define <1 x i64> @test_vceqz_u64(<1 x i64> %a) #0 {
-; CHECK: test_vceqz_u64
-; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0
-  %1 = icmp eq <1 x i64> %a, zeroinitializer
-  %vceqz.i = sext <1 x i1> %1 to <1 x i64>
-  ret <1 x i64> %vceqz.i
-}
-
-define <1 x i64> @test_vceqz_p64(<1 x i64> %a) #0 {
-; CHECK: test_vceqz_p64
-; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0
-  %1 = icmp eq <1 x i64> %a, zeroinitializer
-  %vceqz.i = sext <1 x i1> %1 to <1 x i64>
-  ret <1 x i64> %vceqz.i
-}
-
-define <2 x i64> @test_vceqzq_p64(<2 x i64> %a) #0 {
-; CHECK: test_vceqzq_p64
-; CHECK: cmeq  {{v[0-9]}}.2d, {{v[0-9]}}.2d, #0
-  %1 = icmp eq <2 x i64> %a, zeroinitializer
-  %vceqz.i = sext <2 x i1> %1 to <2 x i64>
-  ret <2 x i64> %vceqz.i
-}
-
-define <1 x i64> @test_vcgez_s64(<1 x i64> %a) #0 {
-; CHECK: test_vcgez_s64
-; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, #0x0
-  %1 = icmp sge <1 x i64> %a, zeroinitializer
-  %vcgez.i = sext <1 x i1> %1 to <1 x i64>
-  ret <1 x i64> %vcgez.i
-}
-
-define <1 x i64> @test_vclez_s64(<1 x i64> %a) #0 {
-; CHECK: test_vclez_s64
-; CHECK: cmle {{d[0-9]}}, {{d[0-9]}}, #0x0
-  %1 = icmp sle <1 x i64> %a, zeroinitializer
-  %vclez.i = sext <1 x i1> %1 to <1 x i64>
-  ret <1 x i64> %vclez.i
-}
-
-define <1 x i64> @test_vcgtz_s64(<1 x i64> %a) #0 {
-; CHECK: test_vcgtz_s64
-; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, #0x0
-  %1 = icmp sgt <1 x i64> %a, zeroinitializer
-  %vcgtz.i = sext <1 x i1> %1 to <1 x i64>
-  ret <1 x i64> %vcgtz.i
-}
-
-define <1 x i64> @test_vcltz_s64(<1 x i64> %a) #0 {
-; CHECK: test_vcltz_s64
-; CHECK: cmlt {{d[0-9]}}, {{d[0-9]}}, #0
-  %1 = icmp slt <1 x i64> %a, zeroinitializer
-  %vcltz.i = sext <1 x i1> %1 to <1 x i64>
-  ret <1 x i64> %vcltz.i
-}
-
-declare <1 x i64> @llvm.arm.neon.vacgt.v1i64.v1f64(<1 x double>, <1 x double>)
-declare <1 x i64> @llvm.arm.neon.vacge.v1i64.v1f64(<1 x double>, <1 x double>)
-declare <1 x i64> @llvm.aarch64.neon.vtstd.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vchs.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vclez.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vchi.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)

Modified: llvm/trunk/test/CodeGen/AArch64/neon-scalar-copy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-copy.ll?rev=209576&r1=209575&r2=209576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-copy.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-copy.ll Sat May 24 07:42:26 2014
@@ -1,10 +1,8 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64
 ; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
 
 
 define float @test_dup_sv2S(<2 x float> %v) {
  ; CHECK-LABEL: test_dup_sv2S
- ; CHECK-AARCH64: dup {{s[0-9]+}}, {{v[0-9]+}}.s[1]
  ; CHECK-ARM64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
  %tmp1 = extractelement <2 x float> %v, i32 1
  ret float  %tmp1
@@ -39,7 +37,6 @@ define double @test_dup_dvD(<1 x double>
 
 define double @test_dup_dv2D(<2 x double> %v) {
  ; CHECK-LABEL: test_dup_dv2D
- ; CHECK-AARCH64: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1]
  ; CHECK-ARM64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
  %tmp1 = extractelement <2 x double> %v, i32 1
  ret double  %tmp1
@@ -47,7 +44,6 @@ define double @test_dup_dv2D(<2 x double
 
 define double @test_dup_dv2D_0(<2 x double> %v) {
  ; CHECK-LABEL: test_dup_dv2D_0
- ; CHECK-AARCH64: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1]
  ; CHECK-ARM64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
  ; CHECK: ret
  %tmp1 = extractelement <2 x double> %v, i32 1
@@ -56,49 +52,42 @@ define double @test_dup_dv2D_0(<2 x doub
 
 define <1 x i8> @test_vector_dup_bv16B(<16 x i8> %v1) {
  ; CHECK-LABEL: test_vector_dup_bv16B
- ; CHECK-AARCH64: dup {{b[0-9]+}}, {{v[0-9]+}}.b[14]
  %shuffle.i = shufflevector <16 x i8> %v1, <16 x i8> undef, <1 x i32> <i32 14> 
  ret <1 x i8> %shuffle.i
 }
 
 define <1 x i8> @test_vector_dup_bv8B(<8 x i8> %v1) {
  ; CHECK-LABEL: test_vector_dup_bv8B
- ; CHECK-AARCH64: dup {{b[0-9]+}}, {{v[0-9]+}}.b[7]
  %shuffle.i = shufflevector <8 x i8> %v1, <8 x i8> undef, <1 x i32> <i32 7> 
  ret <1 x i8> %shuffle.i
 }
 
 define <1 x i16> @test_vector_dup_hv8H(<8 x i16> %v1) {
  ; CHECK-LABEL: test_vector_dup_hv8H
- ; CHECK-AARCH64: dup {{h[0-9]+}}, {{v[0-9]+}}.h[7]
  %shuffle.i = shufflevector <8 x i16> %v1, <8 x i16> undef, <1 x i32> <i32 7> 
  ret <1 x i16> %shuffle.i
 }
 
 define <1 x i16> @test_vector_dup_hv4H(<4 x i16> %v1) {
  ; CHECK-LABEL: test_vector_dup_hv4H
- ; CHECK-AARCH64: dup {{h[0-9]+}}, {{v[0-9]+}}.h[3]
  %shuffle.i = shufflevector <4 x i16> %v1, <4 x i16> undef, <1 x i32> <i32 3> 
  ret <1 x i16> %shuffle.i
 }
 
 define <1 x i32> @test_vector_dup_sv4S(<4 x i32> %v1) {
  ; CHECK-LABEL: test_vector_dup_sv4S
- ; CHECK-AARCH64: dup {{s[0-9]+}}, {{v[0-9]+}}.s[3]
  %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <1 x i32> <i32 3> 
  ret <1 x i32> %shuffle
 }
 
 define <1 x i32> @test_vector_dup_sv2S(<2 x i32> %v1) {
  ; CHECK-LABEL: test_vector_dup_sv2S
- ; CHECK-AARCH64: dup {{s[0-9]+}}, {{v[0-9]+}}.s[1]
  %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <1 x i32> <i32 1> 
  ret <1 x i32> %shuffle
 }
 
 define <1 x i64> @test_vector_dup_dv2D(<2 x i64> %v1) {
  ; CHECK-LABEL: test_vector_dup_dv2D
- ; CHECK-AARCH64: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1]
  ; CHECK-ARM64: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #8
  %shuffle.i = shufflevector <2 x i64> %v1, <2 x i64> undef, <1 x i32> <i32 1> 
  ret <1 x i64> %shuffle.i

Removed: llvm/trunk/test/CodeGen/AArch64/neon-scalar-cvt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-cvt.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-cvt.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-cvt.ll (removed)
@@ -1,134 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-; arm64 has a different approach to scalars. Discarding.
-
-define float @test_vcvts_f32_s32(i32 %a) {
-; CHECK: test_vcvts_f32_s32
-; CHECK: scvtf {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %vcvtf.i = insertelement <1 x i32> undef, i32 %a, i32 0
-  %0 = call float @llvm.aarch64.neon.vcvtint2fps.f32.v1i32(<1 x i32> %vcvtf.i)
-  ret float %0
-}
-
-declare float @llvm.aarch64.neon.vcvtint2fps.f32.v1i32(<1 x i32>)
-
-define double @test_vcvtd_f64_s64(i64 %a) {
-; CHECK: test_vcvtd_f64_s64
-; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %vcvtf.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %0 = call double @llvm.aarch64.neon.vcvtint2fps.f64.v1i64(<1 x i64> %vcvtf.i)
-  ret double %0
-}
-
-declare double @llvm.aarch64.neon.vcvtint2fps.f64.v1i64(<1 x i64>)
-
-define float @test_vcvts_f32_u32(i32 %a) {
-; CHECK: test_vcvts_f32_u32
-; CHECK: ucvtf {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %vcvtf.i = insertelement <1 x i32> undef, i32 %a, i32 0
-  %0 = call float @llvm.aarch64.neon.vcvtint2fpu.f32.v1i32(<1 x i32> %vcvtf.i)
-  ret float %0
-}
-
-declare float @llvm.aarch64.neon.vcvtint2fpu.f32.v1i32(<1 x i32>)
-
-define double @test_vcvtd_f64_u64(i64 %a) {
-; CHECK: test_vcvtd_f64_u64
-; CHECK: ucvtf {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %vcvtf.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %0 = call double @llvm.aarch64.neon.vcvtint2fpu.f64.v1i64(<1 x i64> %vcvtf.i)
-  ret double %0
-}
-
-declare double @llvm.aarch64.neon.vcvtint2fpu.f64.v1i64(<1 x i64>)
-
-define float @test_vcvts_n_f32_s32(i32 %a) {
-; CHECK: test_vcvts_n_f32_s32
-; CHECK: scvtf {{s[0-9]+}}, {{s[0-9]+}}, #1
-entry:
-  %vcvtf = insertelement <1 x i32> undef, i32 %a, i32 0
-  %0 = call float @llvm.aarch64.neon.vcvtfxs2fp.n.f32.v1i32(<1 x i32> %vcvtf, i32 1)
-  ret float %0
-}
-
-declare float @llvm.aarch64.neon.vcvtfxs2fp.n.f32.v1i32(<1 x i32>, i32)
-
-define double @test_vcvtd_n_f64_s64(i64 %a) {
-; CHECK: test_vcvtd_n_f64_s64
-; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}}, #1
-entry:
-  %vcvtf = insertelement <1 x i64> undef, i64 %a, i32 0
-  %0 = call double @llvm.aarch64.neon.vcvtfxs2fp.n.f64.v1i64(<1 x i64> %vcvtf, i32 1)
-  ret double %0
-}
-
-declare double @llvm.aarch64.neon.vcvtfxs2fp.n.f64.v1i64(<1 x i64>, i32)
-
-define float @test_vcvts_n_f32_u32(i32 %a) {
-; CHECK: test_vcvts_n_f32_u32
-; CHECK: ucvtf {{s[0-9]+}}, {{s[0-9]+}}, #1
-entry:
-  %vcvtf = insertelement <1 x i32> undef, i32 %a, i32 0
-  %0 = call float @llvm.aarch64.neon.vcvtfxu2fp.n.f32.v1i32(<1 x i32> %vcvtf, i32 1)
-  ret float %0
-}
-
-declare float @llvm.aarch64.neon.vcvtfxu2fp.n.f32.v1i32(<1 x i32>, i32)
-
-define double @test_vcvtd_n_f64_u64(i64 %a) {
-; CHECK: test_vcvtd_n_f64_u64
-; CHECK: ucvtf {{d[0-9]+}}, {{d[0-9]+}}, #1
-entry:
-  %vcvtf = insertelement <1 x i64> undef, i64 %a, i32 0
-  %0 = call double @llvm.aarch64.neon.vcvtfxu2fp.n.f64.v1i64(<1 x i64> %vcvtf, i32 1)
-  ret double %0
-}
-
-declare double @llvm.aarch64.neon.vcvtfxu2fp.n.f64.v1i64(<1 x i64>, i32)
-
-define i32 @test_vcvts_n_s32_f32(float %a) {
-; CHECK: test_vcvts_n_s32_f32
-; CHECK: fcvtzs {{s[0-9]+}}, {{s[0-9]+}}, #1
-entry:
-  %fcvtzs1 = call <1 x i32> @llvm.aarch64.neon.vcvtfp2fxs.n.v1i32.f32(float %a, i32 1)
-  %0 = extractelement <1 x i32> %fcvtzs1, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vcvtfp2fxs.n.v1i32.f32(float, i32)
-
-define i64 @test_vcvtd_n_s64_f64(double %a) {
-; CHECK: test_vcvtd_n_s64_f64
-; CHECK: fcvtzs {{d[0-9]+}}, {{d[0-9]+}}, #1
-entry:
-  %fcvtzs1 = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.n.v1i64.f64(double %a, i32 1)
-  %0 = extractelement <1 x i64> %fcvtzs1, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.n.v1i64.f64(double, i32)
-
-define i32 @test_vcvts_n_u32_f32(float %a) {
-; CHECK: test_vcvts_n_u32_f32
-; CHECK: fcvtzu {{s[0-9]+}}, {{s[0-9]+}}, #32
-entry:
-  %fcvtzu1 = call <1 x i32> @llvm.aarch64.neon.vcvtfp2fxu.n.v1i32.f32(float %a, i32 32)
-  %0 = extractelement <1 x i32> %fcvtzu1, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vcvtfp2fxu.n.v1i32.f32(float, i32)
-
-define i64 @test_vcvtd_n_u64_f64(double %a) {
-; CHECK: test_vcvtd_n_u64_f64
-; CHECK: fcvtzu {{d[0-9]+}}, {{d[0-9]+}}, #64
-entry:
-  %fcvtzu1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.n.v1i64.f64(double %a, i32 64)
-  %0 = extractelement <1 x i64> %fcvtzu1, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.n.v1i64.f64(double, i32)

Removed: llvm/trunk/test/CodeGen/AArch64/neon-scalar-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-ext.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-ext.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-ext.ll (removed)
@@ -1,114 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-; arm64 doesn't use <1 x iN> types, for N < 64.
-
-define <1 x i64> @test_zext_v1i32_v1i64(<2 x i32> %v) nounwind readnone {
-; CHECK-LABEL: test_zext_v1i32_v1i64:
-; CHECK: ushll	v0.2d, v0.2s, #0
-  %1 = extractelement <2 x i32> %v, i32 0
-  %2 = insertelement <1 x i32> undef, i32 %1, i32 0
-  %3 = zext <1 x i32> %2 to <1 x i64>
-  ret <1 x i64> %3
-}
-
-define <1 x i32> @test_zext_v1i16_v1i32(<4 x i16> %v) nounwind readnone {
-; CHECK-LABEL: test_zext_v1i16_v1i32:
-; CHECK: ushll	v0.4s, v0.4h, #0
-  %1 = extractelement <4 x i16> %v, i32 0
-  %2 = insertelement <1 x i16> undef, i16 %1, i32 0
-  %3 = zext <1 x i16> %2 to <1 x i32>
-  ret <1 x i32> %3
-}
-
-define <1 x i16> @test_zext_v1i8_v1i16(<8 x i8> %v) nounwind readnone {
-; CHECK-LABEL: test_zext_v1i8_v1i16:
-; CHECK: ushll	v0.8h, v0.8b, #0
-  %1 = extractelement <8 x i8> %v, i32 0
-  %2 = insertelement <1 x i8> undef, i8 %1, i32 0
-  %3 = zext <1 x i8> %2 to <1 x i16>
-  ret <1 x i16> %3
-}
-
-define <1 x i32> @test_zext_v1i8_v1i32(<8 x i8> %v) nounwind readnone {
-; CHECK-LABEL: test_zext_v1i8_v1i32:
-; CHECK: dup     b0, v0.b[0]
-  %1 = extractelement <8 x i8> %v, i32 0
-  %2 = insertelement <1 x i8> undef, i8 %1, i32 0
-  %3 = zext <1 x i8> %2 to <1 x i32>
-  ret <1 x i32> %3
-}
-
-define <1 x i64> @test_zext_v1i16_v1i64(<4 x i16> %v) nounwind readnone {
-; CHECK-LABEL: test_zext_v1i16_v1i64:
-; CHECK: dup    h0, v0.h[0]
-  %1 = extractelement <4 x i16> %v, i32 0
-  %2 = insertelement <1 x i16> undef, i16 %1, i32 0
-  %3 = zext <1 x i16> %2 to <1 x i64>
-  ret <1 x i64> %3
-}
-
-define <1 x i64> @test_zext_v1i8_v1i64(<8 x i8> %v) nounwind readnone {
-; CHECK-LABEL: test_zext_v1i8_v1i64:
-; CHECK: dup	b0, v0.b[0]
-  %1 = extractelement <8 x i8> %v, i32 0
-  %2 = insertelement <1 x i8> undef, i8 %1, i32 0
-  %3 = zext <1 x i8> %2 to <1 x i64>
-  ret <1 x i64> %3
-}
-
-define <1 x i64> @test_sext_v1i32_v1i64(<2 x i32> %v) nounwind readnone {
-; CHECK-LABEL: test_sext_v1i32_v1i64:
-; CHECK: sshll	v0.2d, v0.2s, #0
-  %1 = extractelement <2 x i32> %v, i32 0
-  %2 = insertelement <1 x i32> undef, i32 %1, i32 0
-  %3 = sext <1 x i32> %2 to <1 x i64>
-  ret <1 x i64> %3
-}
-
-define <1 x i32> @test_sext_v1i16_v1i32(<4 x i16> %v) nounwind readnone {
-; CHECK-LABEL: test_sext_v1i16_v1i32:
-; CHECK: sshll	v0.4s, v0.4h, #0
-  %1 = extractelement <4 x i16> %v, i32 0
-  %2 = insertelement <1 x i16> undef, i16 %1, i32 0
-  %3 = sext <1 x i16> %2 to <1 x i32>
-  ret <1 x i32> %3
-}
-
-define <1 x i16> @test_sext_v1i8_v1i16(<8 x i8> %v) nounwind readnone {
-; CHECK-LABEL: test_sext_v1i8_v1i16:
-; CHECK: sshll	v0.8h, v0.8b, #0
-  %1 = extractelement <8 x i8> %v, i32 0
-  %2 = insertelement <1 x i8> undef, i8 %1, i32 0
-  %3 = sext <1 x i8> %2 to <1 x i16>
-  ret <1 x i16> %3
-}
-
-define <1 x i32> @test_sext_v1i8_v1i32(<8 x i8> %v) nounwind readnone {
-; CHECK-LABEL: test_sext_v1i8_v1i32:
-; CHECK: sshll	v0.8h, v0.8b, #0
-; CHECK: sshll	v0.4s, v0.4h, #0
-  %1 = extractelement <8 x i8> %v, i32 0
-  %2 = insertelement <1 x i8> undef, i8 %1, i32 0
-  %3 = sext <1 x i8> %2 to <1 x i32>
-  ret <1 x i32> %3
-}
-
-define <1 x i64> @test_sext_v1i16_v1i64(<4 x i16> %v) nounwind readnone {
-; CHECK-LABEL: test_sext_v1i16_v1i64:
-; CHECK: sshll	v0.4s, v0.4h, #0
-; CHECK: sshll	v0.2d, v0.2s, #0
-  %1 = extractelement <4 x i16> %v, i32 0
-  %2 = insertelement <1 x i16> undef, i16 %1, i32 0
-  %3 = sext <1 x i16> %2 to <1 x i64>
-  ret <1 x i64> %3
-}
-
-define <1 x i64> @test_sext_v1i8_v1i64(<8 x i8> %v) nounwind readnone {
-; CHECK-LABEL: test_sext_v1i8_v1i64:
-; CHECK: sshll	v0.8h, v0.8b, #0
-; CHECK: sshll	v0.4s, v0.4h, #0
-; CHECK: sshll	v0.2d, v0.2s, #0
-  %1 = extractelement <8 x i8> %v, i32 0
-  %2 = insertelement <1 x i8> undef, i8 %1, i32 0
-  %3 = sext <1 x i8> %2 to <1 x i64>
-  ret <1 x i64> %3
-}

Removed: llvm/trunk/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll (removed)
@@ -1,105 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-; intrinsic wrangling that arm64 does differently.
-
-define i8 @test_vqmovunh_s16(i16 %a) {
-; CHECK: test_vqmovunh_s16
-; CHECK: sqxtun {{b[0-9]+}}, {{h[0-9]+}}
-entry:
-  %vqmovun.i = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vqmovun1.i = call <1 x i8> @llvm.arm.neon.vqmovnsu.v1i8(<1 x i16> %vqmovun.i)
-  %0 = extractelement <1 x i8> %vqmovun1.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vqmovuns_s32(i32 %a) {
-; CHECK: test_vqmovuns_s32
-; CHECK: sqxtun {{h[0-9]+}}, {{s[0-9]+}}
-entry:
-  %vqmovun.i = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vqmovun1.i = call <1 x i16> @llvm.arm.neon.vqmovnsu.v1i16(<1 x i32> %vqmovun.i)
-  %0 = extractelement <1 x i16> %vqmovun1.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vqmovund_s64(i64 %a) {
-; CHECK: test_vqmovund_s64
-; CHECK: sqxtun {{s[0-9]+}}, {{d[0-9]+}}
-entry:
-  %vqmovun.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vqmovun1.i = call <1 x i32> @llvm.arm.neon.vqmovnsu.v1i32(<1 x i64> %vqmovun.i)
-  %0 = extractelement <1 x i32> %vqmovun1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i8> @llvm.arm.neon.vqmovnsu.v1i8(<1 x i16>)
-declare <1 x i16> @llvm.arm.neon.vqmovnsu.v1i16(<1 x i32>)
-declare <1 x i32> @llvm.arm.neon.vqmovnsu.v1i32(<1 x i64>)
-
-define i8 @test_vqmovnh_s16(i16 %a) {
-; CHECK: test_vqmovnh_s16
-; CHECK: sqxtn {{b[0-9]+}}, {{h[0-9]+}}
-entry:
-  %vqmovn.i = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vqmovn1.i = call <1 x i8> @llvm.arm.neon.vqmovns.v1i8(<1 x i16> %vqmovn.i)
-  %0 = extractelement <1 x i8> %vqmovn1.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vqmovns_s32(i32 %a) {
-; CHECK: test_vqmovns_s32
-; CHECK: sqxtn {{h[0-9]+}}, {{s[0-9]+}}
-entry:
-  %vqmovn.i = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vqmovn1.i = call <1 x i16> @llvm.arm.neon.vqmovns.v1i16(<1 x i32> %vqmovn.i)
-  %0 = extractelement <1 x i16> %vqmovn1.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vqmovnd_s64(i64 %a) {
-; CHECK: test_vqmovnd_s64
-; CHECK: sqxtn {{s[0-9]+}}, {{d[0-9]+}}
-entry:
-  %vqmovn.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vqmovn1.i = call <1 x i32> @llvm.arm.neon.vqmovns.v1i32(<1 x i64> %vqmovn.i)
-  %0 = extractelement <1 x i32> %vqmovn1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i8> @llvm.arm.neon.vqmovns.v1i8(<1 x i16>)
-declare <1 x i16> @llvm.arm.neon.vqmovns.v1i16(<1 x i32>)
-declare <1 x i32> @llvm.arm.neon.vqmovns.v1i32(<1 x i64>)
-
-define i8 @test_vqmovnh_u16(i16 %a) {
-; CHECK: test_vqmovnh_u16
-; CHECK: uqxtn {{b[0-9]+}}, {{h[0-9]+}}
-entry:
-  %vqmovn.i = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vqmovn1.i = call <1 x i8> @llvm.arm.neon.vqmovnu.v1i8(<1 x i16> %vqmovn.i)
-  %0 = extractelement <1 x i8> %vqmovn1.i, i32 0
-  ret i8 %0
-}
-
-
-define i16 @test_vqmovns_u32(i32 %a) {
-; CHECK: test_vqmovns_u32
-; CHECK: uqxtn {{h[0-9]+}}, {{s[0-9]+}}
-entry:
-  %vqmovn.i = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vqmovn1.i = call <1 x i16> @llvm.arm.neon.vqmovnu.v1i16(<1 x i32> %vqmovn.i)
-  %0 = extractelement <1 x i16> %vqmovn1.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vqmovnd_u64(i64 %a) {
-; CHECK: test_vqmovnd_u64
-; CHECK: uqxtn {{s[0-9]+}}, {{d[0-9]+}}
-entry:
-  %vqmovn.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vqmovn1.i = call <1 x i32> @llvm.arm.neon.vqmovnu.v1i32(<1 x i64> %vqmovn.i)
-  %0 = extractelement <1 x i32> %vqmovn1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i8> @llvm.arm.neon.vqmovnu.v1i8(<1 x i16>)
-declare <1 x i16> @llvm.arm.neon.vqmovnu.v1i16(<1 x i32>)
-declare <1 x i32> @llvm.arm.neon.vqmovnu.v1i32(<1 x i64>)

Removed: llvm/trunk/test/CodeGen/AArch64/neon-scalar-fabd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-fabd.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-fabd.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-fabd.ll (removed)
@@ -1,21 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-; arm64 has these two tests in vabs.ll
-
-define float @test_vabds_f32(float %a, float %b) {
-; CHECK-LABEL: test_vabds_f32
-; CHECK: fabd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %0 = call float @llvm.aarch64.neon.vabd.f32(float %a, float %a)
-  ret float %0
-}
-
-define double @test_vabdd_f64(double %a, double %b) {
-; CHECK-LABEL: test_vabdd_f64
-; CHECK: fabd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %0 = call double @llvm.aarch64.neon.vabd.f64(double %a, double %b)
-  ret double %0
-}
-
-declare double @llvm.aarch64.neon.vabd.f64(double, double)
-declare float @llvm.aarch64.neon.vabd.f32(float, float)

Removed: llvm/trunk/test/CodeGen/AArch64/neon-scalar-fcvt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-fcvt.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-fcvt.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-fcvt.ll (removed)
@@ -1,234 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-; arm64 duplicates these tests in cvt.ll
-
-;; Scalar Floating-point Convert
-
-define float @test_vcvtxn(double %a) {
-; CHECK: test_vcvtxn
-; CHECK: fcvtxn {{s[0-9]}}, {{d[0-9]}}
-entry:
-  %vcvtf = call float @llvm.aarch64.neon.fcvtxn(double %a)
-  ret float %vcvtf
-}
-
-declare float @llvm.aarch64.neon.fcvtxn(double)
-
-define i32 @test_vcvtass(float %a) {
-; CHECK: test_vcvtass
-; CHECK: fcvtas {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %vcvtas1.i = call <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.f32(float %a)
-  %0 = extractelement <1 x i32> %vcvtas1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.f32(float)
-
-define i64 @test_test_vcvtasd(double %a) {
-; CHECK: test_test_vcvtasd
-; CHECK: fcvtas {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcvtas1.i = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.f64(double %a)
-  %0 = extractelement <1 x i64> %vcvtas1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.f64(double)
-
-define i32 @test_vcvtaus(float %a) {
-; CHECK: test_vcvtaus
-; CHECK: fcvtau {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %vcvtau1.i = call <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.f32(float %a)
-  %0 = extractelement <1 x i32> %vcvtau1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.f32(float)
-
-define i64 @test_vcvtaud(double %a) {
-; CHECK: test_vcvtaud
-; CHECK: fcvtau {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcvtau1.i = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.f64(double %a)
-  %0 = extractelement <1 x i64> %vcvtau1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.f64(double) 
-
-define i32 @test_vcvtmss(float %a) {
-; CHECK: test_vcvtmss
-; CHECK: fcvtms {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %vcvtms1.i = call <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.f32(float %a)
-  %0 = extractelement <1 x i32> %vcvtms1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.f32(float)
-
-define i64 @test_vcvtmd_s64_f64(double %a) {
-; CHECK: test_vcvtmd_s64_f64
-; CHECK: fcvtms {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcvtms1.i = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.f64(double %a)
-  %0 = extractelement <1 x i64> %vcvtms1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.f64(double)
-
-define i32 @test_vcvtmus(float %a) {
-; CHECK: test_vcvtmus
-; CHECK: fcvtmu {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %vcvtmu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.f32(float %a)
-  %0 = extractelement <1 x i32> %vcvtmu1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.f32(float)
-
-define i64 @test_vcvtmud(double %a) {
-; CHECK: test_vcvtmud
-; CHECK: fcvtmu {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcvtmu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.f64(double %a)
-  %0 = extractelement <1 x i64> %vcvtmu1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.f64(double)
-
-define i32 @test_vcvtnss(float %a) {
-; CHECK: test_vcvtnss
-; CHECK: fcvtns {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %vcvtns1.i = call <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.f32(float %a)
-  %0 = extractelement <1 x i32> %vcvtns1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.f32(float)
-
-define i64 @test_vcvtnd_s64_f64(double %a) {
-; CHECK: test_vcvtnd_s64_f64
-; CHECK: fcvtns {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcvtns1.i = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.f64(double %a)
-  %0 = extractelement <1 x i64> %vcvtns1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.f64(double)
-
-define i32 @test_vcvtnus(float %a) {
-; CHECK: test_vcvtnus
-; CHECK: fcvtnu {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %vcvtnu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.f32(float %a)
-  %0 = extractelement <1 x i32> %vcvtnu1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.f32(float)
-
-define i64 @test_vcvtnud(double %a) {
-; CHECK: test_vcvtnud
-; CHECK: fcvtnu {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcvtnu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.f64(double %a)
-  %0 = extractelement <1 x i64> %vcvtnu1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.f64(double)
-
-define i32 @test_vcvtpss(float %a) {
-; CHECK: test_vcvtpss
-; CHECK: fcvtps {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %vcvtps1.i = call <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.f32(float %a)
-  %0 = extractelement <1 x i32> %vcvtps1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.f32(float)
-
-define i64 @test_vcvtpd_s64_f64(double %a) {
-; CHECK: test_vcvtpd_s64_f64
-; CHECK: fcvtps {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcvtps1.i = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.f64(double %a)
-  %0 = extractelement <1 x i64> %vcvtps1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.f64(double)
-
-define i32 @test_vcvtpus(float %a) {
-; CHECK: test_vcvtpus
-; CHECK: fcvtpu {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %vcvtpu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.f32(float %a)
-  %0 = extractelement <1 x i32> %vcvtpu1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.f32(float)
-
-define i64 @test_vcvtpud(double %a) {
-; CHECK: test_vcvtpud
-; CHECK: fcvtpu {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcvtpu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.f64(double %a)
-  %0 = extractelement <1 x i64> %vcvtpu1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.f64(double)
-
-define i32 @test_vcvtss(float %a) {
-; CHECK: test_vcvtss
-; CHECK: fcvtzs {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %vcvtzs1.i = call <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.f32(float %a)
-  %0 = extractelement <1 x i32> %vcvtzs1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.f32(float)
-
-define i64 @test_vcvtd_s64_f64(double %a) {
-; CHECK: test_vcvtd_s64_f64
-; CHECK: fcvtzs {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcvzs1.i = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.f64(double %a)
-  %0 = extractelement <1 x i64> %vcvzs1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.f64(double)
-
-define i32 @test_vcvtus(float %a) {
-; CHECK: test_vcvtus
-; CHECK: fcvtzu {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %vcvtzu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.f32(float %a)
-  %0 = extractelement <1 x i32> %vcvtzu1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.f32(float)
-
-define i64 @test_vcvtud(double %a) {
-; CHECK: test_vcvtud
-; CHECK: fcvtzu {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcvtzu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.f64(double %a)
-  %0 = extractelement <1 x i64> %vcvtzu1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.f64(double)

Removed: llvm/trunk/test/CodeGen/AArch64/neon-scalar-fp-compare.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-fp-compare.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-fp-compare.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-fp-compare.ll (removed)
@@ -1,283 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-; arm64 does not use intrinsics for comparisons.
-
-;; Scalar Floating-point Compare
-
-define i32 @test_vceqs_f32(float %a, float %b) {
-; CHECK-LABEL: test_vceqs_f32
-; CHECK: fcmeq {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %fceq2.i = call <1 x i32> @llvm.aarch64.neon.fceq.v1i32.f32.f32(float %a, float %b)
-  %0 = extractelement <1 x i32> %fceq2.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vceqd_f64(double %a, double %b) {
-; CHECK-LABEL: test_vceqd_f64
-; CHECK: fcmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %fceq2.i = call <1 x i64> @llvm.aarch64.neon.fceq.v1i64.f64.f64(double %a, double %b)
-  %0 = extractelement <1 x i64> %fceq2.i, i32 0
-  ret i64 %0
-}
-
-define <1 x i64> @test_vceqz_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vceqz_f64
-; CHECK: fcmeq  {{d[0-9]+}}, {{d[0-9]+}}, #0.0
-entry:
-  %0 = fcmp oeq <1 x double> %a, zeroinitializer
-  %vceqz.i = sext <1 x i1> %0 to <1 x i64>
-  ret <1 x i64> %vceqz.i
-}
-
-define i32 @test_vceqzs_f32(float %a) {
-; CHECK-LABEL: test_vceqzs_f32
-; CHECK: fcmeq {{s[0-9]}}, {{s[0-9]}}, #0.0
-entry:
-  %fceq1.i = call <1 x i32> @llvm.aarch64.neon.fceq.v1i32.f32.f32(float %a, float 0.0)
-  %0 = extractelement <1 x i32> %fceq1.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vceqzd_f64(double %a) {
-; CHECK-LABEL: test_vceqzd_f64
-; CHECK: fcmeq {{d[0-9]}}, {{d[0-9]}}, #0.0
-entry:
-  %fceq1.i = call <1 x i64> @llvm.aarch64.neon.fceq.v1i64.f64.f32(double %a, float 0.0)
-  %0 = extractelement <1 x i64> %fceq1.i, i32 0
-  ret i64 %0
-}
-
-define i32 @test_vcges_f32(float %a, float %b) {
-; CHECK-LABEL: test_vcges_f32
-; CHECK: fcmge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %fcge2.i = call <1 x i32> @llvm.aarch64.neon.fcge.v1i32.f32.f32(float %a, float %b)
-  %0 = extractelement <1 x i32> %fcge2.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vcged_f64(double %a, double %b) {
-; CHECK-LABEL: test_vcged_f64
-; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %fcge2.i = call <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f64(double %a, double %b)
-  %0 = extractelement <1 x i64> %fcge2.i, i32 0
-  ret i64 %0
-}
-
-define i32 @test_vcgezs_f32(float %a) {
-; CHECK-LABEL: test_vcgezs_f32
-; CHECK: fcmge {{s[0-9]}}, {{s[0-9]}}, #0.0
-entry:
-  %fcge1.i = call <1 x i32> @llvm.aarch64.neon.fcge.v1i32.f32.f32(float %a, float 0.0)
-  %0 = extractelement <1 x i32> %fcge1.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vcgezd_f64(double %a) {
-; CHECK-LABEL: test_vcgezd_f64
-; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, #0.0
-entry:
-  %fcge1.i = call <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f32(double %a, float 0.0)
-  %0 = extractelement <1 x i64> %fcge1.i, i32 0
-  ret i64 %0
-}
-
-define i32 @test_vcgts_f32(float %a, float %b) {
-; CHECK-LABEL: test_vcgts_f32
-; CHECK: fcmgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %fcgt2.i = call <1 x i32> @llvm.aarch64.neon.fcgt.v1i32.f32.f32(float %a, float %b)
-  %0 = extractelement <1 x i32> %fcgt2.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vcgtd_f64(double %a, double %b) {
-; CHECK-LABEL: test_vcgtd_f64
-; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %fcgt2.i = call <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f64(double %a, double %b)
-  %0 = extractelement <1 x i64> %fcgt2.i, i32 0
-  ret i64 %0
-}
-
-define i32 @test_vcgtzs_f32(float %a) {
-; CHECK-LABEL: test_vcgtzs_f32
-; CHECK: fcmgt {{s[0-9]}}, {{s[0-9]}}, #0.0
-entry:
-  %fcgt1.i = call <1 x i32> @llvm.aarch64.neon.fcgt.v1i32.f32.f32(float %a, float 0.0)
-  %0 = extractelement <1 x i32> %fcgt1.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vcgtzd_f64(double %a) {
-; CHECK-LABEL: test_vcgtzd_f64
-; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, #0.0
-entry:
-  %fcgt1.i = call <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f32(double %a, float 0.0)
-  %0 = extractelement <1 x i64> %fcgt1.i, i32 0
-  ret i64 %0
-}
-
-define i32 @test_vcles_f32(float %a, float %b) {
-; CHECK-LABEL: test_vcles_f32
-; CHECK: fcmge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %fcge2.i = call <1 x i32> @llvm.aarch64.neon.fcge.v1i32.f32.f32(float %a, float %b)
-  %0 = extractelement <1 x i32> %fcge2.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vcled_f64(double %a, double %b) {
-; CHECK-LABEL: test_vcled_f64
-; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %fcge2.i = call <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f64(double %a, double %b)
-  %0 = extractelement <1 x i64> %fcge2.i, i32 0
-  ret i64 %0
-}
-
-define i32 @test_vclezs_f32(float %a) {
-; CHECK-LABEL: test_vclezs_f32
-; CHECK: fcmle {{s[0-9]}}, {{s[0-9]}}, #0.0
-entry:
-  %fcle1.i = call <1 x i32> @llvm.aarch64.neon.fclez.v1i32.f32.f32(float %a, float 0.0)
-  %0 = extractelement <1 x i32> %fcle1.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vclezd_f64(double %a) {
-; CHECK-LABEL: test_vclezd_f64
-; CHECK: fcmle {{d[0-9]}}, {{d[0-9]}}, #0.0
-entry:
-  %fcle1.i = call <1 x i64> @llvm.aarch64.neon.fclez.v1i64.f64.f32(double %a, float 0.0)
-  %0 = extractelement <1 x i64> %fcle1.i, i32 0
-  ret i64 %0
-}
-
-define i32 @test_vclts_f32(float %a, float %b) {
-; CHECK-LABEL: test_vclts_f32
-; CHECK: fcmgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %fcgt2.i = call <1 x i32> @llvm.aarch64.neon.fcgt.v1i32.f32.f32(float %a, float %b)
-  %0 = extractelement <1 x i32> %fcgt2.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vcltd_f64(double %a, double %b) {
-; CHECK-LABEL: test_vcltd_f64
-; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %fcgt2.i = call <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f64(double %a, double %b)
-  %0 = extractelement <1 x i64> %fcgt2.i, i32 0
-  ret i64 %0
-}
-
-define i32 @test_vcltzs_f32(float %a) {
-; CHECK-LABEL: test_vcltzs_f32
-; CHECK: fcmlt {{s[0-9]}}, {{s[0-9]}}, #0.0
-entry:
-  %fclt1.i = call <1 x i32> @llvm.aarch64.neon.fcltz.v1i32.f32.f32(float %a, float 0.0)
-  %0 = extractelement <1 x i32> %fclt1.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vcltzd_f64(double %a) {
-; CHECK-LABEL: test_vcltzd_f64
-; CHECK: fcmlt {{d[0-9]}}, {{d[0-9]}}, #0.0
-entry:
-  %fclt1.i = call <1 x i64> @llvm.aarch64.neon.fcltz.v1i64.f64.f32(double %a, float 0.0)
-  %0 = extractelement <1 x i64> %fclt1.i, i32 0
-  ret i64 %0
-}
-
-define i32 @test_vcages_f32(float %a, float %b) {
-; CHECK-LABEL: test_vcages_f32
-; CHECK: facge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %fcage2.i = call <1 x i32> @llvm.aarch64.neon.fcage.v1i32.f32.f32(float %a, float %b)
-  %0 = extractelement <1 x i32> %fcage2.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vcaged_f64(double %a, double %b) {
-; CHECK-LABEL: test_vcaged_f64
-; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %fcage2.i = call <1 x i64> @llvm.aarch64.neon.fcage.v1i64.f64.f64(double %a, double %b)
-  %0 = extractelement <1 x i64> %fcage2.i, i32 0
-  ret i64 %0
-}
-
-define i32 @test_vcagts_f32(float %a, float %b) {
-; CHECK-LABEL: test_vcagts_f32
-; CHECK: facgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %fcagt2.i = call <1 x i32> @llvm.aarch64.neon.fcagt.v1i32.f32.f32(float %a, float %b)
-  %0 = extractelement <1 x i32> %fcagt2.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vcagtd_f64(double %a, double %b) {
-; CHECK-LABEL: test_vcagtd_f64
-; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %fcagt2.i = call <1 x i64> @llvm.aarch64.neon.fcagt.v1i64.f64.f64(double %a, double %b)
-  %0 = extractelement <1 x i64> %fcagt2.i, i32 0
-  ret i64 %0
-}
-
-define i32 @test_vcales_f32(float %a, float %b) {
-; CHECK-LABEL: test_vcales_f32
-; CHECK: facge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %fcage2.i = call <1 x i32> @llvm.aarch64.neon.fcage.v1i32.f32.f32(float %a, float %b)
-  %0 = extractelement <1 x i32> %fcage2.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vcaled_f64(double %a, double %b) {
-; CHECK-LABEL: test_vcaled_f64
-; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %fcage2.i = call <1 x i64> @llvm.aarch64.neon.fcage.v1i64.f64.f64(double %a, double %b)
-  %0 = extractelement <1 x i64> %fcage2.i, i32 0
-  ret i64 %0
-}
-
-define i32 @test_vcalts_f32(float %a, float %b) {
-; CHECK-LABEL: test_vcalts_f32
-; CHECK: facgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %fcalt2.i = call <1 x i32> @llvm.aarch64.neon.fcagt.v1i32.f32.f32(float %a, float %b)
-  %0 = extractelement <1 x i32> %fcalt2.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vcaltd_f64(double %a, double %b) {
-; CHECK-LABEL: test_vcaltd_f64
-; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %fcalt2.i = call <1 x i64> @llvm.aarch64.neon.fcagt.v1i64.f64.f64(double %a, double %b)
-  %0 = extractelement <1 x i64> %fcalt2.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.fceq.v1i32.f32.f32(float, float)
-declare <1 x i64> @llvm.aarch64.neon.fceq.v1i64.f64.f32(double, float)
-declare <1 x i64> @llvm.aarch64.neon.fceq.v1i64.f64.f64(double, double)
-declare <1 x i32> @llvm.aarch64.neon.fcge.v1i32.f32.f32(float, float)
-declare <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f32(double, float)
-declare <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f64(double, double)
-declare <1 x i32> @llvm.aarch64.neon.fclez.v1i32.f32.f32(float, float)
-declare <1 x i64> @llvm.aarch64.neon.fclez.v1i64.f64.f32(double, float)
-declare <1 x i32> @llvm.aarch64.neon.fcgt.v1i32.f32.f32(float, float)
-declare <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f32(double, float)
-declare <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f64(double, double)
-declare <1 x i32> @llvm.aarch64.neon.fcltz.v1i32.f32.f32(float, float)
-declare <1 x i64> @llvm.aarch64.neon.fcltz.v1i64.f64.f32(double, float)
-declare <1 x i32> @llvm.aarch64.neon.fcage.v1i32.f32.f32(float, float)
-declare <1 x i64> @llvm.aarch64.neon.fcage.v1i64.f64.f64(double, double)
-declare <1 x i32> @llvm.aarch64.neon.fcagt.v1i32.f32.f32(float, float)
-declare <1 x i64> @llvm.aarch64.neon.fcagt.v1i64.f64.f64(double, double)

Removed: llvm/trunk/test/CodeGen/AArch64/neon-scalar-mul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-mul.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-mul.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-mul.ll (removed)
@@ -1,144 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-; Just intrinsic wrangling, and arm64 does scalar differently anyway.
-
-define i16 @test_vqdmulhh_s16(i16 %a, i16 %b) {
-; CHECK: test_vqdmulhh_s16
-; CHECK: sqdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-  %1 = insertelement <1 x i16> undef, i16 %a, i32 0
-  %2 = insertelement <1 x i16> undef, i16 %b, i32 0
-  %3 = call <1 x i16> @llvm.arm.neon.vqdmulh.v1i16(<1 x i16> %1, <1 x i16> %2)
-  %4 = extractelement <1 x i16> %3, i32 0
-  ret i16 %4
-}
-
-define i32 @test_vqdmulhs_s32(i32 %a, i32 %b) {
-; CHECK: test_vqdmulhs_s32
-; CHECK: sqdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  %1 = insertelement <1 x i32> undef, i32 %a, i32 0
-  %2 = insertelement <1 x i32> undef, i32 %b, i32 0
-  %3 = call <1 x i32> @llvm.arm.neon.vqdmulh.v1i32(<1 x i32> %1, <1 x i32> %2)
-  %4 = extractelement <1 x i32> %3, i32 0
-  ret i32 %4
-}
-
-declare <1 x i16> @llvm.arm.neon.vqdmulh.v1i16(<1 x i16>, <1 x i16>)
-declare <1 x i32> @llvm.arm.neon.vqdmulh.v1i32(<1 x i32>, <1 x i32>)
-
-define i16 @test_vqrdmulhh_s16(i16 %a, i16 %b) {
-; CHECK: test_vqrdmulhh_s16
-; CHECK: sqrdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-  %1 = insertelement <1 x i16> undef, i16 %a, i32 0
-  %2 = insertelement <1 x i16> undef, i16 %b, i32 0
-  %3 = call <1 x i16> @llvm.arm.neon.vqrdmulh.v1i16(<1 x i16> %1, <1 x i16> %2)
-  %4 = extractelement <1 x i16> %3, i32 0
-  ret i16 %4
-}
-
-define i32 @test_vqrdmulhs_s32(i32 %a, i32 %b) {
-; CHECK: test_vqrdmulhs_s32
-; CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  %1 = insertelement <1 x i32> undef, i32 %a, i32 0
-  %2 = insertelement <1 x i32> undef, i32 %b, i32 0
-  %3 = call <1 x i32> @llvm.arm.neon.vqrdmulh.v1i32(<1 x i32> %1, <1 x i32> %2)
-  %4 = extractelement <1 x i32> %3, i32 0
-  ret i32 %4
-}
-
-declare <1 x i16> @llvm.arm.neon.vqrdmulh.v1i16(<1 x i16>, <1 x i16>)
-declare <1 x i32> @llvm.arm.neon.vqrdmulh.v1i32(<1 x i32>, <1 x i32>)
-
-define float @test_vmulxs_f32(float %a, float %b) {
-; CHECK: test_vmulxs_f32
-; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  %1 = call float @llvm.aarch64.neon.vmulx.f32(float %a, float %b)
-  ret float %1
-}
-
-define double @test_vmulxd_f64(double %a, double %b) {
-; CHECK: test_vmulxd_f64
-; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  %1 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %b)
-  ret double %1
-}
-
-declare float @llvm.aarch64.neon.vmulx.f32(float, float)
-declare double @llvm.aarch64.neon.vmulx.f64(double, double)
-
-define i32 @test_vqdmlalh_s16(i32 %a, i16 %b, i16 %c) {
-; CHECK: test_vqdmlalh_s16
-; CHECK: sqdmlal {{s[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-entry:
-  %vqdmlal.i = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vqdmlal1.i = insertelement <1 x i16> undef, i16 %b, i32 0
-  %vqdmlal2.i = insertelement <1 x i16> undef, i16 %c, i32 0
-  %vqdmlal3.i = call <1 x i32> @llvm.aarch64.neon.vqdmlal.v1i32(<1 x i32> %vqdmlal.i, <1 x i16> %vqdmlal1.i, <1 x i16> %vqdmlal2.i)
-  %0 = extractelement <1 x i32> %vqdmlal3.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vqdmlals_s32(i64 %a, i32 %b, i32 %c) {
-; CHECK: test_vqdmlals_s32
-; CHECK: sqdmlal {{d[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %vqdmlal.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vqdmlal1.i = insertelement <1 x i32> undef, i32 %b, i32 0
-  %vqdmlal2.i = insertelement <1 x i32> undef, i32 %c, i32 0
-  %vqdmlal3.i = call <1 x i64> @llvm.aarch64.neon.vqdmlal.v1i64(<1 x i64> %vqdmlal.i, <1 x i32> %vqdmlal1.i, <1 x i32> %vqdmlal2.i)
-  %0 = extractelement <1 x i64> %vqdmlal3.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vqdmlal.v1i32(<1 x i32>, <1 x i16>, <1 x i16>)
-declare <1 x i64> @llvm.aarch64.neon.vqdmlal.v1i64(<1 x i64>, <1 x i32>, <1 x i32>)
-
-define i32 @test_vqdmlslh_s16(i32 %a, i16 %b, i16 %c) {
-; CHECK: test_vqdmlslh_s16
-; CHECK: sqdmlsl {{s[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-entry:
-  %vqdmlsl.i = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vqdmlsl1.i = insertelement <1 x i16> undef, i16 %b, i32 0
-  %vqdmlsl2.i = insertelement <1 x i16> undef, i16 %c, i32 0
-  %vqdmlsl3.i = call <1 x i32> @llvm.aarch64.neon.vqdmlsl.v1i32(<1 x i32> %vqdmlsl.i, <1 x i16> %vqdmlsl1.i, <1 x i16> %vqdmlsl2.i)
-  %0 = extractelement <1 x i32> %vqdmlsl3.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vqdmlsls_s32(i64 %a, i32 %b, i32 %c) {
-; CHECK: test_vqdmlsls_s32
-; CHECK: sqdmlsl {{d[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %vqdmlsl.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vqdmlsl1.i = insertelement <1 x i32> undef, i32 %b, i32 0
-  %vqdmlsl2.i = insertelement <1 x i32> undef, i32 %c, i32 0
-  %vqdmlsl3.i = call <1 x i64> @llvm.aarch64.neon.vqdmlsl.v1i64(<1 x i64> %vqdmlsl.i, <1 x i32> %vqdmlsl1.i, <1 x i32> %vqdmlsl2.i)
-  %0 = extractelement <1 x i64> %vqdmlsl3.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vqdmlsl.v1i32(<1 x i32>, <1 x i16>, <1 x i16>)
-declare <1 x i64> @llvm.aarch64.neon.vqdmlsl.v1i64(<1 x i64>, <1 x i32>, <1 x i32>)
-
-define i32 @test_vqdmullh_s16(i16 %a, i16 %b) {
-; CHECK: test_vqdmullh_s16
-; CHECK: sqdmull {{s[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-entry:
-  %vqdmull.i = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vqdmull1.i = insertelement <1 x i16> undef, i16 %b, i32 0
-  %vqdmull2.i = call <1 x i32> @llvm.arm.neon.vqdmull.v1i32(<1 x i16> %vqdmull.i, <1 x i16> %vqdmull1.i)
-  %0 = extractelement <1 x i32> %vqdmull2.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vqdmulls_s32(i32 %a, i32 %b) {
-; CHECK: test_vqdmulls_s32
-; CHECK: sqdmull {{d[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %vqdmull.i = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vqdmull1.i = insertelement <1 x i32> undef, i32 %b, i32 0
-  %vqdmull2.i = call <1 x i64> @llvm.arm.neon.vqdmull.v1i64(<1 x i32> %vqdmull.i, <1 x i32> %vqdmull1.i)
-  %0 = extractelement <1 x i64> %vqdmull2.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i32> @llvm.arm.neon.vqdmull.v1i32(<1 x i16>, <1 x i16>)
-declare <1 x i64> @llvm.arm.neon.vqdmull.v1i64(<1 x i32>, <1 x i32>)

Removed: llvm/trunk/test/CodeGen/AArch64/neon-scalar-neg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-neg.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-neg.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-neg.ll (removed)
@@ -1,62 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-; Intrinsic wrangling. arm64 does it differently.
-
-define i64 @test_vnegd_s64(i64 %a) {
-; CHECK: test_vnegd_s64
-; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %vneg.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vneg1.i = tail call <1 x i64> @llvm.aarch64.neon.vneg(<1 x i64> %vneg.i)
-  %0 = extractelement <1 x i64> %vneg1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vneg(<1 x i64>)
-
-define i8 @test_vqnegb_s8(i8 %a) {
-; CHECK: test_vqnegb_s8
-; CHECK: sqneg {{b[0-9]+}}, {{b[0-9]+}}
-entry:
-  %vqneg.i = insertelement <1 x i8> undef, i8 %a, i32 0
-  %vqneg1.i = call <1 x i8> @llvm.arm.neon.vqneg.v1i8(<1 x i8> %vqneg.i)
-  %0 = extractelement <1 x i8> %vqneg1.i, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.arm.neon.vqneg.v1i8(<1 x i8>)
-
-define i16 @test_vqnegh_s16(i16 %a) {
-; CHECK: test_vqnegh_s16
-; CHECK: sqneg {{h[0-9]+}}, {{h[0-9]+}}
-entry:
-  %vqneg.i = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vqneg1.i = call <1 x i16> @llvm.arm.neon.vqneg.v1i16(<1 x i16> %vqneg.i)
-  %0 = extractelement <1 x i16> %vqneg1.i, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.arm.neon.vqneg.v1i16(<1 x i16>)
-
-define i32 @test_vqnegs_s32(i32 %a) {
-; CHECK: test_vqnegs_s32
-; CHECK: sqneg {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %vqneg.i = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vqneg1.i = call <1 x i32> @llvm.arm.neon.vqneg.v1i32(<1 x i32> %vqneg.i)
-  %0 = extractelement <1 x i32> %vqneg1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.arm.neon.vqneg.v1i32(<1 x i32>)
-
-define i64 @test_vqnegd_s64(i64 %a) {
-; CHECK: test_vqnegd_s64
-; CHECK: sqneg {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %vqneg.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vqneg1.i = call <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64> %vqneg.i)
-  %0 = extractelement <1 x i64> %vqneg1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64>)

Removed: llvm/trunk/test/CodeGen/AArch64/neon-scalar-recip.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-recip.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-recip.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-recip.ll (removed)
@@ -1,93 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-; duplicates arm64 tests in vsqrt.ll
-
-define float @test_vrecpss_f32(float %a, float %b) {
-; CHECK: test_vrecpss_f32
-; CHECK: frecps {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  %1 = call float @llvm.aarch64.neon.vrecps.f32(float %a, float %b)
-  ret float %1
-}
-
-define double @test_vrecpsd_f64(double %a, double %b) {
-; CHECK: test_vrecpsd_f64
-; CHECK: frecps {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  %1 = call double @llvm.aarch64.neon.vrecps.f64(double %a, double %b)
-  ret double %1
-}
-
-declare float @llvm.aarch64.neon.vrecps.f32(float, float)
-declare double @llvm.aarch64.neon.vrecps.f64(double, double)
-
-define float @test_vrsqrtss_f32(float %a, float %b) {
-; CHECK: test_vrsqrtss_f32
-; CHECK: frsqrts {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  %1 = call float @llvm.aarch64.neon.vrsqrts.f32(float %a, float %b)
-  ret float %1
-}
-
-define double @test_vrsqrtsd_f64(double %a, double %b) {
-; CHECK: test_vrsqrtsd_f64
-; CHECK: frsqrts {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  %1 = call double @llvm.aarch64.neon.vrsqrts.f64(double %a, double %b)
-  ret double %1
-}
-
-declare float @llvm.aarch64.neon.vrsqrts.f32(float, float)
-declare double @llvm.aarch64.neon.vrsqrts.f64(double, double)
-
-define float @test_vrecpes_f32(float %a) {
-; CHECK: test_vrecpes_f32
-; CHECK: frecpe {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %0 = call float @llvm.aarch64.neon.vrecpe.f32(float %a)
-  ret float %0
-}
-
-define double @test_vrecped_f64(double %a) {
-; CHECK: test_vrecped_f64
-; CHECK: frecpe {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %0 = call double @llvm.aarch64.neon.vrecpe.f64(double %a)
-  ret double %0
-}
-
-declare float @llvm.aarch64.neon.vrecpe.f32(float)
-declare double @llvm.aarch64.neon.vrecpe.f64(double)
-
-define float @test_vrecpxs_f32(float %a) {
-; CHECK: test_vrecpxs_f32
-; CHECK: frecpx {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %0 = call float @llvm.aarch64.neon.vrecpx.f32(float %a)
-  ret float %0
-}
-
-define double @test_vrecpxd_f64(double %a) {
-; CHECK: test_vrecpxd_f64
-; CHECK: frecpx {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %0 = call double @llvm.aarch64.neon.vrecpx.f64(double %a)
-  ret double %0
-}
-
-declare float @llvm.aarch64.neon.vrecpx.f32(float)
-declare double @llvm.aarch64.neon.vrecpx.f64(double)
-
-define float @test_vrsqrtes_f32(float %a) {
-; CHECK: test_vrsqrtes_f32
-; CHECK: frsqrte {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %0 = call float @llvm.aarch64.neon.vrsqrte.f32(float %a)
-  ret float %0
-}
-
-define double @test_vrsqrted_f64(double %a) {
-; CHECK: test_vrsqrted_f64
-; CHECK: frsqrte {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %0 = call double @llvm.aarch64.neon.vrsqrte.f64(double %a)
-  ret double %0
-}
-
-declare float @llvm.aarch64.neon.vrsqrte.f32(float)
-declare double @llvm.aarch64.neon.vrsqrte.f64(double)

Removed: llvm/trunk/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll (removed)
@@ -1,216 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-; Intrinsic wrangling. Duplicates various arm64 tests.
-
-declare <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64>)
-
-define <1 x i64> @test_addp_v1i64(<2 x i64> %a) {
-; CHECK: test_addp_v1i64:
-; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %val = call <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64> %a)
-  ret <1 x i64> %val
-}
-
-declare float @llvm.aarch64.neon.vpfadd.f32.v2f32(<2 x float>)
-
-define float @test_faddp_f32(<2 x float> %a) {
-; CHECK: test_faddp_f32:
-; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s
-  %val = call float @llvm.aarch64.neon.vpfadd.f32.v2f32(<2 x float> %a)
-  ret float %val
-}
-
-declare double @llvm.aarch64.neon.vpfadd.f64.v2f64(<2 x double>)
-
-define double @test_faddp_f64(<2 x double> %a) {
-; CHECK: test_faddp_f64:
-; CHECK: faddp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %val = call double @llvm.aarch64.neon.vpfadd.f64.v2f64(<2 x double> %a)
-  ret double %val
-}
-
-
-declare float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float>)
-
-define float @test_fmaxp_f32(<2 x float> %a) {
-; CHECK: test_fmaxp_f32:
-; CHECK: fmaxp {{s[0-9]+}}, {{v[0-9]+}}.2s
-  %val = call float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float> %a)
-  ret float %val
-}
-
-declare double @llvm.aarch64.neon.vpmax.f64.v2f64(<2 x double>)
-
-define double @test_fmaxp_f64(<2 x double> %a) {
-; CHECK: test_fmaxp_f64:
-; CHECK: fmaxp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %val = call double @llvm.aarch64.neon.vpmax.f64.v2f64(<2 x double> %a)
-  ret double %val
-}
-
-declare float @llvm.aarch64.neon.vpmin.f32.v2f32(<2 x float>)
-
-define float @test_fminp_f32(<2 x float> %a) {
-; CHECK: test_fminp_f32:
-; CHECK: fminp {{s[0-9]+}}, {{v[0-9]+}}.2s
-  %val = call float @llvm.aarch64.neon.vpmin.f32.v2f32(<2 x float> %a)
-  ret float %val
-}
-
-declare double @llvm.aarch64.neon.vpmin.f64.v2f64(<2 x double>)
-
-define double @test_fminp_f64(<2 x double> %a) {
-; CHECK: test_fminp_f64:
-; CHECK: fminp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %val = call double @llvm.aarch64.neon.vpmin.f64.v2f64(<2 x double> %a)
-  ret double %val
-}
-
-declare float @llvm.aarch64.neon.vpfmaxnm.f32.v2f32(<2 x float>)
-
-define float @test_fmaxnmp_f32(<2 x float> %a) {
-; CHECK: test_fmaxnmp_f32:
-; CHECK: fmaxnmp {{s[0-9]+}}, {{v[0-9]+}}.2s
-  %val = call float @llvm.aarch64.neon.vpfmaxnm.f32.v2f32(<2 x float> %a)
-  ret float %val
-}
-
-declare double @llvm.aarch64.neon.vpfmaxnm.f64.v2f64(<2 x double>)
-
-define double @test_fmaxnmp_f64(<2 x double> %a) {
-; CHECK: test_fmaxnmp_f64:
-; CHECK: fmaxnmp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %val = call double @llvm.aarch64.neon.vpfmaxnm.f64.v2f64(<2 x double> %a)
-  ret double %val
-}
-
-declare float @llvm.aarch64.neon.vpfminnm.f32.v2f32(<2 x float>)
-
-define float @test_fminnmp_f32(<2 x float> %a) {
-; CHECK: test_fminnmp_f32:
-; CHECK: fminnmp {{s[0-9]+}}, {{v[0-9]+}}.2s
-  %val = call float @llvm.aarch64.neon.vpfminnm.f32.v2f32(<2 x float> %a)
-  ret float %val
-}
-
-declare double @llvm.aarch64.neon.vpfminnm.f64.v2f64(<2 x double>)
-
-define double @test_fminnmp_f64(<2 x double> %a) {
-; CHECK: test_fminnmp_f64:
-; CHECK: fminnmp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %val = call double @llvm.aarch64.neon.vpfminnm.f64.v2f64(<2 x double> %a)
-  ret double %val
-}
-
-define float @test_vaddv_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vaddv_f32
-; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s
-  %1 = call float @llvm.aarch64.neon.vpfadd.f32.v2f32(<2 x float> %a)
-  ret float %1
-}
-
-define float @test_vaddvq_f32(<4 x float> %a) {
-; CHECK-LABEL: test_vaddvq_f32
-; CHECK: faddp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s
-  %1 = call float @llvm.aarch64.neon.vpfadd.f32.v4f32(<4 x float> %a)
-  ret float %1
-}
-
-define double @test_vaddvq_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vaddvq_f64
-; CHECK: faddp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %1 = call double @llvm.aarch64.neon.vpfadd.f64.v2f64(<2 x double> %a)
-  ret double %1
-}
-
-define float @test_vmaxv_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vmaxv_f32
-; CHECK: fmaxp {{s[0-9]+}}, {{v[0-9]+}}.2s
-  %1 = call float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float> %a)
-  ret float %1
-}
-
-define double @test_vmaxvq_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vmaxvq_f64
-; CHECK: fmaxp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %1 = call double @llvm.aarch64.neon.vpmax.f64.v2f64(<2 x double> %a)
-  ret double %1
-}
-
-define float @test_vminv_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vminv_f32
-; CHECK: fminp {{s[0-9]+}}, {{v[0-9]+}}.2s
-  %1 = call float @llvm.aarch64.neon.vpmin.f32.v2f32(<2 x float> %a)
-  ret float %1
-}
-
-define double @test_vminvq_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vminvq_f64
-; CHECK: fminp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %1 = call double @llvm.aarch64.neon.vpmin.f64.v2f64(<2 x double> %a)
-  ret double %1
-}
-
-define double @test_vmaxnmvq_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vmaxnmvq_f64
-; CHECK: fmaxnmp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %1 = call double @llvm.aarch64.neon.vpfmaxnm.f64.v2f64(<2 x double> %a)
-  ret double %1
-}
-
-define float @test_vmaxnmv_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vmaxnmv_f32
-; CHECK: fmaxnmp {{s[0-9]+}}, {{v[0-9]+}}.2s
-  %1 = call float @llvm.aarch64.neon.vpfmaxnm.f32.v2f32(<2 x float> %a)
-  ret float %1
-}
-
-define double @test_vminnmvq_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vminnmvq_f64
-; CHECK: fminnmp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %1 = call double @llvm.aarch64.neon.vpfminnm.f64.v2f64(<2 x double> %a)
-  ret double %1
-}
-
-define float @test_vminnmv_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vminnmv_f32
-; CHECK: fminnmp {{s[0-9]+}}, {{v[0-9]+}}.2s
-  %1 = call float @llvm.aarch64.neon.vpfminnm.f32.v2f32(<2 x float> %a)
-  ret float %1
-}
-
-define <2 x i64> @test_vpaddq_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vpaddq_s64
-; CHECK: addp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-  %1 = call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %a, <2 x i64> %b)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @test_vpaddq_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vpaddq_u64
-; CHECK: addp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-  %1 = call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %a, <2 x i64> %b)
-  ret <2 x i64> %1
-}
-
-define i64 @test_vaddvq_s64(<2 x i64> %a) {
-; CHECK-LABEL: test_vaddvq_s64
-; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %1 = call <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64> %a)
-  %2 = extractelement <1 x i64> %1, i32 0
-  ret i64 %2
-}
-
-define i64 @test_vaddvq_u64(<2 x i64> %a) {
-; CHECK-LABEL: test_vaddvq_u64
-; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %1 = call <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64> %a)
-  %2 = extractelement <1 x i64> %1, i32 0
-  ret i64 %2
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64>)
-
-declare <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64>, <2 x i64>)
-
-declare float @llvm.aarch64.neon.vpfadd.f32.v4f32(<4 x float>)

Removed: llvm/trunk/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll (removed)
@@ -1,39 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-; Duplicates arm64'd vshift.ll
-
-declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_urshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_urshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_srshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_srshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_urshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_urshl_v1i64_aarch64:
-  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_srshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_srshl_v1i64_aarch64:
-  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-
-

Removed: llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll (removed)
@@ -1,243 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-; Intrinsic wrangling and arm64 does it differently.
-
-declare <1 x i8> @llvm.arm.neon.vqaddu.v1i8(<1 x i8>, <1 x i8>)
-declare <1 x i8> @llvm.arm.neon.vqadds.v1i8(<1 x i8>, <1 x i8>)
-
-define <1 x i8> @test_uqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
-; CHECK: test_uqadd_v1i8_aarch64:
-  %tmp1 = call <1 x i8> @llvm.arm.neon.vqaddu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
-;CHECK: uqadd {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}}
-  ret <1 x i8> %tmp1
-}
-
-define <1 x i8> @test_sqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
-; CHECK: test_sqadd_v1i8_aarch64:
-  %tmp1 = call <1 x i8> @llvm.arm.neon.vqadds.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
-;CHECK: sqadd {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}}
-  ret <1 x i8> %tmp1
-}
-
-declare <1 x i8> @llvm.arm.neon.vqsubu.v1i8(<1 x i8>, <1 x i8>)
-declare <1 x i8> @llvm.arm.neon.vqsubs.v1i8(<1 x i8>, <1 x i8>)
-
-define <1 x i8> @test_uqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
-; CHECK: test_uqsub_v1i8_aarch64:
-  %tmp1 = call <1 x i8> @llvm.arm.neon.vqsubu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
-;CHECK: uqsub {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}}
-  ret <1 x i8> %tmp1
-}
-
-define <1 x i8> @test_sqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
-; CHECK: test_sqsub_v1i8_aarch64:
-  %tmp1 = call <1 x i8> @llvm.arm.neon.vqsubs.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
-;CHECK: sqsub {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}}
-  ret <1 x i8> %tmp1
-}
-
-declare <1 x i16> @llvm.arm.neon.vqaddu.v1i16(<1 x i16>, <1 x i16>)
-declare <1 x i16> @llvm.arm.neon.vqadds.v1i16(<1 x i16>, <1 x i16>)
-
-define <1 x i16> @test_uqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
-; CHECK: test_uqadd_v1i16_aarch64:
-  %tmp1 = call <1 x i16> @llvm.arm.neon.vqaddu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
-;CHECK: uqadd {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-  ret <1 x i16> %tmp1
-}
-
-define <1 x i16> @test_sqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
-; CHECK: test_sqadd_v1i16_aarch64:
-  %tmp1 = call <1 x i16> @llvm.arm.neon.vqadds.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
-;CHECK: sqadd {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-  ret <1 x i16> %tmp1
-}
-
-declare <1 x i16> @llvm.arm.neon.vqsubu.v1i16(<1 x i16>, <1 x i16>)
-declare <1 x i16> @llvm.arm.neon.vqsubs.v1i16(<1 x i16>, <1 x i16>)
-
-define <1 x i16> @test_uqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
-; CHECK: test_uqsub_v1i16_aarch64:
-  %tmp1 = call <1 x i16> @llvm.arm.neon.vqsubu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
-;CHECK: uqsub {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-  ret <1 x i16> %tmp1
-}
-
-define <1 x i16> @test_sqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
-; CHECK: test_sqsub_v1i16_aarch64:
-  %tmp1 = call <1 x i16> @llvm.arm.neon.vqsubs.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
-;CHECK: sqsub {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-  ret <1 x i16> %tmp1
-}
-
-declare <1 x i32> @llvm.arm.neon.vqaddu.v1i32(<1 x i32>, <1 x i32>)
-declare <1 x i32> @llvm.arm.neon.vqadds.v1i32(<1 x i32>, <1 x i32>)
-
-define <1 x i32> @test_uqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
-; CHECK: test_uqadd_v1i32_aarch64:
-  %tmp1 = call <1 x i32> @llvm.arm.neon.vqaddu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
-;CHECK: uqadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  ret <1 x i32> %tmp1
-}
-
-define <1 x i32> @test_sqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
-; CHECK: test_sqadd_v1i32_aarch64:
-  %tmp1 = call <1 x i32> @llvm.arm.neon.vqadds.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
-;CHECK: sqadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  ret <1 x i32> %tmp1
-}
-
-declare <1 x i32> @llvm.arm.neon.vqsubu.v1i32(<1 x i32>, <1 x i32>)
-declare <1 x i32> @llvm.arm.neon.vqsubs.v1i32(<1 x i32>, <1 x i32>)
-
-define <1 x i32> @test_uqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
-; CHECK: test_uqsub_v1i32_aarch64:
-  %tmp1 = call <1 x i32> @llvm.arm.neon.vqsubu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
-;CHECK: uqsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  ret <1 x i32> %tmp1
-}
-
-
-define <1 x i32> @test_sqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
-; CHECK: test_sqsub_v1i32_aarch64:
-  %tmp1 = call <1 x i32> @llvm.arm.neon.vqsubs.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
-;CHECK: sqsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  ret <1 x i32> %tmp1
-}
-
-declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_uqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uqadd_v1i64_aarch64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: uqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sqadd_v1i64_aarch64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: sqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_uqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uqsub_v1i64_aarch64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: uqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sqsub_v1i64_aarch64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: sqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-define i8 @test_vuqaddb_s8(i8 %a, i8 %b) {
-; CHECK: test_vuqaddb_s8
-; CHECK: suqadd {{b[0-9]+}}, {{b[0-9]+}}
-entry:
-  %vuqadd.i = insertelement <1 x i8> undef, i8 %a, i32 0
-  %vuqadd1.i = insertelement <1 x i8> undef, i8 %b, i32 0
-  %vuqadd2.i = call <1 x i8> @llvm.aarch64.neon.vuqadd.v1i8(<1 x i8> %vuqadd.i, <1 x i8> %vuqadd1.i)
-  %0 = extractelement <1 x i8> %vuqadd2.i, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8>, <1 x i8>)
-
-define i16 @test_vuqaddh_s16(i16 %a, i16 %b) {
-; CHECK: test_vuqaddh_s16
-; CHECK: suqadd {{h[0-9]+}}, {{h[0-9]+}}
-entry:
-  %vuqadd.i = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vuqadd1.i = insertelement <1 x i16> undef, i16 %b, i32 0
-  %vuqadd2.i = call <1 x i16> @llvm.aarch64.neon.vuqadd.v1i16(<1 x i16> %vuqadd.i, <1 x i16> %vuqadd1.i)
-  %0 = extractelement <1 x i16> %vuqadd2.i, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16>, <1 x i16>)
-
-define i32 @test_vuqadds_s32(i32 %a, i32 %b) {
-; CHECK: test_vuqadds_s32
-; CHECK: suqadd {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %vuqadd.i = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vuqadd1.i = insertelement <1 x i32> undef, i32 %b, i32 0
-  %vuqadd2.i = call <1 x i32> @llvm.aarch64.neon.vuqadd.v1i32(<1 x i32> %vuqadd.i, <1 x i32> %vuqadd1.i)
-  %0 = extractelement <1 x i32> %vuqadd2.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vsqadd.v1i32(<1 x i32>, <1 x i32>)
-
-define i64 @test_vuqaddd_s64(i64 %a, i64 %b) {
-; CHECK: test_vuqaddd_s64
-; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %vuqadd.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vuqadd1.i = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vuqadd2.i = call <1 x i64> @llvm.aarch64.neon.vuqadd.v1i64(<1 x i64> %vuqadd.i, <1 x i64> %vuqadd1.i)
-  %0 = extractelement <1 x i64> %vuqadd2.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vsqadd.v1i64(<1 x i64>, <1 x i64>)
-
-define i8 @test_vsqaddb_u8(i8 %a, i8 %b) {
-; CHECK: test_vsqaddb_u8
-; CHECK: usqadd {{b[0-9]+}}, {{b[0-9]+}}
-entry:
-  %vsqadd.i = insertelement <1 x i8> undef, i8 %a, i32 0
-  %vsqadd1.i = insertelement <1 x i8> undef, i8 %b, i32 0
-  %vsqadd2.i = call <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8> %vsqadd.i, <1 x i8> %vsqadd1.i)
-  %0 = extractelement <1 x i8> %vsqadd2.i, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vuqadd.v1i8(<1 x i8>, <1 x i8>)
-
-define i16 @test_vsqaddh_u16(i16 %a, i16 %b) {
-; CHECK: test_vsqaddh_u16
-; CHECK: usqadd {{h[0-9]+}}, {{h[0-9]+}}
-entry:
-  %vsqadd.i = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vsqadd1.i = insertelement <1 x i16> undef, i16 %b, i32 0
-  %vsqadd2.i = call <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16> %vsqadd.i, <1 x i16> %vsqadd1.i)
-  %0 = extractelement <1 x i16> %vsqadd2.i, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vuqadd.v1i16(<1 x i16>, <1 x i16>)
-
-define i32 @test_vsqadds_u32(i32 %a, i32 %b) {
-; CHECK: test_vsqadds_u32
-; CHECK: usqadd {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %vsqadd.i = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vsqadd1.i = insertelement <1 x i32> undef, i32 %b, i32 0
-  %vsqadd2.i = call <1 x i32> @llvm.aarch64.neon.vsqadd.v1i32(<1 x i32> %vsqadd.i, <1 x i32> %vsqadd1.i)
-  %0 = extractelement <1 x i32> %vsqadd2.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vuqadd.v1i32(<1 x i32>, <1 x i32>)
-
-define i64 @test_vsqaddd_u64(i64 %a, i64 %b) {
-; CHECK: test_vsqaddd_u64
-; CHECK: usqadd {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %vsqadd.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsqadd1.i = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vsqadd2.i = call <1 x i64> @llvm.aarch64.neon.vsqadd.v1i64(<1 x i64> %vsqadd.i, <1 x i64> %vsqadd1.i)
-  %0 = extractelement <1 x i64> %vsqadd2.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vuqadd.v1i64(<1 x i64>, <1 x i64>)

Removed: llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll (removed)
@@ -1,95 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-; Intrinsic wrangling and arm64 does it differently.
-
-declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_uqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uqrshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sqrshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8>, <1 x i8>)
-declare <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8>, <1 x i8>)
-
-define <1 x i8> @test_uqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
-; CHECK: test_uqrshl_v1i8_aarch64:
-  %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
-;CHECK: uqrshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}}
-
-  ret <1 x i8> %tmp1
-}
-
-define <1 x i8> @test_sqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
-; CHECK: test_sqrshl_v1i8_aarch64:
-  %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
-;CHECK: sqrshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}}
-  ret <1 x i8> %tmp1
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16>, <1 x i16>)
-declare <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16>, <1 x i16>)
-
-define <1 x i16> @test_uqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
-; CHECK: test_uqrshl_v1i16_aarch64:
-  %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
-;CHECK: uqrshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-
-  ret <1 x i16> %tmp1
-}
-
-define <1 x i16> @test_sqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
-; CHECK: test_sqrshl_v1i16_aarch64:
-  %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
-;CHECK: sqrshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-  ret <1 x i16> %tmp1
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32>, <1 x i32>)
-declare <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32>, <1 x i32>)
-
-define <1 x i32> @test_uqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
-; CHECK: test_uqrshl_v1i32_aarch64:
-  %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
-;CHECK: uqrshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-
-  ret <1 x i32> %tmp1
-}
-
-define <1 x i32> @test_sqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
-; CHECK: test_sqrshl_v1i32_aarch64:
-  %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
-;CHECK: sqrshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  ret <1 x i32> %tmp1
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_uqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uqrshl_v1i64_aarch64:
-  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sqrshl_v1i64_aarch64:
-  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-
-

Removed: llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll (removed)
@@ -1,89 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-; Intrinsic wrangling and arm64 does it differently.
-
-declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_uqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uqshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sqshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8>, <1 x i8>)
-declare <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8>, <1 x i8>)
-
-define <1 x i8> @test_uqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
-; CHECK: test_uqshl_v1i8_aarch64:
-  %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
-;CHECK: uqshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}}
-  ret <1 x i8> %tmp1
-}
-
-define <1 x i8> @test_sqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
-; CHECK: test_sqshl_v1i8_aarch64:
-  %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
-;CHECK: sqshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}}
-  ret <1 x i8> %tmp1
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16>, <1 x i16>)
-declare <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16>, <1 x i16>)
-
-define <1 x i16> @test_uqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
-; CHECK: test_uqshl_v1i16_aarch64:
-  %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
-;CHECK: uqshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-  ret <1 x i16> %tmp1
-}
-
-define <1 x i16> @test_sqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
-; CHECK: test_sqshl_v1i16_aarch64:
-  %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
-;CHECK: sqshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-  ret <1 x i16> %tmp1
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32>, <1 x i32>)
-declare <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32>, <1 x i32>)
-
-define <1 x i32> @test_uqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
-; CHECK: test_uqshl_v1i32_aarch64:
-  %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
-;CHECK: uqshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  ret <1 x i32> %tmp1
-}
-
-define <1 x i32> @test_sqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
-; CHECK: test_sqshl_v1i32_aarch64:
-  %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
-;CHECK: sqshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  ret <1 x i32> %tmp1
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_uqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uqshl_v1i64_aarch64:
-  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sqshl_v1i64_aarch64:
-  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-

Removed: llvm/trunk/test/CodeGen/AArch64/neon-scalar-shift-imm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-shift-imm.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-shift-imm.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-shift-imm.ll (removed)
@@ -1,532 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-; Intrinsic wrangling & arm64 does it differently.
-
-define i64 @test_vshrd_n_s64(i64 %a) {
-; CHECK: test_vshrd_n_s64
-; CHECK: sshr {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vsshr = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsshr1 = call <1 x i64> @llvm.aarch64.neon.vshrds.n(<1 x i64> %vsshr, i32 63)
-  %0 = extractelement <1 x i64> %vsshr1, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vshrds.n(<1 x i64>, i32)
-
-define i64 @test_vshrd_n_u64(i64 %a) {
-; CHECK: test_vshrd_n_u64
-; CHECK: ushr {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vushr = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vushr1 = call <1 x i64> @llvm.aarch64.neon.vshrdu.n(<1 x i64> %vushr, i32 63)
-  %0 = extractelement <1 x i64> %vushr1, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vshrdu.n(<1 x i64>, i32)
-
-define i64 @test_vrshrd_n_s64(i64 %a) {
-; CHECK: test_vrshrd_n_s64
-; CHECK: srshr {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vsrshr = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsrshr1 = call <1 x i64> @llvm.aarch64.neon.vsrshr.v1i64(<1 x i64> %vsrshr, i32 63)
-  %0 = extractelement <1 x i64> %vsrshr1, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vsrshr.v1i64(<1 x i64>, i32)
-
-define i64 @test_vrshrd_n_u64(i64 %a) {
-; CHECK: test_vrshrd_n_u64
-; CHECK: urshr {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vurshr = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vurshr1 = call <1 x i64> @llvm.aarch64.neon.vurshr.v1i64(<1 x i64> %vurshr, i32 63)
-  %0 = extractelement <1 x i64> %vurshr1, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vurshr.v1i64(<1 x i64>, i32)
-
-define i64 @test_vsrad_n_s64(i64 %a, i64 %b) {
-; CHECK: test_vsrad_n_s64
-; CHECK: ssra {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vssra = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vssra1 = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vssra2 = call <1 x i64> @llvm.aarch64.neon.vsrads.n(<1 x i64> %vssra, <1 x i64> %vssra1, i32 63)
-  %0 = extractelement <1 x i64> %vssra2, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vsrads.n(<1 x i64>, <1 x i64>, i32)
-
-define i64 @test_vsrad_n_u64(i64 %a, i64 %b) {
-; CHECK: test_vsrad_n_u64
-; CHECK: usra {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vusra = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vusra1 = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vusra2 = call <1 x i64> @llvm.aarch64.neon.vsradu.n(<1 x i64> %vusra, <1 x i64> %vusra1, i32 63)
-  %0 = extractelement <1 x i64> %vusra2, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vsradu.n(<1 x i64>, <1 x i64>, i32)
-
-define i64 @test_vrsrad_n_s64(i64 %a, i64 %b) {
-; CHECK: test_vrsrad_n_s64
-; CHECK: srsra {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vsrsra = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsrsra1 = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vsrsra2 = call <1 x i64> @llvm.aarch64.neon.vrsrads.n(<1 x i64> %vsrsra, <1 x i64> %vsrsra1, i32 63)
-  %0 = extractelement <1 x i64> %vsrsra2, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vrsrads.n(<1 x i64>, <1 x i64>, i32)
-
-define i64 @test_vrsrad_n_u64(i64 %a, i64 %b) {
-; CHECK: test_vrsrad_n_u64
-; CHECK: ursra {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vursra = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vursra1 = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vursra2 = call <1 x i64> @llvm.aarch64.neon.vrsradu.n(<1 x i64> %vursra, <1 x i64> %vursra1, i32 63)
-  %0 = extractelement <1 x i64> %vursra2, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vrsradu.n(<1 x i64>, <1 x i64>, i32)
-
-define i64 @test_vshld_n_s64(i64 %a) {
-; CHECK: test_vshld_n_s64
-; CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vshl = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vshl1 = call <1 x i64> @llvm.aarch64.neon.vshld.n(<1 x i64> %vshl, i32 63)
-  %0 = extractelement <1 x i64> %vshl1, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vshld.n(<1 x i64>, i32)
-
-define i64 @test_vshld_n_u64(i64 %a) {
-; CHECK: test_vshld_n_u64
-; CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vshl = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vshl1 = call <1 x i64> @llvm.aarch64.neon.vshld.n(<1 x i64> %vshl, i32 63)
-  %0 = extractelement <1 x i64> %vshl1, i32 0
-  ret i64 %0
-}
-
-define i8 @test_vqshlb_n_s8(i8 %a) {
-; CHECK: test_vqshlb_n_s8
-; CHECK: sqshl {{b[0-9]+}}, {{b[0-9]+}}, #7
-entry:
-  %vsqshl = insertelement <1 x i8> undef, i8 %a, i32 0
-  %vsqshl1 = call <1 x i8> @llvm.aarch64.neon.vqshls.n.v1i8(<1 x i8> %vsqshl, i32 7)
-  %0 = extractelement <1 x i8> %vsqshl1, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vqshls.n.v1i8(<1 x i8>, i32)
-
-define i16 @test_vqshlh_n_s16(i16 %a) {
-; CHECK: test_vqshlh_n_s16
-; CHECK: sqshl {{h[0-9]+}}, {{h[0-9]+}}, #15
-entry:
-  %vsqshl = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vsqshl1 = call <1 x i16> @llvm.aarch64.neon.vqshls.n.v1i16(<1 x i16> %vsqshl, i32 15)
-  %0 = extractelement <1 x i16> %vsqshl1, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vqshls.n.v1i16(<1 x i16>, i32)
-
-define i32 @test_vqshls_n_s32(i32 %a) {
-; CHECK: test_vqshls_n_s32
-; CHECK: sqshl {{s[0-9]+}}, {{s[0-9]+}}, #31
-entry:
-  %vsqshl = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vsqshl1 = call <1 x i32> @llvm.aarch64.neon.vqshls.n.v1i32(<1 x i32> %vsqshl, i32 31)
-  %0 = extractelement <1 x i32> %vsqshl1, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vqshls.n.v1i32(<1 x i32>, i32)
-
-define i64 @test_vqshld_n_s64(i64 %a) {
-; CHECK: test_vqshld_n_s64
-; CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vsqshl = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsqshl1 = call <1 x i64> @llvm.aarch64.neon.vqshls.n.v1i64(<1 x i64> %vsqshl, i32 63)
-  %0 = extractelement <1 x i64> %vsqshl1, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vqshls.n.v1i64(<1 x i64>, i32)
-
-define i8 @test_vqshlb_n_u8(i8 %a) {
-; CHECK: test_vqshlb_n_u8
-; CHECK: uqshl {{b[0-9]+}}, {{b[0-9]+}}, #7
-entry:
-  %vuqshl = insertelement <1 x i8> undef, i8 %a, i32 0
-  %vuqshl1 = call <1 x i8> @llvm.aarch64.neon.vqshlu.n.v1i8(<1 x i8> %vuqshl, i32 7)
-  %0 = extractelement <1 x i8> %vuqshl1, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vqshlu.n.v1i8(<1 x i8>, i32)
-
-define i16 @test_vqshlh_n_u16(i16 %a) {
-; CHECK: test_vqshlh_n_u16
-; CHECK: uqshl {{h[0-9]+}}, {{h[0-9]+}}, #15
-entry:
-  %vuqshl = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vuqshl1 = call <1 x i16> @llvm.aarch64.neon.vqshlu.n.v1i16(<1 x i16> %vuqshl, i32 15)
-  %0 = extractelement <1 x i16> %vuqshl1, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vqshlu.n.v1i16(<1 x i16>, i32)
-
-define i32 @test_vqshls_n_u32(i32 %a) {
-; CHECK: test_vqshls_n_u32
-; CHECK: uqshl {{s[0-9]+}}, {{s[0-9]+}}, #31
-entry:
-  %vuqshl = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vuqshl1 = call <1 x i32> @llvm.aarch64.neon.vqshlu.n.v1i32(<1 x i32> %vuqshl, i32 31)
-  %0 = extractelement <1 x i32> %vuqshl1, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vqshlu.n.v1i32(<1 x i32>, i32)
-
-define i64 @test_vqshld_n_u64(i64 %a) {
-; CHECK: test_vqshld_n_u64
-; CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vuqshl = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vuqshl1 = call <1 x i64> @llvm.aarch64.neon.vqshlu.n.v1i64(<1 x i64> %vuqshl, i32 63)
-  %0 = extractelement <1 x i64> %vuqshl1, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vqshlu.n.v1i64(<1 x i64>, i32)
-
-define i8 @test_vqshlub_n_s8(i8 %a) {
-; CHECK: test_vqshlub_n_s8
-; CHECK: sqshlu {{b[0-9]+}}, {{b[0-9]+}}, #7
-entry:
-  %vsqshlu = insertelement <1 x i8> undef, i8 %a, i32 0
-  %vsqshlu1 = call <1 x i8> @llvm.aarch64.neon.vsqshlu.v1i8(<1 x i8> %vsqshlu, i32 7)
-  %0 = extractelement <1 x i8> %vsqshlu1, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vsqshlu.v1i8(<1 x i8>, i32)
-
-define i16 @test_vqshluh_n_s16(i16 %a) {
-; CHECK: test_vqshluh_n_s16
-; CHECK: sqshlu {{h[0-9]+}}, {{h[0-9]+}}, #15
-entry:
-  %vsqshlu = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vsqshlu1 = call <1 x i16> @llvm.aarch64.neon.vsqshlu.v1i16(<1 x i16> %vsqshlu, i32 15)
-  %0 = extractelement <1 x i16> %vsqshlu1, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vsqshlu.v1i16(<1 x i16>, i32)
-
-define i32 @test_vqshlus_n_s32(i32 %a) {
-; CHECK: test_vqshlus_n_s32
-; CHECK: sqshlu {{s[0-9]+}}, {{s[0-9]+}}, #31
-entry:
-  %vsqshlu = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vsqshlu1 = call <1 x i32> @llvm.aarch64.neon.vsqshlu.v1i32(<1 x i32> %vsqshlu, i32 31)
-  %0 = extractelement <1 x i32> %vsqshlu1, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vsqshlu.v1i32(<1 x i32>, i32)
-
-define i64 @test_vqshlud_n_s64(i64 %a) {
-; CHECK: test_vqshlud_n_s64
-; CHECK: sqshlu {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vsqshlu = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsqshlu1 = call <1 x i64> @llvm.aarch64.neon.vsqshlu.v1i64(<1 x i64> %vsqshlu, i32 63)
-  %0 = extractelement <1 x i64> %vsqshlu1, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vsqshlu.v1i64(<1 x i64>, i32)
-
-define i64 @test_vsrid_n_s64(i64 %a, i64 %b) {
-; CHECK: test_vsrid_n_s64
-; CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vsri = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsri1 = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vsri2 = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> %vsri, <1 x i64> %vsri1, i32 63)
-  %0 = extractelement <1 x i64> %vsri2, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64>, <1 x i64>, i32)
-
-define i64 @test_vsrid_n_u64(i64 %a, i64 %b) {
-; CHECK: test_vsrid_n_u64
-; CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vsri = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsri1 = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vsri2 = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> %vsri, <1 x i64> %vsri1, i32 63)
-  %0 = extractelement <1 x i64> %vsri2, i32 0
-  ret i64 %0
-}
-
-define i64 @test_vslid_n_s64(i64 %a, i64 %b) {
-; CHECK: test_vslid_n_s64
-; CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vsli = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsli1 = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vsli2 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %vsli, <1 x i64> %vsli1, i32 63)
-  %0 = extractelement <1 x i64> %vsli2, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32)
-
-define i64 @test_vslid_n_u64(i64 %a, i64 %b) {
-; CHECK: test_vslid_n_u64
-; CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vsli = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsli1 = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vsli2 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %vsli, <1 x i64> %vsli1, i32 63)
-  %0 = extractelement <1 x i64> %vsli2, i32 0
-  ret i64 %0
-}
-
-define i8 @test_vqshrnh_n_s16(i16 %a) {
-; CHECK: test_vqshrnh_n_s16
-; CHECK: sqshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
-entry:
-  %vsqshrn = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vsqshrn1 = call <1 x i8> @llvm.aarch64.neon.vsqshrn.v1i8(<1 x i16> %vsqshrn, i32 8)
-  %0 = extractelement <1 x i8> %vsqshrn1, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vsqshrn.v1i8(<1 x i16>, i32)
-
-define i16 @test_vqshrns_n_s32(i32 %a) {
-; CHECK: test_vqshrns_n_s32
-; CHECK: sqshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
-entry:
-  %vsqshrn = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vsqshrn1 = call <1 x i16> @llvm.aarch64.neon.vsqshrn.v1i16(<1 x i32> %vsqshrn, i32 16)
-  %0 = extractelement <1 x i16> %vsqshrn1, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vsqshrn.v1i16(<1 x i32>, i32)
-
-define i32 @test_vqshrnd_n_s64(i64 %a) {
-; CHECK: test_vqshrnd_n_s64
-; CHECK: sqshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
-entry:
-  %vsqshrn = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsqshrn1 = call <1 x i32> @llvm.aarch64.neon.vsqshrn.v1i32(<1 x i64> %vsqshrn, i32 32)
-  %0 = extractelement <1 x i32> %vsqshrn1, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vsqshrn.v1i32(<1 x i64>, i32)
-
-define i8 @test_vqshrnh_n_u16(i16 %a) {
-; CHECK: test_vqshrnh_n_u16
-; CHECK: uqshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
-entry:
-  %vuqshrn = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vuqshrn1 = call <1 x i8> @llvm.aarch64.neon.vuqshrn.v1i8(<1 x i16> %vuqshrn, i32 8)
-  %0 = extractelement <1 x i8> %vuqshrn1, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vuqshrn.v1i8(<1 x i16>, i32)
-
-define i16 @test_vqshrns_n_u32(i32 %a) {
-; CHECK: test_vqshrns_n_u32
-; CHECK: uqshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
-entry:
-  %vuqshrn = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vuqshrn1 = call <1 x i16> @llvm.aarch64.neon.vuqshrn.v1i16(<1 x i32> %vuqshrn, i32 16)
-  %0 = extractelement <1 x i16> %vuqshrn1, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vuqshrn.v1i16(<1 x i32>, i32)
-
-define i32 @test_vqshrnd_n_u64(i64 %a) {
-; CHECK: test_vqshrnd_n_u64
-; CHECK: uqshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
-entry:
-  %vuqshrn = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vuqshrn1 = call <1 x i32> @llvm.aarch64.neon.vuqshrn.v1i32(<1 x i64> %vuqshrn, i32 32)
-  %0 = extractelement <1 x i32> %vuqshrn1, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vuqshrn.v1i32(<1 x i64>, i32)
-
-define i8 @test_vqrshrnh_n_s16(i16 %a) {
-; CHECK: test_vqrshrnh_n_s16
-; CHECK: sqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
-entry:
-  %vsqrshrn = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vsqrshrn1 = call <1 x i8> @llvm.aarch64.neon.vsqrshrn.v1i8(<1 x i16> %vsqrshrn, i32 8)
-  %0 = extractelement <1 x i8> %vsqrshrn1, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vsqrshrn.v1i8(<1 x i16>, i32)
-
-define i16 @test_vqrshrns_n_s32(i32 %a) {
-; CHECK: test_vqrshrns_n_s32
-; CHECK: sqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
-entry:
-  %vsqrshrn = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vsqrshrn1 = call <1 x i16> @llvm.aarch64.neon.vsqrshrn.v1i16(<1 x i32> %vsqrshrn, i32 16)
-  %0 = extractelement <1 x i16> %vsqrshrn1, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vsqrshrn.v1i16(<1 x i32>, i32)
-
-define i32 @test_vqrshrnd_n_s64(i64 %a) {
-; CHECK: test_vqrshrnd_n_s64
-; CHECK: sqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
-entry:
-  %vsqrshrn = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsqrshrn1 = call <1 x i32> @llvm.aarch64.neon.vsqrshrn.v1i32(<1 x i64> %vsqrshrn, i32 32)
-  %0 = extractelement <1 x i32> %vsqrshrn1, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vsqrshrn.v1i32(<1 x i64>, i32)
-
-define i8 @test_vqrshrnh_n_u16(i16 %a) {
-; CHECK: test_vqrshrnh_n_u16
-; CHECK: uqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
-entry:
-  %vuqrshrn = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vuqrshrn1 = call <1 x i8> @llvm.aarch64.neon.vuqrshrn.v1i8(<1 x i16> %vuqrshrn, i32 8)
-  %0 = extractelement <1 x i8> %vuqrshrn1, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vuqrshrn.v1i8(<1 x i16>, i32)
-
-define i16 @test_vqrshrns_n_u32(i32 %a) {
-; CHECK: test_vqrshrns_n_u32
-; CHECK: uqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
-entry:
-  %vuqrshrn = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vuqrshrn1 = call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %vuqrshrn, i32 16)
-  %0 = extractelement <1 x i16> %vuqrshrn1, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32>, i32)
-
-define i32 @test_vqrshrnd_n_u64(i64 %a) {
-; CHECK: test_vqrshrnd_n_u64
-; CHECK: uqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
-entry:
-  %vuqrshrn = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vuqrshrn1 = call <1 x i32> @llvm.aarch64.neon.vuqrshrn.v1i32(<1 x i64> %vuqrshrn, i32 32)
-  %0 = extractelement <1 x i32> %vuqrshrn1, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vuqrshrn.v1i32(<1 x i64>, i32)
-
-define i8 @test_vqshrunh_n_s16(i16 %a) {
-; CHECK: test_vqshrunh_n_s16
-; CHECK: sqshrun {{b[0-9]+}}, {{h[0-9]+}}, #8
-entry:
-  %vsqshrun = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vsqshrun1 = call <1 x i8> @llvm.aarch64.neon.vsqshrun.v1i8(<1 x i16> %vsqshrun, i32 8)
-  %0 = extractelement <1 x i8> %vsqshrun1, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vsqshrun.v1i8(<1 x i16>, i32)
-
-define i16 @test_vqshruns_n_s32(i32 %a) {
-; CHECK: test_vqshruns_n_s32
-; CHECK: sqshrun {{h[0-9]+}}, {{s[0-9]+}}, #16
-entry:
-  %vsqshrun = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vsqshrun1 = call <1 x i16> @llvm.aarch64.neon.vsqshrun.v1i16(<1 x i32> %vsqshrun, i32 16)
-  %0 = extractelement <1 x i16> %vsqshrun1, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vsqshrun.v1i16(<1 x i32>, i32)
-
-define i32 @test_vqshrund_n_s64(i64 %a) {
-; CHECK: test_vqshrund_n_s64
-; CHECK: sqshrun {{s[0-9]+}}, {{d[0-9]+}}, #32
-entry:
-  %vsqshrun = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsqshrun1 = call <1 x i32> @llvm.aarch64.neon.vsqshrun.v1i32(<1 x i64> %vsqshrun, i32 32)
-  %0 = extractelement <1 x i32> %vsqshrun1, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vsqshrun.v1i32(<1 x i64>, i32)
-
-define i8 @test_vqrshrunh_n_s16(i16 %a) {
-; CHECK: test_vqrshrunh_n_s16
-; CHECK: sqrshrun {{b[0-9]+}}, {{h[0-9]+}}, #8
-entry:
-  %vsqrshrun = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vsqrshrun1 = call <1 x i8> @llvm.aarch64.neon.vsqrshrun.v1i8(<1 x i16> %vsqrshrun, i32 8)
-  %0 = extractelement <1 x i8> %vsqrshrun1, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vsqrshrun.v1i8(<1 x i16>, i32)
-
-define i16 @test_vqrshruns_n_s32(i32 %a) {
-; CHECK: test_vqrshruns_n_s32
-; CHECK: sqrshrun {{h[0-9]+}}, {{s[0-9]+}}, #16
-entry:
-  %vsqrshrun = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vsqrshrun1 = call <1 x i16> @llvm.aarch64.neon.vsqrshrun.v1i16(<1 x i32> %vsqrshrun, i32 16)
-  %0 = extractelement <1 x i16> %vsqrshrun1, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vsqrshrun.v1i16(<1 x i32>, i32)
-
-define i32 @test_vqrshrund_n_s64(i64 %a) {
-; CHECK: test_vqrshrund_n_s64
-; CHECK: sqrshrun {{s[0-9]+}}, {{d[0-9]+}}, #32
-entry:
-  %vsqrshrun = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsqrshrun1 = call <1 x i32> @llvm.aarch64.neon.vsqrshrun.v1i32(<1 x i64> %vsqrshrun, i32 32)
-  %0 = extractelement <1 x i32> %vsqrshrun1, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vsqrshrun.v1i32(<1 x i64>, i32)

Removed: llvm/trunk/test/CodeGen/AArch64/neon-scalar-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-shift.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-shift.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-shift.ll (removed)
@@ -1,237 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-; Duplicates existing arm64 tests in vshift.ll and vcmp.ll
-
-declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_ushl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_ushl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: ushl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: sshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_ushl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_ushl_v1i64_aarch64:
-  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: ushl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sshl_v1i64_aarch64:
-  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: sshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_vtst_s64(<1 x i64> %a, <1 x i64> %b) {
-; CHECK-LABEL: test_vtst_s64
-; CHECK: cmtst {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %0 = and <1 x i64> %a, %b
-  %1 = icmp ne <1 x i64> %0, zeroinitializer
-  %vtst.i = sext <1 x i1> %1 to <1 x i64>
-  ret <1 x i64> %vtst.i
-}
-
-define <1 x i64> @test_vtst_u64(<1 x i64> %a, <1 x i64> %b) {
-; CHECK-LABEL: test_vtst_u64
-; CHECK: cmtst {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %0 = and <1 x i64> %a, %b
-  %1 = icmp ne <1 x i64> %0, zeroinitializer
-  %vtst.i = sext <1 x i1> %1 to <1 x i64>
-  ret <1 x i64> %vtst.i
-}
-
-define <1 x i64> @test_vsli_n_p64(<1 x i64> %a, <1 x i64> %b) {
-; CHECK-LABEL: test_vsli_n_p64
-; CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #0
-entry:
-  %vsli_n2 = tail call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %a, <1 x i64> %b, i32 0)
-  ret <1 x i64> %vsli_n2
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32)
-
-define <2 x i64> @test_vsliq_n_p64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vsliq_n_p64
-; CHECK: sli {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
-entry:
-  %vsli_n2 = tail call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %a, <2 x i64> %b, i32 0)
-  ret <2 x i64> %vsli_n2
-}
-
-declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32)
-
-define <2 x i32> @test_vrsqrte_u32(<2 x i32> %a) {
-; CHECK-LABEL: test_vrsqrte_u32
-; CHECK: ursqrte {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vrsqrte1.i = tail call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %a)
-  ret <2 x i32> %vrsqrte1.i
-}
-
-define <4 x i32> @test_vrsqrteq_u32(<4 x i32> %a) {
-; CHECK-LABEL: test_vrsqrteq_u32
-; CHECK: ursqrte {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vrsqrte1.i = tail call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %a)
-  ret <4 x i32> %vrsqrte1.i
-}
-
-define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) {
-; CHECK-LABEL: test_vqshl_n_s8
-; CHECK: sqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
-entry:
-  %vqshl_n = tail call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
-  ret <8 x i8> %vqshl_n
-}
-
-declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>)
-
-define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) {
-; CHECK-LABEL: test_vqshlq_n_s8
-; CHECK: sqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0
-entry:
-  %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
-  ret <16 x i8> %vqshl_n
-}
-
-declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>)
-
-define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) {
-; CHECK-LABEL: test_vqshl_n_s16
-; CHECK: sqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0
-entry:
-  %vqshl_n1 = tail call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> zeroinitializer)
-  ret <4 x i16> %vqshl_n1
-}
-
-declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>)
-
-define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) {
-; CHECK-LABEL: test_vqshlq_n_s16
-; CHECK: sqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0
-entry:
-  %vqshl_n1 = tail call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> zeroinitializer)
-  ret <8 x i16> %vqshl_n1
-}
-
-declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>)
-
-define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) {
-; CHECK-LABEL: test_vqshl_n_s32
-; CHECK: sqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
-entry:
-  %vqshl_n1 = tail call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> zeroinitializer)
-  ret <2 x i32> %vqshl_n1
-}
-
-declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>)
-
-define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) {
-; CHECK-LABEL: test_vqshlq_n_s32
-; CHECK: sqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
-entry:
-  %vqshl_n1 = tail call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> zeroinitializer)
-  ret <4 x i32> %vqshl_n1
-}
-
-declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>)
-
-define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) {
-; CHECK-LABEL: test_vqshlq_n_s64
-; CHECK: sqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
-entry:
-  %vqshl_n1 = tail call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> zeroinitializer)
-  ret <2 x i64> %vqshl_n1
-}
-
-declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>)
-
-define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) {
-; CHECK-LABEL: test_vqshl_n_u8
-; CHECK: uqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
-entry:
-  %vqshl_n = tail call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
-  ret <8 x i8> %vqshl_n
-}
-
-declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>)
-
-define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) {
-; CHECK-LABEL: test_vqshlq_n_u8
-; CHECK: uqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0
-entry:
-  %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
-  ret <16 x i8> %vqshl_n
-}
-
-declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>)
-
-define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) {
-; CHECK-LABEL: test_vqshl_n_u16
-; CHECK: uqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0
-entry:
-  %vqshl_n1 = tail call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> zeroinitializer)
-  ret <4 x i16> %vqshl_n1
-}
-
-declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>)
-
-define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) {
-; CHECK-LABEL: test_vqshlq_n_u16
-; CHECK: uqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0
-entry:
-  %vqshl_n1 = tail call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> zeroinitializer)
-  ret <8 x i16> %vqshl_n1
-}
-
-declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>)
-
-define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) {
-; CHECK-LABEL: test_vqshl_n_u32
-; CHECK: uqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
-entry:
-  %vqshl_n1 = tail call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> zeroinitializer)
-  ret <2 x i32> %vqshl_n1
-}
-
-declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>)
-
-define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) {
-; CHECK-LABEL: test_vqshlq_n_u32
-; CHECK: uqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
-entry:
-  %vqshl_n1 = tail call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> zeroinitializer)
-  ret <4 x i32> %vqshl_n1
-}
-
-declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>)
-
-define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) {
-; CHECK-LABEL: test_vqshlq_n_u64
-; CHECK: uqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d,
-entry:
-  %vqshl_n1 = tail call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> zeroinitializer)
-  ret <2 x i64> %vqshl_n1
-}
-
-declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>)
-
-declare <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32>)
-
-declare <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32>)

Removed: llvm/trunk/test/CodeGen/AArch64/neon-select_cc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-select_cc.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-select_cc.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-select_cc.ll (removed)
@@ -1,202 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
-; arm64 has separate copy of this test due to different codegen.
-define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) {
-; CHECK-LABEL: test_select_cc_v8i8_i8:
-; CHECK: and	w0, w0, #0xff
-; CHECK-NEXT: cmp	w0, w1, uxtb
-; CHECK-NEXT: csetm	w0, eq
-; CHECK-NEXT: dup	v{{[0-9]+}}.8b, w0
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v0.8b, v1.8b
-  %cmp31 = icmp eq i8 %a, %b
-  %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
-  ret <8x i8> %e
-}
-
-define <8x i8> @test_select_cc_v8i8_f32(float %a, float %b, <8x i8> %c, <8x i8> %d ) {
-; CHECK-LABEL: test_select_cc_v8i8_f32:
-; CHECK: fcmeq	v{{[0-9]+}}.4s, v0.4s, v1.4s
-; CHECK-NEXT: dup	v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v2.8b, v3.8b
-  %cmp31 = fcmp oeq float %a, %b
-  %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
-  ret <8x i8> %e
-}
-
-define <8x i8> @test_select_cc_v8i8_f64(double %a, double %b, <8x i8> %c, <8x i8> %d ) {
-; CHECK-LABEL: test_select_cc_v8i8_f64:
-; CHECK: fcmeq	v{{[0-9]+}}.2d, v0.2d, v1.2d
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v2.8b, v3.8b
-  %cmp31 = fcmp oeq double %a, %b
-  %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
-  ret <8x i8> %e
-}
-
-define <16x i8> @test_select_cc_v16i8_i8(i8 %a, i8 %b, <16x i8> %c, <16x i8> %d ) {
-; CHECK-LABEL: test_select_cc_v16i8_i8:
-; CHECK: and	w0, w0, #0xff
-; CHECK-NEXT: cmp	w0, w1, uxtb
-; CHECK-NEXT: csetm	w0, eq
-; CHECK-NEXT: dup	v{{[0-9]+}}.16b, w0
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v0.16b, v1.16b
-  %cmp31 = icmp eq i8 %a, %b
-  %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
-  ret <16x i8> %e
-}
-
-define <16x i8> @test_select_cc_v16i8_f32(float %a, float %b, <16x i8> %c, <16x i8> %d ) {
-; CHECK-LABEL: test_select_cc_v16i8_f32:
-; CHECK: fcmeq	v{{[0-9]+}}.4s, v0.4s, v1.4s
-; CHECK-NEXT: dup	v{{[0-9]+}}.4s, v{{[0-9]+}}.s[0]
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v2.16b, v3.16b
-  %cmp31 = fcmp oeq float %a, %b
-  %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
-  ret <16x i8> %e
-}
-
-define <16x i8> @test_select_cc_v16i8_f64(double %a, double %b, <16x i8> %c, <16x i8> %d ) {
-; CHECK-LABEL: test_select_cc_v16i8_f64:
-; CHECK: fcmeq	v{{[0-9]+}}.2d, v0.2d, v1.2d
-; CHECK-NEXT: dup	v{{[0-9]+}}.2d, v{{[0-9]+}}.d[0]
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v2.16b, v3.16b
-  %cmp31 = fcmp oeq double %a, %b
-  %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
-  ret <16x i8> %e
-}
-
-define <4x i16> @test_select_cc_v4i16(i16 %a, i16 %b, <4x i16> %c, <4x i16> %d ) {
-; CHECK-LABEL: test_select_cc_v4i16:
-; CHECK: and	w0, w0, #0xffff
-; CHECK-NEXT: cmp	w0, w1, uxth
-; CHECK-NEXT: csetm	w0, eq
-; CHECK-NEXT: dup	v{{[0-9]+}}.4h, w0
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v0.8b, v1.8b
-  %cmp31 = icmp eq i16 %a, %b
-  %e = select i1 %cmp31, <4x i16> %c, <4x i16> %d
-  ret <4x i16> %e
-}
-
-define <8x i16> @test_select_cc_v8i16(i16 %a, i16 %b, <8x i16> %c, <8x i16> %d ) {
-; CHECK-LABEL: test_select_cc_v8i16:
-; CHECK: and	w0, w0, #0xffff
-; CHECK-NEXT: cmp	w0, w1, uxth
-; CHECK-NEXT: csetm	w0, eq
-; CHECK-NEXT: dup	v{{[0-9]+}}.8h, w0
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v0.16b, v1.16b
-  %cmp31 = icmp eq i16 %a, %b
-  %e = select i1 %cmp31, <8x i16> %c, <8x i16> %d
-  ret <8x i16> %e
-}
-
-define <2x i32> @test_select_cc_v2i32(i32 %a, i32 %b, <2x i32> %c, <2x i32> %d ) {
-; CHECK-LABEL: test_select_cc_v2i32:
-; CHECK: cmp	w0, w1, uxtw
-; CHECK-NEXT: csetm	w0, eq
-; CHECK-NEXT: dup	v{{[0-9]+}}.2s, w0
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v0.8b, v1.8b
-  %cmp31 = icmp eq i32 %a, %b
-  %e = select i1 %cmp31, <2x i32> %c, <2x i32> %d
-  ret <2x i32> %e
-}
-
-define <4x i32> @test_select_cc_v4i32(i32 %a, i32 %b, <4x i32> %c, <4x i32> %d ) {
-; CHECK-LABEL: test_select_cc_v4i32:
-; CHECK: cmp	w0, w1, uxtw
-; CHECK-NEXT: csetm	w0, eq
-; CHECK-NEXT: dup	v{{[0-9]+}}.4s, w0
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v0.16b, v1.16b
-  %cmp31 = icmp eq i32 %a, %b
-  %e = select i1 %cmp31, <4x i32> %c, <4x i32> %d
-  ret <4x i32> %e
-}
-
-define <1x i64> @test_select_cc_v1i64(i64 %a, i64 %b, <1x i64> %c, <1x i64> %d ) {
-; CHECK-LABEL: test_select_cc_v1i64:
-; CHECK: cmp	x0, x1
-; CHECK-NEXT: csetm	x0, eq
-; CHECK-NEXT: fmov	d{{[0-9]+}}, x0
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v0.8b, v1.8b
-  %cmp31 = icmp eq i64 %a, %b
-  %e = select i1 %cmp31, <1x i64> %c, <1x i64> %d
-  ret <1x i64> %e
-}
-
-define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d ) {
-; CHECK-LABEL: test_select_cc_v2i64:
-; CHECK: cmp	x0, x1
-; CHECK-NEXT: csetm	x0, eq
-; CHECK-NEXT: dup	v{{[0-9]+}}.2d, x0
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v0.16b, v1.16b
-  %cmp31 = icmp eq i64 %a, %b
-  %e = select i1 %cmp31, <2x i64> %c, <2x i64> %d
-  ret <2x i64> %e
-}
-
-define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) {
-; CHECK-LABEL: test_select_cc_v1f32:
-; CHECK: fcmp	s0, s1
-; CHECK-NEXT: fcsel	s0, s2, s3, eq
-  %cmp31 = fcmp oeq float %a, %b
-  %e = select i1 %cmp31, <1 x float> %c, <1 x float> %d
-  ret <1 x float> %e
-}
-  
-define <2 x float> @test_select_cc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d ) {
-; CHECK-LABEL: test_select_cc_v2f32:
-; CHECK: fcmeq	v{{[0-9]+}}.4s, v0.4s, v1.4s
-; CHECK-NEXT: dup	v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v2.8b, v3.8b
-  %cmp31 = fcmp oeq float %a, %b
-  %e = select i1 %cmp31, <2 x float> %c, <2 x float> %d
-  ret <2 x float> %e
-}
-
-define <4x float> @test_select_cc_v4f32(float %a, float %b, <4x float> %c, <4x float> %d ) {
-; CHECK-LABEL: test_select_cc_v4f32:
-; CHECK: fcmeq	v{{[0-9]+}}.4s, v0.4s, v1.4s
-; CHECK-NEXT: dup	v{{[0-9]+}}.4s, v{{[0-9]+}}.s[0]
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v2.16b, v3.16b
-  %cmp31 = fcmp oeq float %a, %b
-  %e = select i1 %cmp31, <4x float> %c, <4x float> %d
-  ret <4x float> %e
-}
-
-define <4x float> @test_select_cc_v4f32_icmp(i32 %a, i32 %b, <4x float> %c, <4x float> %d ) {
-; CHECK-LABEL: test_select_cc_v4f32_icmp:
-; CHECK: cmp	w0, w1, uxtw
-; CHECK: csetm	w0, eq
-; CHECK-NEXT: dup	v{{[0-9]+}}.4s, w0
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v0.16b, v1.16b
-  %cmp31 = icmp eq i32 %a, %b
-  %e = select i1 %cmp31, <4x float> %c, <4x float> %d
-  ret <4x float> %e
-}
-
-define <1 x double> @test_select_cc_v1f64(double %a, double %b, <1 x double> %c, <1 x double> %d ) {
-; CHECK-LABEL: test_select_cc_v1f64:
-; CHECK: fcmeq	v{{[0-9]+}}.2d, v0.2d, v1.2d
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v2.8b, v3.8b
-  %cmp31 = fcmp oeq double %a, %b
-  %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d
-  ret <1 x double> %e
-}
-
-define <1 x double> @test_select_cc_v1f64_icmp(i64 %a, i64 %b, <1 x double> %c, <1 x double> %d ) {
-; CHECK-LABEL: test_select_cc_v1f64_icmp:
-; CHECK: cmp	 x0, x1
-; CHECK-NEXT: csetm	x0, eq
-; CHECK-NEXT: fmov	d{{[0-9]+}}, x0
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v0.8b, v1.8b
-  %cmp31 = icmp eq i64 %a, %b
-  %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d
-  ret <1 x double> %e
-}
-
-define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d ) {
-; CHECK-LABEL: test_select_cc_v2f64:
-; CHECK: fcmeq	v{{[0-9]+}}.2d, v0.2d, v1.2d
-; CHECK-NEXT: dup	v{{[0-9]+}}.2d, v{{[0-9]+}}.d[0]
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v2.16b, v3.16b
-  %cmp31 = fcmp oeq double %a, %b
-  %e = select i1 %cmp31, <2 x double> %c, <2 x double> %d
-  ret <2 x double> %e
-}

Modified: llvm/trunk/test/CodeGen/AArch64/neon-shift-left-long.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-shift-left-long.ll?rev=209576&r1=209575&r2=209576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-shift-left-long.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-shift-left-long.ll Sat May 24 07:42:26 2014
@@ -1,4 +1,3 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
 
 define <8 x i16> @test_sshll_v8i8(<8 x i8> %a) {

Removed: llvm/trunk/test/CodeGen/AArch64/neon-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-shift.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-shift.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-shift.ll (removed)
@@ -1,172 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-; arm64 already has these tests: pure intrinsics & trivial shifts.
-
-declare <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_uqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_uqshl_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: ushl v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_sqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_sqshl_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: sshl v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_ushl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_ushl_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: ushl v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_sshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_sshl_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: sshl v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_ushl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_ushl_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: ushl v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_sshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_sshl_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: sshl v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-declare <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_ushl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_ushl_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: ushl v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_sshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_sshl_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: sshl v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-declare <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_ushl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_ushl_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: ushl v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_sshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_sshl_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: sshl v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_ushl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_ushl_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: ushl v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_sshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_sshl_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: sshl v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>)
-declare <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64>, <2 x i64>)
-
-define <2 x i64> @test_ushl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_ushl_v2i64:
-  %tmp1 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: ushl v0.2d, v0.2d, v1.2d
-  ret <2 x i64> %tmp1
-}
-
-define <2 x i64> @test_sshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_sshl_v2i64:
-  %tmp1 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: sshl v0.2d, v0.2d, v1.2d
-  ret <2 x i64> %tmp1
-}
-
-
-define <8 x i8> @test_shl_v8i8(<8 x i8> %a) {
-; CHECK: test_shl_v8i8:
-; CHECK: shl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
-  %tmp = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-  ret <8 x i8> %tmp
-}
-
-define <4 x i16> @test_shl_v4i16(<4 x i16> %a) {
-; CHECK: test_shl_v4i16:
-; CHECK: shl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
-  %tmp = shl <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
-  ret <4 x i16> %tmp
-}
-
-define <2 x i32> @test_shl_v2i32(<2 x i32> %a) {
-; CHECK: test_shl_v2i32:
-; CHECK: shl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
-  %tmp = shl <2 x i32> %a, <i32 3, i32 3>
-  ret <2 x i32> %tmp
-}
-
-define <16 x i8> @test_shl_v16i8(<16 x i8> %a) {
-; CHECK: test_shl_v16i8:
-; CHECK: shl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
-  %tmp = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-  ret <16 x i8> %tmp
-}
-
-define <8 x i16> @test_shl_v8i16(<8 x i16> %a) {
-; CHECK: test_shl_v8i16:
-; CHECK: shl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
-  %tmp = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-  ret <8 x i16> %tmp
-}
-
-define <4 x i32> @test_shl_v4i32(<4 x i32> %a) {
-; CHECK: test_shl_v4i32:
-; CHECK: shl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
-  %tmp = shl <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
-  ret <4 x i32> %tmp
-}
-
-define <2 x i64> @test_shl_v2i64(<2 x i64> %a) {
-; CHECK: test_shl_v2i64:
-; CHECK: shl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #63
-  %tmp = shl <2 x i64> %a, <i64 63, i64 63>
-  ret <2 x i64> %tmp
-}
-

Removed: llvm/trunk/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll (removed)
@@ -1,334 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-; arm64 has all tests not involving v1iN.
-
-define <8 x i8> @shl.v8i8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: shl.v8i8:
-; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %c = shl <8 x i8> %a, %b
-  ret <8 x i8> %c
-}
-
-define <4 x i16> @shl.v4i16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK-LABEL: shl.v4i16:
-; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %c = shl <4 x i16> %a, %b
-  ret <4 x i16> %c
-}
-
-define <2 x i32> @shl.v2i32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK-LABEL: shl.v2i32:
-; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %c = shl <2 x i32> %a, %b
-  ret <2 x i32> %c
-}
-
-define <1 x i64> @shl.v1i64(<1 x i64> %a, <1 x i64> %b) {
-; CHECK-LABEL: shl.v1i64:
-; CHECK: ushl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %c = shl <1 x i64> %a, %b
-  ret <1 x i64> %c
-}
-
-define <16 x i8> @shl.v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: shl.v16i8:
-; CHECK: ushl v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %c = shl <16 x i8> %a, %b
-  ret <16 x i8> %c
-}
-
-define <8 x i16> @shl.v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: shl.v8i16:
-; CHECK: ushl v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-  %c = shl <8 x i16> %a, %b
-  ret <8 x i16> %c
-}
-
-define <4 x i32> @shl.v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: shl.v4i32:
-; CHECK: ushl v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %c = shl <4 x i32> %a, %b
-  ret <4 x i32> %c
-}
-
-define <2 x i64> @shl.v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: shl.v2i64:
-; CHECK: ushl v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %c = shl <2 x i64> %a, %b
-  ret <2 x i64> %c
-}
-
-define <8 x i8> @lshr.v8i8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: lshr.v8i8:
-; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %c = lshr <8 x i8> %a, %b
-  ret <8 x i8> %c
-}
-
-define <4 x i16> @lshr.v4i16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK-LABEL: lshr.v4i16:
-; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %c = lshr <4 x i16> %a, %b
-  ret <4 x i16> %c
-}
-
-define <2 x i32> @lshr.v2i32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK-LABEL: lshr.v2i32:
-; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %c = lshr <2 x i32> %a, %b
-  ret <2 x i32> %c
-}
-
-define <1 x i64> @lshr.v1i64(<1 x i64> %a, <1 x i64> %b) {
-; CHECK-LABEL: lshr.v1i64:
-; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: ushl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %c = lshr <1 x i64> %a, %b
-  ret <1 x i64> %c
-}
-
-define <16 x i8> @lshr.v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: lshr.v16i8:
-; CHECK: neg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: ushl v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %c = lshr <16 x i8> %a, %b
-  ret <16 x i8> %c
-}
-
-define <8 x i16> @lshr.v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: lshr.v8i16:
-; CHECK: neg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-; CHECK: ushl v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-  %c = lshr <8 x i16> %a, %b
-  ret <8 x i16> %c
-}
-
-define <4 x i32> @lshr.v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: lshr.v4i32:
-; CHECK: neg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-; CHECK: ushl v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %c = lshr <4 x i32> %a, %b
-  ret <4 x i32> %c
-}
-
-define <2 x i64> @lshr.v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: lshr.v2i64:
-; CHECK: neg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-; CHECK: ushl v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %c = lshr <2 x i64> %a, %b
-  ret <2 x i64> %c
-}
-
-define <8 x i8> @ashr.v8i8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: ashr.v8i8:
-; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-; CHECK: sshl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %c = ashr <8 x i8> %a, %b
-  ret <8 x i8> %c
-}
-
-define <4 x i16> @ashr.v4i16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK-LABEL: ashr.v4i16:
-; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-; CHECK: sshl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %c = ashr <4 x i16> %a, %b
-  ret <4 x i16> %c
-}
-
-define <2 x i32> @ashr.v2i32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK-LABEL: ashr.v2i32:
-; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-; CHECK: sshl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %c = ashr <2 x i32> %a, %b
-  ret <2 x i32> %c
-}
-
-define <1 x i64> @ashr.v1i64(<1 x i64> %a, <1 x i64> %b) {
-; CHECK-LABEL: ashr.v1i64:
-; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: sshl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %c = ashr <1 x i64> %a, %b
-  ret <1 x i64> %c
-}
-
-define <16 x i8> @ashr.v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: ashr.v16i8:
-; CHECK: neg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: sshl v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %c = ashr <16 x i8> %a, %b
-  ret <16 x i8> %c
-}
-
-define <8 x i16> @ashr.v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: ashr.v8i16:
-; CHECK: neg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-; CHECK: sshl v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-  %c = ashr <8 x i16> %a, %b
-  ret <8 x i16> %c
-}
-
-define <4 x i32> @ashr.v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: ashr.v4i32:
-; CHECK: neg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-; CHECK: sshl v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %c = ashr <4 x i32> %a, %b
-  ret <4 x i32> %c
-}
-
-define <2 x i64> @ashr.v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: ashr.v2i64:
-; CHECK: neg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-; CHECK: sshl v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %c = ashr <2 x i64> %a, %b
-  ret <2 x i64> %c
-}
-
-define <1 x i64> @shl.v1i64.0(<1 x i64> %a) {
-; CHECK-LABEL: shl.v1i64.0:
-; CHECK-NOT: shl d{{[0-9]+}}, d{{[0-9]+}}, #0
-  %c = shl <1 x i64> %a, zeroinitializer
-  ret <1 x i64> %c
-}
-
-define <2 x i32> @shl.v2i32.0(<2 x i32> %a) {
-; CHECK-LABEL: shl.v2i32.0:
-; CHECK-NOT: shl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #0
-  %c = shl <2 x i32> %a, zeroinitializer
-  ret <2 x i32> %c
-}
-
-; The following test cases test shl/ashr/lshr with v1i8/v1i16/v1i32 types
-
-define <1 x i8> @shl.v1i8(<1 x i8> %a, <1 x i8> %b) {
-; CHECK-LABEL: shl.v1i8:
-; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %c = shl <1 x i8> %a, %b
-  ret <1 x i8> %c
-}
-
-define <1 x i16> @shl.v1i16(<1 x i16> %a, <1 x i16> %b) {
-; CHECK-LABEL: shl.v1i16:
-; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %c = shl <1 x i16> %a, %b
-  ret <1 x i16> %c
-}
-
-define <1 x i32> @shl.v1i32(<1 x i32> %a, <1 x i32> %b) {
-; CHECK-LABEL: shl.v1i32:
-; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %c = shl <1 x i32> %a, %b
-  ret <1 x i32> %c
-}
-
-define <1 x i8> @ashr.v1i8(<1 x i8> %a, <1 x i8> %b) {
-; CHECK-LABEL: ashr.v1i8:
-; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-; CHECK: sshl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %c = ashr <1 x i8> %a, %b
-  ret <1 x i8> %c
-}
-
-define <1 x i16> @ashr.v1i16(<1 x i16> %a, <1 x i16> %b) {
-; CHECK-LABEL: ashr.v1i16:
-; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-; CHECK: sshl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %c = ashr <1 x i16> %a, %b
-  ret <1 x i16> %c
-}
-
-define <1 x i32> @ashr.v1i32(<1 x i32> %a, <1 x i32> %b) {
-; CHECK-LABEL: ashr.v1i32:
-; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-; CHECK: sshl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %c = ashr <1 x i32> %a, %b
-  ret <1 x i32> %c
-}
-
-define <1 x i8> @lshr.v1i8(<1 x i8> %a, <1 x i8> %b) {
-; CHECK-LABEL: lshr.v1i8:
-; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %c = lshr <1 x i8> %a, %b
-  ret <1 x i8> %c
-}
-
-define <1 x i16> @lshr.v1i16(<1 x i16> %a, <1 x i16> %b) {
-; CHECK-LABEL: lshr.v1i16:
-; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %c = lshr <1 x i16> %a, %b
-  ret <1 x i16> %c
-}
-
-define <1 x i32> @lshr.v1i32(<1 x i32> %a, <1 x i32> %b) {
-; CHECK-LABEL: lshr.v1i32:
-; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %c = lshr <1 x i32> %a, %b
-  ret <1 x i32> %c
-}
-
-define <1 x i8> @shl.v1i8.imm(<1 x i8> %a) {
-; CHECK-LABEL: shl.v1i8.imm:
-; CHECK: shl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, #3
-  %c = shl <1 x i8> %a, <i8 3>
-  ret <1 x i8> %c
-}
-
-define <1 x i16> @shl.v1i16.imm(<1 x i16> %a) {
-; CHECK-LABEL: shl.v1i16.imm:
-; CHECK: shl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #5
-  %c = shl <1 x i16> %a, <i16 5>
-  ret <1 x i16> %c
-}
-
-define <1 x i32> @shl.v1i32.imm(<1 x i32> %a) {
-; CHECK-LABEL: shl.v1i32.imm:
-; CHECK-NOT: shl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #0
-  %c = shl <1 x i32> %a, zeroinitializer
-  ret <1 x i32> %c
-}
-
-define <1 x i8> @ashr.v1i8.imm(<1 x i8> %a) {
-; CHECK-LABEL: ashr.v1i8.imm:
-; CHECK: sshr v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, #3
-  %c = ashr <1 x i8> %a, <i8 3>
-  ret <1 x i8> %c
-}
-
-define <1 x i16> @ashr.v1i16.imm(<1 x i16> %a) {
-; CHECK-LABEL: ashr.v1i16.imm:
-; CHECK: sshr v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #10
-  %c = ashr <1 x i16> %a, <i16 10>
-  ret <1 x i16> %c
-}
-
-define <1 x i32> @ashr.v1i32.imm(<1 x i32> %a) {
-; CHECK-LABEL: ashr.v1i32.imm:
-; CHECK: sshr v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #31
-  %c = ashr <1 x i32> %a, <i32 31>
-  ret <1 x i32> %c
-}
-
-define <1 x i8> @lshr.v1i8.imm(<1 x i8> %a) {
-; CHECK-LABEL: lshr.v1i8.imm:
-; CHECK: ushr v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, #3
-  %c = lshr <1 x i8> %a, <i8 3>
-  ret <1 x i8> %c
-}
-
-define <1 x i16> @lshr.v1i16.imm(<1 x i16> %a) {
-; CHECK-LABEL: lshr.v1i16.imm:
-; CHECK: ushr v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #10
-  %c = lshr <1 x i16> %a, <i16 10>
-  ret <1 x i16> %c
-}
-
-define <1 x i32> @lshr.v1i32.imm(<1 x i32> %a) {
-; CHECK-LABEL: lshr.v1i32.imm:
-; CHECK: ushr v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #31
-  %c = lshr <1 x i32> %a, <i32 31>
-  ret <1 x i32> %c
-}

Removed: llvm/trunk/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll (removed)
@@ -1,2317 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-; arm64 already has these. Essentially just a copy/paste from Clang output from
-; arm_neon.h
-
-define void @test_ldst1_v16i8(<16 x i8>* %ptr, <16 x i8>* %ptr2) {
-; CHECK-LABEL: test_ldst1_v16i8:
-; CHECK: ld1 { v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}]
-; CHECK: st1 { v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}]
-  %tmp = load <16 x i8>* %ptr
-  store <16 x i8> %tmp, <16 x i8>* %ptr2
-  ret void
-}
-
-define void @test_ldst1_v8i16(<8 x i16>* %ptr, <8 x i16>* %ptr2) {
-; CHECK-LABEL: test_ldst1_v8i16:
-; CHECK: ld1 { v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}]
-; CHECK: st1 { v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}]
-  %tmp = load <8 x i16>* %ptr
-  store <8 x i16> %tmp, <8 x i16>* %ptr2
-  ret void
-}
-
-define void @test_ldst1_v4i32(<4 x i32>* %ptr, <4 x i32>* %ptr2) {
-; CHECK-LABEL: test_ldst1_v4i32:
-; CHECK: ld1 { v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}]
-; CHECK: st1 { v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}]
-  %tmp = load <4 x i32>* %ptr
-  store <4 x i32> %tmp, <4 x i32>* %ptr2
-  ret void
-}
-
-define void @test_ldst1_v2i64(<2 x i64>* %ptr, <2 x i64>* %ptr2) {
-; CHECK-LABEL: test_ldst1_v2i64:
-; CHECK: ld1 { v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}]
-; CHECK: st1 { v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}]
-  %tmp = load <2 x i64>* %ptr
-  store <2 x i64> %tmp, <2 x i64>* %ptr2
-  ret void
-}
-
-define void @test_ldst1_v8i8(<8 x i8>* %ptr, <8 x i8>* %ptr2) {
-; CHECK-LABEL: test_ldst1_v8i8:
-; CHECK: ld1 { v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}]
-; CHECK: st1 { v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}]
-  %tmp = load <8 x i8>* %ptr
-  store <8 x i8> %tmp, <8 x i8>* %ptr2
-  ret void
-}
-
-define void @test_ldst1_v4i16(<4 x i16>* %ptr, <4 x i16>* %ptr2) {
-; CHECK-LABEL: test_ldst1_v4i16:
-; CHECK: ld1 { v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}]
-; CHECK: st1 { v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}]
-  %tmp = load <4 x i16>* %ptr
-  store <4 x i16> %tmp, <4 x i16>* %ptr2
-  ret void
-}
-
-define void @test_ldst1_v2i32(<2 x i32>* %ptr, <2 x i32>* %ptr2) {
-; CHECK-LABEL: test_ldst1_v2i32:
-; CHECK: ld1 { v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}]
-; CHECK: st1 { v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}]
-  %tmp = load <2 x i32>* %ptr
-  store <2 x i32> %tmp, <2 x i32>* %ptr2
-  ret void
-}
-
-define void @test_ldst1_v1i64(<1 x i64>* %ptr, <1 x i64>* %ptr2) {
-; CHECK-LABEL: test_ldst1_v1i64:
-; CHECK: ld1 { v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}]
-; CHECK: st1 { v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}]
-  %tmp = load <1 x i64>* %ptr
-  store <1 x i64> %tmp, <1 x i64>* %ptr2
-  ret void
-}
-
-%struct.int8x16x2_t = type { [2 x <16 x i8>] }
-%struct.int16x8x2_t = type { [2 x <8 x i16>] }
-%struct.int32x4x2_t = type { [2 x <4 x i32>] }
-%struct.int64x2x2_t = type { [2 x <2 x i64>] }
-%struct.float32x4x2_t = type { [2 x <4 x float>] }
-%struct.float64x2x2_t = type { [2 x <2 x double>] }
-%struct.int8x8x2_t = type { [2 x <8 x i8>] }
-%struct.int16x4x2_t = type { [2 x <4 x i16>] }
-%struct.int32x2x2_t = type { [2 x <2 x i32>] }
-%struct.int64x1x2_t = type { [2 x <1 x i64>] }
-%struct.float32x2x2_t = type { [2 x <2 x float>] }
-%struct.float64x1x2_t = type { [2 x <1 x double>] }
-%struct.int8x16x3_t = type { [3 x <16 x i8>] }
-%struct.int16x8x3_t = type { [3 x <8 x i16>] }
-%struct.int32x4x3_t = type { [3 x <4 x i32>] }
-%struct.int64x2x3_t = type { [3 x <2 x i64>] }
-%struct.float32x4x3_t = type { [3 x <4 x float>] }
-%struct.float64x2x3_t = type { [3 x <2 x double>] }
-%struct.int8x8x3_t = type { [3 x <8 x i8>] }
-%struct.int16x4x3_t = type { [3 x <4 x i16>] }
-%struct.int32x2x3_t = type { [3 x <2 x i32>] }
-%struct.int64x1x3_t = type { [3 x <1 x i64>] }
-%struct.float32x2x3_t = type { [3 x <2 x float>] }
-%struct.float64x1x3_t = type { [3 x <1 x double>] }
-%struct.int8x16x4_t = type { [4 x <16 x i8>] }
-%struct.int16x8x4_t = type { [4 x <8 x i16>] }
-%struct.int32x4x4_t = type { [4 x <4 x i32>] }
-%struct.int64x2x4_t = type { [4 x <2 x i64>] }
-%struct.float32x4x4_t = type { [4 x <4 x float>] }
-%struct.float64x2x4_t = type { [4 x <2 x double>] }
-%struct.int8x8x4_t = type { [4 x <8 x i8>] }
-%struct.int16x4x4_t = type { [4 x <4 x i16>] }
-%struct.int32x2x4_t = type { [4 x <2 x i32>] }
-%struct.int64x1x4_t = type { [4 x <1 x i64>] }
-%struct.float32x2x4_t = type { [4 x <2 x float>] }
-%struct.float64x1x4_t = type { [4 x <1 x double>] }
-
-
-define <16 x i8> @test_vld1q_s8(i8* readonly %a) {
-; CHECK-LABEL: test_vld1q_s8
-; CHECK: ld1 { v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}]
-  %vld1 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %a, i32 1)
-  ret <16 x i8> %vld1
-}
-
-define <8 x i16> @test_vld1q_s16(i16* readonly %a) {
-; CHECK-LABEL: test_vld1q_s16
-; CHECK: ld1 { v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %vld1 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %1, i32 2)
-  ret <8 x i16> %vld1
-}
-
-define <4 x i32> @test_vld1q_s32(i32* readonly %a) {
-; CHECK-LABEL: test_vld1q_s32
-; CHECK: ld1 { v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %vld1 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %1, i32 4)
-  ret <4 x i32> %vld1
-}
-
-define <2 x i64> @test_vld1q_s64(i64* readonly %a) {
-; CHECK-LABEL: test_vld1q_s64
-; CHECK: ld1 { v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %vld1 = tail call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %1, i32 8)
-  ret <2 x i64> %vld1
-}
-
-define <4 x float> @test_vld1q_f32(float* readonly %a) {
-; CHECK-LABEL: test_vld1q_f32
-; CHECK: ld1 { v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %vld1 = tail call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %1, i32 4)
-  ret <4 x float> %vld1
-}
-
-define <2 x double> @test_vld1q_f64(double* readonly %a) {
-; CHECK-LABEL: test_vld1q_f64
-; CHECK: ld1 { v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %vld1 = tail call <2 x double> @llvm.arm.neon.vld1.v2f64(i8* %1, i32 8)
-  ret <2 x double> %vld1
-}
-
-define <8 x i8> @test_vld1_s8(i8* readonly %a) {
-; CHECK-LABEL: test_vld1_s8
-; CHECK: ld1 { v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}]
-  %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %a, i32 1)
-  ret <8 x i8> %vld1
-}
-
-define <4 x i16> @test_vld1_s16(i16* readonly %a) {
-; CHECK-LABEL: test_vld1_s16
-; CHECK: ld1 { v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %vld1 = tail call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %1, i32 2)
-  ret <4 x i16> %vld1
-}
-
-define <2 x i32> @test_vld1_s32(i32* readonly %a) {
-; CHECK-LABEL: test_vld1_s32
-; CHECK: ld1 { v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %vld1 = tail call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %1, i32 4)
-  ret <2 x i32> %vld1
-}
-
-define <1 x i64> @test_vld1_s64(i64* readonly %a) {
-; CHECK-LABEL: test_vld1_s64
-; CHECK: ld1 { v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %vld1 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %1, i32 8)
-  ret <1 x i64> %vld1
-}
-
-define <2 x float> @test_vld1_f32(float* readonly %a) {
-; CHECK-LABEL: test_vld1_f32
-; CHECK: ld1 { v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %vld1 = tail call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %1, i32 4)
-  ret <2 x float> %vld1
-}
-
-define <1 x double> @test_vld1_f64(double* readonly %a) {
-; CHECK-LABEL: test_vld1_f64
-; CHECK: ld1 { v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %vld1 = tail call <1 x double> @llvm.arm.neon.vld1.v1f64(i8* %1, i32 8)
-  ret <1 x double> %vld1
-}
-
-define <8 x i8> @test_vld1_p8(i8* readonly %a) {
-; CHECK-LABEL: test_vld1_p8
-; CHECK: ld1 { v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}]
-  %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %a, i32 1)
-  ret <8 x i8> %vld1
-}
-
-define <4 x i16> @test_vld1_p16(i16* readonly %a) {
-; CHECK-LABEL: test_vld1_p16
-; CHECK: ld1 { v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %vld1 = tail call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %1, i32 2)
-  ret <4 x i16> %vld1
-}
-
-define %struct.int8x16x2_t @test_vld2q_s8(i8* readonly %a) {
-; CHECK-LABEL: test_vld2q_s8
-; CHECK: ld2 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}]
-  %vld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8* %a, i32 1)
-  %vld2.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 1
-  %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vld2.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2.fca.1.extract, 0, 1
-  ret %struct.int8x16x2_t %.fca.0.1.insert
-}
-
-define %struct.int16x8x2_t @test_vld2q_s16(i16* readonly %a) {
-; CHECK-LABEL: test_vld2q_s16
-; CHECK: ld2 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %vld2 = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16(i8* %1, i32 2)
-  %vld2.fca.0.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2, 1
-  %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vld2.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vld2.fca.1.extract, 0, 1
-  ret %struct.int16x8x2_t %.fca.0.1.insert
-}
-
-define %struct.int32x4x2_t @test_vld2q_s32(i32* readonly %a) {
-; CHECK-LABEL: test_vld2q_s32
-; CHECK: ld2 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %vld2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8* %1, i32 4)
-  %vld2.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2, 1
-  %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vld2.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vld2.fca.1.extract, 0, 1
-  ret %struct.int32x4x2_t %.fca.0.1.insert
-}
-
-define %struct.int64x2x2_t @test_vld2q_s64(i64* readonly %a) {
-; CHECK-LABEL: test_vld2q_s64
-; CHECK: ld2 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %vld2 = tail call { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2.v2i64(i8* %1, i32 8)
-  %vld2.fca.0.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2, 1
-  %.fca.0.0.insert = insertvalue %struct.int64x2x2_t undef, <2 x i64> %vld2.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x2x2_t %.fca.0.0.insert, <2 x i64> %vld2.fca.1.extract, 0, 1
-  ret %struct.int64x2x2_t %.fca.0.1.insert
-}
-
-define %struct.float32x4x2_t @test_vld2q_f32(float* readonly %a) {
-; CHECK-LABEL: test_vld2q_f32
-; CHECK: ld2 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %1, i32 4)
-  %vld2.fca.0.extract = extractvalue { <4 x float>, <4 x float> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <4 x float>, <4 x float> } %vld2, 1
-  %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vld2.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vld2.fca.1.extract, 0, 1
-  ret %struct.float32x4x2_t %.fca.0.1.insert
-}
-
-define %struct.float64x2x2_t @test_vld2q_f64(double* readonly %a) {
-; CHECK-LABEL: test_vld2q_f64
-; CHECK: ld2 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %vld2 = tail call { <2 x double>, <2 x double> } @llvm.arm.neon.vld2.v2f64(i8* %1, i32 8)
-  %vld2.fca.0.extract = extractvalue { <2 x double>, <2 x double> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <2 x double>, <2 x double> } %vld2, 1
-  %.fca.0.0.insert = insertvalue %struct.float64x2x2_t undef, <2 x double> %vld2.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x2x2_t %.fca.0.0.insert, <2 x double> %vld2.fca.1.extract, 0, 1
-  ret %struct.float64x2x2_t %.fca.0.1.insert
-}
-
-define %struct.int8x8x2_t @test_vld2_s8(i8* readonly %a) {
-; CHECK-LABEL: test_vld2_s8
-; CHECK: ld2 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}]
-  %vld2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8* %a, i32 1)
-  %vld2.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 1
-  %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vld2.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vld2.fca.1.extract, 0, 1
-  ret %struct.int8x8x2_t %.fca.0.1.insert
-}
-
-define %struct.int16x4x2_t @test_vld2_s16(i16* readonly %a) {
-; CHECK-LABEL: test_vld2_s16
-; CHECK: ld2 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %vld2 = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16(i8* %1, i32 2)
-  %vld2.fca.0.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2, 1
-  %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vld2.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vld2.fca.1.extract, 0, 1
-  ret %struct.int16x4x2_t %.fca.0.1.insert
-}
-
-define %struct.int32x2x2_t @test_vld2_s32(i32* readonly %a) {
-; CHECK-LABEL: test_vld2_s32
-; CHECK: ld2 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %vld2 = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8* %1, i32 4)
-  %vld2.fca.0.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2, 1
-  %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vld2.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vld2.fca.1.extract, 0, 1
-  ret %struct.int32x2x2_t %.fca.0.1.insert
-}
-
-define %struct.int64x1x2_t @test_vld2_s64(i64* readonly %a) {
-; CHECK-LABEL: test_vld2_s64
-; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %vld2 = tail call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8* %1, i32 8)
-  %vld2.fca.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2, 1
-  %.fca.0.0.insert = insertvalue %struct.int64x1x2_t undef, <1 x i64> %vld2.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x1x2_t %.fca.0.0.insert, <1 x i64> %vld2.fca.1.extract, 0, 1
-  ret %struct.int64x1x2_t %.fca.0.1.insert
-}
-
-define %struct.float32x2x2_t @test_vld2_f32(float* readonly %a) {
-; CHECK-LABEL: test_vld2_f32
-; CHECK: ld2 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %vld2 = tail call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8* %1, i32 4)
-  %vld2.fca.0.extract = extractvalue { <2 x float>, <2 x float> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <2 x float>, <2 x float> } %vld2, 1
-  %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vld2.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vld2.fca.1.extract, 0, 1
-  ret %struct.float32x2x2_t %.fca.0.1.insert
-}
-
-define %struct.float64x1x2_t @test_vld2_f64(double* readonly %a) {
-; CHECK-LABEL: test_vld2_f64
-; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %vld2 = tail call { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8* %1, i32 8)
-  %vld2.fca.0.extract = extractvalue { <1 x double>, <1 x double> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <1 x double>, <1 x double> } %vld2, 1
-  %.fca.0.0.insert = insertvalue %struct.float64x1x2_t undef, <1 x double> %vld2.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x1x2_t %.fca.0.0.insert, <1 x double> %vld2.fca.1.extract, 0, 1
-  ret %struct.float64x1x2_t %.fca.0.1.insert
-}
-
-define %struct.int8x16x3_t @test_vld3q_s8(i8* readonly %a) {
-; CHECK-LABEL: test_vld3q_s8
-; CHECK: ld3 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}]
-  %vld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %a, i32 1)
-  %vld3.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 2
-  %.fca.0.0.insert = insertvalue %struct.int8x16x3_t undef, <16 x i8> %vld3.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x16x3_t %.fca.0.0.insert, <16 x i8> %vld3.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int8x16x3_t %.fca.0.1.insert, <16 x i8> %vld3.fca.2.extract, 0, 2
-  ret %struct.int8x16x3_t %.fca.0.2.insert
-}
-
-define %struct.int16x8x3_t @test_vld3q_s16(i16* readonly %a) {
-; CHECK-LABEL: test_vld3q_s16
-; CHECK: ld3 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %vld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16(i8* %1, i32 2)
-  %vld3.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 2
-  %.fca.0.0.insert = insertvalue %struct.int16x8x3_t undef, <8 x i16> %vld3.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x8x3_t %.fca.0.0.insert, <8 x i16> %vld3.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int16x8x3_t %.fca.0.1.insert, <8 x i16> %vld3.fca.2.extract, 0, 2
-  ret %struct.int16x8x3_t %.fca.0.2.insert
-}
-
-define %struct.int32x4x3_t @test_vld3q_s32(i32* readonly %a) {
-; CHECK-LABEL: test_vld3q_s32
-; CHECK: ld3 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %vld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8* %1, i32 4)
-  %vld3.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 2
-  %.fca.0.0.insert = insertvalue %struct.int32x4x3_t undef, <4 x i32> %vld3.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x4x3_t %.fca.0.0.insert, <4 x i32> %vld3.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int32x4x3_t %.fca.0.1.insert, <4 x i32> %vld3.fca.2.extract, 0, 2
-  ret %struct.int32x4x3_t %.fca.0.2.insert
-}
-
-define %struct.int64x2x3_t @test_vld3q_s64(i64* readonly %a) {
-; CHECK-LABEL: test_vld3q_s64
-; CHECK: ld3 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %vld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3.v2i64(i8* %1, i32 8)
-  %vld3.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 2
-  %.fca.0.0.insert = insertvalue %struct.int64x2x3_t undef, <2 x i64> %vld3.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x2x3_t %.fca.0.0.insert, <2 x i64> %vld3.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int64x2x3_t %.fca.0.1.insert, <2 x i64> %vld3.fca.2.extract, 0, 2
-  ret %struct.int64x2x3_t %.fca.0.2.insert
-}
-
-define %struct.float32x4x3_t @test_vld3q_f32(float* readonly %a) {
-; CHECK-LABEL: test_vld3q_f32
-; CHECK: ld3 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %vld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8* %1, i32 4)
-  %vld3.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 2
-  %.fca.0.0.insert = insertvalue %struct.float32x4x3_t undef, <4 x float> %vld3.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x4x3_t %.fca.0.0.insert, <4 x float> %vld3.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float32x4x3_t %.fca.0.1.insert, <4 x float> %vld3.fca.2.extract, 0, 2
-  ret %struct.float32x4x3_t %.fca.0.2.insert
-}
-
-define %struct.float64x2x3_t @test_vld3q_f64(double* readonly %a) {
-; CHECK-LABEL: test_vld3q_f64
-; CHECK: ld3 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %vld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3.v2f64(i8* %1, i32 8)
-  %vld3.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 2
-  %.fca.0.0.insert = insertvalue %struct.float64x2x3_t undef, <2 x double> %vld3.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x2x3_t %.fca.0.0.insert, <2 x double> %vld3.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float64x2x3_t %.fca.0.1.insert, <2 x double> %vld3.fca.2.extract, 0, 2
-  ret %struct.float64x2x3_t %.fca.0.2.insert
-}
-
-define %struct.int8x8x3_t @test_vld3_s8(i8* readonly %a) {
-; CHECK-LABEL: test_vld3_s8
-; CHECK: ld3 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}]
-  %vld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8* %a, i32 1)
-  %vld3.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 2
-  %.fca.0.0.insert = insertvalue %struct.int8x8x3_t undef, <8 x i8> %vld3.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x8x3_t %.fca.0.0.insert, <8 x i8> %vld3.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int8x8x3_t %.fca.0.1.insert, <8 x i8> %vld3.fca.2.extract, 0, 2
-  ret %struct.int8x8x3_t %.fca.0.2.insert
-}
-
-define %struct.int16x4x3_t @test_vld3_s16(i16* readonly %a) {
-; CHECK-LABEL: test_vld3_s16
-; CHECK: ld3 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %vld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8* %1, i32 2)
-  %vld3.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 2
-  %.fca.0.0.insert = insertvalue %struct.int16x4x3_t undef, <4 x i16> %vld3.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x4x3_t %.fca.0.0.insert, <4 x i16> %vld3.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int16x4x3_t %.fca.0.1.insert, <4 x i16> %vld3.fca.2.extract, 0, 2
-  ret %struct.int16x4x3_t %.fca.0.2.insert
-}
-
-define %struct.int32x2x3_t @test_vld3_s32(i32* readonly %a) {
-; CHECK-LABEL: test_vld3_s32
-; CHECK: ld3 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %vld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32(i8* %1, i32 4)
-  %vld3.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 2
-  %.fca.0.0.insert = insertvalue %struct.int32x2x3_t undef, <2 x i32> %vld3.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x2x3_t %.fca.0.0.insert, <2 x i32> %vld3.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int32x2x3_t %.fca.0.1.insert, <2 x i32> %vld3.fca.2.extract, 0, 2
-  ret %struct.int32x2x3_t %.fca.0.2.insert
-}
-
-define %struct.int64x1x3_t @test_vld3_s64(i64* readonly %a) {
-; CHECK-LABEL: test_vld3_s64
-; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %vld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8* %1, i32 8)
-  %vld3.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 2
-  %.fca.0.0.insert = insertvalue %struct.int64x1x3_t undef, <1 x i64> %vld3.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x1x3_t %.fca.0.0.insert, <1 x i64> %vld3.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int64x1x3_t %.fca.0.1.insert, <1 x i64> %vld3.fca.2.extract, 0, 2
-  ret %struct.int64x1x3_t %.fca.0.2.insert
-}
-
-define %struct.float32x2x3_t @test_vld3_f32(float* readonly %a) {
-; CHECK-LABEL: test_vld3_f32
-; CHECK: ld3 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %vld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32(i8* %1, i32 4)
-  %vld3.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 2
-  %.fca.0.0.insert = insertvalue %struct.float32x2x3_t undef, <2 x float> %vld3.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x2x3_t %.fca.0.0.insert, <2 x float> %vld3.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float32x2x3_t %.fca.0.1.insert, <2 x float> %vld3.fca.2.extract, 0, 2
-  ret %struct.float32x2x3_t %.fca.0.2.insert
-}
-
-define %struct.float64x1x3_t @test_vld3_f64(double* readonly %a) {
-; CHECK-LABEL: test_vld3_f64
-; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %vld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8* %1, i32 8)
-  %vld3.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 2
-  %.fca.0.0.insert = insertvalue %struct.float64x1x3_t undef, <1 x double> %vld3.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x1x3_t %.fca.0.0.insert, <1 x double> %vld3.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float64x1x3_t %.fca.0.1.insert, <1 x double> %vld3.fca.2.extract, 0, 2
-  ret %struct.float64x1x3_t %.fca.0.2.insert
-}
-
-define %struct.int8x16x4_t @test_vld4q_s8(i8* readonly %a) {
-; CHECK-LABEL: test_vld4q_s8
-; CHECK: ld4 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}]
-  %vld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %a, i32 1)
-  %vld4.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 3
-  %.fca.0.0.insert = insertvalue %struct.int8x16x4_t undef, <16 x i8> %vld4.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x16x4_t %.fca.0.0.insert, <16 x i8> %vld4.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int8x16x4_t %.fca.0.1.insert, <16 x i8> %vld4.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int8x16x4_t %.fca.0.2.insert, <16 x i8> %vld4.fca.3.extract, 0, 3
-  ret %struct.int8x16x4_t %.fca.0.3.insert
-}
-
-define %struct.int16x8x4_t @test_vld4q_s16(i16* readonly %a) {
-; CHECK-LABEL: test_vld4q_s16
-; CHECK: ld4 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %vld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8* %1, i32 2)
-  %vld4.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 3
-  %.fca.0.0.insert = insertvalue %struct.int16x8x4_t undef, <8 x i16> %vld4.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x8x4_t %.fca.0.0.insert, <8 x i16> %vld4.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int16x8x4_t %.fca.0.1.insert, <8 x i16> %vld4.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int16x8x4_t %.fca.0.2.insert, <8 x i16> %vld4.fca.3.extract, 0, 3
-  ret %struct.int16x8x4_t %.fca.0.3.insert
-}
-
-define %struct.int32x4x4_t @test_vld4q_s32(i32* readonly %a) {
-; CHECK-LABEL: test_vld4q_s32
-; CHECK: ld4 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %vld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32(i8* %1, i32 4)
-  %vld4.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 3
-  %.fca.0.0.insert = insertvalue %struct.int32x4x4_t undef, <4 x i32> %vld4.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x4x4_t %.fca.0.0.insert, <4 x i32> %vld4.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int32x4x4_t %.fca.0.1.insert, <4 x i32> %vld4.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int32x4x4_t %.fca.0.2.insert, <4 x i32> %vld4.fca.3.extract, 0, 3
-  ret %struct.int32x4x4_t %.fca.0.3.insert
-}
-
-define %struct.int64x2x4_t @test_vld4q_s64(i64* readonly %a) {
-; CHECK-LABEL: test_vld4q_s64
-; CHECK: ld4 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %vld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4.v2i64(i8* %1, i32 8)
-  %vld4.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 3
-  %.fca.0.0.insert = insertvalue %struct.int64x2x4_t undef, <2 x i64> %vld4.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x2x4_t %.fca.0.0.insert, <2 x i64> %vld4.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int64x2x4_t %.fca.0.1.insert, <2 x i64> %vld4.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int64x2x4_t %.fca.0.2.insert, <2 x i64> %vld4.fca.3.extract, 0, 3
-  ret %struct.int64x2x4_t %.fca.0.3.insert
-}
-
-define %struct.float32x4x4_t @test_vld4q_f32(float* readonly %a) {
-; CHECK-LABEL: test_vld4q_f32
-; CHECK: ld4 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %vld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32(i8* %1, i32 4)
-  %vld4.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 3
-  %.fca.0.0.insert = insertvalue %struct.float32x4x4_t undef, <4 x float> %vld4.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x4x4_t %.fca.0.0.insert, <4 x float> %vld4.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float32x4x4_t %.fca.0.1.insert, <4 x float> %vld4.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.float32x4x4_t %.fca.0.2.insert, <4 x float> %vld4.fca.3.extract, 0, 3
-  ret %struct.float32x4x4_t %.fca.0.3.insert
-}
-
-define %struct.float64x2x4_t @test_vld4q_f64(double* readonly %a) {
-; CHECK-LABEL: test_vld4q_f64
-; CHECK: ld4 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %vld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4.v2f64(i8* %1, i32 8)
-  %vld4.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 3
-  %.fca.0.0.insert = insertvalue %struct.float64x2x4_t undef, <2 x double> %vld4.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x2x4_t %.fca.0.0.insert, <2 x double> %vld4.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float64x2x4_t %.fca.0.1.insert, <2 x double> %vld4.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.float64x2x4_t %.fca.0.2.insert, <2 x double> %vld4.fca.3.extract, 0, 3
-  ret %struct.float64x2x4_t %.fca.0.3.insert
-}
-
-define %struct.int8x8x4_t @test_vld4_s8(i8* readonly %a) {
-; CHECK-LABEL: test_vld4_s8
-; CHECK: ld4 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}]
-  %vld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8* %a, i32 1)
-  %vld4.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 3
-  %.fca.0.0.insert = insertvalue %struct.int8x8x4_t undef, <8 x i8> %vld4.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x8x4_t %.fca.0.0.insert, <8 x i8> %vld4.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int8x8x4_t %.fca.0.1.insert, <8 x i8> %vld4.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int8x8x4_t %.fca.0.2.insert, <8 x i8> %vld4.fca.3.extract, 0, 3
-  ret %struct.int8x8x4_t %.fca.0.3.insert
-}
-
-define %struct.int16x4x4_t @test_vld4_s16(i16* readonly %a) {
-; CHECK-LABEL: test_vld4_s16
-; CHECK: ld4 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %vld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8* %1, i32 2)
-  %vld4.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 3
-  %.fca.0.0.insert = insertvalue %struct.int16x4x4_t undef, <4 x i16> %vld4.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x4x4_t %.fca.0.0.insert, <4 x i16> %vld4.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int16x4x4_t %.fca.0.1.insert, <4 x i16> %vld4.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int16x4x4_t %.fca.0.2.insert, <4 x i16> %vld4.fca.3.extract, 0, 3
-  ret %struct.int16x4x4_t %.fca.0.3.insert
-}
-
-define %struct.int32x2x4_t @test_vld4_s32(i32* readonly %a) {
-; CHECK-LABEL: test_vld4_s32
-; CHECK: ld4 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %vld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32(i8* %1, i32 4)
-  %vld4.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 3
-  %.fca.0.0.insert = insertvalue %struct.int32x2x4_t undef, <2 x i32> %vld4.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x2x4_t %.fca.0.0.insert, <2 x i32> %vld4.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int32x2x4_t %.fca.0.1.insert, <2 x i32> %vld4.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int32x2x4_t %.fca.0.2.insert, <2 x i32> %vld4.fca.3.extract, 0, 3
-  ret %struct.int32x2x4_t %.fca.0.3.insert
-}
-
-define %struct.int64x1x4_t @test_vld4_s64(i64* readonly %a) {
-; CHECK-LABEL: test_vld4_s64
-; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %vld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8* %1, i32 8)
-  %vld4.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 3
-  %.fca.0.0.insert = insertvalue %struct.int64x1x4_t undef, <1 x i64> %vld4.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x1x4_t %.fca.0.0.insert, <1 x i64> %vld4.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int64x1x4_t %.fca.0.1.insert, <1 x i64> %vld4.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int64x1x4_t %.fca.0.2.insert, <1 x i64> %vld4.fca.3.extract, 0, 3
-  ret %struct.int64x1x4_t %.fca.0.3.insert
-}
-
-define %struct.float32x2x4_t @test_vld4_f32(float* readonly %a) {
-; CHECK-LABEL: test_vld4_f32
-; CHECK: ld4 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %vld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32(i8* %1, i32 4)
-  %vld4.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 3
-  %.fca.0.0.insert = insertvalue %struct.float32x2x4_t undef, <2 x float> %vld4.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x2x4_t %.fca.0.0.insert, <2 x float> %vld4.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float32x2x4_t %.fca.0.1.insert, <2 x float> %vld4.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.float32x2x4_t %.fca.0.2.insert, <2 x float> %vld4.fca.3.extract, 0, 3
-  ret %struct.float32x2x4_t %.fca.0.3.insert
-}
-
-define %struct.float64x1x4_t @test_vld4_f64(double* readonly %a) {
-; CHECK-LABEL: test_vld4_f64
-; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %vld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8* %1, i32 8)
-  %vld4.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 3
-  %.fca.0.0.insert = insertvalue %struct.float64x1x4_t undef, <1 x double> %vld4.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x1x4_t %.fca.0.0.insert, <1 x double> %vld4.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float64x1x4_t %.fca.0.1.insert, <1 x double> %vld4.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.float64x1x4_t %.fca.0.2.insert, <1 x double> %vld4.fca.3.extract, 0, 3
-  ret %struct.float64x1x4_t %.fca.0.3.insert
-}
-
-declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32)
-declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32)
-declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32)
-declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32)
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32)
-declare <2 x double> @llvm.arm.neon.vld1.v2f64(i8*, i32)
-declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32)
-declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32)
-declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32)
-declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32)
-declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32)
-declare <1 x double> @llvm.arm.neon.vld1.v1f64(i8*, i32)
-declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8*, i32)
-declare { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16(i8*, i32)
-declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8*, i32)
-declare { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2.v2i64(i8*, i32)
-declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8*, i32)
-declare { <2 x double>, <2 x double> } @llvm.arm.neon.vld2.v2f64(i8*, i32)
-declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8*, i32)
-declare { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16(i8*, i32)
-declare { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8*, i32)
-declare { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8*, i32)
-declare { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8*, i32)
-declare { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8*, i32)
-declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8*, i32)
-declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16(i8*, i32)
-declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8*, i32)
-declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3.v2i64(i8*, i32)
-declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8*, i32)
-declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3.v2f64(i8*, i32)
-declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8*, i32)
-declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8*, i32)
-declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32(i8*, i32)
-declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8*, i32)
-declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32(i8*, i32)
-declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8*, i32)
-declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32)
-declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8*, i32)
-declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32(i8*, i32)
-declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4.v2i64(i8*, i32)
-declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32(i8*, i32)
-declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4.v2f64(i8*, i32)
-declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8*, i32)
-declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8*, i32)
-declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32(i8*, i32)
-declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8*, i32)
-declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32(i8*, i32)
-declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8*, i32)
-
-define void @test_vst1q_s8(i8* %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vst1q_s8
-; CHECK: st1 { v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}]
-  tail call void @llvm.arm.neon.vst1.v16i8(i8* %a, <16 x i8> %b, i32 1)
-  ret void
-}
-
-define void @test_vst1q_s16(i16* %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vst1q_s16
-; CHECK: st1 { v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst1.v8i16(i8* %1, <8 x i16> %b, i32 2)
-  ret void
-}
-
-define void @test_vst1q_s32(i32* %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vst1q_s32
-; CHECK: st1 { v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst1.v4i32(i8* %1, <4 x i32> %b, i32 4)
-  ret void
-}
-
-define void @test_vst1q_s64(i64* %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vst1q_s64
-; CHECK: st1 { v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst1.v2i64(i8* %1, <2 x i64> %b, i32 8)
-  ret void
-}
-
-define void @test_vst1q_f32(float* %a, <4 x float> %b) {
-; CHECK-LABEL: test_vst1q_f32
-; CHECK: st1 { v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst1.v4f32(i8* %1, <4 x float> %b, i32 4)
-  ret void
-}
-
-define void @test_vst1q_f64(double* %a, <2 x double> %b) {
-; CHECK-LABEL: test_vst1q_f64
-; CHECK: st1 { v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst1.v2f64(i8* %1, <2 x double> %b, i32 8)
-  ret void
-}
-
-define void @test_vst1_s8(i8* %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vst1_s8
-; CHECK: st1 { v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}]
-  tail call void @llvm.arm.neon.vst1.v8i8(i8* %a, <8 x i8> %b, i32 1)
-  ret void
-}
-
-define void @test_vst1_s16(i16* %a, <4 x i16> %b) {
-; CHECK-LABEL: test_vst1_s16
-; CHECK: st1 { v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst1.v4i16(i8* %1, <4 x i16> %b, i32 2)
-  ret void
-}
-
-define void @test_vst1_s32(i32* %a, <2 x i32> %b) {
-; CHECK-LABEL: test_vst1_s32
-; CHECK: st1 { v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst1.v2i32(i8* %1, <2 x i32> %b, i32 4)
-  ret void
-}
-
-define void @test_vst1_s64(i64* %a, <1 x i64> %b) {
-; CHECK-LABEL: test_vst1_s64
-; CHECK: st1 { v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst1.v1i64(i8* %1, <1 x i64> %b, i32 8)
-  ret void
-}
-
-define void @test_vst1_f32(float* %a, <2 x float> %b) {
-; CHECK-LABEL: test_vst1_f32
-; CHECK: st1 { v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst1.v2f32(i8* %1, <2 x float> %b, i32 4)
-  ret void
-}
-
-define void @test_vst1_f64(double* %a, <1 x double> %b) {
-; CHECK-LABEL: test_vst1_f64
-; CHECK: st1 { v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst1.v1f64(i8* %1, <1 x double> %b, i32 8)
-  ret void
-}
-
-define void @test_vst2q_s8(i8* %a, [2 x <16 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vst2q_s8
-; CHECK: st2 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %b.coerce, 1
-  tail call void @llvm.arm.neon.vst2.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, i32 1)
-  ret void
-}
-
-define void @test_vst2q_s16(i16* %a, [2 x <8 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vst2q_s16
-; CHECK: st2 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i16>] %b.coerce, 1
-  %1 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst2.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, i32 2)
-  ret void
-}
-
-define void @test_vst2q_s32(i32* %a, [2 x <4 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vst2q_s32
-; CHECK: st2 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %b.coerce, 1
-  %1 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst2.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, i32 4)
-  ret void
-}
-
-define void @test_vst2q_s64(i64* %a, [2 x <2 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vst2q_s64
-; CHECK: st2 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i64>] %b.coerce, 1
-  %1 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst2.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, i32 8)
-  ret void
-}
-
-define void @test_vst2q_f32(float* %a, [2 x <4 x float>] %b.coerce) {
-; CHECK-LABEL: test_vst2q_f32
-; CHECK: st2 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [2 x <4 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <4 x float>] %b.coerce, 1
-  %1 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, i32 4)
-  ret void
-}
-
-define void @test_vst2q_f64(double* %a, [2 x <2 x double>] %b.coerce) {
-; CHECK-LABEL: test_vst2q_f64
-; CHECK: st2 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [2 x <2 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <2 x double>] %b.coerce, 1
-  %1 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst2.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, i32 8)
-  ret void
-}
-
-define void @test_vst2_s8(i8* %a, [2 x <8 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vst2_s8
-; CHECK: st2 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i8>] %b.coerce, 1
-  tail call void @llvm.arm.neon.vst2.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, i32 1)
-  ret void
-}
-
-define void @test_vst2_s16(i16* %a, [2 x <4 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vst2_s16
-; CHECK: st2 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i16>] %b.coerce, 1
-  %1 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst2.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, i32 2)
-  ret void
-}
-
-define void @test_vst2_s32(i32* %a, [2 x <2 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vst2_s32
-; CHECK: st2 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i32>] %b.coerce, 1
-  %1 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst2.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, i32 4)
-  ret void
-}
-
-define void @test_vst2_s64(i64* %a, [2 x <1 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vst2_s64
-; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [2 x <1 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <1 x i64>] %b.coerce, 1
-  %1 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst2.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, i32 8)
-  ret void
-}
-
-define void @test_vst2_f32(float* %a, [2 x <2 x float>] %b.coerce) {
-; CHECK-LABEL: test_vst2_f32
-; CHECK: st2 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [2 x <2 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <2 x float>] %b.coerce, 1
-  %1 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst2.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, i32 4)
-  ret void
-}
-
-define void @test_vst2_f64(double* %a, [2 x <1 x double>] %b.coerce) {
-; CHECK-LABEL: test_vst2_f64
-; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [2 x <1 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <1 x double>] %b.coerce, 1
-  %1 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst2.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, i32 8)
-  ret void
-}
-
-define void @test_vst3q_s8(i8* %a, [3 x <16 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vst3q_s8
-; CHECK: st3 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %b.coerce, 2
-  tail call void @llvm.arm.neon.vst3.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, i32 1)
-  ret void
-}
-
-define void @test_vst3q_s16(i16* %a, [3 x <8 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vst3q_s16
-; CHECK: st3 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i16>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i16>] %b.coerce, 2
-  %1 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst3.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, i32 2)
-  ret void
-}
-
-define void @test_vst3q_s32(i32* %a, [3 x <4 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vst3q_s32
-; CHECK: st3 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i32>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i32>] %b.coerce, 2
-  %1 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst3.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, i32 4)
-  ret void
-}
-
-define void @test_vst3q_s64(i64* %a, [3 x <2 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vst3q_s64
-; CHECK: st3 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i64>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i64>] %b.coerce, 2
-  %1 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst3.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, i32 8)
-  ret void
-}
-
-define void @test_vst3q_f32(float* %a, [3 x <4 x float>] %b.coerce) {
-; CHECK-LABEL: test_vst3q_f32
-; CHECK: st3 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [3 x <4 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <4 x float>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <4 x float>] %b.coerce, 2
-  %1 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst3.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, i32 4)
-  ret void
-}
-
-define void @test_vst3q_f64(double* %a, [3 x <2 x double>] %b.coerce) {
-; CHECK-LABEL: test_vst3q_f64
-; CHECK: st3 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [3 x <2 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <2 x double>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <2 x double>] %b.coerce, 2
-  %1 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst3.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, i32 8)
-  ret void
-}
-
-define void @test_vst3_s8(i8* %a, [3 x <8 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vst3_s8
-; CHECK: st3 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i8>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i8>] %b.coerce, 2
-  tail call void @llvm.arm.neon.vst3.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, i32 1)
-  ret void
-}
-
-define void @test_vst3_s16(i16* %a, [3 x <4 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vst3_s16
-; CHECK: st3 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i16>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i16>] %b.coerce, 2
-  %1 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst3.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, i32 2)
-  ret void
-}
-
-define void @test_vst3_s32(i32* %a, [3 x <2 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vst3_s32
-; CHECK: st3 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i32>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i32>] %b.coerce, 2
-  %1 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst3.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, i32 4)
-  ret void
-}
-
-define void @test_vst3_s64(i64* %a, [3 x <1 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vst3_s64
-; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [3 x <1 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <1 x i64>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <1 x i64>] %b.coerce, 2
-  %1 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst3.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, i32 8)
-  ret void
-}
-
-define void @test_vst3_f32(float* %a, [3 x <2 x float>] %b.coerce) {
-; CHECK-LABEL: test_vst3_f32
-; CHECK: st3 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [3 x <2 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <2 x float>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <2 x float>] %b.coerce, 2
-  %1 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst3.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, i32 4)
-  ret void
-}
-
-define void @test_vst3_f64(double* %a, [3 x <1 x double>] %b.coerce) {
-; CHECK-LABEL: test_vst3_f64
-; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [3 x <1 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <1 x double>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <1 x double>] %b.coerce, 2
-  %1 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst3.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, i32 8)
-  ret void
-}
-
-define void @test_vst4q_s8(i8* %a, [4 x <16 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vst4q_s8
-; CHECK: st4 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [4 x <16 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <16 x i8>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <16 x i8>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <16 x i8>] %b.coerce, 3
-  tail call void @llvm.arm.neon.vst4.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, <16 x i8> %b.coerce.fca.3.extract, i32 1)
-  ret void
-}
-
-define void @test_vst4q_s16(i16* %a, [4 x <8 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vst4q_s16
-; CHECK: st4 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i16>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i16>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i16>] %b.coerce, 3
-  %1 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst4.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, <8 x i16> %b.coerce.fca.3.extract, i32 2)
-  ret void
-}
-
-define void @test_vst4q_s32(i32* %a, [4 x <4 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vst4q_s32
-; CHECK: st4 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i32>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i32>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i32>] %b.coerce, 3
-  %1 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst4.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, <4 x i32> %b.coerce.fca.3.extract, i32 4)
-  ret void
-}
-
-define void @test_vst4q_s64(i64* %a, [4 x <2 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vst4q_s64
-; CHECK: st4 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i64>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i64>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i64>] %b.coerce, 3
-  %1 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst4.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, <2 x i64> %b.coerce.fca.3.extract, i32 8)
-  ret void
-}
-
-define void @test_vst4q_f32(float* %a, [4 x <4 x float>] %b.coerce) {
-; CHECK-LABEL: test_vst4q_f32
-; CHECK: st4 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [4 x <4 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <4 x float>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <4 x float>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <4 x float>] %b.coerce, 3
-  %1 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst4.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, <4 x float> %b.coerce.fca.3.extract, i32 4)
-  ret void
-}
-
-define void @test_vst4q_f64(double* %a, [4 x <2 x double>] %b.coerce) {
-; CHECK-LABEL: test_vst4q_f64
-; CHECK: st4 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [4 x <2 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <2 x double>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <2 x double>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <2 x double>] %b.coerce, 3
-  %1 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst4.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, <2 x double> %b.coerce.fca.3.extract, i32 8)
-  ret void
-}
-
-define void @test_vst4_s8(i8* %a, [4 x <8 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vst4_s8
-; CHECK: st4 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i8>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i8>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i8>] %b.coerce, 3
-  tail call void @llvm.arm.neon.vst4.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, <8 x i8> %b.coerce.fca.3.extract, i32 1)
-  ret void
-}
-
-define void @test_vst4_s16(i16* %a, [4 x <4 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vst4_s16
-; CHECK: st4 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i16>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i16>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i16>] %b.coerce, 3
-  %1 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst4.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, <4 x i16> %b.coerce.fca.3.extract, i32 2)
-  ret void
-}
-
-define void @test_vst4_s32(i32* %a, [4 x <2 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vst4_s32
-; CHECK: st4 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i32>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i32>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i32>] %b.coerce, 3
-  %1 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst4.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, <2 x i32> %b.coerce.fca.3.extract, i32 4)
-  ret void
-}
-
-define void @test_vst4_s64(i64* %a, [4 x <1 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vst4_s64
-; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [4 x <1 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <1 x i64>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <1 x i64>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <1 x i64>] %b.coerce, 3
-  %1 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst4.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, <1 x i64> %b.coerce.fca.3.extract, i32 8)
-  ret void
-}
-
-define void @test_vst4_f32(float* %a, [4 x <2 x float>] %b.coerce) {
-; CHECK-LABEL: test_vst4_f32
-; CHECK: st4 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [4 x <2 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <2 x float>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <2 x float>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <2 x float>] %b.coerce, 3
-  %1 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst4.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, <2 x float> %b.coerce.fca.3.extract, i32 4)
-  ret void
-}
-
-define void @test_vst4_f64(double* %a, [4 x <1 x double>] %b.coerce) {
-; CHECK-LABEL: test_vst4_f64
-; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [4 x <1 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <1 x double>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <1 x double>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <1 x double>] %b.coerce, 3
-  %1 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst4.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, <1 x double> %b.coerce.fca.3.extract, i32 8)
-  ret void
-}
-
-declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32)
-declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32)
-declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32)
-declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>, i32)
-declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32)
-declare void @llvm.arm.neon.vst1.v2f64(i8*, <2 x double>, i32)
-declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32)
-declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32)
-declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32)
-declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>, i32)
-declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32)
-declare void @llvm.arm.neon.vst1.v1f64(i8*, <1 x double>, i32)
-declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32)
-declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>, i32)
-declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32)
-declare void @llvm.arm.neon.vst2.v2i64(i8*, <2 x i64>, <2 x i64>, i32)
-declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32)
-declare void @llvm.arm.neon.vst2.v2f64(i8*, <2 x double>, <2 x double>, i32)
-declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32)
-declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32)
-declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32)
-declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>, i32)
-declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>, i32)
-declare void @llvm.arm.neon.vst2.v1f64(i8*, <1 x double>, <1 x double>, i32)
-declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32)
-declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32)
-declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32)
-declare void @llvm.arm.neon.vst3.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32)
-declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32)
-declare void @llvm.arm.neon.vst3.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32)
-declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
-declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32)
-declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32)
-declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32)
-declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32)
-declare void @llvm.arm.neon.vst3.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32)
-declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32)
-declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32)
-declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32)
-declare void @llvm.arm.neon.vst4.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32)
-declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32)
-declare void @llvm.arm.neon.vst4.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32)
-declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32)
-declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32)
-declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32)
-declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32)
-declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32)
-declare void @llvm.arm.neon.vst4.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32)
-
-define %struct.int8x16x2_t @test_vld1q_s8_x2(i8* %a)  {
-; CHECK-LABEL: test_vld1q_s8_x2
-; CHECK: ld1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}]
-  %1 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8* %a, i32 1)
-  %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0
-  %3 = extractvalue { <16 x i8>, <16 x i8> } %1, 1
-  %4 = insertvalue %struct.int8x16x2_t undef, <16 x i8> %2, 0, 0
-  %5 = insertvalue %struct.int8x16x2_t %4, <16 x i8> %3, 0, 1
-  ret %struct.int8x16x2_t %5
-}
-
-define %struct.int16x8x2_t @test_vld1q_s16_x2(i16* %a)  {
-; CHECK-LABEL: test_vld1q_s16_x2
-; CHECK: ld1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8* %1, i32 2)
-  %3 = extractvalue { <8 x i16>, <8 x i16> } %2, 0
-  %4 = extractvalue { <8 x i16>, <8 x i16> } %2, 1
-  %5 = insertvalue %struct.int16x8x2_t undef, <8 x i16> %3, 0, 0
-  %6 = insertvalue %struct.int16x8x2_t %5, <8 x i16> %4, 0, 1
-  ret %struct.int16x8x2_t %6
-}
-
-define %struct.int32x4x2_t @test_vld1q_s32_x2(i32* %a)  {
-; CHECK-LABEL: test_vld1q_s32_x2
-; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x2.v4i32(i8* %1, i32 4)
-  %3 = extractvalue { <4 x i32>, <4 x i32> } %2, 0
-  %4 = extractvalue { <4 x i32>, <4 x i32> } %2, 1
-  %5 = insertvalue %struct.int32x4x2_t undef, <4 x i32> %3, 0, 0
-  %6 = insertvalue %struct.int32x4x2_t %5, <4 x i32> %4, 0, 1
-  ret %struct.int32x4x2_t %6
-}
-
-define %struct.int64x2x2_t @test_vld1q_s64_x2(i64* %a)  {
-; CHECK-LABEL: test_vld1q_s64_x2
-; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x2.v2i64(i8* %1, i32 8)
-  %3 = extractvalue { <2 x i64>, <2 x i64> } %2, 0
-  %4 = extractvalue { <2 x i64>, <2 x i64> } %2, 1
-  %5 = insertvalue %struct.int64x2x2_t undef, <2 x i64> %3, 0, 0
-  %6 = insertvalue %struct.int64x2x2_t %5, <2 x i64> %4, 0, 1
-  ret %struct.int64x2x2_t %6
-}
-
-define %struct.float32x4x2_t @test_vld1q_f32_x2(float* %a)  {
-; CHECK-LABEL: test_vld1q_f32_x2
-; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x2.v4f32(i8* %1, i32 4)
-  %3 = extractvalue { <4 x float>, <4 x float> } %2, 0
-  %4 = extractvalue { <4 x float>, <4 x float> } %2, 1
-  %5 = insertvalue %struct.float32x4x2_t undef, <4 x float> %3, 0, 0
-  %6 = insertvalue %struct.float32x4x2_t %5, <4 x float> %4, 0, 1
-  ret %struct.float32x4x2_t %6
-}
-
-
-define %struct.float64x2x2_t @test_vld1q_f64_x2(double* %a)  {
-; CHECK-LABEL: test_vld1q_f64_x2
-; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x2.v2f64(i8* %1, i32 8)
-  %3 = extractvalue { <2 x double>, <2 x double> } %2, 0
-  %4 = extractvalue { <2 x double>, <2 x double> } %2, 1
-  %5 = insertvalue %struct.float64x2x2_t undef, <2 x double> %3, 0, 0
-  %6 = insertvalue %struct.float64x2x2_t %5, <2 x double> %4, 0, 1
-  ret %struct.float64x2x2_t %6
-}
-
-define %struct.int8x8x2_t @test_vld1_s8_x2(i8* %a)  {
-; CHECK-LABEL: test_vld1_s8_x2
-; CHECK: ld1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}]
-  %1 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x2.v8i8(i8* %a, i32 1)
-  %2 = extractvalue { <8 x i8>, <8 x i8> } %1, 0
-  %3 = extractvalue { <8 x i8>, <8 x i8> } %1, 1
-  %4 = insertvalue %struct.int8x8x2_t undef, <8 x i8> %2, 0, 0
-  %5 = insertvalue %struct.int8x8x2_t %4, <8 x i8> %3, 0, 1
-  ret %struct.int8x8x2_t %5
-}
-
-define %struct.int16x4x2_t @test_vld1_s16_x2(i16* %a)  {
-; CHECK-LABEL: test_vld1_s16_x2
-; CHECK: ld1 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x2.v4i16(i8* %1, i32 2)
-  %3 = extractvalue { <4 x i16>, <4 x i16> } %2, 0
-  %4 = extractvalue { <4 x i16>, <4 x i16> } %2, 1
-  %5 = insertvalue %struct.int16x4x2_t undef, <4 x i16> %3, 0, 0
-  %6 = insertvalue %struct.int16x4x2_t %5, <4 x i16> %4, 0, 1
-  ret %struct.int16x4x2_t %6
-}
-
-define %struct.int32x2x2_t @test_vld1_s32_x2(i32* %a)  {
-; CHECK-LABEL: test_vld1_s32_x2
-; CHECK: ld1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x2.v2i32(i8* %1, i32 4)
-  %3 = extractvalue { <2 x i32>, <2 x i32> } %2, 0
-  %4 = extractvalue { <2 x i32>, <2 x i32> } %2, 1
-  %5 = insertvalue %struct.int32x2x2_t undef, <2 x i32> %3, 0, 0
-  %6 = insertvalue %struct.int32x2x2_t %5, <2 x i32> %4, 0, 1
-  ret %struct.int32x2x2_t %6
-}
-
-define %struct.int64x1x2_t @test_vld1_s64_x2(i64* %a)  {
-; CHECK-LABEL: test_vld1_s64_x2
-; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x2.v1i64(i8* %1, i32 8)
-  %3 = extractvalue { <1 x i64>, <1 x i64> } %2, 0
-  %4 = extractvalue { <1 x i64>, <1 x i64> } %2, 1
-  %5 = insertvalue %struct.int64x1x2_t undef, <1 x i64> %3, 0, 0
-  %6 = insertvalue %struct.int64x1x2_t %5, <1 x i64> %4, 0, 1
-  ret %struct.int64x1x2_t %6
-}
-
-define %struct.float32x2x2_t @test_vld1_f32_x2(float* %a)  {
-; CHECK-LABEL: test_vld1_f32_x2
-; CHECK: ld1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x2.v2f32(i8* %1, i32 4)
-  %3 = extractvalue { <2 x float>, <2 x float> } %2, 0
-  %4 = extractvalue { <2 x float>, <2 x float> } %2, 1
-  %5 = insertvalue %struct.float32x2x2_t undef, <2 x float> %3, 0, 0
-  %6 = insertvalue %struct.float32x2x2_t %5, <2 x float> %4, 0, 1
-  ret %struct.float32x2x2_t %6
-}
-
-define %struct.float64x1x2_t @test_vld1_f64_x2(double* %a)  {
-; CHECK-LABEL: test_vld1_f64_x2
-; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x2.v1f64(i8* %1, i32 8)
-  %3 = extractvalue { <1 x double>, <1 x double> } %2, 0
-  %4 = extractvalue { <1 x double>, <1 x double> } %2, 1
-  %5 = insertvalue %struct.float64x1x2_t undef, <1 x double> %3, 0, 0
-  %6 = insertvalue %struct.float64x1x2_t %5, <1 x double> %4, 0, 1
-  ret %struct.float64x1x2_t %6
-}
-
-define %struct.int8x16x3_t @test_vld1q_s8_x3(i8* %a)  {
-; CHECK-LABEL: test_vld1q_s8_x3
-; CHECK: ld1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b },
-; [{{x[0-9]+|sp}}]
-  %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x3.v16i8(i8* %a, i32 1)
-  %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 0
-  %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 1
-  %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 2
-  %5 = insertvalue %struct.int8x16x3_t undef, <16 x i8> %2, 0, 0
-  %6 = insertvalue %struct.int8x16x3_t %5, <16 x i8> %3, 0, 1
-  %7 = insertvalue %struct.int8x16x3_t %6, <16 x i8> %4, 0, 2
-  ret %struct.int8x16x3_t %7
-}
-
-define %struct.int16x8x3_t @test_vld1q_s16_x3(i16* %a)  {
-; CHECK-LABEL: test_vld1q_s16_x3
-; CHECK: ld1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h },
-; [{{x[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %2 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8* %1, i32 2)
-  %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 0
-  %4 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 1
-  %5 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 2
-  %6 = insertvalue %struct.int16x8x3_t undef, <8 x i16> %3, 0, 0
-  %7 = insertvalue %struct.int16x8x3_t %6, <8 x i16> %4, 0, 1
-  %8 = insertvalue %struct.int16x8x3_t %7, <8 x i16> %5, 0, 2
-  ret %struct.int16x8x3_t %8
-}
-
-define %struct.int32x4x3_t @test_vld1q_s32_x3(i32* %a)  {
-; CHECK-LABEL: test_vld1q_s32_x3
-; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s },
-; [{{x[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %2 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x3.v4i32(i8* %1, i32 4)
-  %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 0
-  %4 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 1
-  %5 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 2
-  %6 = insertvalue %struct.int32x4x3_t undef, <4 x i32> %3, 0, 0
-  %7 = insertvalue %struct.int32x4x3_t %6, <4 x i32> %4, 0, 1
-  %8 = insertvalue %struct.int32x4x3_t %7, <4 x i32> %5, 0, 2
-  ret %struct.int32x4x3_t %8
-}
-
-define %struct.int64x2x3_t @test_vld1q_s64_x3(i64* %a)  {
-; CHECK-LABEL: test_vld1q_s64_x3
-; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d },
-; [{{x[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %2 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8* %1, i32 8)
-  %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 0
-  %4 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 1
-  %5 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 2
-  %6 = insertvalue %struct.int64x2x3_t undef, <2 x i64> %3, 0, 0
-  %7 = insertvalue %struct.int64x2x3_t %6, <2 x i64> %4, 0, 1
-  %8 = insertvalue %struct.int64x2x3_t %7, <2 x i64> %5, 0, 2
-  ret %struct.int64x2x3_t %8
-}
-
-define %struct.float32x4x3_t @test_vld1q_f32_x3(float* %a)  {
-; CHECK-LABEL: test_vld1q_f32_x3
-; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s },
-; [{{x[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %2 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x3.v4f32(i8* %1, i32 4)
-  %3 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 0
-  %4 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 1
-  %5 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 2
-  %6 = insertvalue %struct.float32x4x3_t undef, <4 x float> %3, 0, 0
-  %7 = insertvalue %struct.float32x4x3_t %6, <4 x float> %4, 0, 1
-  %8 = insertvalue %struct.float32x4x3_t %7, <4 x float> %5, 0, 2
-  ret %struct.float32x4x3_t %8
-}
-
-
-define %struct.float64x2x3_t @test_vld1q_f64_x3(double* %a)  {
-; CHECK-LABEL: test_vld1q_f64_x3
-; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d },
-; [{{x[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %2 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x3.v2f64(i8* %1, i32 8)
-  %3 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 0
-  %4 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 1
-  %5 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 2
-  %6 = insertvalue %struct.float64x2x3_t undef, <2 x double> %3, 0, 0
-  %7 = insertvalue %struct.float64x2x3_t %6, <2 x double> %4, 0, 1
-  %8 = insertvalue %struct.float64x2x3_t %7, <2 x double> %5, 0, 2
-  ret %struct.float64x2x3_t %8
-}
-
-define %struct.int8x8x3_t @test_vld1_s8_x3(i8* %a)  {
-; CHECK-LABEL: test_vld1_s8_x3
-; CHECK: ld1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b },
-; [{{x[0-9]+|sp}}]
-  %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x3.v8i8(i8* %a, i32 1)
-  %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
-  %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 1
-  %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 2
-  %5 = insertvalue %struct.int8x8x3_t undef, <8 x i8> %2, 0, 0
-  %6 = insertvalue %struct.int8x8x3_t %5, <8 x i8> %3, 0, 1
-  %7 = insertvalue %struct.int8x8x3_t %6, <8 x i8> %4, 0, 2
-  ret %struct.int8x8x3_t %7
-}
-
-define %struct.int16x4x3_t @test_vld1_s16_x3(i16* %a)  {
-; CHECK-LABEL: test_vld1_s16_x3
-; CHECK: ld1 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h },
-; [{{x[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %2 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x3.v4i16(i8* %1, i32 2)
-  %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 0
-  %4 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 1
-  %5 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 2
-  %6 = insertvalue %struct.int16x4x3_t undef, <4 x i16> %3, 0, 0
-  %7 = insertvalue %struct.int16x4x3_t %6, <4 x i16> %4, 0, 1
-  %8 = insertvalue %struct.int16x4x3_t %7, <4 x i16> %5, 0, 2
-  ret %struct.int16x4x3_t %8
-}
-
-define %struct.int32x2x3_t @test_vld1_s32_x3(i32* %a)  {
-  %1 = bitcast i32* %a to i8*
-; CHECK-LABEL: test_vld1_s32_x3
-; CHECK: ld1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s },
-; [{{x[0-9]+|sp}}]
-  %2 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x3.v2i32(i8* %1, i32 4)
-  %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 0
-  %4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 1
-  %5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 2
-  %6 = insertvalue %struct.int32x2x3_t undef, <2 x i32> %3, 0, 0
-  %7 = insertvalue %struct.int32x2x3_t %6, <2 x i32> %4, 0, 1
-  %8 = insertvalue %struct.int32x2x3_t %7, <2 x i32> %5, 0, 2
-  ret %struct.int32x2x3_t %8
-}
-
-define %struct.int64x1x3_t @test_vld1_s64_x3(i64* %a)  {
-; CHECK-LABEL: test_vld1_s64_x3
-; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d },
-; [{{x[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %2 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x3.v1i64(i8* %1, i32 8)
-  %3 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 0
-  %4 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 1
-  %5 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 2
-  %6 = insertvalue %struct.int64x1x3_t undef, <1 x i64> %3, 0, 0
-  %7 = insertvalue %struct.int64x1x3_t %6, <1 x i64> %4, 0, 1
-  %8 = insertvalue %struct.int64x1x3_t %7, <1 x i64> %5, 0, 2
-  ret %struct.int64x1x3_t %8
-}
-
-define %struct.float32x2x3_t @test_vld1_f32_x3(float* %a)  {
-; CHECK-LABEL: test_vld1_f32_x3
-; CHECK: ld1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s },
-; [{{x[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %2 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x3.v2f32(i8* %1, i32 4)
-  %3 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 0
-  %4 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 1
-  %5 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 2
-  %6 = insertvalue %struct.float32x2x3_t undef, <2 x float> %3, 0, 0
-  %7 = insertvalue %struct.float32x2x3_t %6, <2 x float> %4, 0, 1
-  %8 = insertvalue %struct.float32x2x3_t %7, <2 x float> %5, 0, 2
-  ret %struct.float32x2x3_t %8
-}
-
-
-define %struct.float64x1x3_t @test_vld1_f64_x3(double* %a)  {
-; CHECK-LABEL: test_vld1_f64_x3
-; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d },
-; [{{x[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %2 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x3.v1f64(i8* %1, i32 8)
-  %3 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 0
-  %4 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 1
-  %5 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 2
-  %6 = insertvalue %struct.float64x1x3_t undef, <1 x double> %3, 0, 0
-  %7 = insertvalue %struct.float64x1x3_t %6, <1 x double> %4, 0, 1
-  %8 = insertvalue %struct.float64x1x3_t %7, <1 x double> %5, 0, 2
-  ret %struct.float64x1x3_t %8
-}
-
-define %struct.int8x16x4_t @test_vld1q_s8_x4(i8* %a)  {
-; CHECK-LABEL: test_vld1q_s8_x4
-; CHECK: ld1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
-; v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}]
-  %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x4.v16i8(i8* %a, i32 1)
-  %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 0
-  %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 1
-  %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 2
-  %5 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 3
-  %6 = insertvalue %struct.int8x16x4_t undef, <16 x i8> %2, 0, 0
-  %7 = insertvalue %struct.int8x16x4_t %6, <16 x i8> %3, 0, 1
-  %8 = insertvalue %struct.int8x16x4_t %7, <16 x i8> %4, 0, 2
-  %9 = insertvalue %struct.int8x16x4_t %8, <16 x i8> %5, 0, 3
-  ret %struct.int8x16x4_t %9
-}
-
-define %struct.int16x8x4_t @test_vld1q_s16_x4(i16* %a)  {
-; CHECK-LABEL: test_vld1q_s16_x4
-; CHECK: ld1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
-; v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %2 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x4.v8i16(i8* %1, i32 2)
-  %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 0
-  %4 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 1
-  %5 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 2
-  %6 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 3
-  %7 = insertvalue %struct.int16x8x4_t undef, <8 x i16> %3, 0, 0
-  %8 = insertvalue %struct.int16x8x4_t %7, <8 x i16> %4, 0, 1
-  %9 = insertvalue %struct.int16x8x4_t %8, <8 x i16> %5, 0, 2
-  %10 = insertvalue %struct.int16x8x4_t %9, <8 x i16> %6, 0, 3
-  ret %struct.int16x8x4_t %10
-}
-
-define %struct.int32x4x4_t @test_vld1q_s32_x4(i32* %a)  {
-; CHECK-LABEL: test_vld1q_s32_x4
-; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
-; v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %2 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x4.v4i32(i8* %1, i32 4)
-  %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 0
-  %4 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 1
-  %5 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 2
-  %6 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 3
-  %7 = insertvalue %struct.int32x4x4_t undef, <4 x i32> %3, 0, 0
-  %8 = insertvalue %struct.int32x4x4_t %7, <4 x i32> %4, 0, 1
-  %9 = insertvalue %struct.int32x4x4_t %8, <4 x i32> %5, 0, 2
-  %10 = insertvalue %struct.int32x4x4_t %9, <4 x i32> %6, 0, 3
-  ret %struct.int32x4x4_t %10
-}
-
-define %struct.int64x2x4_t @test_vld1q_s64_x4(i64* %a)  {
-; CHECK-LABEL: test_vld1q_s64_x4
-; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
-; v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %2 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x4.v2i64(i8* %1, i32 8)
-  %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 0
-  %4 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 1
-  %5 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 2
-  %6 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 3
-  %7 = insertvalue %struct.int64x2x4_t undef, <2 x i64> %3, 0, 0
-  %8 = insertvalue %struct.int64x2x4_t %7, <2 x i64> %4, 0, 1
-  %9 = insertvalue %struct.int64x2x4_t %8, <2 x i64> %5, 0, 2
-  %10 = insertvalue %struct.int64x2x4_t %9, <2 x i64> %6, 0, 3
-  ret %struct.int64x2x4_t %10
-}
-
-define %struct.float32x4x4_t @test_vld1q_f32_x4(float* %a)  {
-; CHECK-LABEL: test_vld1q_f32_x4
-; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
-; v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %2 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8* %1, i32 4)
-  %3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 0
-  %4 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 1
-  %5 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 2
-  %6 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 3
-  %7 = insertvalue %struct.float32x4x4_t undef, <4 x float> %3, 0, 0
-  %8 = insertvalue %struct.float32x4x4_t %7, <4 x float> %4, 0, 1
-  %9 = insertvalue %struct.float32x4x4_t %8, <4 x float> %5, 0, 2
-  %10 = insertvalue %struct.float32x4x4_t %9, <4 x float> %6, 0, 3
-  ret %struct.float32x4x4_t %10
-}
-
-define %struct.float64x2x4_t @test_vld1q_f64_x4(double* %a)  {
-; CHECK-LABEL: test_vld1q_f64_x4
-; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
-; v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %2 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x4.v2f64(i8* %1, i32 8)
-  %3 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 0
-  %4 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 1
-  %5 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 2
-  %6 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 3
-  %7 = insertvalue %struct.float64x2x4_t undef, <2 x double> %3, 0, 0
-  %8 = insertvalue %struct.float64x2x4_t %7, <2 x double> %4, 0, 1
-  %9 = insertvalue %struct.float64x2x4_t %8, <2 x double> %5, 0, 2
-  %10 = insertvalue %struct.float64x2x4_t %9, <2 x double> %6, 0, 3
-  ret %struct.float64x2x4_t %10
-}
-
-define %struct.int8x8x4_t @test_vld1_s8_x4(i8* %a)  {
-; CHECK-LABEL: test_vld1_s8_x4
-; CHECK: ld1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
-; v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}]
-  %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8* %a, i32 1)
-  %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
-  %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 1
-  %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 2
-  %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 3
-  %6 = insertvalue %struct.int8x8x4_t undef, <8 x i8> %2, 0, 0
-  %7 = insertvalue %struct.int8x8x4_t %6, <8 x i8> %3, 0, 1
-  %8 = insertvalue %struct.int8x8x4_t %7, <8 x i8> %4, 0, 2
-  %9 = insertvalue %struct.int8x8x4_t %8, <8 x i8> %5, 0, 3
-  ret %struct.int8x8x4_t %9
-}
-
-define %struct.int16x4x4_t @test_vld1_s16_x4(i16* %a)  {
-; CHECK-LABEL: test_vld1_s16_x4
-; CHECK: ld1 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
-; v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %2 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x4.v4i16(i8* %1, i32 2)
-  %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 0
-  %4 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 1
-  %5 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 2
-  %6 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 3
-  %7 = insertvalue %struct.int16x4x4_t undef, <4 x i16> %3, 0, 0
-  %8 = insertvalue %struct.int16x4x4_t %7, <4 x i16> %4, 0, 1
-  %9 = insertvalue %struct.int16x4x4_t %8, <4 x i16> %5, 0, 2
-  %10 = insertvalue %struct.int16x4x4_t %9, <4 x i16> %6, 0, 3
-  ret %struct.int16x4x4_t %10
-}
-
-define %struct.int32x2x4_t @test_vld1_s32_x4(i32* %a)  {
-; CHECK-LABEL: test_vld1_s32_x4
-; CHECK: ld1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
-; v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %2 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x4.v2i32(i8* %1, i32 4)
-  %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 0
-  %4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 1
-  %5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 2
-  %6 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 3
-  %7 = insertvalue %struct.int32x2x4_t undef, <2 x i32> %3, 0, 0
-  %8 = insertvalue %struct.int32x2x4_t %7, <2 x i32> %4, 0, 1
-  %9 = insertvalue %struct.int32x2x4_t %8, <2 x i32> %5, 0, 2
-  %10 = insertvalue %struct.int32x2x4_t %9, <2 x i32> %6, 0, 3
-  ret %struct.int32x2x4_t %10
-}
-
-define %struct.int64x1x4_t @test_vld1_s64_x4(i64* %a)  {
-; CHECK-LABEL: test_vld1_s64_x4
-; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
-; v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %2 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x4.v1i64(i8* %1, i32 8)
-  %3 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 0
-  %4 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 1
-  %5 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 2
-  %6 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 3
-  %7 = insertvalue %struct.int64x1x4_t undef, <1 x i64> %3, 0, 0
-  %8 = insertvalue %struct.int64x1x4_t %7, <1 x i64> %4, 0, 1
-  %9 = insertvalue %struct.int64x1x4_t %8, <1 x i64> %5, 0, 2
-  %10 = insertvalue %struct.int64x1x4_t %9, <1 x i64> %6, 0, 3
-  ret %struct.int64x1x4_t %10
-}
-
-define %struct.float32x2x4_t @test_vld1_f32_x4(float* %a)  {
-; CHECK-LABEL: test_vld1_f32_x4
-; CHECK: ld1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
-; v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %2 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x4.v2f32(i8* %1, i32 4)
-  %3 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 0
-  %4 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 1
-  %5 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 2
-  %6 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 3
-  %7 = insertvalue %struct.float32x2x4_t undef, <2 x float> %3, 0, 0
-  %8 = insertvalue %struct.float32x2x4_t %7, <2 x float> %4, 0, 1
-  %9 = insertvalue %struct.float32x2x4_t %8, <2 x float> %5, 0, 2
-  %10 = insertvalue %struct.float32x2x4_t %9, <2 x float> %6, 0, 3
-  ret %struct.float32x2x4_t %10
-}
-
-
-define %struct.float64x1x4_t @test_vld1_f64_x4(double* %a)  {
-; CHECK-LABEL: test_vld1_f64_x4
-; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
-; v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %2 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x4.v1f64(i8* %1, i32 8)
-  %3 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 0
-  %4 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 1
-  %5 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 2
-  %6 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 3
-  %7 = insertvalue %struct.float64x1x4_t undef, <1 x double> %3, 0, 0
-  %8 = insertvalue %struct.float64x1x4_t %7, <1 x double> %4, 0, 1
-  %9 = insertvalue %struct.float64x1x4_t %8, <1 x double> %5, 0, 2
-  %10 = insertvalue %struct.float64x1x4_t %9, <1 x double> %6, 0, 3
-  ret %struct.float64x1x4_t %10
-}
-
-define void @test_vst1q_s8_x2(i8* %a, [2 x <16 x i8>] %b)  {
-; CHECK-LABEL: test_vst1q_s8_x2
-; CHECK: st1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [2 x <16 x i8>] %b, 0
-  %2 = extractvalue [2 x <16 x i8>] %b, 1
-  tail call void @llvm.aarch64.neon.vst1x2.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, i32 1)
-  ret void
-}
-
-define void @test_vst1q_s16_x2(i16* %a, [2 x <8 x i16>] %b)  {
-; CHECK-LABEL: test_vst1q_s16_x2
-; CHECK: st1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [2 x <8 x i16>] %b, 0
-  %2 = extractvalue [2 x <8 x i16>] %b, 1
-  %3 = bitcast i16* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x2.v8i16(i8* %3, <8 x i16> %1, <8 x i16> %2, i32 2)
-  ret void
-}
-
-define void @test_vst1q_s32_x2(i32* %a, [2 x <4 x i32>] %b)  {
-; CHECK-LABEL: test_vst1q_s32_x2
-; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [2 x <4 x i32>] %b, 0
-  %2 = extractvalue [2 x <4 x i32>] %b, 1
-  %3 = bitcast i32* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x2.v4i32(i8* %3, <4 x i32> %1, <4 x i32> %2, i32 4)
-  ret void
-}
-
-define void @test_vst1q_s64_x2(i64* %a, [2 x <2 x i64>] %b)  {
-; CHECK-LABEL: test_vst1q_s64_x2
-; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [2 x <2 x i64>] %b, 0
-  %2 = extractvalue [2 x <2 x i64>] %b, 1
-  %3 = bitcast i64* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x2.v2i64(i8* %3, <2 x i64> %1, <2 x i64> %2, i32 8)
-  ret void
-}
-
-define void @test_vst1q_f32_x2(float* %a, [2 x <4 x float>] %b)  {
-; CHECK-LABEL: test_vst1q_f32_x2
-; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [2 x <4 x float>] %b, 0
-  %2 = extractvalue [2 x <4 x float>] %b, 1
-  %3 = bitcast float* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x2.v4f32(i8* %3, <4 x float> %1, <4 x float> %2, i32 4)
-  ret void
-}
-
-
-define void @test_vst1q_f64_x2(double* %a, [2 x <2 x double>] %b)  {
-; CHECK-LABEL: test_vst1q_f64_x2
-; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [2 x <2 x double>] %b, 0
-  %2 = extractvalue [2 x <2 x double>] %b, 1
-  %3 = bitcast double* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x2.v2f64(i8* %3, <2 x double> %1, <2 x double> %2, i32 8)
-  ret void
-}
-
-define void @test_vst1_s8_x2(i8* %a, [2 x <8 x i8>] %b)  {
-; CHECK-LABEL: test_vst1_s8_x2
-; CHECK: st1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [2 x <8 x i8>] %b, 0
-  %2 = extractvalue [2 x <8 x i8>] %b, 1
-  tail call void @llvm.aarch64.neon.vst1x2.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 1)
-  ret void
-}
-
-define void @test_vst1_s16_x2(i16* %a, [2 x <4 x i16>] %b)  {
-; CHECK-LABEL: test_vst1_s16_x2
-; CHECK: st1 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [2 x <4 x i16>] %b, 0
-  %2 = extractvalue [2 x <4 x i16>] %b, 1
-  %3 = bitcast i16* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x2.v4i16(i8* %3, <4 x i16> %1, <4 x i16> %2, i32 2)
-  ret void
-}
-
-define void @test_vst1_s32_x2(i32* %a, [2 x <2 x i32>] %b)  {
-; CHECK-LABEL: test_vst1_s32_x2
-; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [2 x <2 x i32>] %b, 0
-  %2 = extractvalue [2 x <2 x i32>] %b, 1
-  %3 = bitcast i32* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x2.v2i32(i8* %3, <2 x i32> %1, <2 x i32> %2, i32 4)
-  ret void
-}
-
-define void @test_vst1_s64_x2(i64* %a, [2 x <1 x i64>] %b)  {
-; CHECK-LABEL: test_vst1_s64_x2
-; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [2 x <1 x i64>] %b, 0
-  %2 = extractvalue [2 x <1 x i64>] %b, 1
-  %3 = bitcast i64* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x2.v1i64(i8* %3, <1 x i64> %1, <1 x i64> %2, i32 8)
-  ret void
-}
-
-define void @test_vst1_f32_x2(float* %a, [2 x <2 x float>] %b)  {
-; CHECK-LABEL: test_vst1_f32_x2
-; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [2 x <2 x float>] %b, 0
-  %2 = extractvalue [2 x <2 x float>] %b, 1
-  %3 = bitcast float* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x2.v2f32(i8* %3, <2 x float> %1, <2 x float> %2, i32 4)
-  ret void
-}
-
-define void @test_vst1_f64_x2(double* %a, [2 x <1 x double>] %b)  {
-; CHECK-LABEL: test_vst1_f64_x2
-; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [2 x <1 x double>] %b, 0
-  %2 = extractvalue [2 x <1 x double>] %b, 1
-  %3 = bitcast double* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x2.v1f64(i8* %3, <1 x double> %1, <1 x double> %2, i32 8)
-  ret void
-}
-
-define void @test_vst1q_s8_x3(i8* %a, [3 x <16 x i8>] %b)  {
-; CHECK-LABEL: test_vst1q_s8_x3
-; CHECK: st1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b },
-; [{{x[0-9]+|sp}}]
-  %1 = extractvalue [3 x <16 x i8>] %b, 0
-  %2 = extractvalue [3 x <16 x i8>] %b, 1
-  %3 = extractvalue [3 x <16 x i8>] %b, 2
-  tail call void @llvm.aarch64.neon.vst1x3.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, <16 x i8> %3, i32 1)
-  ret void
-}
-
-define void @test_vst1q_s16_x3(i16* %a, [3 x <8 x i16>] %b)  {
-; CHECK-LABEL: test_vst1q_s16_x3
-; CHECK: st1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h },
-; [{{x[0-9]+|sp}}]
-  %1 = extractvalue [3 x <8 x i16>] %b, 0
-  %2 = extractvalue [3 x <8 x i16>] %b, 1
-  %3 = extractvalue [3 x <8 x i16>] %b, 2
-  %4 = bitcast i16* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x3.v8i16(i8* %4, <8 x i16> %1, <8 x i16> %2, <8 x i16> %3, i32 2)
-  ret void
-}
-
-define void @test_vst1q_s32_x3(i32* %a, [3 x <4 x i32>] %b)  {
-; CHECK-LABEL: test_vst1q_s32_x3
-; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s },
-; [{{x[0-9]+|sp}}]
-  %1 = extractvalue [3 x <4 x i32>] %b, 0
-  %2 = extractvalue [3 x <4 x i32>] %b, 1
-  %3 = extractvalue [3 x <4 x i32>] %b, 2
-  %4 = bitcast i32* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x3.v4i32(i8* %4, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3, i32 4)
-  ret void
-}
-
-define void @test_vst1q_s64_x3(i64* %a, [3 x <2 x i64>] %b)  {
-; CHECK-LABEL: test_vst1q_s64_x3
-; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d },
-; [{{x[0-9]+|sp}}]
-  %1 = extractvalue [3 x <2 x i64>] %b, 0
-  %2 = extractvalue [3 x <2 x i64>] %b, 1
-  %3 = extractvalue [3 x <2 x i64>] %b, 2
-  %4 = bitcast i64* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x3.v2i64(i8* %4, <2 x i64> %1, <2 x i64> %2, <2 x i64> %3, i32 8)
-  ret void
-}
-
-define void @test_vst1q_f32_x3(float* %a, [3 x <4 x float>] %b)  {
-; CHECK-LABEL: test_vst1q_f32_x3
-; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s },
-; [{{x[0-9]+|sp}}]
-  %1 = extractvalue [3 x <4 x float>] %b, 0
-  %2 = extractvalue [3 x <4 x float>] %b, 1
-  %3 = extractvalue [3 x <4 x float>] %b, 2
-  %4 = bitcast float* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x3.v4f32(i8* %4, <4 x float> %1, <4 x float> %2, <4 x float> %3, i32 4)
-  ret void
-}
-
-define void @test_vst1q_f64_x3(double* %a, [3 x <2 x double>] %b)  {
-; CHECK-LABEL: test_vst1q_f64_x3
-; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d },
-; [{{x[0-9]+|sp}}]
-  %1 = extractvalue [3 x <2 x double>] %b, 0
-  %2 = extractvalue [3 x <2 x double>] %b, 1
-  %3 = extractvalue [3 x <2 x double>] %b, 2
-  %4 = bitcast double* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x3.v2f64(i8* %4, <2 x double> %1, <2 x double> %2, <2 x double> %3, i32 8)
-  ret void
-}
-
-define void @test_vst1_s8_x3(i8* %a, [3 x <8 x i8>] %b)  {
-; CHECK-LABEL: test_vst1_s8_x3
-; CHECK: st1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b },
-; [{{x[0-9]+|sp}}]
-  %1 = extractvalue [3 x <8 x i8>] %b, 0
-  %2 = extractvalue [3 x <8 x i8>] %b, 1
-  %3 = extractvalue [3 x <8 x i8>] %b, 2
-  tail call void @llvm.aarch64.neon.vst1x3.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, i32 1)
-  ret void
-}
-
-define void @test_vst1_s16_x3(i16* %a, [3 x <4 x i16>] %b)  {
-; CHECK-LABEL: test_vst1_s16_x3
-; CHECK: st1 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h },
-; [{{x[0-9]+|sp}}]
-  %1 = extractvalue [3 x <4 x i16>] %b, 0
-  %2 = extractvalue [3 x <4 x i16>] %b, 1
-  %3 = extractvalue [3 x <4 x i16>] %b, 2
-  %4 = bitcast i16* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x3.v4i16(i8* %4, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 2)
-  ret void
-}
-
-define void @test_vst1_s32_x3(i32* %a, [3 x <2 x i32>] %b)  {
-; CHECK-LABEL: test_vst1_s32_x3
-; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s },
-; [{{x[0-9]+|sp}}]
-  %1 = extractvalue [3 x <2 x i32>] %b, 0
-  %2 = extractvalue [3 x <2 x i32>] %b, 1
-  %3 = extractvalue [3 x <2 x i32>] %b, 2
-  %4 = bitcast i32* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x3.v2i32(i8* %4, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, i32 4)
-  ret void
-}
-
-define void @test_vst1_s64_x3(i64* %a, [3 x <1 x i64>] %b)  {
-; CHECK-LABEL: test_vst1_s64_x3
-; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d },
-; [{{x[0-9]+|sp}}]
-  %1 = extractvalue [3 x <1 x i64>] %b, 0
-  %2 = extractvalue [3 x <1 x i64>] %b, 1
-  %3 = extractvalue [3 x <1 x i64>] %b, 2
-  %4 = bitcast i64* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x3.v1i64(i8* %4, <1 x i64> %1, <1 x i64> %2, <1 x i64> %3, i32 8)
-  ret void
-}
-
-define void @test_vst1_f32_x3(float* %a, [3 x <2 x float>] %b)  {
-; CHECK-LABEL: test_vst1_f32_x3
-; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s },
-; [{{x[0-9]+|sp}}]
-  %1 = extractvalue [3 x <2 x float>] %b, 0
-  %2 = extractvalue [3 x <2 x float>] %b, 1
-  %3 = extractvalue [3 x <2 x float>] %b, 2
-  %4 = bitcast float* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x3.v2f32(i8* %4, <2 x float> %1, <2 x float> %2, <2 x float> %3, i32 4)
-  ret void
-}
-
-define void @test_vst1_f64_x3(double* %a, [3 x <1 x double>] %b)  {
-; CHECK-LABEL: test_vst1_f64_x3
-; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d },
-; [{{x[0-9]+|sp}}]
-  %1 = extractvalue [3 x <1 x double>] %b, 0
-  %2 = extractvalue [3 x <1 x double>] %b, 1
-  %3 = extractvalue [3 x <1 x double>] %b, 2
-  %4 = bitcast double* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x3.v1f64(i8* %4, <1 x double> %1, <1 x double> %2, <1 x double> %3, i32 8)
-  ret void
-}
-
-define void @test_vst1q_s8_x4(i8* %a, [4 x <16 x i8>] %b)  {
-; CHECK-LABEL: test_vst1q_s8_x4
-; CHECK: st1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
-; v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [4 x <16 x i8>] %b, 0
-  %2 = extractvalue [4 x <16 x i8>] %b, 1
-  %3 = extractvalue [4 x <16 x i8>] %b, 2
-  %4 = extractvalue [4 x <16 x i8>] %b, 3
-  tail call void @llvm.aarch64.neon.vst1x4.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, <16 x i8> %3, <16 x i8> %4, i32 1)
-  ret void
-}
-
-define void @test_vst1q_s16_x4(i16* %a, [4 x <8 x i16>] %b)  {
-; CHECK-LABEL: test_vst1q_s16_x4
-; CHECK: st1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
-; v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [4 x <8 x i16>] %b, 0
-  %2 = extractvalue [4 x <8 x i16>] %b, 1
-  %3 = extractvalue [4 x <8 x i16>] %b, 2
-  %4 = extractvalue [4 x <8 x i16>] %b, 3
-  %5 = bitcast i16* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x4.v8i16(i8* %5, <8 x i16> %1, <8 x i16> %2, <8 x i16> %3, <8 x i16> %4, i32 2)
-  ret void
-}
-
-define void @test_vst1q_s32_x4(i32* %a, [4 x <4 x i32>] %b)  {
-; CHECK-LABEL: test_vst1q_s32_x4
-; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
-; v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [4 x <4 x i32>] %b, 0
-  %2 = extractvalue [4 x <4 x i32>] %b, 1
-  %3 = extractvalue [4 x <4 x i32>] %b, 2
-  %4 = extractvalue [4 x <4 x i32>] %b, 3
-  %5 = bitcast i32* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x4.v4i32(i8* %5, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, i32 4)
-  ret void
-}
-
-define void @test_vst1q_s64_x4(i64* %a, [4 x <2 x i64>] %b)  {
-; CHECK-LABEL: test_vst1q_s64_x4
-; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
-; v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [4 x <2 x i64>] %b, 0
-  %2 = extractvalue [4 x <2 x i64>] %b, 1
-  %3 = extractvalue [4 x <2 x i64>] %b, 2
-  %4 = extractvalue [4 x <2 x i64>] %b, 3
-  %5 = bitcast i64* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x4.v2i64(i8* %5, <2 x i64> %1, <2 x i64> %2, <2 x i64> %3, <2 x i64> %4, i32 8)
-  ret void
-}
-
-define void @test_vst1q_f32_x4(float* %a, [4 x <4 x float>] %b)  {
-; CHECK-LABEL: test_vst1q_f32_x4
-; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
-; v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [4 x <4 x float>] %b, 0
-  %2 = extractvalue [4 x <4 x float>] %b, 1
-  %3 = extractvalue [4 x <4 x float>] %b, 2
-  %4 = extractvalue [4 x <4 x float>] %b, 3
-  %5 = bitcast float* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x4.v4f32(i8* %5, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4, i32 4)
-  ret void
-}
-
-define void @test_vst1q_f64_x4(double* %a, [4 x <2 x double>] %b)  {
-; CHECK-LABEL: test_vst1q_f64_x4
-; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
-; v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [4 x <2 x double>] %b, 0
-  %2 = extractvalue [4 x <2 x double>] %b, 1
-  %3 = extractvalue [4 x <2 x double>] %b, 2
-  %4 = extractvalue [4 x <2 x double>] %b, 3
-  %5 = bitcast double* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x4.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 8)
-  ret void
-}
-
-define void @test_vst1_s8_x4(i8* %a, [4 x <8 x i8>] %b)  {
-; CHECK-LABEL: test_vst1_s8_x4
-; CHECK: st1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
-; v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [4 x <8 x i8>] %b, 0
-  %2 = extractvalue [4 x <8 x i8>] %b, 1
-  %3 = extractvalue [4 x <8 x i8>] %b, 2
-  %4 = extractvalue [4 x <8 x i8>] %b, 3
-  tail call void @llvm.aarch64.neon.vst1x4.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, <8 x i8> %4, i32 1)
-  ret void
-}
-
-define void @test_vst1_s16_x4(i16* %a, [4 x <4 x i16>] %b)  {
-; CHECK-LABEL: test_vst1_s16_x4
-; CHECK: st1 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
-; v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [4 x <4 x i16>] %b, 0
-  %2 = extractvalue [4 x <4 x i16>] %b, 1
-  %3 = extractvalue [4 x <4 x i16>] %b, 2
-  %4 = extractvalue [4 x <4 x i16>] %b, 3
-  %5 = bitcast i16* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x4.v4i16(i8* %5, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, <4 x i16> %4, i32 2)
-  ret void
-}
-
-define void @test_vst1_s32_x4(i32* %a, [4 x <2 x i32>] %b)  {
-; CHECK-LABEL: test_vst1_s32_x4
-; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
-; v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [4 x <2 x i32>] %b, 0
-  %2 = extractvalue [4 x <2 x i32>] %b, 1
-  %3 = extractvalue [4 x <2 x i32>] %b, 2
-  %4 = extractvalue [4 x <2 x i32>] %b, 3
-  %5 = bitcast i32* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x4.v2i32(i8* %5, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, <2 x i32> %4, i32 4)
-  ret void
-}
-
-define void @test_vst1_s64_x4(i64* %a, [4 x <1 x i64>] %b)  {
-; CHECK-LABEL: test_vst1_s64_x4
-; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
-; v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [4 x <1 x i64>] %b, 0
-  %2 = extractvalue [4 x <1 x i64>] %b, 1
-  %3 = extractvalue [4 x <1 x i64>] %b, 2
-  %4 = extractvalue [4 x <1 x i64>] %b, 3
-  %5 = bitcast i64* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x4.v1i64(i8* %5, <1 x i64> %1, <1 x i64> %2, <1 x i64> %3, <1 x i64> %4, i32 8)
-  ret void
-}
-
-define void @test_vst1_f32_x4(float* %a, [4 x <2 x float>] %b)  {
-; CHECK-LABEL: test_vst1_f32_x4
-; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
-; v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [4 x <2 x float>] %b, 0
-  %2 = extractvalue [4 x <2 x float>] %b, 1
-  %3 = extractvalue [4 x <2 x float>] %b, 2
-  %4 = extractvalue [4 x <2 x float>] %b, 3
-  %5 = bitcast float* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x4.v2f32(i8* %5, <2 x float> %1, <2 x float> %2, <2 x float> %3, <2 x float> %4, i32 4)
-  ret void
-}
-
-define void @test_vst1_f64_x4(double* %a, [4 x <1 x double>] %b)  {
-; CHECK-LABEL: test_vst1_f64_x4
-; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
-; v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [4 x <1 x double>] %b, 0
-  %2 = extractvalue [4 x <1 x double>] %b, 1
-  %3 = extractvalue [4 x <1 x double>] %b, 2
-  %4 = extractvalue [4 x <1 x double>] %b, 3
-  %5 = bitcast double* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x4.v1f64(i8* %5, <1 x double> %1, <1 x double> %2, <1 x double> %3, <1 x double> %4, i32 8)
-  ret void
-}
-
-declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8*, i32)
-declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8*, i32)
-declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x2.v4i32(i8*, i32)
-declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x2.v2i64(i8*, i32)
-declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x2.v4f32(i8*, i32)
-declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x2.v2f64(i8*, i32)
-declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x2.v8i8(i8*, i32)
-declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x2.v4i16(i8*, i32)
-declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x2.v2i32(i8*, i32)
-declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x2.v1i64(i8*, i32)
-declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x2.v2f32(i8*, i32)
-declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x2.v1f64(i8*, i32)
-declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x3.v16i8(i8*, i32)
-declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8*, i32)
-declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x3.v4i32(i8*, i32)
-declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8*, i32)
-declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x3.v4f32(i8*, i32)
-declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x3.v2f64(i8*, i32)
-declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x3.v8i8(i8*, i32)
-declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x3.v4i16(i8*, i32)
-declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x3.v2i32(i8*, i32)
-declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x3.v1i64(i8*, i32)
-declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x3.v2f32(i8*, i32)
-declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x3.v1f64(i8*, i32)
-declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x4.v16i8(i8*, i32)
-declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x4.v8i16(i8*, i32)
-declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x4.v4i32(i8*, i32)
-declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x4.v2i64(i8*, i32)
-declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8*, i32)
-declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x4.v2f64(i8*, i32)
-declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8*, i32)
-declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x4.v4i16(i8*, i32)
-declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x4.v2i32(i8*, i32)
-declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x4.v1i64(i8*, i32)
-declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x4.v2f32(i8*, i32)
-declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x4.v1f64(i8*, i32)
-declare void @llvm.aarch64.neon.vst1x2.v16i8(i8*, <16 x i8>, <16 x i8>, i32)
-declare void @llvm.aarch64.neon.vst1x2.v8i16(i8*, <8 x i16>, <8 x i16>, i32)
-declare void @llvm.aarch64.neon.vst1x2.v4i32(i8*, <4 x i32>, <4 x i32>, i32)
-declare void @llvm.aarch64.neon.vst1x2.v2i64(i8*, <2 x i64>, <2 x i64>, i32)
-declare void @llvm.aarch64.neon.vst1x2.v4f32(i8*, <4 x float>, <4 x float>, i32)
-declare void @llvm.aarch64.neon.vst1x2.v2f64(i8*, <2 x double>, <2 x double>, i32)
-declare void @llvm.aarch64.neon.vst1x2.v8i8(i8*, <8 x i8>, <8 x i8>, i32)
-declare void @llvm.aarch64.neon.vst1x2.v4i16(i8*, <4 x i16>, <4 x i16>, i32)
-declare void @llvm.aarch64.neon.vst1x2.v2i32(i8*, <2 x i32>, <2 x i32>, i32)
-declare void @llvm.aarch64.neon.vst1x2.v1i64(i8*, <1 x i64>, <1 x i64>, i32)
-declare void @llvm.aarch64.neon.vst1x2.v2f32(i8*, <2 x float>, <2 x float>, i32)
-declare void @llvm.aarch64.neon.vst1x2.v1f64(i8*, <1 x double>, <1 x double>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32)
-declare void @llvm.aarch64.neon.vst1x4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32)
-declare void @llvm.aarch64.neon.vst1x4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32)
-declare void @llvm.aarch64.neon.vst1x4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32)
-declare void @llvm.aarch64.neon.vst1x4.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32)
-declare void @llvm.aarch64.neon.vst1x4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32)
-declare void @llvm.aarch64.neon.vst1x4.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32)
-declare void @llvm.aarch64.neon.vst1x4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32)
-declare void @llvm.aarch64.neon.vst1x4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32)
-declare void @llvm.aarch64.neon.vst1x4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32)
-declare void @llvm.aarch64.neon.vst1x4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32)
-declare void @llvm.aarch64.neon.vst1x4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32)
-declare void @llvm.aarch64.neon.vst1x4.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32)

Removed: llvm/trunk/test/CodeGen/AArch64/neon-simd-ldst-one.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-simd-ldst-one.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-simd-ldst-one.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-simd-ldst-one.ll (removed)
@@ -1,2300 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-; interesting parts copied into arm64 directory as aarch64-neon-simd-ldst-one.ll
-
-%struct.uint8x16x2_t = type { [2 x <16 x i8>] }
-%struct.poly8x16x2_t = type { [2 x <16 x i8>] }
-%struct.uint8x16x3_t = type { [3 x <16 x i8>] }
-%struct.int8x16x2_t = type { [2 x <16 x i8>] }
-%struct.int16x8x2_t = type { [2 x <8 x i16>] }
-%struct.int32x4x2_t = type { [2 x <4 x i32>] }
-%struct.int64x2x2_t = type { [2 x <2 x i64>] }
-%struct.float32x4x2_t = type { [2 x <4 x float>] }
-%struct.float64x2x2_t = type { [2 x <2 x double>] }
-%struct.int8x8x2_t = type { [2 x <8 x i8>] }
-%struct.int16x4x2_t = type { [2 x <4 x i16>] }
-%struct.int32x2x2_t = type { [2 x <2 x i32>] }
-%struct.int64x1x2_t = type { [2 x <1 x i64>] }
-%struct.float32x2x2_t = type { [2 x <2 x float>] }
-%struct.float64x1x2_t = type { [2 x <1 x double>] }
-%struct.int8x16x3_t = type { [3 x <16 x i8>] }
-%struct.int16x8x3_t = type { [3 x <8 x i16>] }
-%struct.int32x4x3_t = type { [3 x <4 x i32>] }
-%struct.int64x2x3_t = type { [3 x <2 x i64>] }
-%struct.float32x4x3_t = type { [3 x <4 x float>] }
-%struct.float64x2x3_t = type { [3 x <2 x double>] }
-%struct.int8x8x3_t = type { [3 x <8 x i8>] }
-%struct.int16x4x3_t = type { [3 x <4 x i16>] }
-%struct.int32x2x3_t = type { [3 x <2 x i32>] }
-%struct.int64x1x3_t = type { [3 x <1 x i64>] }
-%struct.float32x2x3_t = type { [3 x <2 x float>] }
-%struct.float64x1x3_t = type { [3 x <1 x double>] }
-%struct.int8x16x4_t = type { [4 x <16 x i8>] }
-%struct.int16x8x4_t = type { [4 x <8 x i16>] }
-%struct.int32x4x4_t = type { [4 x <4 x i32>] }
-%struct.int64x2x4_t = type { [4 x <2 x i64>] }
-%struct.float32x4x4_t = type { [4 x <4 x float>] }
-%struct.float64x2x4_t = type { [4 x <2 x double>] }
-%struct.int8x8x4_t = type { [4 x <8 x i8>] }
-%struct.int16x4x4_t = type { [4 x <4 x i16>] }
-%struct.int32x2x4_t = type { [4 x <2 x i32>] }
-%struct.int64x1x4_t = type { [4 x <1 x i64>] }
-%struct.float32x2x4_t = type { [4 x <2 x float>] }
-%struct.float64x1x4_t = type { [4 x <1 x double>] }
-
-define <16 x i8> @test_ld_from_poll_v16i8(<16 x i8> %a) {
-; CHECK-LABEL: test_ld_from_poll_v16i8
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = add <16 x i8> %a, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 2, i8 13, i8 14, i8 15, i8 16>
-  ret <16 x i8> %b
-}
-
-define <8 x i16> @test_ld_from_poll_v8i16(<8 x i16> %a) {
-; CHECK-LABEL: test_ld_from_poll_v8i16
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = add <8 x i16> %a, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
-  ret <8 x i16> %b
-}
-
-define <4 x i32> @test_ld_from_poll_v4i32(<4 x i32> %a) {
-; CHECK-LABEL: test_ld_from_poll_v4i32
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = add <4 x i32> %a, <i32 1, i32 2, i32 3, i32 4>
-  ret <4 x i32> %b
-}
-
-define <2 x i64> @test_ld_from_poll_v2i64(<2 x i64> %a) {
-; CHECK-LABEL: test_ld_from_poll_v2i64
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = add <2 x i64> %a, <i64 1, i64 2>
-  ret <2 x i64> %b
-}
-
-define <4 x float> @test_ld_from_poll_v4f32(<4 x float> %a) {
-; CHECK-LABEL: test_ld_from_poll_v4f32
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = fadd <4 x float> %a, <float 1.0, float 2.0, float 3.0, float 4.0>
-  ret <4 x float> %b
-}
-
-define <2 x double> @test_ld_from_poll_v2f64(<2 x double> %a) {
-; CHECK-LABEL: test_ld_from_poll_v2f64
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = fadd <2 x double> %a, <double 1.0, double 2.0>
-  ret <2 x double> %b
-}
-
-define <8 x i8> @test_ld_from_poll_v8i8(<8 x i8> %a) {
-; CHECK-LABEL: test_ld_from_poll_v8i8
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = add <8 x i8> %a, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>
-  ret <8 x i8> %b
-}
-
-define <4 x i16> @test_ld_from_poll_v4i16(<4 x i16> %a) {
-; CHECK-LABEL: test_ld_from_poll_v4i16
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = add <4 x i16> %a, <i16 1, i16 2, i16 3, i16 4>
-  ret <4 x i16> %b
-}
-
-define <2 x i32> @test_ld_from_poll_v2i32(<2 x i32> %a) {
-; CHECK-LABEL: test_ld_from_poll_v2i32
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = add <2 x i32> %a, <i32 1, i32 2>
-  ret <2 x i32> %b
-}
-
-define <16 x i8> @test_vld1q_dup_s8(i8* %a) {
-; CHECK-LABEL: test_vld1q_dup_s8
-; CHECK: ld1r { {{v[0-9]+}}.16b }, [x0]
-entry:
-  %0 = load i8* %a, align 1
-  %1 = insertelement <16 x i8> undef, i8 %0, i32 0
-  %lane = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
-  ret <16 x i8> %lane
-}
-
-define <8 x i16> @test_vld1q_dup_s16(i16* %a) {
-; CHECK-LABEL: test_vld1q_dup_s16
-; CHECK: ld1r { {{v[0-9]+}}.8h }, [x0]
-entry:
-  %0 = load i16* %a, align 2
-  %1 = insertelement <8 x i16> undef, i16 %0, i32 0
-  %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer
-  ret <8 x i16> %lane
-}
-
-define <4 x i32> @test_vld1q_dup_s32(i32* %a) {
-; CHECK-LABEL: test_vld1q_dup_s32
-; CHECK: ld1r { {{v[0-9]+}}.4s }, [x0]
-entry:
-  %0 = load i32* %a, align 4
-  %1 = insertelement <4 x i32> undef, i32 %0, i32 0
-  %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
-  ret <4 x i32> %lane
-}
-
-define <2 x i64> @test_vld1q_dup_s64(i64* %a) {
-; CHECK-LABEL: test_vld1q_dup_s64
-; CHECK: ld1r { {{v[0-9]+}}.2d }, [x0]
-entry:
-  %0 = load i64* %a, align 8
-  %1 = insertelement <2 x i64> undef, i64 %0, i32 0
-  %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer
-  ret <2 x i64> %lane
-}
-
-define <4 x float> @test_vld1q_dup_f32(float* %a) {
-; CHECK-LABEL: test_vld1q_dup_f32
-; CHECK: ld1r { {{v[0-9]+}}.4s }, [x0]
-entry:
-  %0 = load float* %a, align 4
-  %1 = insertelement <4 x float> undef, float %0, i32 0
-  %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
-  ret <4 x float> %lane
-}
-
-define <2 x double> @test_vld1q_dup_f64(double* %a) {
-; CHECK-LABEL: test_vld1q_dup_f64
-; CHECK: ld1r { {{v[0-9]+}}.2d }, [x0]
-entry:
-  %0 = load double* %a, align 8
-  %1 = insertelement <2 x double> undef, double %0, i32 0
-  %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer
-  ret <2 x double> %lane
-}
-
-define <8 x i8> @test_vld1_dup_s8(i8* %a) {
-; CHECK-LABEL: test_vld1_dup_s8
-; CHECK: ld1r { {{v[0-9]+}}.8b }, [x0]
-entry:
-  %0 = load i8* %a, align 1
-  %1 = insertelement <8 x i8> undef, i8 %0, i32 0
-  %lane = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
-  ret <8 x i8> %lane
-}
-
-define <4 x i16> @test_vld1_dup_s16(i16* %a) {
-; CHECK-LABEL: test_vld1_dup_s16
-; CHECK: ld1r { {{v[0-9]+}}.4h }, [x0]
-entry:
-  %0 = load i16* %a, align 2
-  %1 = insertelement <4 x i16> undef, i16 %0, i32 0
-  %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer
-  ret <4 x i16> %lane
-}
-
-define <2 x i32> @test_vld1_dup_s32(i32* %a) {
-; CHECK-LABEL: test_vld1_dup_s32
-; CHECK: ld1r { {{v[0-9]+}}.2s }, [x0]
-entry:
-  %0 = load i32* %a, align 4
-  %1 = insertelement <2 x i32> undef, i32 %0, i32 0
-  %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer
-  ret <2 x i32> %lane
-}
-
-define <1 x i64> @test_vld1_dup_s64(i64* %a) {
-; CHECK-LABEL: test_vld1_dup_s64
-; CHECK: ld1r { {{v[0-9]+}}.1d }, [x0]
-entry:
-  %0 = load i64* %a, align 8
-  %1 = insertelement <1 x i64> undef, i64 %0, i32 0
-  ret <1 x i64> %1
-}
-
-define <2 x float> @test_vld1_dup_f32(float* %a) {
-; CHECK-LABEL: test_vld1_dup_f32
-; CHECK: ld1r { {{v[0-9]+}}.2s }, [x0]
-entry:
-  %0 = load float* %a, align 4
-  %1 = insertelement <2 x float> undef, float %0, i32 0
-  %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer
-  ret <2 x float> %lane
-}
-
-define <1 x double> @test_vld1_dup_f64(double* %a) {
-; CHECK-LABEL: test_vld1_dup_f64
-; CHECK: ld1r { {{v[0-9]+}}.1d }, [x0]
-entry:
-  %0 = load double* %a, align 8
-  %1 = insertelement <1 x double> undef, double %0, i32 0
-  ret <1 x double> %1
-}
-
-define <1 x i64> @testDUP.v1i64(i64* %a, i64* %b) #0 {
-; As there is a store operation depending on %1, LD1R pattern can't be selected.
-; So LDR and FMOV should be emitted.
-; CHECK-LABEL: testDUP.v1i64
-; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}]
-; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
-; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}]
-  %1 = load i64* %a, align 8
-  store i64 %1, i64* %b, align 8
-  %vecinit.i = insertelement <1 x i64> undef, i64 %1, i32 0
-  ret <1 x i64> %vecinit.i
-}
-
-define <1 x double> @testDUP.v1f64(double* %a, double* %b) #0 {
-; As there is a store operation depending on %1, LD1R pattern can't be selected.
-; So LDR and FMOV should be emitted.
-; CHECK-LABEL: testDUP.v1f64
-; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}]
-; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}]
-  %1 = load double* %a, align 8
-  store double %1, double* %b, align 8
-  %vecinit.i = insertelement <1 x double> undef, double %1, i32 0
-  ret <1 x double> %vecinit.i
-}
-
-define %struct.int8x16x2_t @test_vld2q_dup_s8(i8* %a) {
-; CHECK-LABEL: test_vld2q_dup_s8
-; CHECK: ld2r { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, [x0]
-entry:
-  %vld_dup = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %a, <16 x i8> undef, <16 x i8> undef, i32 0, i32 1)
-  %0 = extractvalue { <16 x i8>, <16 x i8> } %vld_dup, 0
-  %lane = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
-  %1 = extractvalue { <16 x i8>, <16 x i8> } %vld_dup, 1
-  %lane1 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %lane1, 0, 1
-  ret %struct.int8x16x2_t %.fca.0.1.insert
-}
-
-define %struct.int16x8x2_t @test_vld2q_dup_s16(i16* %a) {
-; CHECK-LABEL: test_vld2q_dup_s16
-; CHECK: ld2r { {{v[0-9]+}}.8h, {{v[0-9]+}}.8h }, [x0]
-entry:
-  %0 = bitcast i16* %a to i8*
-  %vld_dup = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16(i8* %0, <8 x i16> undef, <8 x i16> undef, i32 0, i32 2)
-  %1 = extractvalue { <8 x i16>, <8 x i16> } %vld_dup, 0
-  %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer
-  %2 = extractvalue { <8 x i16>, <8 x i16> } %vld_dup, 1
-  %lane1 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %lane1, 0, 1
-  ret %struct.int16x8x2_t %.fca.0.1.insert
-}
-
-define %struct.int32x4x2_t @test_vld2q_dup_s32(i32* %a) {
-; CHECK-LABEL: test_vld2q_dup_s32
-; CHECK: ld2r { {{v[0-9]+}}.4s, {{v[0-9]+}}.4s }, [x0]
-entry:
-  %0 = bitcast i32* %a to i8*
-  %vld_dup = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %0, <4 x i32> undef, <4 x i32> undef, i32 0, i32 4)
-  %1 = extractvalue { <4 x i32>, <4 x i32> } %vld_dup, 0
-  %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
-  %2 = extractvalue { <4 x i32>, <4 x i32> } %vld_dup, 1
-  %lane1 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %lane1, 0, 1
-  ret %struct.int32x4x2_t %.fca.0.1.insert
-}
-
-define %struct.int64x2x2_t @test_vld2q_dup_s64(i64* %a) {
-; CHECK-LABEL: test_vld2q_dup_s64
-; CHECK: ld2r { {{v[0-9]+}}.2d, {{v[0-9]+}}.2d }, [x0]
-entry:
-  %0 = bitcast i64* %a to i8*
-  %vld_dup = tail call { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2lane.v2i64(i8* %0, <2 x i64> undef, <2 x i64> undef, i32 0, i32 8)
-  %1 = extractvalue { <2 x i64>, <2 x i64> } %vld_dup, 0
-  %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer
-  %2 = extractvalue { <2 x i64>, <2 x i64> } %vld_dup, 1
-  %lane1 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int64x2x2_t undef, <2 x i64> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x2x2_t %.fca.0.0.insert, <2 x i64> %lane1, 0, 1
-  ret %struct.int64x2x2_t %.fca.0.1.insert
-}
-
-define %struct.float32x4x2_t @test_vld2q_dup_f32(float* %a) {
-; CHECK-LABEL: test_vld2q_dup_f32
-; CHECK: ld2r { {{v[0-9]+}}.4s, {{v[0-9]+}}.4s }, [x0]
-entry:
-  %0 = bitcast float* %a to i8*
-  %vld_dup = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2lane.v4f32(i8* %0, <4 x float> undef, <4 x float> undef, i32 0, i32 4)
-  %1 = extractvalue { <4 x float>, <4 x float> } %vld_dup, 0
-  %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
-  %2 = extractvalue { <4 x float>, <4 x float> } %vld_dup, 1
-  %lane1 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %lane1, 0, 1
-  ret %struct.float32x4x2_t %.fca.0.1.insert
-}
-
-define %struct.float64x2x2_t @test_vld2q_dup_f64(double* %a) {
-; CHECK-LABEL: test_vld2q_dup_f64
-; CHECK: ld2r { {{v[0-9]+}}.2d, {{v[0-9]+}}.2d }, [x0]
-entry:
-  %0 = bitcast double* %a to i8*
-  %vld_dup = tail call { <2 x double>, <2 x double> } @llvm.arm.neon.vld2lane.v2f64(i8* %0, <2 x double> undef, <2 x double> undef, i32 0, i32 8)
-  %1 = extractvalue { <2 x double>, <2 x double> } %vld_dup, 0
-  %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer
-  %2 = extractvalue { <2 x double>, <2 x double> } %vld_dup, 1
-  %lane1 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.float64x2x2_t undef, <2 x double> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x2x2_t %.fca.0.0.insert, <2 x double> %lane1, 0, 1
-  ret %struct.float64x2x2_t %.fca.0.1.insert
-}
-
-define %struct.int8x8x2_t @test_vld2_dup_s8(i8* %a) {
-; CHECK-LABEL: test_vld2_dup_s8
-; CHECK: ld2r { {{v[0-9]+}}.8b, {{v[0-9]+}}.8b }, [x0]
-entry:
-  %vld_dup = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8* %a, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
-  %0 = extractvalue { <8 x i8>, <8 x i8> } %vld_dup, 0
-  %lane = shufflevector <8 x i8> %0, <8 x i8> undef, <8 x i32> zeroinitializer
-  %1 = extractvalue { <8 x i8>, <8 x i8> } %vld_dup, 1
-  %lane1 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %lane1, 0, 1
-  ret %struct.int8x8x2_t %.fca.0.1.insert
-}
-
-define %struct.int16x4x2_t @test_vld2_dup_s16(i16* %a) {
-; CHECK-LABEL: test_vld2_dup_s16
-; CHECK: ld2r { {{v[0-9]+}}.4h, {{v[0-9]+}}.4h }, [x0]
-entry:
-  %0 = bitcast i16* %a to i8*
-  %vld_dup = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16(i8* %0, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
-  %1 = extractvalue { <4 x i16>, <4 x i16> } %vld_dup, 0
-  %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer
-  %2 = extractvalue { <4 x i16>, <4 x i16> } %vld_dup, 1
-  %lane1 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %lane1, 0, 1
-  ret %struct.int16x4x2_t %.fca.0.1.insert
-}
-
-define %struct.int32x2x2_t @test_vld2_dup_s32(i32* %a) {
-; CHECK-LABEL: test_vld2_dup_s32
-; CHECK: ld2r { {{v[0-9]+}}.2s, {{v[0-9]+}}.2s }, [x0]
-entry:
-  %0 = bitcast i32* %a to i8*
-  %vld_dup = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32(i8* %0, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4)
-  %1 = extractvalue { <2 x i32>, <2 x i32> } %vld_dup, 0
-  %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer
-  %2 = extractvalue { <2 x i32>, <2 x i32> } %vld_dup, 1
-  %lane1 = shufflevector <2 x i32> %2, <2 x i32> undef, <2 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %lane1, 0, 1
-  ret %struct.int32x2x2_t %.fca.0.1.insert
-}
-
-define %struct.int64x1x2_t @test_vld2_dup_s64(i64* %a) {
-; CHECK-LABEL: test_vld2_dup_s64
-; CHECK: ld1 { {{v[0-9]+}}.1d, {{v[0-9]+}}.1d }, [x0]
-entry:
-  %0 = bitcast i64* %a to i8*
-  %vld_dup = tail call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8* %0, i32 8)
-  %vld_dup.fca.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld_dup, 0
-  %vld_dup.fca.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld_dup, 1
-  %.fca.0.0.insert = insertvalue %struct.int64x1x2_t undef, <1 x i64> %vld_dup.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x1x2_t %.fca.0.0.insert, <1 x i64> %vld_dup.fca.1.extract, 0, 1
-  ret %struct.int64x1x2_t %.fca.0.1.insert
-}
-
-define %struct.float32x2x2_t @test_vld2_dup_f32(float* %a) {
-; CHECK-LABEL: test_vld2_dup_f32
-; CHECK: ld2r { {{v[0-9]+}}.2s, {{v[0-9]+}}.2s }, [x0]
-entry:
-  %0 = bitcast float* %a to i8*
-  %vld_dup = tail call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32(i8* %0, <2 x float> undef, <2 x float> undef, i32 0, i32 4)
-  %1 = extractvalue { <2 x float>, <2 x float> } %vld_dup, 0
-  %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer
-  %2 = extractvalue { <2 x float>, <2 x float> } %vld_dup, 1
-  %lane1 = shufflevector <2 x float> %2, <2 x float> undef, <2 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %lane1, 0, 1
-  ret %struct.float32x2x2_t %.fca.0.1.insert
-}
-
-define %struct.float64x1x2_t @test_vld2_dup_f64(double* %a) {
-; CHECK-LABEL: test_vld2_dup_f64
-; CHECK: ld1 { {{v[0-9]+}}.1d, {{v[0-9]+}}.1d }, [x0]
-entry:
-  %0 = bitcast double* %a to i8*
-  %vld_dup = tail call { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8* %0, i32 8)
-  %vld_dup.fca.0.extract = extractvalue { <1 x double>, <1 x double> } %vld_dup, 0
-  %vld_dup.fca.1.extract = extractvalue { <1 x double>, <1 x double> } %vld_dup, 1
-  %.fca.0.0.insert = insertvalue %struct.float64x1x2_t undef, <1 x double> %vld_dup.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x1x2_t %.fca.0.0.insert, <1 x double> %vld_dup.fca.1.extract, 0, 1
-  ret %struct.float64x1x2_t %.fca.0.1.insert
-}
-
-define %struct.int8x16x3_t @test_vld3q_dup_s8(i8* %a) {
-; CHECK-LABEL: test_vld3q_dup_s8
-; CHECK: ld3r { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, [x0]
-entry:
-  %vld_dup = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3lane.v16i8(i8* %a, <16 x i8> undef, <16 x i8> undef, <16 x i8> undef, i32 0, i32 1)
-  %0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 0
-  %lane = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
-  %1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 1
-  %lane1 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
-  %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 2
-  %lane2 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int8x16x3_t undef, <16 x i8> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x16x3_t %.fca.0.0.insert, <16 x i8> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int8x16x3_t %.fca.0.1.insert, <16 x i8> %lane2, 0, 2
-  ret %struct.int8x16x3_t %.fca.0.2.insert
-}
-
-define %struct.int16x8x3_t @test_vld3q_dup_s16(i16* %a) {
-; CHECK-LABEL: test_vld3q_dup_s16
-; CHECK: ld3r { {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h }, [x0]
-entry:
-  %0 = bitcast i16* %a to i8*
-  %vld_dup = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8* %0, <8 x i16> undef, <8 x i16> undef, <8 x i16> undef, i32 0, i32 2)
-  %1 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 0
-  %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer
-  %2 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 1
-  %lane1 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer
-  %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 2
-  %lane2 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int16x8x3_t undef, <8 x i16> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x8x3_t %.fca.0.0.insert, <8 x i16> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int16x8x3_t %.fca.0.1.insert, <8 x i16> %lane2, 0, 2
-  ret %struct.int16x8x3_t %.fca.0.2.insert
-}
-
-define %struct.int32x4x3_t @test_vld3q_dup_s32(i32* %a) {
-; CHECK-LABEL: test_vld3q_dup_s32
-; CHECK: ld3r { {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s }, [x0]
-entry:
-  %0 = bitcast i32* %a to i8*
-  %vld_dup = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8* %0, <4 x i32> undef, <4 x i32> undef, <4 x i32> undef, i32 0, i32 4)
-  %1 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 0
-  %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
-  %2 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 1
-  %lane1 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer
-  %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 2
-  %lane2 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int32x4x3_t undef, <4 x i32> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x4x3_t %.fca.0.0.insert, <4 x i32> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int32x4x3_t %.fca.0.1.insert, <4 x i32> %lane2, 0, 2
-  ret %struct.int32x4x3_t %.fca.0.2.insert
-}
-
-define %struct.int64x2x3_t @test_vld3q_dup_s64(i64* %a) {
-; CHECK-LABEL: test_vld3q_dup_s64
-; CHECK: ld3r { {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d }, [x0]
-entry:
-  %0 = bitcast i64* %a to i8*
-  %vld_dup = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3lane.v2i64(i8* %0, <2 x i64> undef, <2 x i64> undef, <2 x i64> undef, i32 0, i32 8)
-  %1 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 0
-  %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer
-  %2 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 1
-  %lane1 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer
-  %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 2
-  %lane2 = shufflevector <2 x i64> %3, <2 x i64> undef, <2 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int64x2x3_t undef, <2 x i64> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x2x3_t %.fca.0.0.insert, <2 x i64> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int64x2x3_t %.fca.0.1.insert, <2 x i64> %lane2, 0, 2
-  ret %struct.int64x2x3_t %.fca.0.2.insert
-}
-
-define %struct.float32x4x3_t @test_vld3q_dup_f32(float* %a) {
-; CHECK-LABEL: test_vld3q_dup_f32
-; CHECK: ld3r { {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s }, [x0]
-entry:
-  %0 = bitcast float* %a to i8*
-  %vld_dup = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3lane.v4f32(i8* %0, <4 x float> undef, <4 x float> undef, <4 x float> undef, i32 0, i32 4)
-  %1 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld_dup, 0
-  %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
-  %2 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld_dup, 1
-  %lane1 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> zeroinitializer
-  %3 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld_dup, 2
-  %lane2 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.float32x4x3_t undef, <4 x float> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x4x3_t %.fca.0.0.insert, <4 x float> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float32x4x3_t %.fca.0.1.insert, <4 x float> %lane2, 0, 2
-  ret %struct.float32x4x3_t %.fca.0.2.insert
-}
-
-define %struct.float64x2x3_t @test_vld3q_dup_f64(double* %a) {
-; CHECK-LABEL: test_vld3q_dup_f64
-; CHECK: ld3r { {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d }, [x0]
-entry:
-  %0 = bitcast double* %a to i8*
-  %vld_dup = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3lane.v2f64(i8* %0, <2 x double> undef, <2 x double> undef, <2 x double> undef, i32 0, i32 8)
-  %1 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld_dup, 0
-  %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer
-  %2 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld_dup, 1
-  %lane1 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
-  %3 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld_dup, 2
-  %lane2 = shufflevector <2 x double> %3, <2 x double> undef, <2 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.float64x2x3_t undef, <2 x double> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x2x3_t %.fca.0.0.insert, <2 x double> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float64x2x3_t %.fca.0.1.insert, <2 x double> %lane2, 0, 2
-  ret %struct.float64x2x3_t %.fca.0.2.insert
-}
-
-define %struct.int8x8x3_t @test_vld3_dup_s8(i8* %a) {
-; CHECK-LABEL: test_vld3_dup_s8
-; CHECK: ld3r { {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b }, [x0]
-entry:
-  %vld_dup = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
-  %0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 0
-  %lane = shufflevector <8 x i8> %0, <8 x i8> undef, <8 x i32> zeroinitializer
-  %1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 1
-  %lane1 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
-  %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 2
-  %lane2 = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int8x8x3_t undef, <8 x i8> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x8x3_t %.fca.0.0.insert, <8 x i8> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int8x8x3_t %.fca.0.1.insert, <8 x i8> %lane2, 0, 2
-  ret %struct.int8x8x3_t %.fca.0.2.insert
-}
-
-define %struct.int16x4x3_t @test_vld3_dup_s16(i16* %a) {
-; CHECK-LABEL: test_vld3_dup_s16
-; CHECK: ld3r { {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h }, [x0]
-entry:
-  %0 = bitcast i16* %a to i8*
-  %vld_dup = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8* %0, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
-  %1 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 0
-  %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer
-  %2 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 1
-  %lane1 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> zeroinitializer
-  %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 2
-  %lane2 = shufflevector <4 x i16> %3, <4 x i16> undef, <4 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int16x4x3_t undef, <4 x i16> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x4x3_t %.fca.0.0.insert, <4 x i16> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int16x4x3_t %.fca.0.1.insert, <4 x i16> %lane2, 0, 2
-  ret %struct.int16x4x3_t %.fca.0.2.insert
-}
-
-define %struct.int32x2x3_t @test_vld3_dup_s32(i32* %a) {
-; CHECK-LABEL: test_vld3_dup_s32
-; CHECK: ld3r { {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s }, [x0]
-entry:
-  %0 = bitcast i32* %a to i8*
-  %vld_dup = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32(i8* %0, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4)
-  %1 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 0
-  %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer
-  %2 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 1
-  %lane1 = shufflevector <2 x i32> %2, <2 x i32> undef, <2 x i32> zeroinitializer
-  %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 2
-  %lane2 = shufflevector <2 x i32> %3, <2 x i32> undef, <2 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int32x2x3_t undef, <2 x i32> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x2x3_t %.fca.0.0.insert, <2 x i32> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int32x2x3_t %.fca.0.1.insert, <2 x i32> %lane2, 0, 2
-  ret %struct.int32x2x3_t %.fca.0.2.insert
-}
-
-define %struct.int64x1x3_t @test_vld3_dup_s64(i64* %a) {
-; CHECK-LABEL: test_vld3_dup_s64
-; CHECK: ld1 { {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d }, [x0]
-entry:
-  %0 = bitcast i64* %a to i8*
-  %vld_dup = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8* %0, i32 8)
-  %vld_dup.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 0
-  %vld_dup.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 1
-  %vld_dup.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 2
-  %.fca.0.0.insert = insertvalue %struct.int64x1x3_t undef, <1 x i64> %vld_dup.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x1x3_t %.fca.0.0.insert, <1 x i64> %vld_dup.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int64x1x3_t %.fca.0.1.insert, <1 x i64> %vld_dup.fca.2.extract, 0, 2
-  ret %struct.int64x1x3_t %.fca.0.2.insert
-}
-
-define %struct.float32x2x3_t @test_vld3_dup_f32(float* %a) {
-; CHECK-LABEL: test_vld3_dup_f32
-; CHECK: ld3r { {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s }, [x0]
-entry:
-  %0 = bitcast float* %a to i8*
-  %vld_dup = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8* %0, <2 x float> undef, <2 x float> undef, <2 x float> undef, i32 0, i32 4)
-  %1 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld_dup, 0
-  %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer
-  %2 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld_dup, 1
-  %lane1 = shufflevector <2 x float> %2, <2 x float> undef, <2 x i32> zeroinitializer
-  %3 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld_dup, 2
-  %lane2 = shufflevector <2 x float> %3, <2 x float> undef, <2 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.float32x2x3_t undef, <2 x float> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x2x3_t %.fca.0.0.insert, <2 x float> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float32x2x3_t %.fca.0.1.insert, <2 x float> %lane2, 0, 2
-  ret %struct.float32x2x3_t %.fca.0.2.insert
-}
-
-define %struct.float64x1x3_t @test_vld3_dup_f64(double* %a) {
-; CHECK-LABEL: test_vld3_dup_f64
-; CHECK: ld1 { {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d }, [x0]
-entry:
-  %0 = bitcast double* %a to i8*
-  %vld_dup = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8* %0, i32 8)
-  %vld_dup.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld_dup, 0
-  %vld_dup.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld_dup, 1
-  %vld_dup.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld_dup, 2
-  %.fca.0.0.insert = insertvalue %struct.float64x1x3_t undef, <1 x double> %vld_dup.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x1x3_t %.fca.0.0.insert, <1 x double> %vld_dup.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float64x1x3_t %.fca.0.1.insert, <1 x double> %vld_dup.fca.2.extract, 0, 2
-  ret %struct.float64x1x3_t %.fca.0.2.insert
-}
-
-define %struct.int8x16x4_t @test_vld4q_dup_s8(i8* %a) {
-; CHECK-LABEL: test_vld4q_dup_s8
-; CHECK: ld4r { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, [x0]
-entry:
-  %vld_dup = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4lane.v16i8(i8* %a, <16 x i8> undef, <16 x i8> undef, <16 x i8> undef, <16 x i8> undef, i32 0, i32 1)
-  %0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 0
-  %lane = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
-  %1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 1
-  %lane1 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
-  %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 2
-  %lane2 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer
-  %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 3
-  %lane3 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int8x16x4_t undef, <16 x i8> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x16x4_t %.fca.0.0.insert, <16 x i8> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int8x16x4_t %.fca.0.1.insert, <16 x i8> %lane2, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int8x16x4_t %.fca.0.2.insert, <16 x i8> %lane3, 0, 3
-  ret %struct.int8x16x4_t %.fca.0.3.insert
-}
-
-define %struct.int16x8x4_t @test_vld4q_dup_s16(i16* %a) {
-; CHECK-LABEL: test_vld4q_dup_s16
-; CHECK: ld4r { {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h }, [x0]
-entry:
-  %0 = bitcast i16* %a to i8*
-  %vld_dup = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16(i8* %0, <8 x i16> undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> undef, i32 0, i32 2)
-  %1 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 0
-  %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer
-  %2 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 1
-  %lane1 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer
-  %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 2
-  %lane2 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
-  %4 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 3
-  %lane3 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int16x8x4_t undef, <8 x i16> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x8x4_t %.fca.0.0.insert, <8 x i16> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int16x8x4_t %.fca.0.1.insert, <8 x i16> %lane2, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int16x8x4_t %.fca.0.2.insert, <8 x i16> %lane3, 0, 3
-  ret %struct.int16x8x4_t %.fca.0.3.insert
-}
-
-define %struct.int32x4x4_t @test_vld4q_dup_s32(i32* %a) {
-; CHECK-LABEL: test_vld4q_dup_s32
-; CHECK: ld4r { {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s }, [x0]
-entry:
-  %0 = bitcast i32* %a to i8*
-  %vld_dup = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8* %0, <4 x i32> undef, <4 x i32> undef, <4 x i32> undef, <4 x i32> undef, i32 0, i32 4)
-  %1 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 0
-  %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
-  %2 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 1
-  %lane1 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer
-  %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 2
-  %lane2 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer
-  %4 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 3
-  %lane3 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int32x4x4_t undef, <4 x i32> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x4x4_t %.fca.0.0.insert, <4 x i32> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int32x4x4_t %.fca.0.1.insert, <4 x i32> %lane2, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int32x4x4_t %.fca.0.2.insert, <4 x i32> %lane3, 0, 3
-  ret %struct.int32x4x4_t %.fca.0.3.insert
-}
-
-define %struct.int64x2x4_t @test_vld4q_dup_s64(i64* %a) {
-; CHECK-LABEL: test_vld4q_dup_s64
-; CHECK: ld4r { {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d }, [x0]
-entry:
-  %0 = bitcast i64* %a to i8*
-  %vld_dup = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4lane.v2i64(i8* %0, <2 x i64> undef, <2 x i64> undef, <2 x i64> undef, <2 x i64> undef, i32 0, i32 8)
-  %1 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 0
-  %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer
-  %2 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 1
-  %lane1 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer
-  %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 2
-  %lane2 = shufflevector <2 x i64> %3, <2 x i64> undef, <2 x i32> zeroinitializer
-  %4 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 3
-  %lane3 = shufflevector <2 x i64> %4, <2 x i64> undef, <2 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int64x2x4_t undef, <2 x i64> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x2x4_t %.fca.0.0.insert, <2 x i64> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int64x2x4_t %.fca.0.1.insert, <2 x i64> %lane2, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int64x2x4_t %.fca.0.2.insert, <2 x i64> %lane3, 0, 3
-  ret %struct.int64x2x4_t %.fca.0.3.insert
-}
-
-define %struct.float32x4x4_t @test_vld4q_dup_f32(float* %a) {
-; CHECK-LABEL: test_vld4q_dup_f32
-; CHECK: ld4r { {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s }, [x0]
-entry:
-  %0 = bitcast float* %a to i8*
-  %vld_dup = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4lane.v4f32(i8* %0, <4 x float> undef, <4 x float> undef, <4 x float> undef, <4 x float> undef, i32 0, i32 4)
-  %1 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld_dup, 0
-  %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
-  %2 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld_dup, 1
-  %lane1 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> zeroinitializer
-  %3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld_dup, 2
-  %lane2 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> zeroinitializer
-  %4 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld_dup, 3
-  %lane3 = shufflevector <4 x float> %4, <4 x float> undef, <4 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.float32x4x4_t undef, <4 x float> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x4x4_t %.fca.0.0.insert, <4 x float> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float32x4x4_t %.fca.0.1.insert, <4 x float> %lane2, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.float32x4x4_t %.fca.0.2.insert, <4 x float> %lane3, 0, 3
-  ret %struct.float32x4x4_t %.fca.0.3.insert
-}
-
-define %struct.float64x2x4_t @test_vld4q_dup_f64(double* %a) {
-; CHECK-LABEL: test_vld4q_dup_f64
-; CHECK: ld4r { {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d }, [x0]
-entry:
-  %0 = bitcast double* %a to i8*
-  %vld_dup = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8* %0, <2 x double> undef, <2 x double> undef, <2 x double> undef, <2 x double> undef, i32 0, i32 8)
-  %1 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld_dup, 0
-  %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer
-  %2 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld_dup, 1
-  %lane1 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
-  %3 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld_dup, 2
-  %lane2 = shufflevector <2 x double> %3, <2 x double> undef, <2 x i32> zeroinitializer
-  %4 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld_dup, 3
-  %lane3 = shufflevector <2 x double> %4, <2 x double> undef, <2 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.float64x2x4_t undef, <2 x double> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x2x4_t %.fca.0.0.insert, <2 x double> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float64x2x4_t %.fca.0.1.insert, <2 x double> %lane2, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.float64x2x4_t %.fca.0.2.insert, <2 x double> %lane3, 0, 3
-  ret %struct.float64x2x4_t %.fca.0.3.insert
-}
-
-define %struct.int8x8x4_t @test_vld4_dup_s8(i8* %a) {
-; CHECK-LABEL: test_vld4_dup_s8
-; CHECK: ld4r { {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b }, [x0]
-entry:
-  %vld_dup = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
-  %0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 0
-  %lane = shufflevector <8 x i8> %0, <8 x i8> undef, <8 x i32> zeroinitializer
-  %1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 1
-  %lane1 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
-  %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 2
-  %lane2 = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer
-  %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 3
-  %lane3 = shufflevector <8 x i8> %3, <8 x i8> undef, <8 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int8x8x4_t undef, <8 x i8> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x8x4_t %.fca.0.0.insert, <8 x i8> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int8x8x4_t %.fca.0.1.insert, <8 x i8> %lane2, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int8x8x4_t %.fca.0.2.insert, <8 x i8> %lane3, 0, 3
-  ret %struct.int8x8x4_t %.fca.0.3.insert
-}
-
-define %struct.int16x4x4_t @test_vld4_dup_s16(i16* %a) {
-; CHECK-LABEL: test_vld4_dup_s16
-; CHECK: ld4r { {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h }, [x0]
-entry:
-  %0 = bitcast i16* %a to i8*
-  %vld_dup = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16(i8* %0, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
-  %1 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 0
-  %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer
-  %2 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 1
-  %lane1 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> zeroinitializer
-  %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 2
-  %lane2 = shufflevector <4 x i16> %3, <4 x i16> undef, <4 x i32> zeroinitializer
-  %4 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 3
-  %lane3 = shufflevector <4 x i16> %4, <4 x i16> undef, <4 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int16x4x4_t undef, <4 x i16> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x4x4_t %.fca.0.0.insert, <4 x i16> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int16x4x4_t %.fca.0.1.insert, <4 x i16> %lane2, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int16x4x4_t %.fca.0.2.insert, <4 x i16> %lane3, 0, 3
-  ret %struct.int16x4x4_t %.fca.0.3.insert
-}
-
-define %struct.int32x2x4_t @test_vld4_dup_s32(i32* %a) {
-; CHECK-LABEL: test_vld4_dup_s32
-; CHECK: ld4r { {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s }, [x0]
-entry:
-  %0 = bitcast i32* %a to i8*
-  %vld_dup = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* %0, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4)
-  %1 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 0
-  %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer
-  %2 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 1
-  %lane1 = shufflevector <2 x i32> %2, <2 x i32> undef, <2 x i32> zeroinitializer
-  %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 2
-  %lane2 = shufflevector <2 x i32> %3, <2 x i32> undef, <2 x i32> zeroinitializer
-  %4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 3
-  %lane3 = shufflevector <2 x i32> %4, <2 x i32> undef, <2 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int32x2x4_t undef, <2 x i32> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x2x4_t %.fca.0.0.insert, <2 x i32> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int32x2x4_t %.fca.0.1.insert, <2 x i32> %lane2, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int32x2x4_t %.fca.0.2.insert, <2 x i32> %lane3, 0, 3
-  ret %struct.int32x2x4_t %.fca.0.3.insert
-}
-
-define %struct.int64x1x4_t @test_vld4_dup_s64(i64* %a) {
-; CHECK-LABEL: test_vld4_dup_s64
-; CHECK: ld1 { {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d }, [x0]
-entry:
-  %0 = bitcast i64* %a to i8*
-  %vld_dup = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8* %0, i32 8)
-  %vld_dup.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 0
-  %vld_dup.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 1
-  %vld_dup.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 2
-  %vld_dup.fca.3.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 3
-  %.fca.0.0.insert = insertvalue %struct.int64x1x4_t undef, <1 x i64> %vld_dup.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x1x4_t %.fca.0.0.insert, <1 x i64> %vld_dup.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int64x1x4_t %.fca.0.1.insert, <1 x i64> %vld_dup.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int64x1x4_t %.fca.0.2.insert, <1 x i64> %vld_dup.fca.3.extract, 0, 3
-  ret %struct.int64x1x4_t %.fca.0.3.insert
-}
-
-define %struct.float32x2x4_t @test_vld4_dup_f32(float* %a) {
-; CHECK-LABEL: test_vld4_dup_f32
-; CHECK: ld4r { {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s }, [x0]
-entry:
-  %0 = bitcast float* %a to i8*
-  %vld_dup = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32(i8* %0, <2 x float> undef, <2 x float> undef, <2 x float> undef, <2 x float> undef, i32 0, i32 4)
-  %1 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld_dup, 0
-  %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer
-  %2 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld_dup, 1
-  %lane1 = shufflevector <2 x float> %2, <2 x float> undef, <2 x i32> zeroinitializer
-  %3 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld_dup, 2
-  %lane2 = shufflevector <2 x float> %3, <2 x float> undef, <2 x i32> zeroinitializer
-  %4 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld_dup, 3
-  %lane3 = shufflevector <2 x float> %4, <2 x float> undef, <2 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.float32x2x4_t undef, <2 x float> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x2x4_t %.fca.0.0.insert, <2 x float> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float32x2x4_t %.fca.0.1.insert, <2 x float> %lane2, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.float32x2x4_t %.fca.0.2.insert, <2 x float> %lane3, 0, 3
-  ret %struct.float32x2x4_t %.fca.0.3.insert
-}
-
-define %struct.float64x1x4_t @test_vld4_dup_f64(double* %a) {
-; CHECK-LABEL: test_vld4_dup_f64
-; CHECK: ld1 { {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d }, [x0]
-entry:
-  %0 = bitcast double* %a to i8*
-  %vld_dup = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8* %0, i32 8)
-  %vld_dup.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld_dup, 0
-  %vld_dup.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld_dup, 1
-  %vld_dup.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld_dup, 2
-  %vld_dup.fca.3.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld_dup, 3
-  %.fca.0.0.insert = insertvalue %struct.float64x1x4_t undef, <1 x double> %vld_dup.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x1x4_t %.fca.0.0.insert, <1 x double> %vld_dup.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float64x1x4_t %.fca.0.1.insert, <1 x double> %vld_dup.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.float64x1x4_t %.fca.0.2.insert, <1 x double> %vld_dup.fca.3.extract, 0, 3
-  ret %struct.float64x1x4_t %.fca.0.3.insert
-}
-
-define <16 x i8> @test_vld1q_lane_s8(i8* %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vld1q_lane_s8
-; CHECK: ld1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
-entry:
-  %0 = load i8* %a, align 1
-  %vld1_lane = insertelement <16 x i8> %b, i8 %0, i32 15
-  ret <16 x i8> %vld1_lane
-}
-
-define <8 x i16> @test_vld1q_lane_s16(i16* %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vld1q_lane_s16
-; CHECK: ld1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
-entry:
-  %0 = load i16* %a, align 2
-  %vld1_lane = insertelement <8 x i16> %b, i16 %0, i32 7
-  ret <8 x i16> %vld1_lane
-}
-
-define <4 x i32> @test_vld1q_lane_s32(i32* %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vld1q_lane_s32
-; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %0 = load i32* %a, align 4
-  %vld1_lane = insertelement <4 x i32> %b, i32 %0, i32 3
-  ret <4 x i32> %vld1_lane
-}
-
-define <2 x i64> @test_vld1q_lane_s64(i64* %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vld1q_lane_s64
-; CHECK: ld1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %0 = load i64* %a, align 8
-  %vld1_lane = insertelement <2 x i64> %b, i64 %0, i32 1
-  ret <2 x i64> %vld1_lane
-}
-
-define <4 x float> @test_vld1q_lane_f32(float* %a, <4 x float> %b) {
-; CHECK-LABEL: test_vld1q_lane_f32
-; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %0 = load float* %a, align 4
-  %vld1_lane = insertelement <4 x float> %b, float %0, i32 3
-  ret <4 x float> %vld1_lane
-}
-
-define <2 x double> @test_vld1q_lane_f64(double* %a, <2 x double> %b) {
-; CHECK-LABEL: test_vld1q_lane_f64
-; CHECK: ld1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %0 = load double* %a, align 8
-  %vld1_lane = insertelement <2 x double> %b, double %0, i32 1
-  ret <2 x double> %vld1_lane
-}
-
-define <8 x i8> @test_vld1_lane_s8(i8* %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vld1_lane_s8
-; CHECK: ld1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
-entry:
-  %0 = load i8* %a, align 1
-  %vld1_lane = insertelement <8 x i8> %b, i8 %0, i32 7
-  ret <8 x i8> %vld1_lane
-}
-
-define <4 x i16> @test_vld1_lane_s16(i16* %a, <4 x i16> %b) {
-; CHECK-LABEL: test_vld1_lane_s16
-; CHECK: ld1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
-entry:
-  %0 = load i16* %a, align 2
-  %vld1_lane = insertelement <4 x i16> %b, i16 %0, i32 3
-  ret <4 x i16> %vld1_lane
-}
-
-define <2 x i32> @test_vld1_lane_s32(i32* %a, <2 x i32> %b) {
-; CHECK-LABEL: test_vld1_lane_s32
-; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %0 = load i32* %a, align 4
-  %vld1_lane = insertelement <2 x i32> %b, i32 %0, i32 1
-  ret <2 x i32> %vld1_lane
-}
-
-define <1 x i64> @test_vld1_lane_s64(i64* %a, <1 x i64> %b) {
-; CHECK-LABEL: test_vld1_lane_s64
-; CHECK: ld1r { {{v[0-9]+}}.1d }, [x0]
-entry:
-  %0 = load i64* %a, align 8
-  %vld1_lane = insertelement <1 x i64> undef, i64 %0, i32 0
-  ret <1 x i64> %vld1_lane
-}
-
-define <2 x float> @test_vld1_lane_f32(float* %a, <2 x float> %b) {
-; CHECK-LABEL: test_vld1_lane_f32
-; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %0 = load float* %a, align 4
-  %vld1_lane = insertelement <2 x float> %b, float %0, i32 1
-  ret <2 x float> %vld1_lane
-}
-
-define <1 x double> @test_vld1_lane_f64(double* %a, <1 x double> %b) {
-; CHECK-LABEL: test_vld1_lane_f64
-; CHECK: ld1r { {{v[0-9]+}}.1d }, [x0]
-entry:
-  %0 = load double* %a, align 8
-  %vld1_lane = insertelement <1 x double> undef, double %0, i32 0
-  ret <1 x double> %vld1_lane
-}
-
-define %struct.int16x8x2_t @test_vld2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vld2q_lane_s16
-; CHECK: ld2 { {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i16>] %b.coerce, 1
-  %0 = bitcast i16* %a to i8*
-  %vld2_lane = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, i32 7, i32 2)
-  %vld2_lane.fca.0.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.int16x8x2_t %.fca.0.1.insert
-}
-
-define %struct.int32x4x2_t @test_vld2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vld2q_lane_s32
-; CHECK: ld2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %b.coerce, 1
-  %0 = bitcast i32* %a to i8*
-  %vld2_lane = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, i32 3, i32 4)
-  %vld2_lane.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.int32x4x2_t %.fca.0.1.insert
-}
-
-define %struct.int64x2x2_t @test_vld2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vld2q_lane_s64
-; CHECK: ld2 { {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i64>] %b.coerce, 1
-  %0 = bitcast i64* %a to i8*
-  %vld2_lane = tail call { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, i32 1, i32 8)
-  %vld2_lane.fca.0.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.int64x2x2_t undef, <2 x i64> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x2x2_t %.fca.0.0.insert, <2 x i64> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.int64x2x2_t %.fca.0.1.insert
-}
-
-define %struct.float32x4x2_t @test_vld2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) {
-; CHECK-LABEL: test_vld2q_lane_f32
-; CHECK: ld2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <4 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <4 x float>] %b.coerce, 1
-  %0 = bitcast float* %a to i8*
-  %vld2_lane = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, i32 3, i32 4)
-  %vld2_lane.fca.0.extract = extractvalue { <4 x float>, <4 x float> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <4 x float>, <4 x float> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.float32x4x2_t %.fca.0.1.insert
-}
-
-define %struct.float64x2x2_t @test_vld2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) {
-; CHECK-LABEL: test_vld2q_lane_f64
-; CHECK: ld2 { {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <2 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <2 x double>] %b.coerce, 1
-  %0 = bitcast double* %a to i8*
-  %vld2_lane = tail call { <2 x double>, <2 x double> } @llvm.arm.neon.vld2lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, i32 1, i32 8)
-  %vld2_lane.fca.0.extract = extractvalue { <2 x double>, <2 x double> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <2 x double>, <2 x double> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.float64x2x2_t undef, <2 x double> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x2x2_t %.fca.0.0.insert, <2 x double> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.float64x2x2_t %.fca.0.1.insert
-}
-
-define %struct.int8x8x2_t @test_vld2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vld2_lane_s8
-; CHECK: ld2 { {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i8>] %b.coerce, 1
-  %vld2_lane = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, i32 7, i32 1)
-  %vld2_lane.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.int8x8x2_t %.fca.0.1.insert
-}
-
-define %struct.int16x4x2_t @test_vld2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vld2_lane_s16
-; CHECK: ld2 { {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i16>] %b.coerce, 1
-  %0 = bitcast i16* %a to i8*
-  %vld2_lane = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, i32 3, i32 2)
-  %vld2_lane.fca.0.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.int16x4x2_t %.fca.0.1.insert
-}
-
-define %struct.int32x2x2_t @test_vld2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vld2_lane_s32
-; CHECK: ld2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i32>] %b.coerce, 1
-  %0 = bitcast i32* %a to i8*
-  %vld2_lane = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, i32 1, i32 4)
-  %vld2_lane.fca.0.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.int32x2x2_t %.fca.0.1.insert
-}
-
-define %struct.int64x1x2_t @test_vld2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vld2_lane_s64
-; CHECK: ld2 { {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <1 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <1 x i64>] %b.coerce, 1
-  %0 = bitcast i64* %a to i8*
-  %vld2_lane = tail call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, i32 0, i32 8)
-  %vld2_lane.fca.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.int64x1x2_t undef, <1 x i64> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x1x2_t %.fca.0.0.insert, <1 x i64> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.int64x1x2_t %.fca.0.1.insert
-}
-
-define %struct.float32x2x2_t @test_vld2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) {
-; CHECK-LABEL: test_vld2_lane_f32
-; CHECK: ld2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <2 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <2 x float>] %b.coerce, 1
-  %0 = bitcast float* %a to i8*
-  %vld2_lane = tail call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, i32 1, i32 4)
-  %vld2_lane.fca.0.extract = extractvalue { <2 x float>, <2 x float> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <2 x float>, <2 x float> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.float32x2x2_t %.fca.0.1.insert
-}
-
-define %struct.float64x1x2_t @test_vld2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) {
-; CHECK-LABEL: test_vld2_lane_f64
-; CHECK: ld2 { {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <1 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <1 x double>] %b.coerce, 1
-  %0 = bitcast double* %a to i8*
-  %vld2_lane = tail call { <1 x double>, <1 x double> } @llvm.arm.neon.vld2lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, i32 0, i32 8)
-  %vld2_lane.fca.0.extract = extractvalue { <1 x double>, <1 x double> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <1 x double>, <1 x double> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.float64x1x2_t undef, <1 x double> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x1x2_t %.fca.0.0.insert, <1 x double> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.float64x1x2_t %.fca.0.1.insert
-}
-
-define %struct.int16x8x3_t @test_vld3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vld3q_lane_s16
-; CHECK: ld3 { {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i16>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i16>] %b.coerce, 2
-  %0 = bitcast i16* %a to i8*
-  %vld3_lane = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, i32 7, i32 2)
-  %vld3_lane.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.int16x8x3_t undef, <8 x i16> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x8x3_t %.fca.0.0.insert, <8 x i16> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int16x8x3_t %.fca.0.1.insert, <8 x i16> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.int16x8x3_t %.fca.0.2.insert
-}
-
-define %struct.int32x4x3_t @test_vld3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vld3q_lane_s32
-; CHECK: ld3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i32>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i32>] %b.coerce, 2
-  %0 = bitcast i32* %a to i8*
-  %vld3_lane = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, i32 3, i32 4)
-  %vld3_lane.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.int32x4x3_t undef, <4 x i32> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x4x3_t %.fca.0.0.insert, <4 x i32> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int32x4x3_t %.fca.0.1.insert, <4 x i32> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.int32x4x3_t %.fca.0.2.insert
-}
-
-define %struct.int64x2x3_t @test_vld3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vld3q_lane_s64
-; CHECK: ld3 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i64>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i64>] %b.coerce, 2
-  %0 = bitcast i64* %a to i8*
-  %vld3_lane = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, i32 1, i32 8)
-  %vld3_lane.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.int64x2x3_t undef, <2 x i64> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x2x3_t %.fca.0.0.insert, <2 x i64> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int64x2x3_t %.fca.0.1.insert, <2 x i64> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.int64x2x3_t %.fca.0.2.insert
-}
-
-define %struct.float32x4x3_t @test_vld3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) {
-; CHECK-LABEL: test_vld3q_lane_f32
-; CHECK: ld3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <4 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <4 x float>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <4 x float>] %b.coerce, 2
-  %0 = bitcast float* %a to i8*
-  %vld3_lane = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, i32 3, i32 4)
-  %vld3_lane.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.float32x4x3_t undef, <4 x float> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x4x3_t %.fca.0.0.insert, <4 x float> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float32x4x3_t %.fca.0.1.insert, <4 x float> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.float32x4x3_t %.fca.0.2.insert
-}
-
-define %struct.float64x2x3_t @test_vld3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) {
-; CHECK-LABEL: test_vld3q_lane_f64
-; CHECK: ld3 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <2 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <2 x double>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <2 x double>] %b.coerce, 2
-  %0 = bitcast double* %a to i8*
-  %vld3_lane = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, i32 1, i32 8)
-  %vld3_lane.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.float64x2x3_t undef, <2 x double> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x2x3_t %.fca.0.0.insert, <2 x double> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float64x2x3_t %.fca.0.1.insert, <2 x double> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.float64x2x3_t %.fca.0.2.insert
-}
-
-define %struct.int8x8x3_t @test_vld3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vld3_lane_s8
-; CHECK: ld3 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i8>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i8>] %b.coerce, 2
-  %vld3_lane = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, i32 7, i32 1)
-  %vld3_lane.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.int8x8x3_t undef, <8 x i8> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x8x3_t %.fca.0.0.insert, <8 x i8> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int8x8x3_t %.fca.0.1.insert, <8 x i8> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.int8x8x3_t %.fca.0.2.insert
-}
-
-define %struct.int16x4x3_t @test_vld3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vld3_lane_s16
-; CHECK: ld3 { {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i16>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i16>] %b.coerce, 2
-  %0 = bitcast i16* %a to i8*
-  %vld3_lane = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, i32 3, i32 2)
-  %vld3_lane.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.int16x4x3_t undef, <4 x i16> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x4x3_t %.fca.0.0.insert, <4 x i16> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int16x4x3_t %.fca.0.1.insert, <4 x i16> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.int16x4x3_t %.fca.0.2.insert
-}
-
-define %struct.int32x2x3_t @test_vld3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vld3_lane_s32
-; CHECK: ld3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i32>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i32>] %b.coerce, 2
-  %0 = bitcast i32* %a to i8*
-  %vld3_lane = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, i32 1, i32 4)
-  %vld3_lane.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.int32x2x3_t undef, <2 x i32> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x2x3_t %.fca.0.0.insert, <2 x i32> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int32x2x3_t %.fca.0.1.insert, <2 x i32> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.int32x2x3_t %.fca.0.2.insert
-}
-
-define %struct.int64x1x3_t @test_vld3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vld3_lane_s64
-; CHECK: ld3 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <1 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <1 x i64>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <1 x i64>] %b.coerce, 2
-  %0 = bitcast i64* %a to i8*
-  %vld3_lane = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, i32 0, i32 8)
-  %vld3_lane.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.int64x1x3_t undef, <1 x i64> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x1x3_t %.fca.0.0.insert, <1 x i64> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int64x1x3_t %.fca.0.1.insert, <1 x i64> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.int64x1x3_t %.fca.0.2.insert
-}
-
-define %struct.float32x2x3_t @test_vld3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) {
-; CHECK-LABEL: test_vld3_lane_f32
-; CHECK: ld3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <2 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <2 x float>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <2 x float>] %b.coerce, 2
-  %0 = bitcast float* %a to i8*
-  %vld3_lane = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, i32 1, i32 4)
-  %vld3_lane.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.float32x2x3_t undef, <2 x float> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x2x3_t %.fca.0.0.insert, <2 x float> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float32x2x3_t %.fca.0.1.insert, <2 x float> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.float32x2x3_t %.fca.0.2.insert
-}
-
-define %struct.float64x1x3_t @test_vld3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) {
-; CHECK-LABEL: test_vld3_lane_f64
-; CHECK: ld3 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <1 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <1 x double>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <1 x double>] %b.coerce, 2
-  %0 = bitcast double* %a to i8*
-  %vld3_lane = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, i32 0, i32 8)
-  %vld3_lane.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.float64x1x3_t undef, <1 x double> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x1x3_t %.fca.0.0.insert, <1 x double> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float64x1x3_t %.fca.0.1.insert, <1 x double> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.float64x1x3_t %.fca.0.2.insert
-}
-
-define %struct.int8x16x4_t @test_vld4q_lane_s8(i8* %a, [4 x <16 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vld4q_lane_s8
-; CHECK: ld4 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <16 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <16 x i8>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <16 x i8>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <16 x i8>] %b.coerce, 3
-  %vld3_lane = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4lane.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, <16 x i8> %b.coerce.fca.3.extract, i32 15, i32 1)
-  %vld3_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 2
-  %vld3_lane.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 3
-  %.fca.0.0.insert = insertvalue %struct.int8x16x4_t undef, <16 x i8> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x16x4_t %.fca.0.0.insert, <16 x i8> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int8x16x4_t %.fca.0.1.insert, <16 x i8> %vld3_lane.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int8x16x4_t %.fca.0.2.insert, <16 x i8> %vld3_lane.fca.3.extract, 0, 3
-  ret %struct.int8x16x4_t %.fca.0.3.insert
-}
-
-define %struct.int16x8x4_t @test_vld4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vld4q_lane_s16
-; CHECK: ld4 { {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i16>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i16>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i16>] %b.coerce, 3
-  %0 = bitcast i16* %a to i8*
-  %vld3_lane = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, <8 x i16> %b.coerce.fca.3.extract, i32 7, i32 2)
-  %vld3_lane.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 2
-  %vld3_lane.fca.3.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 3
-  %.fca.0.0.insert = insertvalue %struct.int16x8x4_t undef, <8 x i16> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x8x4_t %.fca.0.0.insert, <8 x i16> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int16x8x4_t %.fca.0.1.insert, <8 x i16> %vld3_lane.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int16x8x4_t %.fca.0.2.insert, <8 x i16> %vld3_lane.fca.3.extract, 0, 3
-  ret %struct.int16x8x4_t %.fca.0.3.insert
-}
-
-define %struct.int32x4x4_t @test_vld4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vld4q_lane_s32
-; CHECK: ld4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i32>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i32>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i32>] %b.coerce, 3
-  %0 = bitcast i32* %a to i8*
-  %vld3_lane = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, <4 x i32> %b.coerce.fca.3.extract, i32 3, i32 4)
-  %vld3_lane.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 2
-  %vld3_lane.fca.3.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 3
-  %.fca.0.0.insert = insertvalue %struct.int32x4x4_t undef, <4 x i32> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x4x4_t %.fca.0.0.insert, <4 x i32> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int32x4x4_t %.fca.0.1.insert, <4 x i32> %vld3_lane.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int32x4x4_t %.fca.0.2.insert, <4 x i32> %vld3_lane.fca.3.extract, 0, 3
-  ret %struct.int32x4x4_t %.fca.0.3.insert
-}
-
-define %struct.int64x2x4_t @test_vld4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vld4q_lane_s64
-; CHECK: ld4 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i64>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i64>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i64>] %b.coerce, 3
-  %0 = bitcast i64* %a to i8*
-  %vld3_lane = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, <2 x i64> %b.coerce.fca.3.extract, i32 1, i32 8)
-  %vld3_lane.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 2
-  %vld3_lane.fca.3.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 3
-  %.fca.0.0.insert = insertvalue %struct.int64x2x4_t undef, <2 x i64> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x2x4_t %.fca.0.0.insert, <2 x i64> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int64x2x4_t %.fca.0.1.insert, <2 x i64> %vld3_lane.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int64x2x4_t %.fca.0.2.insert, <2 x i64> %vld3_lane.fca.3.extract, 0, 3
-  ret %struct.int64x2x4_t %.fca.0.3.insert
-}
-
-define %struct.float32x4x4_t @test_vld4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) {
-; CHECK-LABEL: test_vld4q_lane_f32
-; CHECK: ld4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <4 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <4 x float>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <4 x float>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <4 x float>] %b.coerce, 3
-  %0 = bitcast float* %a to i8*
-  %vld3_lane = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, <4 x float> %b.coerce.fca.3.extract, i32 3, i32 4)
-  %vld3_lane.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 2
-  %vld3_lane.fca.3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 3
-  %.fca.0.0.insert = insertvalue %struct.float32x4x4_t undef, <4 x float> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x4x4_t %.fca.0.0.insert, <4 x float> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float32x4x4_t %.fca.0.1.insert, <4 x float> %vld3_lane.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.float32x4x4_t %.fca.0.2.insert, <4 x float> %vld3_lane.fca.3.extract, 0, 3
-  ret %struct.float32x4x4_t %.fca.0.3.insert
-}
-
-define %struct.float64x2x4_t @test_vld4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) {
-; CHECK-LABEL: test_vld4q_lane_f64
-; CHECK: ld4 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <2 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <2 x double>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <2 x double>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <2 x double>] %b.coerce, 3
-  %0 = bitcast double* %a to i8*
-  %vld3_lane = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, <2 x double> %b.coerce.fca.3.extract, i32 1, i32 8)
-  %vld3_lane.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 2
-  %vld3_lane.fca.3.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 3
-  %.fca.0.0.insert = insertvalue %struct.float64x2x4_t undef, <2 x double> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x2x4_t %.fca.0.0.insert, <2 x double> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float64x2x4_t %.fca.0.1.insert, <2 x double> %vld3_lane.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.float64x2x4_t %.fca.0.2.insert, <2 x double> %vld3_lane.fca.3.extract, 0, 3
-  ret %struct.float64x2x4_t %.fca.0.3.insert
-}
-
-define %struct.int8x8x4_t @test_vld4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vld4_lane_s8
-; CHECK: ld4 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i8>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i8>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i8>] %b.coerce, 3
-  %vld3_lane = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, <8 x i8> %b.coerce.fca.3.extract, i32 7, i32 1)
-  %vld3_lane.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 2
-  %vld3_lane.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 3
-  %.fca.0.0.insert = insertvalue %struct.int8x8x4_t undef, <8 x i8> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x8x4_t %.fca.0.0.insert, <8 x i8> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int8x8x4_t %.fca.0.1.insert, <8 x i8> %vld3_lane.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int8x8x4_t %.fca.0.2.insert, <8 x i8> %vld3_lane.fca.3.extract, 0, 3
-  ret %struct.int8x8x4_t %.fca.0.3.insert
-}
-
-define %struct.int16x4x4_t @test_vld4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vld4_lane_s16
-; CHECK: ld4 { {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i16>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i16>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i16>] %b.coerce, 3
-  %0 = bitcast i16* %a to i8*
-  %vld3_lane = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, <4 x i16> %b.coerce.fca.3.extract, i32 3, i32 2)
-  %vld3_lane.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 2
-  %vld3_lane.fca.3.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 3
-  %.fca.0.0.insert = insertvalue %struct.int16x4x4_t undef, <4 x i16> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x4x4_t %.fca.0.0.insert, <4 x i16> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int16x4x4_t %.fca.0.1.insert, <4 x i16> %vld3_lane.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int16x4x4_t %.fca.0.2.insert, <4 x i16> %vld3_lane.fca.3.extract, 0, 3
-  ret %struct.int16x4x4_t %.fca.0.3.insert
-}
-
-define %struct.int32x2x4_t @test_vld4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vld4_lane_s32
-; CHECK: ld4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i32>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i32>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i32>] %b.coerce, 3
-  %0 = bitcast i32* %a to i8*
-  %vld3_lane = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, <2 x i32> %b.coerce.fca.3.extract, i32 1, i32 4)
-  %vld3_lane.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 2
-  %vld3_lane.fca.3.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 3
-  %.fca.0.0.insert = insertvalue %struct.int32x2x4_t undef, <2 x i32> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x2x4_t %.fca.0.0.insert, <2 x i32> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int32x2x4_t %.fca.0.1.insert, <2 x i32> %vld3_lane.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int32x2x4_t %.fca.0.2.insert, <2 x i32> %vld3_lane.fca.3.extract, 0, 3
-  ret %struct.int32x2x4_t %.fca.0.3.insert
-}
-
-define %struct.int64x1x4_t @test_vld4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vld4_lane_s64
-; CHECK: ld4 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <1 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <1 x i64>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <1 x i64>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <1 x i64>] %b.coerce, 3
-  %0 = bitcast i64* %a to i8*
-  %vld3_lane = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, <1 x i64> %b.coerce.fca.3.extract, i32 0, i32 8)
-  %vld3_lane.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 2
-  %vld3_lane.fca.3.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 3
-  %.fca.0.0.insert = insertvalue %struct.int64x1x4_t undef, <1 x i64> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x1x4_t %.fca.0.0.insert, <1 x i64> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int64x1x4_t %.fca.0.1.insert, <1 x i64> %vld3_lane.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int64x1x4_t %.fca.0.2.insert, <1 x i64> %vld3_lane.fca.3.extract, 0, 3
-  ret %struct.int64x1x4_t %.fca.0.3.insert
-}
-
-define %struct.float32x2x4_t @test_vld4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) {
-; CHECK-LABEL: test_vld4_lane_f32
-; CHECK: ld4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <2 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <2 x float>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <2 x float>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <2 x float>] %b.coerce, 3
-  %0 = bitcast float* %a to i8*
-  %vld3_lane = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, <2 x float> %b.coerce.fca.3.extract, i32 1, i32 4)
-  %vld3_lane.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 2
-  %vld3_lane.fca.3.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 3
-  %.fca.0.0.insert = insertvalue %struct.float32x2x4_t undef, <2 x float> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x2x4_t %.fca.0.0.insert, <2 x float> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float32x2x4_t %.fca.0.1.insert, <2 x float> %vld3_lane.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.float32x2x4_t %.fca.0.2.insert, <2 x float> %vld3_lane.fca.3.extract, 0, 3
-  ret %struct.float32x2x4_t %.fca.0.3.insert
-}
-
-define %struct.float64x1x4_t @test_vld4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) {
-; CHECK-LABEL: test_vld4_lane_f64
-; CHECK: ld4 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <1 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <1 x double>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <1 x double>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <1 x double>] %b.coerce, 3
-  %0 = bitcast double* %a to i8*
-  %vld3_lane = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, <1 x double> %b.coerce.fca.3.extract, i32 0, i32 8)
-  %vld3_lane.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 2
-  %vld3_lane.fca.3.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 3
-  %.fca.0.0.insert = insertvalue %struct.float64x1x4_t undef, <1 x double> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x1x4_t %.fca.0.0.insert, <1 x double> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float64x1x4_t %.fca.0.1.insert, <1 x double> %vld3_lane.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.float64x1x4_t %.fca.0.2.insert, <1 x double> %vld3_lane.fca.3.extract, 0, 3
-  ret %struct.float64x1x4_t %.fca.0.3.insert
-}
-
-define void @test_vst1q_lane_s8(i8* %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vst1q_lane_s8
-; CHECK: st1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <16 x i8> %b, i32 15
-  store i8 %0, i8* %a, align 1
-  ret void
-}
-
-define void @test_vst1q_lane_s16(i16* %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vst1q_lane_s16
-; CHECK: st1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <8 x i16> %b, i32 7
-  store i16 %0, i16* %a, align 2
-  ret void
-}
-
-define void @test_vst1q_lane_s32(i32* %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vst1q_lane_s32
-; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <4 x i32> %b, i32 3
-  store i32 %0, i32* %a, align 4
-  ret void
-}
-
-define void @test_vst1q_lane_s64(i64* %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vst1q_lane_s64
-; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <2 x i64> %b, i32 1
-  store i64 %0, i64* %a, align 8
-  ret void
-}
-
-define void @test_vst1q_lane_f32(float* %a, <4 x float> %b) {
-; CHECK-LABEL: test_vst1q_lane_f32
-; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <4 x float> %b, i32 3
-  store float %0, float* %a, align 4
-  ret void
-}
-
-define void @test_vst1q_lane_f64(double* %a, <2 x double> %b) {
-; CHECK-LABEL: test_vst1q_lane_f64
-; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <2 x double> %b, i32 1
-  store double %0, double* %a, align 8
-  ret void
-}
-
-define void @test_vst1_lane_s8(i8* %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vst1_lane_s8
-; CHECK: st1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <8 x i8> %b, i32 7
-  store i8 %0, i8* %a, align 1
-  ret void
-}
-
-define void @test_vst1_lane_s16(i16* %a, <4 x i16> %b) {
-; CHECK-LABEL: test_vst1_lane_s16
-; CHECK: st1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <4 x i16> %b, i32 3
-  store i16 %0, i16* %a, align 2
-  ret void
-}
-
-define void @test_vst1_lane_s32(i32* %a, <2 x i32> %b) {
-; CHECK-LABEL: test_vst1_lane_s32
-; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <2 x i32> %b, i32 1
-  store i32 %0, i32* %a, align 4
-  ret void
-}
-
-define void @test_vst1_lane_s64(i64* %a, <1 x i64> %b) {
-; CHECK-LABEL: test_vst1_lane_s64
-; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <1 x i64> %b, i32 0
-  store i64 %0, i64* %a, align 8
-  ret void
-}
-
-define void @test_vst1_lane_f32(float* %a, <2 x float> %b) {
-; CHECK-LABEL: test_vst1_lane_f32
-; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <2 x float> %b, i32 1
-  store float %0, float* %a, align 4
-  ret void
-}
-
-define void @test_vst1_lane_f64(double* %a, <1 x double> %b) {
-; CHECK-LABEL: test_vst1_lane_f64
-; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <1 x double> %b, i32 0
-  store double %0, double* %a, align 8
-  ret void
-}
-
-define void @test_vst2q_lane_s8(i8* %a, [2 x <16 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vst2q_lane_s8
-; CHECK: st2 { {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %b.coerce, 1
-  tail call void @llvm.arm.neon.vst2lane.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, i32 15, i32 1)
-  ret void
-}
-
-define void @test_vst2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vst2q_lane_s16
-; CHECK: st2 { {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i16>] %b.coerce, 1
-  %0 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst2lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, i32 7, i32 2)
-  ret void
-}
-
-define void @test_vst2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vst2q_lane_s32
-; CHECK: st2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %b.coerce, 1
-  %0 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst2lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, i32 3, i32 4)
-  ret void
-}
-
-define void @test_vst2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vst2q_lane_s64
-; CHECK: st2 { {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i64>] %b.coerce, 1
-  %0 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst2lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, i32 1, i32 8)
-  ret void
-}
-
-define void @test_vst2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) {
-; CHECK-LABEL: test_vst2q_lane_f32
-; CHECK: st2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <4 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <4 x float>] %b.coerce, 1
-  %0 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst2lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, i32 3, i32 4)
-  ret void
-}
-
-define void @test_vst2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) {
-; CHECK-LABEL: test_vst2q_lane_f64
-; CHECK: st2 { {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <2 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <2 x double>] %b.coerce, 1
-  %0 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst2lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, i32 1, i32 8)
-  ret void
-}
-
-define void @test_vst2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vst2_lane_s8
-; CHECK: st2 { {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i8>] %b.coerce, 1
-  tail call void @llvm.arm.neon.vst2lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, i32 7, i32 1)
-  ret void
-}
-
-define void @test_vst2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vst2_lane_s16
-; CHECK: st2 { {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i16>] %b.coerce, 1
-  %0 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst2lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, i32 3, i32 2)
-  ret void
-}
-
-define void @test_vst2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vst2_lane_s32
-; CHECK: st2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i32>] %b.coerce, 1
-  %0 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst2lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, i32 1, i32 4)
-  ret void
-}
-
-define void @test_vst2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vst2_lane_s64
-; CHECK: st2 { {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <1 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <1 x i64>] %b.coerce, 1
-  %0 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst2lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, i32 0, i32 8)
-  ret void
-}
-
-define void @test_vst2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) {
-; CHECK-LABEL: test_vst2_lane_f32
-; CHECK: st2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <2 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <2 x float>] %b.coerce, 1
-  %0 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst2lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, i32 1, i32 4)
-  ret void
-}
-
-define void @test_vst2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) {
-; CHECK-LABEL: test_vst2_lane_f64
-; CHECK: st2 { {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <1 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <1 x double>] %b.coerce, 1
-  %0 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst2lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, i32 0, i32 8)
-  ret void
-}
-
-define void @test_vst3q_lane_s8(i8* %a, [3 x <16 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vst3q_lane_s8
-; CHECK: st3 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %b.coerce, 2
-  tail call void @llvm.arm.neon.vst3lane.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, i32 15, i32 1)
-  ret void
-}
-
-define void @test_vst3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vst3q_lane_s16
-; CHECK: st3 { {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i16>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i16>] %b.coerce, 2
-  %0 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst3lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, i32 7, i32 2)
-  ret void
-}
-
-define void @test_vst3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vst3q_lane_s32
-; CHECK: st3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i32>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i32>] %b.coerce, 2
-  %0 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst3lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, i32 3, i32 4)
-  ret void
-}
-
-define void @test_vst3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vst3q_lane_s64
-; CHECK: st3 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i64>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i64>] %b.coerce, 2
-  %0 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst3lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, i32 1, i32 8)
-  ret void
-}
-
-define void @test_vst3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) {
-; CHECK-LABEL: test_vst3q_lane_f32
-; CHECK: st3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <4 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <4 x float>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <4 x float>] %b.coerce, 2
-  %0 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst3lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, i32 3, i32 4)
-  ret void
-}
-
-define void @test_vst3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) {
-; CHECK-LABEL: test_vst3q_lane_f64
-; CHECK: st3 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <2 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <2 x double>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <2 x double>] %b.coerce, 2
-  %0 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst3lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, i32 1, i32 8)
-  ret void
-}
-
-define void @test_vst3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vst3_lane_s8
-; CHECK: st3 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i8>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i8>] %b.coerce, 2
-  tail call void @llvm.arm.neon.vst3lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, i32 7, i32 1)
-  ret void
-}
-
-define void @test_vst3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vst3_lane_s16
-; CHECK: st3 { {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i16>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i16>] %b.coerce, 2
-  %0 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst3lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, i32 3, i32 2)
-  ret void
-}
-
-define void @test_vst3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vst3_lane_s32
-; CHECK: st3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i32>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i32>] %b.coerce, 2
-  %0 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst3lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, i32 1, i32 4)
-  ret void
-}
-
-define void @test_vst3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vst3_lane_s64
-; CHECK: st3 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <1 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <1 x i64>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <1 x i64>] %b.coerce, 2
-  %0 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst3lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, i32 0, i32 8)
-  ret void
-}
-
-define void @test_vst3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) {
-; CHECK-LABEL: test_vst3_lane_f32
-; CHECK: st3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <2 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <2 x float>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <2 x float>] %b.coerce, 2
-  %0 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst3lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, i32 1, i32 4)
-  ret void
-}
-
-define void @test_vst3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) {
-; CHECK-LABEL: test_vst3_lane_f64
-; CHECK: st3 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <1 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <1 x double>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <1 x double>] %b.coerce, 2
-  %0 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst3lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, i32 0, i32 8)
-  ret void
-}
-
-define void @test_vst4q_lane_s8(i16* %a, [4 x <16 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vst4q_lane_s8
-; CHECK: st4 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <16 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <16 x i8>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <16 x i8>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <16 x i8>] %b.coerce, 3
-  %0 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst4lane.v16i8(i8* %0, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, <16 x i8> %b.coerce.fca.3.extract, i32 15, i32 2)
-  ret void
-}
-
-define void @test_vst4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vst4q_lane_s16
-; CHECK: st4 { {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i16>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i16>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i16>] %b.coerce, 3
-  %0 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst4lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, <8 x i16> %b.coerce.fca.3.extract, i32 7, i32 2)
-  ret void
-}
-
-define void @test_vst4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vst4q_lane_s32
-; CHECK: st4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i32>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i32>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i32>] %b.coerce, 3
-  %0 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst4lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, <4 x i32> %b.coerce.fca.3.extract, i32 3, i32 4)
-  ret void
-}
-
-define void @test_vst4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vst4q_lane_s64
-; CHECK: st4 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i64>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i64>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i64>] %b.coerce, 3
-  %0 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst4lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, <2 x i64> %b.coerce.fca.3.extract, i32 1, i32 8)
-  ret void
-}
-
-define void @test_vst4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) {
-; CHECK-LABEL: test_vst4q_lane_f32
-; CHECK: st4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <4 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <4 x float>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <4 x float>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <4 x float>] %b.coerce, 3
-  %0 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst4lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, <4 x float> %b.coerce.fca.3.extract, i32 3, i32 4)
-  ret void
-}
-
-define void @test_vst4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) {
-; CHECK-LABEL: test_vst4q_lane_f64
-; CHECK: st4 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <2 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <2 x double>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <2 x double>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <2 x double>] %b.coerce, 3
-  %0 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst4lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, <2 x double> %b.coerce.fca.3.extract, i32 1, i32 8)
-  ret void
-}
-
-define void @test_vst4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vst4_lane_s8
-; CHECK: st4 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i8>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i8>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i8>] %b.coerce, 3
-  tail call void @llvm.arm.neon.vst4lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, <8 x i8> %b.coerce.fca.3.extract, i32 7, i32 1)
-  ret void
-}
-
-define void @test_vst4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vst4_lane_s16
-; CHECK: st4 { {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i16>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i16>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i16>] %b.coerce, 3
-  %0 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst4lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, <4 x i16> %b.coerce.fca.3.extract, i32 3, i32 2)
-  ret void
-}
-
-define void @test_vst4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vst4_lane_s32
-; CHECK: st4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i32>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i32>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i32>] %b.coerce, 3
-  %0 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst4lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, <2 x i32> %b.coerce.fca.3.extract, i32 1, i32 4)
-  ret void
-}
-
-define void @test_vst4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vst4_lane_s64
-; CHECK: st4 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <1 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <1 x i64>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <1 x i64>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <1 x i64>] %b.coerce, 3
-  %0 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst4lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, <1 x i64> %b.coerce.fca.3.extract, i32 0, i32 8)
-  ret void
-}
-
-define void @test_vst4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) {
-; CHECK-LABEL: test_vst4_lane_f32
-; CHECK: st4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <2 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <2 x float>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <2 x float>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <2 x float>] %b.coerce, 3
-  %0 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst4lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, <2 x float> %b.coerce.fca.3.extract, i32 1, i32 4)
-  ret void
-}
-
-define void @test_vst4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) {
-; CHECK-LABEL: test_vst4_lane_f64
-; CHECK: st4 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <1 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <1 x double>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <1 x double>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <1 x double>] %b.coerce, 3
-  %0 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst4lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, <1 x double> %b.coerce.fca.3.extract, i32 0, i32 8)
-  ret void
-}
-
-declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32)
-declare { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32)
-declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32)
-declare { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2lane.v2i64(i8*, <2 x i64>, <2 x i64>, i32, i32)
-declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32)
-declare { <2 x double>, <2 x double> } @llvm.arm.neon.vld2lane.v2f64(i8*, <2 x double>, <2 x double>, i32, i32)
-declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32)
-declare { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32)
-declare { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32)
-declare { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8*, i32)
-declare { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32)
-declare { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8*, i32)
-declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3lane.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32, i32)
-declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32)
-declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32)
-declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3lane.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32, i32)
-declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32)
-declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32, i32)
-declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32)
-declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32)
-declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32)
-declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8*, i32)
-declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32)
-declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8*, i32)
-declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4lane.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32, i32)
-declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32)
-declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32)
-declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4lane.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32, i32)
-declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32)
-declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32, i32)
-declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32)
-declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32)
-declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32)
-declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8*, i32)
-declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32)
-declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8*, i32)
-declare { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2lane.v1i64(i8*, <1 x i64>, <1 x i64>, i32, i32)
-declare { <1 x double>, <1 x double> } @llvm.arm.neon.vld2lane.v1f64(i8*, <1 x double>, <1 x double>, i32, i32)
-declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32)
-declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32, i32)
-declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32)
-declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v2i64(i8*, <2 x i64>, <2 x i64>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v2f64(i8*, <2 x double>, <2 x double>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v1i64(i8*, <1 x i64>, <1 x i64>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v1f64(i8*, <1 x double>, <1 x double>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32, i32)
-
-define %struct.int8x16x2_t @test_vld2q_lane_s8(i8* readonly %ptr, [2 x <16 x i8>] %src.coerce) {
-; CHECK-LABEL: test_vld2q_lane_s8
-; CHECK: ld2 { {{v[0-9]+}}.b, {{v[0-9]+}}.b }[15], [x0]
-entry:
-  %src.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %src.coerce, 0
-  %src.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %src.coerce, 1
-  %vld2_lane = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, i32 15, i32 1)
-  %vld2_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.int8x16x2_t %.fca.0.1.insert
-}
-
-define %struct.uint8x16x2_t @test_vld2q_lane_u8(i8* readonly %ptr, [2 x <16 x i8>] %src.coerce) {
-; CHECK-LABEL: test_vld2q_lane_u8
-; CHECK: ld2 { {{v[0-9]+}}.b, {{v[0-9]+}}.b }[15], [x0]
-entry:
-  %src.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %src.coerce, 0
-  %src.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %src.coerce, 1
-  %vld2_lane = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, i32 15, i32 1)
-  %vld2_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.uint8x16x2_t undef, <16 x i8> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.uint8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.uint8x16x2_t %.fca.0.1.insert
-}
-
-define %struct.poly8x16x2_t @test_vld2q_lane_p8(i8* readonly %ptr, [2 x <16 x i8>] %src.coerce) {
-; CHECK-LABEL: test_vld2q_lane_p8
-; CHECK: ld2 { {{v[0-9]+}}.b, {{v[0-9]+}}.b }[15], [x0]
-entry:
-  %src.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %src.coerce, 0
-  %src.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %src.coerce, 1
-  %vld2_lane = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, i32 15, i32 1)
-  %vld2_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.poly8x16x2_t undef, <16 x i8> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.poly8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.poly8x16x2_t %.fca.0.1.insert
-}
-
-define %struct.int8x16x3_t @test_vld3q_lane_s8(i8* readonly %ptr, [3 x <16 x i8>] %src.coerce) {
-; CHECK-LABEL: test_vld3q_lane_s8
-; CHECK: ld3 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[15], [x0]
-entry:
-  %src.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %src.coerce, 0
-  %src.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %src.coerce, 1
-  %src.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %src.coerce, 2
-  %vld3_lane = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, <16 x i8> %src.coerce.fca.2.extract, i32 15, i32 1)
-  %vld3_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.int8x16x3_t undef, <16 x i8> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x16x3_t %.fca.0.0.insert, <16 x i8> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int8x16x3_t %.fca.0.1.insert, <16 x i8> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.int8x16x3_t %.fca.0.2.insert
-}
-
-define %struct.uint8x16x3_t @test_vld3q_lane_u8(i8* readonly %ptr, [3 x <16 x i8>] %src.coerce) {
-; CHECK-LABEL: test_vld3q_lane_u8
-; CHECK: ld3 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[15], [x0]
-entry:
-  %src.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %src.coerce, 0
-  %src.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %src.coerce, 1
-  %src.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %src.coerce, 2
-  %vld3_lane = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, <16 x i8> %src.coerce.fca.2.extract, i32 15, i32 1)
-  %vld3_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.uint8x16x3_t undef, <16 x i8> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.uint8x16x3_t %.fca.0.0.insert, <16 x i8> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.uint8x16x3_t %.fca.0.1.insert, <16 x i8> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.uint8x16x3_t %.fca.0.2.insert
-}
-

Removed: llvm/trunk/test/CodeGen/AArch64/neon-simd-ldst.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-simd-ldst.ll?rev=209575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-simd-ldst.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-simd-ldst.ll (removed)
@@ -1,165 +0,0 @@
-; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-; Just intrinsic mashing. Duplicates existing arm64 tests.
-
-define void @test_ldstq_4v(i8* noalias %io, i32 %count) {
-; CHECK-LABEL: test_ldstq_4v
-; CHECK: ld4     { v0.16b, v1.16b, v2.16b, v3.16b }, [x0]
-; CHECK: st4     { v0.16b, v1.16b, v2.16b, v3.16b }, [x0]
-entry:
-  %tobool62 = icmp eq i32 %count, 0
-  br i1 %tobool62, label %while.end, label %while.body
-
-while.body:                                       ; preds = %entry, %while.body
-  %count.addr.063 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
-  %dec = add i32 %count.addr.063, -1
-  %vld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %io, i32 1)
-  %vld4.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 3
-  tail call void @llvm.arm.neon.vst4.v16i8(i8* %io, <16 x i8> %vld4.fca.0.extract, <16 x i8> %vld4.fca.1.extract, <16 x i8> %vld4.fca.2.extract, <16 x i8> %vld4.fca.3.extract, i32 1)
-  %tobool = icmp eq i32 %dec, 0
-  br i1 %tobool, label %while.end, label %while.body
-
-while.end:                                        ; preds = %while.body, %entry
-  ret void
-}
-
-declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32)
-
-declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32)
-
-define void @test_ldstq_3v(i8* noalias %io, i32 %count) {
-; CHECK-LABEL: test_ldstq_3v
-; CHECK: ld3     { v0.16b, v1.16b, v2.16b }, [x0]
-; CHECK: st3     { v0.16b, v1.16b, v2.16b }, [x0]
-entry:
-  %tobool47 = icmp eq i32 %count, 0
-  br i1 %tobool47, label %while.end, label %while.body
-
-while.body:                                       ; preds = %entry, %while.body
-  %count.addr.048 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
-  %dec = add i32 %count.addr.048, -1
-  %vld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %io, i32 1)
-  %vld3.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 2
-  tail call void @llvm.arm.neon.vst3.v16i8(i8* %io, <16 x i8> %vld3.fca.0.extract, <16 x i8> %vld3.fca.1.extract, <16 x i8> %vld3.fca.2.extract, i32 1)
-  %tobool = icmp eq i32 %dec, 0
-  br i1 %tobool, label %while.end, label %while.body
-
-while.end:                                        ; preds = %while.body, %entry
-  ret void
-}
-
-declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8*, i32)
-
-declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32)
-
-define void @test_ldstq_2v(i8* noalias %io, i32 %count) {
-; CHECK-LABEL: test_ldstq_2v
-; CHECK: ld2     { v0.16b, v1.16b }, [x0]
-; CHECK: st2     { v0.16b, v1.16b }, [x0]
-entry:
-  %tobool22 = icmp eq i32 %count, 0
-  br i1 %tobool22, label %while.end, label %while.body
-
-while.body:                                       ; preds = %entry, %while.body
-  %count.addr.023 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
-  %dec = add i32 %count.addr.023, -1
-  %vld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8* %io, i32 1)
-  %vld2.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 1
-  tail call void @llvm.arm.neon.vst2.v16i8(i8* %io, <16 x i8> %vld2.fca.0.extract, <16 x i8> %vld2.fca.1.extract, i32 1)
-  %tobool = icmp eq i32 %dec, 0
-  br i1 %tobool, label %while.end, label %while.body
-
-while.end:                                        ; preds = %while.body, %entry
-  ret void
-}
-
-declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8*, i32)
-
-declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32)
-
-define void @test_ldst_4v(i8* noalias %io, i32 %count) {
-; CHECK-LABEL: test_ldst_4v
-; CHECK: ld4     { v0.8b, v1.8b, v2.8b, v3.8b }, [x0]
-; CHECK: st4     { v0.8b, v1.8b, v2.8b, v3.8b }, [x0]
-entry:
-  %tobool42 = icmp eq i32 %count, 0
-  br i1 %tobool42, label %while.end, label %while.body
-
-while.body:                                       ; preds = %entry, %while.body
-  %count.addr.043 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
-  %dec = add i32 %count.addr.043, -1
-  %vld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8* %io, i32 1)
-  %vld4.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 3
-  tail call void @llvm.arm.neon.vst4.v8i8(i8* %io, <8 x i8> %vld4.fca.0.extract, <8 x i8> %vld4.fca.1.extract, <8 x i8> %vld4.fca.2.extract, <8 x i8> %vld4.fca.3.extract, i32 1)
-  %tobool = icmp eq i32 %dec, 0
-  br i1 %tobool, label %while.end, label %while.body
-
-while.end:                                        ; preds = %while.body, %entry
-  ret void
-}
-
-declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8*, i32)
-
-declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32)
-
-define void @test_ldst_3v(i8* noalias %io, i32 %count) {
-; CHECK-LABEL: test_ldst_3v
-; CHECK: ld3     { v0.8b, v1.8b, v2.8b }, [x0]
-; CHECK: st3     { v0.8b, v1.8b, v2.8b }, [x0]
-entry:
-  %tobool32 = icmp eq i32 %count, 0
-  br i1 %tobool32, label %while.end, label %while.body
-
-while.body:                                       ; preds = %entry, %while.body
-  %count.addr.033 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
-  %dec = add i32 %count.addr.033, -1
-  %vld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8* %io, i32 1)
-  %vld3.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 2
-  tail call void @llvm.arm.neon.vst3.v8i8(i8* %io, <8 x i8> %vld3.fca.0.extract, <8 x i8> %vld3.fca.1.extract, <8 x i8> %vld3.fca.2.extract, i32 1)
-  %tobool = icmp eq i32 %dec, 0
-  br i1 %tobool, label %while.end, label %while.body
-
-while.end:                                        ; preds = %while.body, %entry
-  ret void
-}
-
-declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8*, i32)
-
-declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
-
-define void @test_ldst_2v(i8* noalias %io, i32 %count) {
-; CHECK-LABEL: test_ldst_2v
-; CHECK: ld2     { v0.8b, v1.8b }, [x0]
-; CHECK: st2     { v0.8b, v1.8b }, [x0]
-entry:
-  %tobool22 = icmp eq i32 %count, 0
-  br i1 %tobool22, label %while.end, label %while.body
-
-while.body:                                       ; preds = %entry, %while.body
-  %count.addr.023 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
-  %dec = add i32 %count.addr.023, -1
-  %vld2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8* %io, i32 1)
-  %vld2.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 1
-  tail call void @llvm.arm.neon.vst2.v8i8(i8* %io, <8 x i8> %vld2.fca.0.extract, <8 x i8> %vld2.fca.1.extract, i32 1)
-  %tobool = icmp eq i32 %dec, 0
-  br i1 %tobool, label %while.end, label %while.body
-
-while.end:                                        ; preds = %while.body, %entry
-  ret void
-}
-
-declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8*, i32)
-
-declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32)
-