[llvm] r209577 - AArch64/ARM64: move ARM64 into AArch64's place

Tim Northover tnorthover at apple.com
Sat May 24 05:50:31 PDT 2014


Copied: llvm/trunk/test/CodeGen/AArch64/arm64-vmul.ll (from r209576, llvm/trunk/test/CodeGen/ARM64/vmul.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vmul.ll?p2=llvm/trunk/test/CodeGen/AArch64/arm64-vmul.ll&p1=llvm/trunk/test/CodeGen/ARM64/vmul.ll&r1=209576&r2=209577&rev=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/vmul.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-vmul.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -asm-verbose=false < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc -asm-verbose=false < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 
 define <8 x i16> @smull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
@@ -6,7 +6,7 @@ define <8 x i16> @smull8h(<8 x i8>* %A,
 ;CHECK: smull.8h
   %tmp1 = load <8 x i8>* %A
   %tmp2 = load <8 x i8>* %B
-  %tmp3 = call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+  %tmp3 = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
   ret <8 x i16> %tmp3
 }
 
@@ -15,7 +15,7 @@ define <4 x i32> @smull4s(<4 x i16>* %A,
 ;CHECK: smull.4s
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
-  %tmp3 = call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp3 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   ret <4 x i32> %tmp3
 }
 
@@ -24,20 +24,20 @@ define <2 x i64> @smull2d(<2 x i32>* %A,
 ;CHECK: smull.2d
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
-  %tmp3 = call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp3 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   ret <2 x i64> %tmp3
 }
 
-declare <8 x i16>  @llvm.arm64.neon.smull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+declare <8 x i16>  @llvm.aarch64.neon.smull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
 
 define <8 x i16> @umull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: umull8h:
 ;CHECK: umull.8h
   %tmp1 = load <8 x i8>* %A
   %tmp2 = load <8 x i8>* %B
-  %tmp3 = call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+  %tmp3 = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
   ret <8 x i16> %tmp3
 }
 
@@ -46,7 +46,7 @@ define <4 x i32> @umull4s(<4 x i16>* %A,
 ;CHECK: umull.4s
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
-  %tmp3 = call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp3 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   ret <4 x i32> %tmp3
 }
 
@@ -55,20 +55,20 @@ define <2 x i64> @umull2d(<2 x i32>* %A,
 ;CHECK: umull.2d
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
-  %tmp3 = call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp3 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   ret <2 x i64> %tmp3
 }
 
-declare <8 x i16>  @llvm.arm64.neon.umull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+declare <8 x i16>  @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
 
 define <4 x i32> @sqdmull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: sqdmull4s:
 ;CHECK: sqdmull.4s
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
-  %tmp3 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   ret <4 x i32> %tmp3
 }
 
@@ -77,7 +77,7 @@ define <2 x i64> @sqdmull2d(<2 x i32>* %
 ;CHECK: sqdmull.2d
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
-  %tmp3 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   ret <2 x i64> %tmp3
 }
 
@@ -88,7 +88,7 @@ define <4 x i32> @sqdmull2_4s(<8 x i16>*
   %load2 = load <8 x i16>* %B
   %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %tmp3 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   ret <4 x i32> %tmp3
 }
 
@@ -99,31 +99,31 @@ define <2 x i64> @sqdmull2_2d(<4 x i32>*
   %load2 = load <4 x i32>* %B
   %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %tmp3 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   ret <2 x i64> %tmp3
 }
 
 
-declare <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
 
 define <8 x i16> @pmull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: pmull8h:
 ;CHECK: pmull.8h
   %tmp1 = load <8 x i8>* %A
   %tmp2 = load <8 x i8>* %B
-  %tmp3 = call <8 x i16> @llvm.arm64.neon.pmull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+  %tmp3 = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
   ret <8 x i16> %tmp3
 }
 
-declare <8 x i16> @llvm.arm64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
 
 define <4 x i16> @sqdmulh_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: sqdmulh_4h:
 ;CHECK: sqdmulh.4h
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
-  %tmp3 = call <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
   ret <4 x i16> %tmp3
 }
 
@@ -132,7 +132,7 @@ define <8 x i16> @sqdmulh_8h(<8 x i16>*
 ;CHECK: sqdmulh.8h
   %tmp1 = load <8 x i16>* %A
   %tmp2 = load <8 x i16>* %B
-  %tmp3 = call <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+  %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
   ret <8 x i16> %tmp3
 }
 
@@ -141,7 +141,7 @@ define <2 x i32> @sqdmulh_2s(<2 x i32>*
 ;CHECK: sqdmulh.2s
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
-  %tmp3 = call <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
   ret <2 x i32> %tmp3
 }
 
@@ -150,7 +150,7 @@ define <4 x i32> @sqdmulh_4s(<4 x i32>*
 ;CHECK: sqdmulh.4s
   %tmp1 = load <4 x i32>* %A
   %tmp2 = load <4 x i32>* %B
-  %tmp3 = call <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
   ret <4 x i32> %tmp3
 }
 
@@ -159,22 +159,22 @@ define i32 @sqdmulh_1s(i32* %A, i32* %B)
 ;CHECK: sqdmulh s0, {{s[0-9]+}}, {{s[0-9]+}}
   %tmp1 = load i32* %A
   %tmp2 = load i32* %B
-  %tmp3 = call i32 @llvm.arm64.neon.sqdmulh.i32(i32 %tmp1, i32 %tmp2)
+  %tmp3 = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %tmp1, i32 %tmp2)
   ret i32 %tmp3
 }
 
-declare <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare i32 @llvm.arm64.neon.sqdmulh.i32(i32, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare i32 @llvm.aarch64.neon.sqdmulh.i32(i32, i32) nounwind readnone
 
 define <4 x i16> @sqrdmulh_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: sqrdmulh_4h:
 ;CHECK: sqrdmulh.4h
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
-  %tmp3 = call <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
   ret <4 x i16> %tmp3
 }
 
@@ -183,7 +183,7 @@ define <8 x i16> @sqrdmulh_8h(<8 x i16>*
 ;CHECK: sqrdmulh.8h
   %tmp1 = load <8 x i16>* %A
   %tmp2 = load <8 x i16>* %B
-  %tmp3 = call <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+  %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
   ret <8 x i16> %tmp3
 }
 
@@ -192,7 +192,7 @@ define <2 x i32> @sqrdmulh_2s(<2 x i32>*
 ;CHECK: sqrdmulh.2s
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
-  %tmp3 = call <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
   ret <2 x i32> %tmp3
 }
 
@@ -201,7 +201,7 @@ define <4 x i32> @sqrdmulh_4s(<4 x i32>*
 ;CHECK: sqrdmulh.4s
   %tmp1 = load <4 x i32>* %A
   %tmp2 = load <4 x i32>* %B
-  %tmp3 = call <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
   ret <4 x i32> %tmp3
 }
 
@@ -210,22 +210,22 @@ define i32 @sqrdmulh_1s(i32* %A, i32* %B
 ;CHECK: sqrdmulh s0, {{s[0-9]+}}, {{s[0-9]+}}
   %tmp1 = load i32* %A
   %tmp2 = load i32* %B
-  %tmp3 = call i32 @llvm.arm64.neon.sqrdmulh.i32(i32 %tmp1, i32 %tmp2)
+  %tmp3 = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %tmp1, i32 %tmp2)
   ret i32 %tmp3
 }
 
-declare <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare i32 @llvm.arm64.neon.sqrdmulh.i32(i32, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare i32 @llvm.aarch64.neon.sqrdmulh.i32(i32, i32) nounwind readnone
 
 define <2 x float> @fmulx_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: fmulx_2s:
 ;CHECK: fmulx.2s
   %tmp1 = load <2 x float>* %A
   %tmp2 = load <2 x float>* %B
-  %tmp3 = call <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+  %tmp3 = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
   ret <2 x float> %tmp3
 }
 
@@ -234,7 +234,7 @@ define <4 x float> @fmulx_4s(<4 x float>
 ;CHECK: fmulx.4s
   %tmp1 = load <4 x float>* %A
   %tmp2 = load <4 x float>* %B
-  %tmp3 = call <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+  %tmp3 = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
   ret <4 x float> %tmp3
 }
 
@@ -243,13 +243,13 @@ define <2 x double> @fmulx_2d(<2 x doubl
 ;CHECK: fmulx.2d
   %tmp1 = load <2 x double>* %A
   %tmp2 = load <2 x double>* %B
-  %tmp3 = call <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+  %tmp3 = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
   ret <2 x double> %tmp3
 }
 
-declare <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>) nounwind readnone
 
 define <4 x i32> @smlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: smlal4s:
@@ -257,7 +257,7 @@ define <4 x i32> @smlal4s(<4 x i16>* %A,
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
   %tmp3 = load <4 x i32>* %C
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   %tmp5 = add <4 x i32> %tmp3, %tmp4
   ret <4 x i32> %tmp5
 }
@@ -268,7 +268,7 @@ define <2 x i64> @smlal2d(<2 x i32>* %A,
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
   %tmp3 = load <2 x i64>* %C
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   %tmp5 = add <2 x i64> %tmp3, %tmp4
   ret <2 x i64> %tmp5
 }
@@ -279,7 +279,7 @@ define <4 x i32> @smlsl4s(<4 x i16>* %A,
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
   %tmp3 = load <4 x i32>* %C
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   %tmp5 = sub <4 x i32> %tmp3, %tmp4
   ret <4 x i32> %tmp5
 }
@@ -290,15 +290,15 @@ define <2 x i64> @smlsl2d(<2 x i32>* %A,
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
   %tmp3 = load <2 x i64>* %C
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   %tmp5 = sub <2 x i64> %tmp3, %tmp4
   ret <2 x i64> %tmp5
 }
 
-declare <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
-declare <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
-declare <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
-declare <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
+declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
+declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
 
 define <4 x i32> @sqdmlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: sqdmlal4s:
@@ -306,8 +306,8 @@ define <4 x i32> @sqdmlal4s(<4 x i16>* %
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
   %tmp3 = load <4 x i32>* %C
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
   ret <4 x i32> %tmp5
 }
 
@@ -317,8 +317,8 @@ define <2 x i64> @sqdmlal2d(<2 x i32>* %
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
   %tmp3 = load <2 x i64>* %C
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
   ret <2 x i64> %tmp5
 }
 
@@ -330,8 +330,8 @@ define <4 x i32> @sqdmlal2_4s(<8 x i16>*
   %tmp3 = load <4 x i32>* %C
   %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
   ret <4 x i32> %tmp5
 }
 
@@ -343,8 +343,8 @@ define <2 x i64> @sqdmlal2_2d(<4 x i32>*
   %tmp3 = load <2 x i64>* %C
   %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
   ret <2 x i64> %tmp5
 }
 
@@ -354,8 +354,8 @@ define <4 x i32> @sqdmlsl4s(<4 x i16>* %
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
   %tmp3 = load <4 x i32>* %C
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
   ret <4 x i32> %tmp5
 }
 
@@ -365,8 +365,8 @@ define <2 x i64> @sqdmlsl2d(<2 x i32>* %
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
   %tmp3 = load <2 x i64>* %C
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
   ret <2 x i64> %tmp5
 }
 
@@ -378,8 +378,8 @@ define <4 x i32> @sqdmlsl2_4s(<8 x i16>*
   %tmp3 = load <4 x i32>* %C
   %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
   ret <4 x i32> %tmp5
 }
 
@@ -391,8 +391,8 @@ define <2 x i64> @sqdmlsl2_2d(<4 x i32>*
   %tmp3 = load <2 x i64>* %C
   %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
   ret <2 x i64> %tmp5
 }
 
@@ -402,7 +402,7 @@ define <4 x i32> @umlal4s(<4 x i16>* %A,
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
   %tmp3 = load <4 x i32>* %C
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   %tmp5 = add <4 x i32> %tmp3, %tmp4
   ret <4 x i32> %tmp5
 }
@@ -413,7 +413,7 @@ define <2 x i64> @umlal2d(<2 x i32>* %A,
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
   %tmp3 = load <2 x i64>* %C
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   %tmp5 = add <2 x i64> %tmp3, %tmp4
   ret <2 x i64> %tmp5
 }
@@ -424,7 +424,7 @@ define <4 x i32> @umlsl4s(<4 x i16>* %A,
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
   %tmp3 = load <4 x i32>* %C
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   %tmp5 = sub <4 x i32> %tmp3, %tmp4
   ret <4 x i32> %tmp5
 }
@@ -435,7 +435,7 @@ define <2 x i64> @umlsl2d(<2 x i32>* %A,
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
   %tmp3 = load <2 x i64>* %C
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   %tmp5 = sub <2 x i64> %tmp3, %tmp4
   ret <2 x i64> %tmp5
 }
@@ -717,7 +717,7 @@ define <2 x float> @fmulx_lane_2s(<2 x f
   %tmp1 = load <2 x float>* %A
   %tmp2 = load <2 x float>* %B
   %tmp3 = shufflevector <2 x float> %tmp2, <2 x float> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp4 = call <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp3)
+  %tmp4 = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp3)
   ret <2 x float> %tmp4
 }
 
@@ -728,7 +728,7 @@ define <4 x float> @fmulx_lane_4s(<4 x f
   %tmp1 = load <4 x float>* %A
   %tmp2 = load <4 x float>* %B
   %tmp3 = shufflevector <4 x float> %tmp2, <4 x float> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp3)
+  %tmp4 = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp3)
   ret <4 x float> %tmp4
 }
 
@@ -739,7 +739,7 @@ define <2 x double> @fmulx_lane_2d(<2 x
   %tmp1 = load <2 x double>* %A
   %tmp2 = load <2 x double>* %B
   %tmp3 = shufflevector <2 x double> %tmp2, <2 x double> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp4 = call <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp3)
+  %tmp4 = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp3)
   ret <2 x double> %tmp4
 }
 
@@ -750,7 +750,7 @@ define <4 x i16> @sqdmulh_lane_4h(<4 x i
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
   %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3)
+  %tmp4 = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3)
   ret <4 x i16> %tmp4
 }
 
@@ -761,7 +761,7 @@ define <8 x i16> @sqdmulh_lane_8h(<8 x i
   %tmp1 = load <8 x i16>* %A
   %tmp2 = load <8 x i16>* %B
   %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3)
+  %tmp4 = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3)
   ret <8 x i16> %tmp4
 }
 
@@ -772,7 +772,7 @@ define <2 x i32> @sqdmulh_lane_2s(<2 x i
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
   %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp4 = call <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3)
+  %tmp4 = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3)
   ret <2 x i32> %tmp4
 }
 
@@ -783,7 +783,7 @@ define <4 x i32> @sqdmulh_lane_4s(<4 x i
   %tmp1 = load <4 x i32>* %A
   %tmp2 = load <4 x i32>* %B
   %tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3)
   ret <4 x i32> %tmp4
 }
 
@@ -792,7 +792,7 @@ define i32 @sqdmulh_lane_1s(i32 %A, <4 x
 ;CHECK-NOT: dup
 ;CHECK: sqdmulh.s s0, {{s[0-9]+}}, {{v[0-9]+}}[1]
   %tmp1 = extractelement <4 x i32> %B, i32 1
-  %tmp2 = call i32 @llvm.arm64.neon.sqdmulh.i32(i32 %A, i32 %tmp1)
+  %tmp2 = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %A, i32 %tmp1)
   ret i32 %tmp2
 }
 
@@ -803,7 +803,7 @@ define <4 x i16> @sqrdmulh_lane_4h(<4 x
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
   %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3)
+  %tmp4 = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3)
   ret <4 x i16> %tmp4
 }
 
@@ -814,7 +814,7 @@ define <8 x i16> @sqrdmulh_lane_8h(<8 x
   %tmp1 = load <8 x i16>* %A
   %tmp2 = load <8 x i16>* %B
   %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3)
+  %tmp4 = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3)
   ret <8 x i16> %tmp4
 }
 
@@ -825,7 +825,7 @@ define <2 x i32> @sqrdmulh_lane_2s(<2 x
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
   %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp4 = call <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3)
+  %tmp4 = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3)
   ret <2 x i32> %tmp4
 }
 
@@ -836,7 +836,7 @@ define <4 x i32> @sqrdmulh_lane_4s(<4 x
   %tmp1 = load <4 x i32>* %A
   %tmp2 = load <4 x i32>* %B
   %tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3)
   ret <4 x i32> %tmp4
 }
 
@@ -845,7 +845,7 @@ define i32 @sqrdmulh_lane_1s(i32 %A, <4
 ;CHECK-NOT: dup
 ;CHECK: sqrdmulh.s s0, {{s[0-9]+}}, {{v[0-9]+}}[1]
   %tmp1 = extractelement <4 x i32> %B, i32 1
-  %tmp2 = call i32 @llvm.arm64.neon.sqrdmulh.i32(i32 %A, i32 %tmp1)
+  %tmp2 = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %A, i32 %tmp1)
   ret i32 %tmp2
 }
 
@@ -856,7 +856,7 @@ define <4 x i32> @sqdmull_lane_4s(<4 x i
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
   %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
   ret <4 x i32> %tmp4
 }
 
@@ -867,7 +867,7 @@ define <2 x i64> @sqdmull_lane_2d(<2 x i
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
   %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
   ret <2 x i64> %tmp4
 }
 
@@ -879,7 +879,7 @@ define <4 x i32> @sqdmull2_lane_4s(<8 x
   %load2 = load <8 x i16>* %B
   %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   ret <4 x i32> %tmp4
 }
 
@@ -891,7 +891,7 @@ define <2 x i64> @sqdmull2_lane_2d(<4 x
   %load2 = load <4 x i32>* %B
   %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   ret <2 x i64> %tmp4
 }
 
@@ -902,7 +902,7 @@ define <4 x i32> @umull_lane_4s(<4 x i16
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
   %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
   ret <4 x i32> %tmp4
 }
 
@@ -913,7 +913,7 @@ define <2 x i64> @umull_lane_2d(<2 x i32
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
   %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
   ret <2 x i64> %tmp4
 }
 
@@ -924,7 +924,7 @@ define <4 x i32> @smull_lane_4s(<4 x i16
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
   %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
   ret <4 x i32> %tmp4
 }
 
@@ -935,7 +935,7 @@ define <2 x i64> @smull_lane_2d(<2 x i32
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
   %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
   ret <2 x i64> %tmp4
 }
 
@@ -947,7 +947,7 @@ define <4 x i32> @smlal_lane_4s(<4 x i16
   %tmp2 = load <4 x i16>* %B
   %tmp3 = load <4 x i32>* %C
   %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
   %tmp6 = add <4 x i32> %tmp3, %tmp5
   ret <4 x i32> %tmp6
 }
@@ -960,7 +960,7 @@ define <2 x i64> @smlal_lane_2d(<2 x i32
   %tmp2 = load <2 x i32>* %B
   %tmp3 = load <2 x i64>* %C
   %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
   %tmp6 = add <2 x i64> %tmp3, %tmp5
   ret <2 x i64> %tmp6
 }
@@ -973,8 +973,8 @@ define <4 x i32> @sqdmlal_lane_4s(<4 x i
   %tmp2 = load <4 x i16>* %B
   %tmp3 = load <4 x i32>* %C
   %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
-  %tmp6 = call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
+  %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
   ret <4 x i32> %tmp6
 }
 
@@ -986,8 +986,8 @@ define <2 x i64> @sqdmlal_lane_2d(<2 x i
   %tmp2 = load <2 x i32>* %B
   %tmp3 = load <2 x i64>* %C
   %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
-  %tmp6 = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
+  %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
   ret <2 x i64> %tmp6
 }
 
@@ -1000,8 +1000,8 @@ define <4 x i32> @sqdmlal2_lane_4s(<8 x
   %tmp3 = load <4 x i32>* %C
   %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  %tmp6 = call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
   ret <4 x i32> %tmp6
 }
 
@@ -1014,8 +1014,8 @@ define <2 x i64> @sqdmlal2_lane_2d(<4 x
   %tmp3 = load <2 x i64>* %C
   %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  %tmp6 = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
   ret <2 x i64> %tmp6
 }
 
@@ -1024,45 +1024,45 @@ define i32 @sqdmlal_lane_1s(i32 %A, i16
 ;CHECK: sqdmlal.4s
   %lhs = insertelement <4 x i16> undef, i16 %B, i32 0
   %rhs = shufflevector <4 x i16> %C, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-  %prod.vec = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs)
+  %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs)
   %prod = extractelement <4 x i32> %prod.vec, i32 0
-  %res = call i32 @llvm.arm64.neon.sqadd.i32(i32 %A, i32 %prod)
+  %res = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %A, i32 %prod)
   ret i32 %res
 }
-declare i32 @llvm.arm64.neon.sqadd.i32(i32, i32)
+declare i32 @llvm.aarch64.neon.sqadd.i32(i32, i32)
 
 define i32 @sqdmlsl_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind {
 ;CHECK-LABEL: sqdmlsl_lane_1s:
 ;CHECK: sqdmlsl.4s
   %lhs = insertelement <4 x i16> undef, i16 %B, i32 0
   %rhs = shufflevector <4 x i16> %C, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-  %prod.vec = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs)
+  %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs)
   %prod = extractelement <4 x i32> %prod.vec, i32 0
-  %res = call i32 @llvm.arm64.neon.sqsub.i32(i32 %A, i32 %prod)
+  %res = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %A, i32 %prod)
   ret i32 %res
 }
-declare i32 @llvm.arm64.neon.sqsub.i32(i32, i32)
+declare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32)
 
 define i64 @sqdmlal_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind {
 ;CHECK-LABEL: sqdmlal_lane_1d:
 ;CHECK: sqdmlal.s
   %rhs = extractelement <2 x i32> %C, i32 1
-  %prod = call i64 @llvm.arm64.neon.sqdmulls.scalar(i32 %B, i32 %rhs)
-  %res = call i64 @llvm.arm64.neon.sqadd.i64(i64 %A, i64 %prod)
+  %prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %B, i32 %rhs)
+  %res = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %A, i64 %prod)
   ret i64 %res
 }
-declare i64 @llvm.arm64.neon.sqdmulls.scalar(i32, i32)
-declare i64 @llvm.arm64.neon.sqadd.i64(i64, i64)
+declare i64 @llvm.aarch64.neon.sqdmulls.scalar(i32, i32)
+declare i64 @llvm.aarch64.neon.sqadd.i64(i64, i64)
 
 define i64 @sqdmlsl_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind {
 ;CHECK-LABEL: sqdmlsl_lane_1d:
 ;CHECK: sqdmlsl.s
   %rhs = extractelement <2 x i32> %C, i32 1
-  %prod = call i64 @llvm.arm64.neon.sqdmulls.scalar(i32 %B, i32 %rhs)
-  %res = call i64 @llvm.arm64.neon.sqsub.i64(i64 %A, i64 %prod)
+  %prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %B, i32 %rhs)
+  %res = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %A, i64 %prod)
   ret i64 %res
 }
-declare i64 @llvm.arm64.neon.sqsub.i64(i64, i64)
+declare i64 @llvm.aarch64.neon.sqsub.i64(i64, i64)
 
 
 define <4 x i32> @umlal_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
@@ -1073,7 +1073,7 @@ define <4 x i32> @umlal_lane_4s(<4 x i16
   %tmp2 = load <4 x i16>* %B
   %tmp3 = load <4 x i32>* %C
   %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
   %tmp6 = add <4 x i32> %tmp3, %tmp5
   ret <4 x i32> %tmp6
 }
@@ -1086,7 +1086,7 @@ define <2 x i64> @umlal_lane_2d(<2 x i32
   %tmp2 = load <2 x i32>* %B
   %tmp3 = load <2 x i64>* %C
   %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
   %tmp6 = add <2 x i64> %tmp3, %tmp5
   ret <2 x i64> %tmp6
 }
@@ -1100,7 +1100,7 @@ define <4 x i32> @smlsl_lane_4s(<4 x i16
   %tmp2 = load <4 x i16>* %B
   %tmp3 = load <4 x i32>* %C
   %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
   %tmp6 = sub <4 x i32> %tmp3, %tmp5
   ret <4 x i32> %tmp6
 }
@@ -1113,7 +1113,7 @@ define <2 x i64> @smlsl_lane_2d(<2 x i32
   %tmp2 = load <2 x i32>* %B
   %tmp3 = load <2 x i64>* %C
   %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
   %tmp6 = sub <2 x i64> %tmp3, %tmp5
   ret <2 x i64> %tmp6
 }
@@ -1126,8 +1126,8 @@ define <4 x i32> @sqdmlsl_lane_4s(<4 x i
   %tmp2 = load <4 x i16>* %B
   %tmp3 = load <4 x i32>* %C
   %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
-  %tmp6 = call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
+  %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
   ret <4 x i32> %tmp6
 }
 
@@ -1139,8 +1139,8 @@ define <2 x i64> @sqdmlsl_lane_2d(<2 x i
   %tmp2 = load <2 x i32>* %B
   %tmp3 = load <2 x i64>* %C
   %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
-  %tmp6 = call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
+  %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
   ret <2 x i64> %tmp6
 }
 
@@ -1153,8 +1153,8 @@ define <4 x i32> @sqdmlsl2_lane_4s(<8 x
   %tmp3 = load <4 x i32>* %C
   %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  %tmp6 = call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
   ret <4 x i32> %tmp6
 }
 
@@ -1167,8 +1167,8 @@ define <2 x i64> @sqdmlsl2_lane_2d(<4 x
   %tmp3 = load <2 x i64>* %C
   %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  %tmp6 = call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
   ret <2 x i64> %tmp6
 }
 
@@ -1180,7 +1180,7 @@ define <4 x i32> @umlsl_lane_4s(<4 x i16
   %tmp2 = load <4 x i16>* %B
   %tmp3 = load <4 x i32>* %C
   %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
   %tmp6 = sub <4 x i32> %tmp3, %tmp5
   ret <4 x i32> %tmp6
 }
@@ -1193,7 +1193,7 @@ define <2 x i64> @umlsl_lane_2d(<2 x i32
   %tmp2 = load <2 x i32>* %B
   %tmp3 = load <2 x i64>* %C
   %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
   %tmp6 = sub <2 x i64> %tmp3, %tmp5
   ret <2 x i64> %tmp6
 }
@@ -1202,7 +1202,7 @@ define <2 x i64> @umlsl_lane_2d(<2 x i32
 define float @fmulxs(float %a, float %b) nounwind {
 ; CHECK-LABEL: fmulxs:
 ; CHECKNEXT: fmulx s0, s0, s1
-  %fmulx.i = tail call float @llvm.arm64.neon.fmulx.f32(float %a, float %b) nounwind
+  %fmulx.i = tail call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) nounwind
 ; CHECKNEXT: ret
   ret float %fmulx.i
 }
@@ -1210,7 +1210,7 @@ define float @fmulxs(float %a, float %b)
 define double @fmulxd(double %a, double %b) nounwind {
 ; CHECK-LABEL: fmulxd:
 ; CHECKNEXT: fmulx d0, d0, d1
-  %fmulx.i = tail call double @llvm.arm64.neon.fmulx.f64(double %a, double %b) nounwind
+  %fmulx.i = tail call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) nounwind
 ; CHECKNEXT: ret
   ret double %fmulx.i
 }
@@ -1219,7 +1219,7 @@ define float @fmulxs_lane(float %a, <4 x
 ; CHECK-LABEL: fmulxs_lane:
 ; CHECKNEXT: fmulx.s s0, s0, v1[3]
   %b = extractelement <4 x float> %vec, i32 3
-  %fmulx.i = tail call float @llvm.arm64.neon.fmulx.f32(float %a, float %b) nounwind
+  %fmulx.i = tail call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) nounwind
 ; CHECKNEXT: ret
   ret float %fmulx.i
 }
@@ -1228,13 +1228,13 @@ define double @fmulxd_lane(double %a, <2
 ; CHECK-LABEL: fmulxd_lane:
 ; CHECKNEXT: fmulx d0, d0, v1[1]
   %b = extractelement <2 x double> %vec, i32 1
-  %fmulx.i = tail call double @llvm.arm64.neon.fmulx.f64(double %a, double %b) nounwind
+  %fmulx.i = tail call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) nounwind
 ; CHECKNEXT: ret
   ret double %fmulx.i
 }
 
-declare double @llvm.arm64.neon.fmulx.f64(double, double) nounwind readnone
-declare float @llvm.arm64.neon.fmulx.f32(float, float) nounwind readnone
+declare double @llvm.aarch64.neon.fmulx.f64(double, double) nounwind readnone
+declare float @llvm.aarch64.neon.fmulx.f32(float, float) nounwind readnone
 
 
 define <8 x i16> @smull2_8h_simple(<16 x i8> %a, <16 x i8> %b) nounwind {
@@ -1243,7 +1243,7 @@ define <8 x i16> @smull2_8h_simple(<16 x
 ; CHECK-NEXT: ret
   %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %3 = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %1, <8 x i8> %2) #2
+  %3 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %1, <8 x i8> %2) #2
   ret <8 x i16> %3
 }
 
@@ -1256,7 +1256,7 @@ define <8 x i16> @foo0(<16 x i8> %a, <16
   %tmp2 = bitcast <16 x i8> %b to <2 x i64>
   %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <8 x i8>
-  %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
+  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
   ret <8 x i16> %vmull.i.i
 }
 
@@ -1269,7 +1269,7 @@ define <4 x i32> @foo1(<8 x i16> %a, <8
   %tmp2 = bitcast <8 x i16> %b to <2 x i64>
   %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16>
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
   ret <4 x i32> %vmull2.i.i
 }
 
@@ -1282,7 +1282,7 @@ define <2 x i64> @foo2(<4 x i32> %a, <4
   %tmp2 = bitcast <4 x i32> %b to <2 x i64>
   %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32>
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
   ret <2 x i64> %vmull2.i.i
 }
 
@@ -1295,7 +1295,7 @@ define <8 x i16> @foo3(<16 x i8> %a, <16
   %tmp2 = bitcast <16 x i8> %b to <2 x i64>
   %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <8 x i8>
-  %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
+  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
   ret <8 x i16> %vmull.i.i
 }
 
@@ -1308,7 +1308,7 @@ define <4 x i32> @foo4(<8 x i16> %a, <8
   %tmp2 = bitcast <8 x i16> %b to <2 x i64>
   %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16>
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
   ret <4 x i32> %vmull2.i.i
 }
 
@@ -1321,7 +1321,7 @@ define <2 x i64> @foo5(<4 x i32> %a, <4
   %tmp2 = bitcast <4 x i32> %b to <2 x i64>
   %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32>
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
   ret <2 x i64> %vmull2.i.i
 }
 
@@ -1334,7 +1334,7 @@ entry:
   %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
   %1 = bitcast <1 x i64> %shuffle.i to <4 x i16>
   %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind
   ret <4 x i32> %vmull2.i
 }
 
@@ -1347,7 +1347,7 @@ entry:
   %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
   %1 = bitcast <1 x i64> %shuffle.i to <2 x i32>
   %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind
   ret <2 x i64> %vmull2.i
 }
 
@@ -1360,7 +1360,7 @@ entry:
   %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
   %1 = bitcast <1 x i64> %shuffle.i to <4 x i16>
   %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind
   ret <4 x i32> %vmull2.i
 }
 
@@ -1373,7 +1373,7 @@ entry:
   %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
   %1 = bitcast <1 x i64> %shuffle.i to <2 x i32>
   %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind
   ret <2 x i64> %vmull2.i
 }
 
@@ -1388,7 +1388,7 @@ define <8 x i16> @bar0(<8 x i16> %a, <16
   %tmp2 = bitcast <16 x i8> %c to <2 x i64>
   %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <8 x i8>
-  %vmull.i.i.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
+  %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
   %add.i = add <8 x i16> %vmull.i.i.i, %a
   ret <8 x i16> %add.i
 }
@@ -1404,7 +1404,7 @@ define <4 x i32> @bar1(<4 x i32> %a, <8
   %tmp2 = bitcast <8 x i16> %c to <2 x i64>
   %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <4 x i16>
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
   %add.i = add <4 x i32> %vmull2.i.i.i, %a
   ret <4 x i32> %add.i
 }
@@ -1420,7 +1420,7 @@ define <2 x i64> @bar2(<2 x i64> %a, <4
   %tmp2 = bitcast <4 x i32> %c to <2 x i64>
   %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <2 x i32>
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
   %add.i = add <2 x i64> %vmull2.i.i.i, %a
   ret <2 x i64> %add.i
 }
@@ -1436,7 +1436,7 @@ define <8 x i16> @bar3(<8 x i16> %a, <16
   %tmp2 = bitcast <16 x i8> %c to <2 x i64>
   %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <8 x i8>
-  %vmull.i.i.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
+  %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
   %add.i = add <8 x i16> %vmull.i.i.i, %a
   ret <8 x i16> %add.i
 }
@@ -1452,7 +1452,7 @@ define <4 x i32> @bar4(<4 x i32> %a, <8
   %tmp2 = bitcast <8 x i16> %c to <2 x i64>
   %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <4 x i16>
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
   %add.i = add <4 x i32> %vmull2.i.i.i, %a
   ret <4 x i32> %add.i
 }
@@ -1468,7 +1468,7 @@ define <2 x i64> @bar5(<2 x i64> %a, <4
   %tmp2 = bitcast <4 x i32> %c to <2 x i64>
   %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <2 x i32>
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
   %add.i = add <2 x i64> %vmull2.i.i.i, %a
   ret <2 x i64> %add.i
 }
@@ -1484,7 +1484,7 @@ define <4 x i32> @mlal2_1(<4 x i32> %a,
   %tmp2 = bitcast <8 x i16> %shuffle to <2 x i64>
   %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16>
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
   %add = add <4 x i32> %vmull2.i.i, %a
   ret <4 x i32> %add
 }
@@ -1500,7 +1500,7 @@ define <2 x i64> @mlal2_2(<2 x i64> %a,
   %tmp2 = bitcast <4 x i32> %shuffle to <2 x i64>
   %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32>
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
   %add = add <2 x i64> %vmull2.i.i, %a
   ret <2 x i64> %add
 }
@@ -1517,7 +1517,7 @@ define <4 x i32> @mlal2_4(<4 x i32> %a,
   %tmp2 = bitcast <8 x i16> %shuffle to <2 x i64>
   %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16>
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
   %add = add <4 x i32> %vmull2.i.i, %a
   ret <4 x i32> %add
 }
@@ -1533,7 +1533,7 @@ define <2 x i64> @mlal2_5(<2 x i64> %a,
   %tmp2 = bitcast <4 x i32> %shuffle to <2 x i64>
   %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32>
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
   %add = add <2 x i64> %vmull2.i.i, %a
   ret <2 x i64> %add
 }
@@ -1631,7 +1631,7 @@ entry:
 ; CHECK: smull.4s v0, v0, v1[6]
 ; CHECK-NEXT: ret
   %shuffle = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2
   ret <4 x i32> %vmull2.i
 }
 
@@ -1642,7 +1642,7 @@ entry:
 ; CHECK: smull.2d v0, v0, v1[2]
 ; CHECK-NEXT: ret
   %shuffle = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 2>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2
   ret <2 x i64> %vmull2.i
 }
 define <4 x i32> @vmull_laneq_u16_test(<4 x i16> %a, <8 x i16> %b) nounwind readnone ssp {
@@ -1652,7 +1652,7 @@ entry:
 ; CHECK: umull.4s v0, v0, v1[6]
 ; CHECK-NEXT: ret
   %shuffle = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2
   ret <4 x i32> %vmull2.i
 }
 
@@ -1663,7 +1663,7 @@ entry:
 ; CHECK: umull.2d v0, v0, v1[2]
 ; CHECK-NEXT: ret
   %shuffle = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 2>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2
   ret <2 x i64> %vmull2.i
 }
 
@@ -1681,7 +1681,7 @@ entry:
   %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %conv, i32 1
   %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %conv, i32 2
   %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %conv, i32 3
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %vecinit3.i) nounwind
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %vecinit3.i) nounwind
   ret <4 x i32> %vmull2.i.i
 }
 
@@ -1696,7 +1696,7 @@ entry:
   %1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32>
   %vecinit.i = insertelement <2 x i32> undef, i32 %d, i32 0
   %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %d, i32 1
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %1, <2 x i32> %vecinit1.i) nounwind
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %1, <2 x i32> %vecinit1.i) nounwind
   ret <2 x i64> %vmull2.i.i
 }
 
@@ -1714,7 +1714,7 @@ entry:
   %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %conv, i32 1
   %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %conv, i32 2
   %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %conv, i32 3
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %1, <4 x i16> %vecinit3.i) nounwind
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %1, <4 x i16> %vecinit3.i) nounwind
   ret <4 x i32> %vmull2.i.i
 }
 
@@ -1729,7 +1729,7 @@ entry:
   %1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32>
   %vecinit.i = insertelement <2 x i32> undef, i32 %d, i32 0
   %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %d, i32 1
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %1, <2 x i32> %vecinit1.i) nounwind
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %1, <2 x i32> %vecinit1.i) nounwind
   ret <2 x i64> %vmull2.i.i
 }
 
@@ -1787,7 +1787,7 @@ define <2 x i64> @mull_from_two_extracts
   %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
 
-  %res = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
+  %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
   ret <2 x i64> %res
 }
 
@@ -1799,8 +1799,8 @@ define <2 x i64> @mlal_from_two_extracts
   %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
 
-  %res = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
-  %sum = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %accum, <2 x i64> %res)
+  %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
+  %sum = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %accum, <2 x i64> %res)
   ret <2 x i64> %sum
 }
 
@@ -1813,7 +1813,7 @@ define <2 x i64> @mull_from_extract_dup(
 
   %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
 
-  %res = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind
+  %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind
   ret <2 x i64> %res
 }
 
@@ -1826,7 +1826,7 @@ define <8 x i16> @pmull_from_extract_dup
 
   %lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 
-  %res = tail call <8 x i16> @llvm.arm64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhsvec) nounwind
+  %res = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhsvec) nounwind
   ret <8 x i16> %res
 }
 
@@ -1838,7 +1838,7 @@ define <8 x i16> @pmull_from_extract_dup
   %lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %rhs.high = shufflevector <8 x i8> %rhs, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
 
-  %res = tail call <8 x i16> @llvm.arm64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhs.high) nounwind
+  %res = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhs.high) nounwind
   ret <8 x i16> %res
 }
 
@@ -1850,7 +1850,7 @@ define <2 x i64> @sqdmull_from_extract_d
   %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0>
 
-  %res = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
+  %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
   ret <2 x i64> %res
 }
 
@@ -1862,8 +1862,8 @@ define <2 x i64> @sqdmlal_from_extract_d
   %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0>
 
-  %res = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
-  %sum = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %accum, <2 x i64> %res)
+  %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
+  %sum = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %accum, <2 x i64> %res)
   ret <2 x i64> %sum
 }
 
@@ -1875,7 +1875,7 @@ define <2 x i64> @umlal_from_extract_dup
   %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0>
 
-  %res = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
+  %res = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
   %sum = add <2 x i64> %accum, %res
   ret <2 x i64> %sum
 }
@@ -1997,23 +1997,23 @@ define <1 x double> @test_fdiv_v1f64(<1
 define i64 @sqdmlal_d(i32 %A, i32 %B, i64 %C) nounwind {
 ;CHECK-LABEL: sqdmlal_d:
 ;CHECK: sqdmlal
-  %tmp4 = call i64 @llvm.arm64.neon.sqdmulls.scalar(i32 %A, i32 %B)
-  %tmp5 = call i64 @llvm.arm64.neon.sqadd.i64(i64 %C, i64 %tmp4)
+  %tmp4 = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %A, i32 %B)
+  %tmp5 = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %C, i64 %tmp4)
   ret i64 %tmp5
 }
 
 define i64 @sqdmlsl_d(i32 %A, i32 %B, i64 %C) nounwind {
 ;CHECK-LABEL: sqdmlsl_d:
 ;CHECK: sqdmlsl
-  %tmp4 = call i64 @llvm.arm64.neon.sqdmulls.scalar(i32 %A, i32 %B)
-  %tmp5 = call i64 @llvm.arm64.neon.sqsub.i64(i64 %C, i64 %tmp4)
+  %tmp4 = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %A, i32 %B)
+  %tmp5 = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %C, i64 %tmp4)
   ret i64 %tmp5
 }
 
 define <16 x i8> @test_pmull_64(i64 %l, i64 %r) nounwind {
 ; CHECK-LABEL: test_pmull_64:
 ; CHECK: pmull.1q
-  %val = call <16 x i8> @llvm.arm64.neon.pmull64(i64 %l, i64 %r)
+  %val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r)
   ret <16 x i8> %val
 }
 
@@ -2022,11 +2022,11 @@ define <16 x i8> @test_pmull_high_64(<2
 ; CHECK: pmull2.1q
   %l_hi = extractelement <2 x i64> %l, i32 1
   %r_hi = extractelement <2 x i64> %r, i32 1
-  %val = call <16 x i8> @llvm.arm64.neon.pmull64(i64 %l_hi, i64 %r_hi)
+  %val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l_hi, i64 %r_hi)
   ret <16 x i8> %val
 }
 
-declare <16 x i8> @llvm.arm64.neon.pmull64(i64, i64)
+declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64)
 
 define <1 x i64> @test_mul_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) nounwind {
 ; CHECK-LABEL: test_mul_v1i64:

Copied: llvm/trunk/test/CodeGen/AArch64/arm64-volatile.ll (from r209576, llvm/trunk/test/CodeGen/ARM64/volatile.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-volatile.ll?p2=llvm/trunk/test/CodeGen/AArch64/arm64-volatile.ll&p1=llvm/trunk/test/CodeGen/ARM64/volatile.ll&r1=209576&r2=209577&rev=209577&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/CodeGen/AArch64/arm64-vpopcnt.ll (from r209576, llvm/trunk/test/CodeGen/ARM64/vpopcnt.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vpopcnt.ll?p2=llvm/trunk/test/CodeGen/AArch64/arm64-vpopcnt.ll&p1=llvm/trunk/test/CodeGen/ARM64/vpopcnt.ll&r1=209576&r2=209577&rev=209577&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/CodeGen/AArch64/arm64-vqadd.ll (from r209576, llvm/trunk/test/CodeGen/ARM64/vqadd.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vqadd.ll?p2=llvm/trunk/test/CodeGen/AArch64/arm64-vqadd.ll&p1=llvm/trunk/test/CodeGen/ARM64/vqadd.ll&r1=209576&r2=209577&rev=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/vqadd.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-vqadd.ll Sat May 24 07:50:23 2014
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @sqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: sqadd8b:
 ;CHECK: sqadd.8b
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.sqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
 
@@ -14,7 +14,7 @@ define <4 x i16> @sqadd4h(<4 x i16>* %A,
 ;CHECK: sqadd.4h
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.sqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
 
@@ -23,7 +23,7 @@ define <2 x i32> @sqadd2s(<2 x i32>* %A,
 ;CHECK: sqadd.2s
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.sqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -32,7 +32,7 @@ define <8 x i8> @uqadd8b(<8 x i8>* %A, <
 ;CHECK: uqadd.8b
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.uqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
 
@@ -41,7 +41,7 @@ define <4 x i16> @uqadd4h(<4 x i16>* %A,
 ;CHECK: uqadd.4h
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.uqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
 
@@ -50,7 +50,7 @@ define <2 x i32> @uqadd2s(<2 x i32>* %A,
 ;CHECK: uqadd.2s
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.uqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -59,7 +59,7 @@ define <16 x i8> @sqadd16b(<16 x i8>* %A
 ;CHECK: sqadd.16b
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.sqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
 
@@ -68,7 +68,7 @@ define <8 x i16> @sqadd8h(<8 x i16>* %A,
 ;CHECK: sqadd.8h
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.sqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
 
@@ -77,7 +77,7 @@ define <4 x i32> @sqadd4s(<4 x i32>* %A,
 ;CHECK: sqadd.4s
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
@@ -86,7 +86,7 @@ define <2 x i64> @sqadd2d(<2 x i64>* %A,
 ;CHECK: sqadd.2d
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
 
@@ -95,7 +95,7 @@ define <16 x i8> @uqadd16b(<16 x i8>* %A
 ;CHECK: uqadd.16b
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.uqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
 
@@ -104,7 +104,7 @@ define <8 x i16> @uqadd8h(<8 x i16>* %A,
 ;CHECK: uqadd.8h
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.uqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
 
@@ -113,7 +113,7 @@ define <4 x i32> @uqadd4s(<4 x i32>* %A,
 ;CHECK: uqadd.4s
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.uqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
@@ -122,36 +122,36 @@ define <2 x i64> @uqadd2d(<2 x i64>* %A,
 ;CHECK: uqadd.2d
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.uqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
 
-declare <8 x i8>  @llvm.arm64.neon.sqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.sqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <8 x i8>  @llvm.arm64.neon.uqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.uqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.sqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.uqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.uqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.uqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.uqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.sqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.aarch64.neon.uqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
 
 define <8 x i8> @usqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: usqadd8b:
 ;CHECK: usqadd.8b
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.usqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
 
@@ -160,7 +160,7 @@ define <4 x i16> @usqadd4h(<4 x i16>* %A
 ;CHECK: usqadd.4h
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.usqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
 
@@ -169,7 +169,7 @@ define <2 x i32> @usqadd2s(<2 x i32>* %A
 ;CHECK: usqadd.2s
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.usqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -178,7 +178,7 @@ define <16 x i8> @usqadd16b(<16 x i8>* %
 ;CHECK: usqadd.16b
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.usqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
 
@@ -187,7 +187,7 @@ define <8 x i16> @usqadd8h(<8 x i16>* %A
 ;CHECK: usqadd.8h
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.usqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
 
@@ -196,7 +196,7 @@ define <4 x i32> @usqadd4s(<4 x i32>* %A
 ;CHECK: usqadd.4s
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.usqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
@@ -205,42 +205,42 @@ define <2 x i64> @usqadd2d(<2 x i64>* %A
 ;CHECK: usqadd.2d
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.usqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
 
 define i64 @usqadd_d(i64 %l, i64 %r) nounwind {
 ; CHECK-LABEL: usqadd_d:
 ; CHECK: usqadd {{d[0-9]+}}, {{d[0-9]+}}
-  %sum = call i64 @llvm.arm64.neon.usqadd.i64(i64 %l, i64 %r)
+  %sum = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %l, i64 %r)
   ret i64 %sum
 }
 
 define i32 @usqadd_s(i32 %l, i32 %r) nounwind {
 ; CHECK-LABEL: usqadd_s:
 ; CHECK: usqadd {{s[0-9]+}}, {{s[0-9]+}}
-  %sum = call i32 @llvm.arm64.neon.usqadd.i32(i32 %l, i32 %r)
+  %sum = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %l, i32 %r)
   ret i32 %sum
 }
 
-declare <8 x i8>  @llvm.arm64.neon.usqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.usqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.usqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.usqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-declare i64 @llvm.arm64.neon.usqadd.i64(i64, i64) nounwind readnone
-declare i32 @llvm.arm64.neon.usqadd.i32(i32, i32) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.usqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.usqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.usqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.usqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.usqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+declare i64 @llvm.aarch64.neon.usqadd.i64(i64, i64) nounwind readnone
+declare i32 @llvm.aarch64.neon.usqadd.i32(i32, i32) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
 
 define <8 x i8> @suqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: suqadd8b:
 ;CHECK: suqadd.8b
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.suqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
 
@@ -249,7 +249,7 @@ define <4 x i16> @suqadd4h(<4 x i16>* %A
 ;CHECK: suqadd.4h
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.suqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
 
@@ -258,7 +258,7 @@ define <2 x i32> @suqadd2s(<2 x i32>* %A
 ;CHECK: suqadd.2s
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.suqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -267,7 +267,7 @@ define <16 x i8> @suqadd16b(<16 x i8>* %
 ;CHECK: suqadd.16b
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.suqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
 
@@ -276,7 +276,7 @@ define <8 x i16> @suqadd8h(<8 x i16>* %A
 ;CHECK: suqadd.8h
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.suqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
 
@@ -285,7 +285,7 @@ define <4 x i32> @suqadd4s(<4 x i32>* %A
 ;CHECK: suqadd.4s
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.suqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
@@ -294,39 +294,39 @@ define <2 x i64> @suqadd2d(<2 x i64>* %A
 ;CHECK: suqadd.2d
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.suqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
 
 define <1 x i64> @suqadd_1d(<1 x i64> %l, <1 x i64> %r) nounwind {
 ; CHECK-LABEL: suqadd_1d:
 ; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}}
-  %sum = call <1 x i64> @llvm.arm64.neon.suqadd.v1i64(<1 x i64> %l, <1 x i64> %r)
+  %sum = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %l, <1 x i64> %r)
   ret <1 x i64> %sum
 }
 
 define i64 @suqadd_d(i64 %l, i64 %r) nounwind {
 ; CHECK-LABEL: suqadd_d:
 ; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}}
-  %sum = call i64 @llvm.arm64.neon.suqadd.i64(i64 %l, i64 %r)
+  %sum = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %l, i64 %r)
   ret i64 %sum
 }
 
 define i32 @suqadd_s(i32 %l, i32 %r) nounwind {
 ; CHECK-LABEL: suqadd_s:
 ; CHECK: suqadd {{s[0-9]+}}, {{s[0-9]+}}
-  %sum = call i32 @llvm.arm64.neon.suqadd.i32(i32 %l, i32 %r)
+  %sum = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %l, i32 %r)
   ret i32 %sum
 }
 
-declare <8 x i8>  @llvm.arm64.neon.suqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.suqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.suqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.suqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-declare i64 @llvm.arm64.neon.suqadd.i64(i64, i64) nounwind readnone
-declare i32 @llvm.arm64.neon.suqadd.i32(i32, i32) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.suqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.suqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.suqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.suqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.suqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+declare i64 @llvm.aarch64.neon.suqadd.i64(i64, i64) nounwind readnone
+declare i32 @llvm.aarch64.neon.suqadd.i32(i32, i32) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone

Added: llvm/trunk/test/CodeGen/AArch64/arm64-vqsub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vqsub.ll?rev=209577&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-vqsub.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-vqsub.ll Sat May 24 07:50:23 2014
@@ -0,0 +1,147 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <8 x i8> @sqsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: sqsub8b:
+;CHECK: sqsub.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sqsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: sqsub4h:
+;CHECK: sqsub.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @sqsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: sqsub2s:
+;CHECK: sqsub.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @uqsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: uqsub8b:
+;CHECK: uqsub.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @uqsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: uqsub4h:
+;CHECK: uqsub.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @uqsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: uqsub2s:
+;CHECK: uqsub.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @sqsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: sqsub16b:
+;CHECK: sqsub.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @sqsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: sqsub8h:
+;CHECK: sqsub.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @sqsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: sqsub4s:
+;CHECK: sqsub.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @sqsub2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: sqsub2d:
+;CHECK: sqsub.2d
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @uqsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: uqsub16b:
+;CHECK: uqsub.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @uqsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: uqsub8h:
+;CHECK: uqsub.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @uqsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: uqsub4s:
+;CHECK: uqsub.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @uqsub2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: uqsub2d:
+;CHECK: uqsub.2d
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i8>  @llvm.aarch64.neon.sqsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.aarch64.neon.uqsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64>, <2 x i64>) nounwind readnone

Copied: llvm/trunk/test/CodeGen/AArch64/arm64-vselect.ll (from r209576, llvm/trunk/test/CodeGen/ARM64/vselect.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vselect.ll?p2=llvm/trunk/test/CodeGen/AArch64/arm64-vselect.ll&p1=llvm/trunk/test/CodeGen/ARM64/vselect.ll&r1=209576&r2=209577&rev=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/vselect.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-vselect.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 ;CHECK: @func63
 ;CHECK: cmeq.4h v0, v0, v1

Copied: llvm/trunk/test/CodeGen/AArch64/arm64-vsetcc_fp.ll (from r209576, llvm/trunk/test/CodeGen/ARM64/vsetcc_fp.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vsetcc_fp.ll?p2=llvm/trunk/test/CodeGen/AArch64/arm64-vsetcc_fp.ll&p1=llvm/trunk/test/CodeGen/ARM64/vsetcc_fp.ll&r1=209576&r2=209577&rev=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/vsetcc_fp.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-vsetcc_fp.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
 define <2 x i32> @fcmp_one(<2 x float> %x, <2 x float> %y) nounwind optsize readnone {
 ; CHECK-LABEL: fcmp_one:
 ; CHECK-NEXT: fcmgt.2s [[REG:v[0-9]+]], v0, v1

Copied: llvm/trunk/test/CodeGen/AArch64/arm64-vshift.ll (from r209576, llvm/trunk/test/CodeGen/ARM64/vshift.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vshift.ll?p2=llvm/trunk/test/CodeGen/AArch64/arm64-vshift.ll&p1=llvm/trunk/test/CodeGen/ARM64/vshift.ll&r1=209576&r2=209577&rev=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/vshift.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-vshift.ll Sat May 24 07:50:23 2014
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -enable-misched=false | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -enable-misched=false | FileCheck %s
 
 define <8 x i8> @sqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: sqshl8b:
 ;CHECK: sqshl.8b
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         ret <8 x i8> %tmp3
 }
 
@@ -14,7 +14,7 @@ define <4 x i16> @sqshl4h(<4 x i16>* %A,
 ;CHECK: sqshl.4h
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         ret <4 x i16> %tmp3
 }
 
@@ -23,7 +23,7 @@ define <2 x i32> @sqshl2s(<2 x i32>* %A,
 ;CHECK: sqshl.2s
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         ret <2 x i32> %tmp3
 }
 
@@ -32,7 +32,7 @@ define <8 x i8> @uqshl8b(<8 x i8>* %A, <
 ;CHECK: uqshl.8b
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         ret <8 x i8> %tmp3
 }
 
@@ -41,7 +41,7 @@ define <4 x i16> @uqshl4h(<4 x i16>* %A,
 ;CHECK: uqshl.4h
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         ret <4 x i16> %tmp3
 }
 
@@ -50,7 +50,7 @@ define <2 x i32> @uqshl2s(<2 x i32>* %A,
 ;CHECK: uqshl.2s
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         ret <2 x i32> %tmp3
 }
 
@@ -59,7 +59,7 @@ define <16 x i8> @sqshl16b(<16 x i8>* %A
 ;CHECK: sqshl.16b
         %tmp1 = load <16 x i8>* %A
         %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         ret <16 x i8> %tmp3
 }
 
@@ -68,7 +68,7 @@ define <8 x i16> @sqshl8h(<8 x i16>* %A,
 ;CHECK: sqshl.8h
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i16> %tmp3
 }
 
@@ -77,7 +77,7 @@ define <4 x i32> @sqshl4s(<4 x i32>* %A,
 ;CHECK: sqshl.4s
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i32> %tmp3
 }
 
@@ -86,7 +86,7 @@ define <2 x i64> @sqshl2d(<2 x i64>* %A,
 ;CHECK: sqshl.2d
         %tmp1 = load <2 x i64>* %A
         %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i64> %tmp3
 }
 
@@ -95,7 +95,7 @@ define <16 x i8> @uqshl16b(<16 x i8>* %A
 ;CHECK: uqshl.16b
         %tmp1 = load <16 x i8>* %A
         %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         ret <16 x i8> %tmp3
 }
 
@@ -104,7 +104,7 @@ define <8 x i16> @uqshl8h(<8 x i16>* %A,
 ;CHECK: uqshl.8h
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i16> %tmp3
 }
 
@@ -113,7 +113,7 @@ define <4 x i32> @uqshl4s(<4 x i32>* %A,
 ;CHECK: uqshl.4s
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i32> %tmp3
 }
 
@@ -122,36 +122,36 @@ define <2 x i64> @uqshl2d(<2 x i64>* %A,
 ;CHECK: uqshl.2d
         %tmp1 = load <2 x i64>* %A
         %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i64> %tmp3
 }
 
-declare <8 x i8>  @llvm.arm64.neon.sqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.sqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <8 x i8>  @llvm.arm64.neon.uqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.uqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.sqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.sqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.uqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.uqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.uqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.uqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.sqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.aarch64.neon.uqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
 
 define <8 x i8> @srshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: srshl8b:
 ;CHECK: srshl.8b
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         ret <8 x i8> %tmp3
 }
 
@@ -160,7 +160,7 @@ define <4 x i16> @srshl4h(<4 x i16>* %A,
 ;CHECK: srshl.4h
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         ret <4 x i16> %tmp3
 }
 
@@ -169,7 +169,7 @@ define <2 x i32> @srshl2s(<2 x i32>* %A,
 ;CHECK: srshl.2s
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         ret <2 x i32> %tmp3
 }
 
@@ -178,7 +178,7 @@ define <8 x i8> @urshl8b(<8 x i8>* %A, <
 ;CHECK: urshl.8b
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         ret <8 x i8> %tmp3
 }
 
@@ -187,7 +187,7 @@ define <4 x i16> @urshl4h(<4 x i16>* %A,
 ;CHECK: urshl.4h
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         ret <4 x i16> %tmp3
 }
 
@@ -196,7 +196,7 @@ define <2 x i32> @urshl2s(<2 x i32>* %A,
 ;CHECK: urshl.2s
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         ret <2 x i32> %tmp3
 }
 
@@ -205,7 +205,7 @@ define <16 x i8> @srshl16b(<16 x i8>* %A
 ;CHECK: srshl.16b
         %tmp1 = load <16 x i8>* %A
         %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         ret <16 x i8> %tmp3
 }
 
@@ -214,7 +214,7 @@ define <8 x i16> @srshl8h(<8 x i16>* %A,
 ;CHECK: srshl.8h
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i16> %tmp3
 }
 
@@ -223,7 +223,7 @@ define <4 x i32> @srshl4s(<4 x i32>* %A,
 ;CHECK: srshl.4s
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i32> %tmp3
 }
 
@@ -232,7 +232,7 @@ define <2 x i64> @srshl2d(<2 x i64>* %A,
 ;CHECK: srshl.2d
         %tmp1 = load <2 x i64>* %A
         %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i64> %tmp3
 }
 
@@ -241,7 +241,7 @@ define <16 x i8> @urshl16b(<16 x i8>* %A
 ;CHECK: urshl.16b
         %tmp1 = load <16 x i8>* %A
         %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         ret <16 x i8> %tmp3
 }
 
@@ -250,7 +250,7 @@ define <8 x i16> @urshl8h(<8 x i16>* %A,
 ;CHECK: urshl.8h
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i16> %tmp3
 }
 
@@ -259,7 +259,7 @@ define <4 x i32> @urshl4s(<4 x i32>* %A,
 ;CHECK: urshl.4s
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i32> %tmp3
 }
 
@@ -268,36 +268,36 @@ define <2 x i64> @urshl2d(<2 x i64>* %A,
 ;CHECK: urshl.2d
         %tmp1 = load <2 x i64>* %A
         %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i64> %tmp3
 }
 
-declare <8 x i8>  @llvm.arm64.neon.srshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.srshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.srshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.srshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <8 x i8>  @llvm.arm64.neon.urshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.urshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.urshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.urshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.srshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.srshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.srshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.srshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.urshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.urshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.urshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.urshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.srshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.aarch64.neon.urshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
 
 define <8 x i8> @sqrshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: sqrshl8b:
 ;CHECK: sqrshl.8b
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         ret <8 x i8> %tmp3
 }
 
@@ -306,7 +306,7 @@ define <4 x i16> @sqrshl4h(<4 x i16>* %A
 ;CHECK: sqrshl.4h
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         ret <4 x i16> %tmp3
 }
 
@@ -315,7 +315,7 @@ define <2 x i32> @sqrshl2s(<2 x i32>* %A
 ;CHECK: sqrshl.2s
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         ret <2 x i32> %tmp3
 }
 
@@ -324,7 +324,7 @@ define <8 x i8> @uqrshl8b(<8 x i8>* %A,
 ;CHECK: uqrshl.8b
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         ret <8 x i8> %tmp3
 }
 
@@ -333,7 +333,7 @@ define <4 x i16> @uqrshl4h(<4 x i16>* %A
 ;CHECK: uqrshl.4h
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         ret <4 x i16> %tmp3
 }
 
@@ -342,7 +342,7 @@ define <2 x i32> @uqrshl2s(<2 x i32>* %A
 ;CHECK: uqrshl.2s
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         ret <2 x i32> %tmp3
 }
 
@@ -351,7 +351,7 @@ define <16 x i8> @sqrshl16b(<16 x i8>* %
 ;CHECK: sqrshl.16b
         %tmp1 = load <16 x i8>* %A
         %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.sqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         ret <16 x i8> %tmp3
 }
 
@@ -360,7 +360,7 @@ define <8 x i16> @sqrshl8h(<8 x i16>* %A
 ;CHECK: sqrshl.8h
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.sqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i16> %tmp3
 }
 
@@ -369,7 +369,7 @@ define <4 x i32> @sqrshl4s(<4 x i32>* %A
 ;CHECK: sqrshl.4s
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.sqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i32> %tmp3
 }
 
@@ -378,7 +378,7 @@ define <2 x i64> @sqrshl2d(<2 x i64>* %A
 ;CHECK: sqrshl.2d
         %tmp1 = load <2 x i64>* %A
         %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.sqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i64> %tmp3
 }
 
@@ -387,7 +387,7 @@ define <16 x i8> @uqrshl16b(<16 x i8>* %
 ;CHECK: uqrshl.16b
         %tmp1 = load <16 x i8>* %A
         %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.uqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         ret <16 x i8> %tmp3
 }
 
@@ -396,7 +396,7 @@ define <8 x i16> @uqrshl8h(<8 x i16>* %A
 ;CHECK: uqrshl.8h
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.uqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i16> %tmp3
 }
 
@@ -405,7 +405,7 @@ define <4 x i32> @uqrshl4s(<4 x i32>* %A
 ;CHECK: uqrshl.4s
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.uqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i32> %tmp3
 }
 
@@ -414,35 +414,35 @@ define <2 x i64> @uqrshl2d(<2 x i64>* %A
 ;CHECK: uqrshl.2d
         %tmp1 = load <2 x i64>* %A
         %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.uqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i64> %tmp3
 }
 
-declare <8 x i8>  @llvm.arm64.neon.sqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.sqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <8 x i8>  @llvm.arm64.neon.uqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.uqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.sqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.sqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.uqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.uqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.uqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.uqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
 
 define <8 x i8> @urshr8b(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: urshr8b:
 ;CHECK: urshr.8b
         %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
         ret <8 x i8> %tmp3
 }
 
@@ -450,7 +450,7 @@ define <4 x i16> @urshr4h(<4 x i16>* %A)
 ;CHECK-LABEL: urshr4h:
 ;CHECK: urshr.4h
         %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
         ret <4 x i16> %tmp3
 }
 
@@ -458,7 +458,7 @@ define <2 x i32> @urshr2s(<2 x i32>* %A)
 ;CHECK-LABEL: urshr2s:
 ;CHECK: urshr.2s
         %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
         ret <2 x i32> %tmp3
 }
 
@@ -466,7 +466,7 @@ define <16 x i8> @urshr16b(<16 x i8>* %A
 ;CHECK-LABEL: urshr16b:
 ;CHECK: urshr.16b
         %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
         ret <16 x i8> %tmp3
 }
 
@@ -474,7 +474,7 @@ define <8 x i16> @urshr8h(<8 x i16>* %A)
 ;CHECK-LABEL: urshr8h:
 ;CHECK: urshr.8h
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
         ret <8 x i16> %tmp3
 }
 
@@ -482,7 +482,7 @@ define <4 x i32> @urshr4s(<4 x i32>* %A)
 ;CHECK-LABEL: urshr4s:
 ;CHECK: urshr.4s
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
         ret <4 x i32> %tmp3
 }
 
@@ -490,7 +490,7 @@ define <2 x i64> @urshr2d(<2 x i64>* %A)
 ;CHECK-LABEL: urshr2d:
 ;CHECK: urshr.2d
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
         ret <2 x i64> %tmp3
 }
 
@@ -498,7 +498,7 @@ define <8 x i8> @srshr8b(<8 x i8>* %A) n
 ;CHECK-LABEL: srshr8b:
 ;CHECK: srshr.8b
         %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
         ret <8 x i8> %tmp3
 }
 
@@ -506,7 +506,7 @@ define <4 x i16> @srshr4h(<4 x i16>* %A)
 ;CHECK-LABEL: srshr4h:
 ;CHECK: srshr.4h
         %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
         ret <4 x i16> %tmp3
 }
 
@@ -514,7 +514,7 @@ define <2 x i32> @srshr2s(<2 x i32>* %A)
 ;CHECK-LABEL: srshr2s:
 ;CHECK: srshr.2s
         %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
         ret <2 x i32> %tmp3
 }
 
@@ -522,7 +522,7 @@ define <16 x i8> @srshr16b(<16 x i8>* %A
 ;CHECK-LABEL: srshr16b:
 ;CHECK: srshr.16b
         %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
         ret <16 x i8> %tmp3
 }
 
@@ -530,7 +530,7 @@ define <8 x i16> @srshr8h(<8 x i16>* %A)
 ;CHECK-LABEL: srshr8h:
 ;CHECK: srshr.8h
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
         ret <8 x i16> %tmp3
 }
 
@@ -538,7 +538,7 @@ define <4 x i32> @srshr4s(<4 x i32>* %A)
 ;CHECK-LABEL: srshr4s:
 ;CHECK: srshr.4s
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
         ret <4 x i32> %tmp3
 }
 
@@ -546,7 +546,7 @@ define <2 x i64> @srshr2d(<2 x i64>* %A)
 ;CHECK-LABEL: srshr2d:
 ;CHECK: srshr.2d
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
         ret <2 x i64> %tmp3
 }
 
@@ -554,7 +554,7 @@ define <8 x i8> @sqshlu8b(<8 x i8>* %A)
 ;CHECK-LABEL: sqshlu8b:
 ;CHECK: sqshlu.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
         ret <8 x i8> %tmp3
 }
 
@@ -562,7 +562,7 @@ define <4 x i16> @sqshlu4h(<4 x i16>* %A
 ;CHECK-LABEL: sqshlu4h:
 ;CHECK: sqshlu.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
         ret <4 x i16> %tmp3
 }
 
@@ -570,7 +570,7 @@ define <2 x i32> @sqshlu2s(<2 x i32>* %A
 ;CHECK-LABEL: sqshlu2s:
 ;CHECK: sqshlu.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
         ret <2 x i32> %tmp3
 }
 
@@ -578,7 +578,7 @@ define <16 x i8> @sqshlu16b(<16 x i8>* %
 ;CHECK-LABEL: sqshlu16b:
 ;CHECK: sqshlu.16b v0, {{v[0-9]+}}, #1
         %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
         ret <16 x i8> %tmp3
 }
 
@@ -586,7 +586,7 @@ define <8 x i16> @sqshlu8h(<8 x i16>* %A
 ;CHECK-LABEL: sqshlu8h:
 ;CHECK: sqshlu.8h v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
         ret <8 x i16> %tmp3
 }
 
@@ -594,7 +594,7 @@ define <4 x i32> @sqshlu4s(<4 x i32>* %A
 ;CHECK-LABEL: sqshlu4s:
 ;CHECK: sqshlu.4s v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
         ret <4 x i32> %tmp3
 }
 
@@ -602,25 +602,25 @@ define <2 x i64> @sqshlu2d(<2 x i64>* %A
 ;CHECK-LABEL: sqshlu2d:
 ;CHECK: sqshlu.2d v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
         ret <2 x i64> %tmp3
 }
 
-declare <8 x i8>  @llvm.arm64.neon.sqshlu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqshlu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqshlu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.sqshlu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.sqshlu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sqshlu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sqshlu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.sqshlu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
 
 define <8 x i8> @rshrn8b(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: rshrn8b:
 ;CHECK: rshrn.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
         ret <8 x i8> %tmp3
 }
 
@@ -628,7 +628,7 @@ define <4 x i16> @rshrn4h(<4 x i32>* %A)
 ;CHECK-LABEL: rshrn4h:
 ;CHECK: rshrn.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
         ret <4 x i16> %tmp3
 }
 
@@ -636,7 +636,7 @@ define <2 x i32> @rshrn2s(<2 x i64>* %A)
 ;CHECK-LABEL: rshrn2s:
 ;CHECK: rshrn.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
         ret <2 x i32> %tmp3
 }
 
@@ -645,7 +645,7 @@ define <16 x i8> @rshrn16b(<8 x i8> *%re
 ;CHECK: rshrn2.16b v0, {{v[0-9]+}}, #1
         %out = load <8 x i8>* %ret
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <16 x i8> %tmp4
 }
@@ -655,7 +655,7 @@ define <8 x i16> @rshrn8h(<4 x i16>* %re
 ;CHECK: rshrn2.8h v0, {{v[0-9]+}}, #1
         %out = load <4 x i16>* %ret
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
         ret <8 x i16> %tmp4
 }
@@ -665,14 +665,14 @@ define <4 x i32> @rshrn4s(<2 x i32>* %re
 ;CHECK: rshrn2.4s v0, {{v[0-9]+}}, #1
         %out = load <2 x i32>* %ret
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
         ret <4 x i32> %tmp4
 }
 
-declare <8 x i8>  @llvm.arm64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.rshrn.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.rshrn.v2i32(<2 x i64>, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32) nounwind readnone
 
 define <8 x i8> @shrn8b(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: shrn8b:
@@ -734,14 +734,14 @@ define <4 x i32> @shrn4s(<2 x i32>* %ret
         ret <4 x i32> %tmp4
 }
 
-declare <8 x i8>  @llvm.arm64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone
 
 define i32 @sqshrn1s(i64 %A) nounwind {
 ; CHECK-LABEL: sqshrn1s:
 ; CHECK: sqshrn {{s[0-9]+}}, d0, #1
-  %tmp = call i32 @llvm.arm64.neon.sqshrn.i32(i64 %A, i32 1)
+  %tmp = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %A, i32 1)
   ret i32 %tmp
 }
 
@@ -749,7 +749,7 @@ define <8 x i8> @sqshrn8b(<8 x i16>* %A)
 ;CHECK-LABEL: sqshrn8b:
 ;CHECK: sqshrn.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
         ret <8 x i8> %tmp3
 }
 
@@ -757,7 +757,7 @@ define <4 x i16> @sqshrn4h(<4 x i32>* %A
 ;CHECK-LABEL: sqshrn4h:
 ;CHECK: sqshrn.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
         ret <4 x i16> %tmp3
 }
 
@@ -765,7 +765,7 @@ define <2 x i32> @sqshrn2s(<2 x i64>* %A
 ;CHECK-LABEL: sqshrn2s:
 ;CHECK: sqshrn.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
         ret <2 x i32> %tmp3
 }
 
@@ -775,7 +775,7 @@ define <16 x i8> @sqshrn16b(<8 x i8>* %r
 ;CHECK: sqshrn2.16b v0, {{v[0-9]+}}, #1
         %out = load <8 x i8>* %ret
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <16 x i8> %tmp4
 }
@@ -785,7 +785,7 @@ define <8 x i16> @sqshrn8h(<4 x i16>* %r
 ;CHECK: sqshrn2.8h v0, {{v[0-9]+}}, #1
         %out = load <4 x i16>* %ret
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
         ret <8 x i16> %tmp4
 }
@@ -795,20 +795,20 @@ define <4 x i32> @sqshrn4s(<2 x i32>* %r
 ;CHECK: sqshrn2.4s v0, {{v[0-9]+}}, #1
         %out = load <2 x i32>* %ret
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
         ret <4 x i32> %tmp4
 }
 
-declare i32  @llvm.arm64.neon.sqshrn.i32(i64, i32) nounwind readnone
-declare <8 x i8>  @llvm.arm64.neon.sqshrn.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqshrn.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqshrn.v2i32(<2 x i64>, i32) nounwind readnone
+declare i32  @llvm.aarch64.neon.sqshrn.i32(i64, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32) nounwind readnone
 
 define i32 @sqshrun1s(i64 %A) nounwind {
 ; CHECK-LABEL: sqshrun1s:
 ; CHECK: sqshrun {{s[0-9]+}}, d0, #1
-  %tmp = call i32 @llvm.arm64.neon.sqshrun.i32(i64 %A, i32 1)
+  %tmp = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %A, i32 1)
   ret i32 %tmp
 }
 
@@ -816,7 +816,7 @@ define <8 x i8> @sqshrun8b(<8 x i16>* %A
 ;CHECK-LABEL: sqshrun8b:
 ;CHECK: sqshrun.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
         ret <8 x i8> %tmp3
 }
 
@@ -824,7 +824,7 @@ define <4 x i16> @sqshrun4h(<4 x i32>* %
 ;CHECK-LABEL: sqshrun4h:
 ;CHECK: sqshrun.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
         ret <4 x i16> %tmp3
 }
 
@@ -832,7 +832,7 @@ define <2 x i32> @sqshrun2s(<2 x i64>* %
 ;CHECK-LABEL: sqshrun2s:
 ;CHECK: sqshrun.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
         ret <2 x i32> %tmp3
 }
 
@@ -841,7 +841,7 @@ define <16 x i8> @sqshrun16b(<8 x i8>* %
 ;CHECK: sqshrun2.16b v0, {{v[0-9]+}}, #1
         %out = load <8 x i8>* %ret
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <16 x i8> %tmp4
 }
@@ -851,7 +851,7 @@ define <8 x i16> @sqshrun8h(<4 x i16>* %
 ;CHECK: sqshrun2.8h v0, {{v[0-9]+}}, #1
         %out = load <4 x i16>* %ret
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
         ret <8 x i16> %tmp4
 }
@@ -861,20 +861,20 @@ define <4 x i32> @sqshrun4s(<2 x i32>* %
 ;CHECK: sqshrun2.4s v0, {{v[0-9]+}}, #1
         %out = load <2 x i32>* %ret
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
         ret <4 x i32> %tmp4
 }
 
-declare i32  @llvm.arm64.neon.sqshrun.i32(i64, i32) nounwind readnone
-declare <8 x i8>  @llvm.arm64.neon.sqshrun.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqshrun.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqshrun.v2i32(<2 x i64>, i32) nounwind readnone
+declare i32  @llvm.aarch64.neon.sqshrun.i32(i64, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32) nounwind readnone
 
 define i32 @sqrshrn1s(i64 %A) nounwind {
 ; CHECK-LABEL: sqrshrn1s:
 ; CHECK: sqrshrn {{s[0-9]+}}, d0, #1
-  %tmp = call i32 @llvm.arm64.neon.sqrshrn.i32(i64 %A, i32 1)
+  %tmp = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %A, i32 1)
   ret i32 %tmp
 }
 
@@ -882,7 +882,7 @@ define <8 x i8> @sqrshrn8b(<8 x i16>* %A
 ;CHECK-LABEL: sqrshrn8b:
 ;CHECK: sqrshrn.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
         ret <8 x i8> %tmp3
 }
 
@@ -890,7 +890,7 @@ define <4 x i16> @sqrshrn4h(<4 x i32>* %
 ;CHECK-LABEL: sqrshrn4h:
 ;CHECK: sqrshrn.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
         ret <4 x i16> %tmp3
 }
 
@@ -898,7 +898,7 @@ define <2 x i32> @sqrshrn2s(<2 x i64>* %
 ;CHECK-LABEL: sqrshrn2s:
 ;CHECK: sqrshrn.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
         ret <2 x i32> %tmp3
 }
 
@@ -907,7 +907,7 @@ define <16 x i8> @sqrshrn16b(<8 x i8>* %
 ;CHECK: sqrshrn2.16b v0, {{v[0-9]+}}, #1
         %out = load <8 x i8>* %ret
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <16 x i8> %tmp4
 }
@@ -917,7 +917,7 @@ define <8 x i16> @sqrshrn8h(<4 x i16>* %
 ;CHECK: sqrshrn2.8h v0, {{v[0-9]+}}, #1
         %out = load <4 x i16>* %ret
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
         ret <8 x i16> %tmp4
 }
@@ -927,20 +927,20 @@ define <4 x i32> @sqrshrn4s(<2 x i32>* %
 ;CHECK: sqrshrn2.4s v0, {{v[0-9]+}}, #1
         %out = load <2 x i32>* %ret
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
         ret <4 x i32> %tmp4
 }
 
-declare i32  @llvm.arm64.neon.sqrshrn.i32(i64, i32) nounwind readnone
-declare <8 x i8>  @llvm.arm64.neon.sqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
+declare i32  @llvm.aarch64.neon.sqrshrn.i32(i64, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
 
 define i32 @sqrshrun1s(i64 %A) nounwind {
 ; CHECK-LABEL: sqrshrun1s:
 ; CHECK: sqrshrun {{s[0-9]+}}, d0, #1
-  %tmp = call i32 @llvm.arm64.neon.sqrshrun.i32(i64 %A, i32 1)
+  %tmp = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %A, i32 1)
   ret i32 %tmp
 }
 
@@ -948,7 +948,7 @@ define <8 x i8> @sqrshrun8b(<8 x i16>* %
 ;CHECK-LABEL: sqrshrun8b:
 ;CHECK: sqrshrun.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
         ret <8 x i8> %tmp3
 }
 
@@ -956,7 +956,7 @@ define <4 x i16> @sqrshrun4h(<4 x i32>*
 ;CHECK-LABEL: sqrshrun4h:
 ;CHECK: sqrshrun.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
         ret <4 x i16> %tmp3
 }
 
@@ -964,7 +964,7 @@ define <2 x i32> @sqrshrun2s(<2 x i64>*
 ;CHECK-LABEL: sqrshrun2s:
 ;CHECK: sqrshrun.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
         ret <2 x i32> %tmp3
 }
 
@@ -973,7 +973,7 @@ define <16 x i8> @sqrshrun16b(<8 x i8>*
 ;CHECK: sqrshrun2.16b v0, {{v[0-9]+}}, #1
         %out = load <8 x i8>* %ret
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <16 x i8> %tmp4
 }
@@ -983,7 +983,7 @@ define <8 x i16> @sqrshrun8h(<4 x i16>*
 ;CHECK: sqrshrun2.8h v0, {{v[0-9]+}}, #1
         %out = load <4 x i16>* %ret
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
         ret <8 x i16> %tmp4
 }
@@ -993,20 +993,20 @@ define <4 x i32> @sqrshrun4s(<2 x i32>*
 ;CHECK: sqrshrun2.4s v0, {{v[0-9]+}}, #1
         %out = load <2 x i32>* %ret
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
         ret <4 x i32> %tmp4
 }
 
-declare i32  @llvm.arm64.neon.sqrshrun.i32(i64, i32) nounwind readnone
-declare <8 x i8>  @llvm.arm64.neon.sqrshrun.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqrshrun.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqrshrun.v2i32(<2 x i64>, i32) nounwind readnone
+declare i32  @llvm.aarch64.neon.sqrshrun.i32(i64, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32) nounwind readnone
 
 define i32 @uqrshrn1s(i64 %A) nounwind {
 ; CHECK-LABEL: uqrshrn1s:
 ; CHECK: uqrshrn {{s[0-9]+}}, d0, #1
-  %tmp = call i32 @llvm.arm64.neon.uqrshrn.i32(i64 %A, i32 1)
+  %tmp = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %A, i32 1)
   ret i32 %tmp
 }
 
@@ -1014,7 +1014,7 @@ define <8 x i8> @uqrshrn8b(<8 x i16>* %A
 ;CHECK-LABEL: uqrshrn8b:
 ;CHECK: uqrshrn.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
         ret <8 x i8> %tmp3
 }
 
@@ -1022,7 +1022,7 @@ define <4 x i16> @uqrshrn4h(<4 x i32>* %
 ;CHECK-LABEL: uqrshrn4h:
 ;CHECK: uqrshrn.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
         ret <4 x i16> %tmp3
 }
 
@@ -1030,7 +1030,7 @@ define <2 x i32> @uqrshrn2s(<2 x i64>* %
 ;CHECK-LABEL: uqrshrn2s:
 ;CHECK: uqrshrn.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
         ret <2 x i32> %tmp3
 }
 
@@ -1039,7 +1039,7 @@ define <16 x i8> @uqrshrn16b(<8 x i8>* %
 ;CHECK: uqrshrn2.16b v0, {{v[0-9]+}}, #1
         %out = load <8 x i8>* %ret
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <16 x i8> %tmp4
 }
@@ -1049,7 +1049,7 @@ define <8 x i16> @uqrshrn8h(<4 x i16>* %
 ;CHECK: uqrshrn2.8h v0, {{v[0-9]+}}, #1
         %out = load <4 x i16>* %ret
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
         ret <8 x i16> %tmp4
 }
@@ -1059,20 +1059,20 @@ define <4 x i32> @uqrshrn4s(<2 x i32>* %
 ;CHECK: uqrshrn2.4s v0, {{v[0-9]+}}, #1
         %out = load <2 x i32>* %ret
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
         ret <4 x i32> %tmp4
 }
 
-declare i32  @llvm.arm64.neon.uqrshrn.i32(i64, i32) nounwind readnone
-declare <8 x i8>  @llvm.arm64.neon.uqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
+declare i32  @llvm.aarch64.neon.uqrshrn.i32(i64, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
 
 define i32 @uqshrn1s(i64 %A) nounwind {
 ; CHECK-LABEL: uqshrn1s:
 ; CHECK: uqshrn {{s[0-9]+}}, d0, #1
-  %tmp = call i32 @llvm.arm64.neon.uqshrn.i32(i64 %A, i32 1)
+  %tmp = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %A, i32 1)
   ret i32 %tmp
 }
 
@@ -1080,7 +1080,7 @@ define <8 x i8> @uqshrn8b(<8 x i16>* %A)
 ;CHECK-LABEL: uqshrn8b:
 ;CHECK: uqshrn.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
         ret <8 x i8> %tmp3
 }
 
@@ -1088,7 +1088,7 @@ define <4 x i16> @uqshrn4h(<4 x i32>* %A
 ;CHECK-LABEL: uqshrn4h:
 ;CHECK: uqshrn.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
         ret <4 x i16> %tmp3
 }
 
@@ -1096,7 +1096,7 @@ define <2 x i32> @uqshrn2s(<2 x i64>* %A
 ;CHECK-LABEL: uqshrn2s:
 ;CHECK: uqshrn.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
         ret <2 x i32> %tmp3
 }
 
@@ -1105,7 +1105,7 @@ define <16 x i8> @uqshrn16b(<8 x i8>* %r
 ;CHECK: uqshrn2.16b v0, {{v[0-9]+}}, #1
         %out = load <8 x i8>* %ret
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <16 x i8> %tmp4
 }
@@ -1115,7 +1115,7 @@ define <8 x i16> @uqshrn8h(<4 x i16>* %r
 ;CHECK: uqshrn2.8h v0, {{v[0-9]+}}, #1
   %out = load <4 x i16>* %ret
   %tmp1 = load <4 x i32>* %A
-  %tmp3 = call <4 x i16> @llvm.arm64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
+  %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
   %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i16> %tmp4
 }
@@ -1125,15 +1125,15 @@ define <4 x i32> @uqshrn4s(<2 x i32>* %r
 ;CHECK: uqshrn2.4s v0, {{v[0-9]+}}, #1
   %out = load <2 x i32>* %ret
   %tmp1 = load <2 x i64>* %A
-  %tmp3 = call <2 x i32> @llvm.arm64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
+  %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
   %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   ret <4 x i32> %tmp4
 }
 
-declare i32  @llvm.arm64.neon.uqshrn.i32(i64, i32) nounwind readnone
-declare <8 x i8>  @llvm.arm64.neon.uqshrn.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uqshrn.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uqshrn.v2i32(<2 x i64>, i32) nounwind readnone
+declare i32  @llvm.aarch64.neon.uqshrn.i32(i64, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32) nounwind readnone
 
 define <8 x i16> @ushll8h(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: ushll8h:
@@ -1253,7 +1253,7 @@ define <8 x i8> @sqshli8b(<8 x i8>* %A)
 ;CHECK-LABEL: sqshli8b:
 ;CHECK: sqshl.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
         ret <8 x i8> %tmp3
 }
 
@@ -1261,7 +1261,7 @@ define <4 x i16> @sqshli4h(<4 x i16>* %A
 ;CHECK-LABEL: sqshli4h:
 ;CHECK: sqshl.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
         ret <4 x i16> %tmp3
 }
 
@@ -1269,7 +1269,7 @@ define <2 x i32> @sqshli2s(<2 x i32>* %A
 ;CHECK-LABEL: sqshli2s:
 ;CHECK: sqshl.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
         ret <2 x i32> %tmp3
 }
 
@@ -1277,7 +1277,7 @@ define <16 x i8> @sqshli16b(<16 x i8>* %
 ;CHECK-LABEL: sqshli16b:
 ;CHECK: sqshl.16b v0, {{v[0-9]+}}, #1
         %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
         ret <16 x i8> %tmp3
 }
 
@@ -1285,7 +1285,7 @@ define <8 x i16> @sqshli8h(<8 x i16>* %A
 ;CHECK-LABEL: sqshli8h:
 ;CHECK: sqshl.8h v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
         ret <8 x i16> %tmp3
 }
 
@@ -1293,7 +1293,7 @@ define <4 x i32> @sqshli4s(<4 x i32>* %A
 ;CHECK-LABEL: sqshli4s:
 ;CHECK: sqshl.4s v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
         ret <4 x i32> %tmp3
 }
 
@@ -1301,7 +1301,7 @@ define <2 x i64> @sqshli2d(<2 x i64>* %A
 ;CHECK-LABEL: sqshli2d:
 ;CHECK: sqshl.2d v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
         ret <2 x i64> %tmp3
 }
 
@@ -1309,7 +1309,7 @@ define <8 x i8> @uqshli8b(<8 x i8>* %A)
 ;CHECK-LABEL: uqshli8b:
 ;CHECK: uqshl.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
         ret <8 x i8> %tmp3
 }
 
@@ -1317,7 +1317,7 @@ define <4 x i16> @uqshli4h(<4 x i16>* %A
 ;CHECK-LABEL: uqshli4h:
 ;CHECK: uqshl.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
         ret <4 x i16> %tmp3
 }
 
@@ -1325,7 +1325,7 @@ define <2 x i32> @uqshli2s(<2 x i32>* %A
 ;CHECK-LABEL: uqshli2s:
 ;CHECK: uqshl.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
         ret <2 x i32> %tmp3
 }
 
@@ -1333,7 +1333,7 @@ define <16 x i8> @uqshli16b(<16 x i8>* %
 ;CHECK-LABEL: uqshli16b:
 ;CHECK: uqshl.16b
         %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
         ret <16 x i8> %tmp3
 }
 
@@ -1341,7 +1341,7 @@ define <8 x i16> @uqshli8h(<8 x i16>* %A
 ;CHECK-LABEL: uqshli8h:
 ;CHECK: uqshl.8h v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
         ret <8 x i16> %tmp3
 }
 
@@ -1349,7 +1349,7 @@ define <4 x i32> @uqshli4s(<4 x i32>* %A
 ;CHECK-LABEL: uqshli4s:
 ;CHECK: uqshl.4s v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
         ret <4 x i32> %tmp3
 }
 
@@ -1357,7 +1357,7 @@ define <2 x i64> @uqshli2d(<2 x i64>* %A
 ;CHECK-LABEL: uqshli2d:
 ;CHECK: uqshl.2d v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
         ret <2 x i64> %tmp3
 }
 
@@ -1365,7 +1365,7 @@ define <8 x i8> @ursra8b(<8 x i8>* %A, <
 ;CHECK-LABEL: ursra8b:
 ;CHECK: ursra.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
         %tmp4 = load <8 x i8>* %B
         %tmp5 = add <8 x i8> %tmp3, %tmp4
         ret <8 x i8> %tmp5
@@ -1375,7 +1375,7 @@ define <4 x i16> @ursra4h(<4 x i16>* %A,
 ;CHECK-LABEL: ursra4h:
 ;CHECK: ursra.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
         %tmp4 = load <4 x i16>* %B
         %tmp5 = add <4 x i16> %tmp3, %tmp4
         ret <4 x i16> %tmp5
@@ -1385,7 +1385,7 @@ define <2 x i32> @ursra2s(<2 x i32>* %A,
 ;CHECK-LABEL: ursra2s:
 ;CHECK: ursra.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
         %tmp4 = load <2 x i32>* %B
         %tmp5 = add <2 x i32> %tmp3, %tmp4
         ret <2 x i32> %tmp5
@@ -1395,7 +1395,7 @@ define <16 x i8> @ursra16b(<16 x i8>* %A
 ;CHECK-LABEL: ursra16b:
 ;CHECK: ursra.16b v0, {{v[0-9]+}}, #1
         %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
         %tmp4 = load <16 x i8>* %B
         %tmp5 = add <16 x i8> %tmp3, %tmp4
          ret <16 x i8> %tmp5
@@ -1405,7 +1405,7 @@ define <8 x i16> @ursra8h(<8 x i16>* %A,
 ;CHECK-LABEL: ursra8h:
 ;CHECK: ursra.8h v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
         %tmp4 = load <8 x i16>* %B
         %tmp5 = add <8 x i16> %tmp3, %tmp4
          ret <8 x i16> %tmp5
@@ -1415,7 +1415,7 @@ define <4 x i32> @ursra4s(<4 x i32>* %A,
 ;CHECK-LABEL: ursra4s:
 ;CHECK: ursra.4s v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
         %tmp4 = load <4 x i32>* %B
         %tmp5 = add <4 x i32> %tmp3, %tmp4
          ret <4 x i32> %tmp5
@@ -1425,7 +1425,7 @@ define <2 x i64> @ursra2d(<2 x i64>* %A,
 ;CHECK-LABEL: ursra2d:
 ;CHECK: ursra.2d v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
         %tmp4 = load <2 x i64>* %B
         %tmp5 = add <2 x i64> %tmp3, %tmp4
          ret <2 x i64> %tmp5
@@ -1435,7 +1435,7 @@ define <8 x i8> @srsra8b(<8 x i8>* %A, <
 ;CHECK-LABEL: srsra8b:
 ;CHECK: srsra.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
         %tmp4 = load <8 x i8>* %B
         %tmp5 = add <8 x i8> %tmp3, %tmp4
         ret <8 x i8> %tmp5
@@ -1445,7 +1445,7 @@ define <4 x i16> @srsra4h(<4 x i16>* %A,
 ;CHECK-LABEL: srsra4h:
 ;CHECK: srsra.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
         %tmp4 = load <4 x i16>* %B
         %tmp5 = add <4 x i16> %tmp3, %tmp4
         ret <4 x i16> %tmp5
@@ -1455,7 +1455,7 @@ define <2 x i32> @srsra2s(<2 x i32>* %A,
 ;CHECK-LABEL: srsra2s:
 ;CHECK: srsra.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
         %tmp4 = load <2 x i32>* %B
         %tmp5 = add <2 x i32> %tmp3, %tmp4
         ret <2 x i32> %tmp5
@@ -1465,7 +1465,7 @@ define <16 x i8> @srsra16b(<16 x i8>* %A
 ;CHECK-LABEL: srsra16b:
 ;CHECK: srsra.16b v0, {{v[0-9]+}}, #1
         %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
         %tmp4 = load <16 x i8>* %B
         %tmp5 = add <16 x i8> %tmp3, %tmp4
          ret <16 x i8> %tmp5
@@ -1475,7 +1475,7 @@ define <8 x i16> @srsra8h(<8 x i16>* %A,
 ;CHECK-LABEL: srsra8h:
 ;CHECK: srsra.8h v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
         %tmp4 = load <8 x i16>* %B
         %tmp5 = add <8 x i16> %tmp3, %tmp4
          ret <8 x i16> %tmp5
@@ -1485,7 +1485,7 @@ define <4 x i32> @srsra4s(<4 x i32>* %A,
 ;CHECK-LABEL: srsra4s:
 ;CHECK: srsra.4s v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
         %tmp4 = load <4 x i32>* %B
         %tmp5 = add <4 x i32> %tmp3, %tmp4
          ret <4 x i32> %tmp5
@@ -1495,7 +1495,7 @@ define <2 x i64> @srsra2d(<2 x i64>* %A,
 ;CHECK-LABEL: srsra2d:
 ;CHECK: srsra.2d v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
         %tmp4 = load <2 x i64>* %B
         %tmp5 = add <2 x i64> %tmp3, %tmp4
          ret <2 x i64> %tmp5
@@ -1831,7 +1831,7 @@ define <8 x i8> @sli8b(<8 x i8>* %A, <8
 ;CHECK: sli.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.vsli.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1)
         ret <8 x i8> %tmp3
 }
 
@@ -1840,7 +1840,7 @@ define <4 x i16> @sli4h(<4 x i16>* %A, <
 ;CHECK: sli.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.vsli.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1)
         ret <4 x i16> %tmp3
 }
 
@@ -1849,7 +1849,7 @@ define <2 x i32> @sli2s(<2 x i32>* %A, <
 ;CHECK: sli.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.vsli.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1)
         ret <2 x i32> %tmp3
 }
 
@@ -1858,7 +1858,7 @@ define <1 x i64> @sli1d(<1 x i64>* %A, <
 ;CHECK: sli d0, {{d[0-9]+}}, #1
         %tmp1 = load <1 x i64>* %A
         %tmp2 = load <1 x i64>* %B
-        %tmp3 = call <1 x i64> @llvm.arm64.neon.vsli.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1)
+        %tmp3 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1)
         ret <1 x i64> %tmp3
 }
 
@@ -1867,7 +1867,7 @@ define <16 x i8> @sli16b(<16 x i8>* %A,
 ;CHECK: sli.16b v0, {{v[0-9]+}}, #1
         %tmp1 = load <16 x i8>* %A
         %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.vsli.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1)
         ret <16 x i8> %tmp3
 }
 
@@ -1876,7 +1876,7 @@ define <8 x i16> @sli8h(<8 x i16>* %A, <
 ;CHECK: sli.8h v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.vsli.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1)
         ret <8 x i16> %tmp3
 }
 
@@ -1885,7 +1885,7 @@ define <4 x i32> @sli4s(<4 x i32>* %A, <
 ;CHECK: sli.4s v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.vsli.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1)
         ret <4 x i32> %tmp3
 }
 
@@ -1894,19 +1894,19 @@ define <2 x i64> @sli2d(<2 x i64>* %A, <
 ;CHECK: sli.2d v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
         %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.vsli.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1)
         ret <2 x i64> %tmp3
 }
 
-declare <8 x i8>  @llvm.arm64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone
 
 define <1 x i64> @ashr_v1i64(<1 x i64> %a, <1 x i64> %b) {
 ; CHECK-LABEL: ashr_v1i64:

Copied: llvm/trunk/test/CodeGen/AArch64/arm64-vshr.ll (from r209576, llvm/trunk/test/CodeGen/ARM64/vshr.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vshr.ll?p2=llvm/trunk/test/CodeGen/AArch64/arm64-vshr.ll&p1=llvm/trunk/test/CodeGen/ARM64/vshr.ll&r1=209576&r2=209577&rev=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/vshr.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-vshr.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s -mcpu=cyclone | FileCheck %s
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s -mcpu=cyclone | FileCheck %s
 
 define <8 x i16> @testShiftRightArith_v8i16(<8 x i16> %a, <8 x i16> %b) #0 {
 ; CHECK-LABEL: testShiftRightArith_v8i16:

Copied: llvm/trunk/test/CodeGen/AArch64/arm64-vshuffle.ll (from r209576, llvm/trunk/test/CodeGen/ARM64/vshuffle.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vshuffle.ll?p2=llvm/trunk/test/CodeGen/AArch64/arm64-vshuffle.ll&p1=llvm/trunk/test/CodeGen/ARM64/vshuffle.ll&r1=209576&r2=209577&rev=209577&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/CodeGen/AArch64/arm64-vsqrt.ll (from r209576, llvm/trunk/test/CodeGen/ARM64/vsqrt.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vsqrt.ll?p2=llvm/trunk/test/CodeGen/AArch64/arm64-vsqrt.ll&p1=llvm/trunk/test/CodeGen/ARM64/vsqrt.ll&r1=209576&r2=209577&rev=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/vsqrt.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-vsqrt.ll Sat May 24 07:50:23 2014
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <2 x float> @frecps_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: frecps_2s:
 ;CHECK: frecps.2s
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm64.neon.frecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
 
@@ -14,7 +14,7 @@ define <4 x float> @frecps_4s(<4 x float
 ;CHECK: frecps.4s
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x float> @llvm.arm64.neon.frecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
 
@@ -23,13 +23,13 @@ define <2 x double> @frecps_2d(<2 x doub
 ;CHECK: frecps.2d
 	%tmp1 = load <2 x double>* %A
 	%tmp2 = load <2 x double>* %B
-	%tmp3 = call <2 x double> @llvm.arm64.neon.frecps.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 	ret <2 x double> %tmp3
 }
 
-declare <2 x float> @llvm.arm64.neon.frecps.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.frecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.frecps.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double>, <2 x double>) nounwind readnone
 
 
 define <2 x float> @frsqrts_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
@@ -37,7 +37,7 @@ define <2 x float> @frsqrts_2s(<2 x floa
 ;CHECK: frsqrts.2s
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm64.neon.frsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
 
@@ -46,7 +46,7 @@ define <4 x float> @frsqrts_4s(<4 x floa
 ;CHECK: frsqrts.4s
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x float> @llvm.arm64.neon.frsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
 
@@ -55,19 +55,19 @@ define <2 x double> @frsqrts_2d(<2 x dou
 ;CHECK: frsqrts.2d
 	%tmp1 = load <2 x double>* %A
 	%tmp2 = load <2 x double>* %B
-	%tmp3 = call <2 x double> @llvm.arm64.neon.frsqrts.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 	ret <2 x double> %tmp3
 }
 
-declare <2 x float> @llvm.arm64.neon.frsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.frsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.frsqrts.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double>, <2 x double>) nounwind readnone
 
 define <2 x float> @frecpe_2s(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: frecpe_2s:
 ;CHECK: frecpe.2s
 	%tmp1 = load <2 x float>* %A
-	%tmp3 = call <2 x float> @llvm.arm64.neon.frecpe.v2f32(<2 x float> %tmp1)
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float> %tmp1)
 	ret <2 x float> %tmp3
 }
 
@@ -75,7 +75,7 @@ define <4 x float> @frecpe_4s(<4 x float
 ;CHECK-LABEL: frecpe_4s:
 ;CHECK: frecpe.4s
 	%tmp1 = load <4 x float>* %A
-	%tmp3 = call <4 x float> @llvm.arm64.neon.frecpe.v4f32(<4 x float> %tmp1)
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float> %tmp1)
 	ret <4 x float> %tmp3
 }
 
@@ -83,7 +83,7 @@ define <2 x double> @frecpe_2d(<2 x doub
 ;CHECK-LABEL: frecpe_2d:
 ;CHECK: frecpe.2d
 	%tmp1 = load <2 x double>* %A
-	%tmp3 = call <2 x double> @llvm.arm64.neon.frecpe.v2f64(<2 x double> %tmp1)
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double> %tmp1)
 	ret <2 x double> %tmp3
 }
 
@@ -91,7 +91,7 @@ define float @frecpe_s(float* %A) nounwi
 ;CHECK-LABEL: frecpe_s:
 ;CHECK: frecpe s0, {{s[0-9]+}}
   %tmp1 = load float* %A
-  %tmp3 = call float @llvm.arm64.neon.frecpe.f32(float %tmp1)
+  %tmp3 = call float @llvm.aarch64.neon.frecpe.f32(float %tmp1)
   ret float %tmp3
 }
 
@@ -99,21 +99,21 @@ define double @frecpe_d(double* %A) noun
 ;CHECK-LABEL: frecpe_d:
 ;CHECK: frecpe d0, {{d[0-9]+}}
   %tmp1 = load double* %A
-  %tmp3 = call double @llvm.arm64.neon.frecpe.f64(double %tmp1)
+  %tmp3 = call double @llvm.aarch64.neon.frecpe.f64(double %tmp1)
   ret double %tmp3
 }
 
-declare <2 x float> @llvm.arm64.neon.frecpe.v2f32(<2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.frecpe.v4f32(<4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.frecpe.v2f64(<2 x double>) nounwind readnone
-declare float @llvm.arm64.neon.frecpe.f32(float) nounwind readnone
-declare double @llvm.arm64.neon.frecpe.f64(double) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double>) nounwind readnone
+declare float @llvm.aarch64.neon.frecpe.f32(float) nounwind readnone
+declare double @llvm.aarch64.neon.frecpe.f64(double) nounwind readnone
 
 define float @frecpx_s(float* %A) nounwind {
 ;CHECK-LABEL: frecpx_s:
 ;CHECK: frecpx s0, {{s[0-9]+}}
   %tmp1 = load float* %A
-  %tmp3 = call float @llvm.arm64.neon.frecpx.f32(float %tmp1)
+  %tmp3 = call float @llvm.aarch64.neon.frecpx.f32(float %tmp1)
   ret float %tmp3
 }
 
@@ -121,18 +121,18 @@ define double @frecpx_d(double* %A) noun
 ;CHECK-LABEL: frecpx_d:
 ;CHECK: frecpx d0, {{d[0-9]+}}
   %tmp1 = load double* %A
-  %tmp3 = call double @llvm.arm64.neon.frecpx.f64(double %tmp1)
+  %tmp3 = call double @llvm.aarch64.neon.frecpx.f64(double %tmp1)
   ret double %tmp3
 }
 
-declare float @llvm.arm64.neon.frecpx.f32(float) nounwind readnone
-declare double @llvm.arm64.neon.frecpx.f64(double) nounwind readnone
+declare float @llvm.aarch64.neon.frecpx.f32(float) nounwind readnone
+declare double @llvm.aarch64.neon.frecpx.f64(double) nounwind readnone
 
 define <2 x float> @frsqrte_2s(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: frsqrte_2s:
 ;CHECK: frsqrte.2s
 	%tmp1 = load <2 x float>* %A
-	%tmp3 = call <2 x float> @llvm.arm64.neon.frsqrte.v2f32(<2 x float> %tmp1)
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float> %tmp1)
 	ret <2 x float> %tmp3
 }
 
@@ -140,7 +140,7 @@ define <4 x float> @frsqrte_4s(<4 x floa
 ;CHECK-LABEL: frsqrte_4s:
 ;CHECK: frsqrte.4s
 	%tmp1 = load <4 x float>* %A
-	%tmp3 = call <4 x float> @llvm.arm64.neon.frsqrte.v4f32(<4 x float> %tmp1)
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float> %tmp1)
 	ret <4 x float> %tmp3
 }
 
@@ -148,7 +148,7 @@ define <2 x double> @frsqrte_2d(<2 x dou
 ;CHECK-LABEL: frsqrte_2d:
 ;CHECK: frsqrte.2d
 	%tmp1 = load <2 x double>* %A
-	%tmp3 = call <2 x double> @llvm.arm64.neon.frsqrte.v2f64(<2 x double> %tmp1)
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double> %tmp1)
 	ret <2 x double> %tmp3
 }
 
@@ -156,7 +156,7 @@ define float @frsqrte_s(float* %A) nounw
 ;CHECK-LABEL: frsqrte_s:
 ;CHECK: frsqrte s0, {{s[0-9]+}}
   %tmp1 = load float* %A
-  %tmp3 = call float @llvm.arm64.neon.frsqrte.f32(float %tmp1)
+  %tmp3 = call float @llvm.aarch64.neon.frsqrte.f32(float %tmp1)
   ret float %tmp3
 }
 
@@ -164,21 +164,21 @@ define double @frsqrte_d(double* %A) nou
 ;CHECK-LABEL: frsqrte_d:
 ;CHECK: frsqrte d0, {{d[0-9]+}}
   %tmp1 = load double* %A
-  %tmp3 = call double @llvm.arm64.neon.frsqrte.f64(double %tmp1)
+  %tmp3 = call double @llvm.aarch64.neon.frsqrte.f64(double %tmp1)
   ret double %tmp3
 }
 
-declare <2 x float> @llvm.arm64.neon.frsqrte.v2f32(<2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.frsqrte.v4f32(<4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.frsqrte.v2f64(<2 x double>) nounwind readnone
-declare float @llvm.arm64.neon.frsqrte.f32(float) nounwind readnone
-declare double @llvm.arm64.neon.frsqrte.f64(double) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double>) nounwind readnone
+declare float @llvm.aarch64.neon.frsqrte.f32(float) nounwind readnone
+declare double @llvm.aarch64.neon.frsqrte.f64(double) nounwind readnone
 
 define <2 x i32> @urecpe_2s(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: urecpe_2s:
 ;CHECK: urecpe.2s
 	%tmp1 = load <2 x i32>* %A
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.urecpe.v2i32(<2 x i32> %tmp1)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp3
 }
 
@@ -186,18 +186,18 @@ define <4 x i32> @urecpe_4s(<4 x i32>* %
 ;CHECK-LABEL: urecpe_4s:
 ;CHECK: urecpe.4s
 	%tmp1 = load <4 x i32>* %A
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.urecpe.v4i32(<4 x i32> %tmp1)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp3
 }
 
-declare <2 x i32> @llvm.arm64.neon.urecpe.v2i32(<2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.urecpe.v4i32(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32>) nounwind readnone
 
 define <2 x i32> @ursqrte_2s(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: ursqrte_2s:
 ;CHECK: ursqrte.2s
 	%tmp1 = load <2 x i32>* %A
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.ursqrte.v2i32(<2 x i32> %tmp1)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp3
 }
 
@@ -205,18 +205,18 @@ define <4 x i32> @ursqrte_4s(<4 x i32>*
 ;CHECK-LABEL: ursqrte_4s:
 ;CHECK: ursqrte.4s
 	%tmp1 = load <4 x i32>* %A
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.ursqrte.v4i32(<4 x i32> %tmp1)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp3
 }
 
-declare <2 x i32> @llvm.arm64.neon.ursqrte.v2i32(<2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.ursqrte.v4i32(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32>) nounwind readnone
 
 define float @f1(float %a, float %b) nounwind readnone optsize ssp {
 ; CHECK-LABEL: f1:
 ; CHECK: frsqrts s0, s0, s1
 ; CHECK-NEXT: ret
-  %vrsqrtss.i = tail call float @llvm.arm64.neon.frsqrts.f32(float %a, float %b) nounwind
+  %vrsqrtss.i = tail call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b) nounwind
   ret float %vrsqrtss.i
 }
 
@@ -224,9 +224,9 @@ define double @f2(double %a, double %b)
 ; CHECK-LABEL: f2:
 ; CHECK: frsqrts d0, d0, d1
 ; CHECK-NEXT: ret
-  %vrsqrtsd.i = tail call double @llvm.arm64.neon.frsqrts.f64(double %a, double %b) nounwind
+  %vrsqrtsd.i = tail call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b) nounwind
   ret double %vrsqrtsd.i
 }
 
-declare double @llvm.arm64.neon.frsqrts.f64(double, double) nounwind readnone
-declare float @llvm.arm64.neon.frsqrts.f32(float, float) nounwind readnone
+declare double @llvm.aarch64.neon.frsqrts.f64(double, double) nounwind readnone
+declare float @llvm.aarch64.neon.frsqrts.f32(float, float) nounwind readnone

Copied: llvm/trunk/test/CodeGen/AArch64/arm64-vsra.ll (from r209576, llvm/trunk/test/CodeGen/ARM64/vsra.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vsra.ll?p2=llvm/trunk/test/CodeGen/AArch64/arm64-vsra.ll&p1=llvm/trunk/test/CodeGen/ARM64/vsra.ll&r1=209576&r2=209577&rev=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/vsra.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-vsra.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsras8:

Copied: llvm/trunk/test/CodeGen/AArch64/arm64-vsub.ll (from r209576, llvm/trunk/test/CodeGen/ARM64/vsub.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vsub.ll?p2=llvm/trunk/test/CodeGen/AArch64/arm64-vsub.ll&p1=llvm/trunk/test/CodeGen/ARM64/vsub.ll&r1=209576&r2=209577&rev=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/vsub.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-vsub.ll Sat May 24 07:50:23 2014
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @subhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: subhn8b:
 ;CHECK: subhn.8b
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.subhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.subhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i8> %tmp3
 }
 
@@ -14,7 +14,7 @@ define <4 x i16> @subhn4h(<4 x i32>* %A,
 ;CHECK: subhn.4h
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.subhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.subhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i16> %tmp3
 }
 
@@ -23,7 +23,7 @@ define <2 x i32> @subhn2s(<2 x i64>* %A,
 ;CHECK: subhn.2s
         %tmp1 = load <2 x i64>* %A
         %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.subhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.subhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i32> %tmp3
 }
 
@@ -31,8 +31,8 @@ define <16 x i8> @subhn2_16b(<8 x i16> %
 ;CHECK-LABEL: subhn2_16b:
 ;CHECK: subhn.8b
 ;CHECK-NEXT: subhn2.16b
-  %vsubhn2.i = tail call <8 x i8> @llvm.arm64.neon.subhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
-  %vsubhn_high2.i = tail call <8 x i8> @llvm.arm64.neon.subhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
+  %vsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.subhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
+  %vsubhn_high2.i = tail call <8 x i8> @llvm.aarch64.neon.subhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
   %res = shufflevector <8 x i8> %vsubhn2.i, <8 x i8> %vsubhn_high2.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   ret <16 x i8> %res
 }
@@ -41,8 +41,8 @@ define <8 x i16> @subhn2_8h(<4 x i32> %a
 ;CHECK-LABEL: subhn2_8h:
 ;CHECK: subhn.4h
 ;CHECK-NEXT: subhn2.8h
-  %vsubhn2.i = tail call <4 x i16> @llvm.arm64.neon.subhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
-  %vsubhn_high3.i = tail call <4 x i16> @llvm.arm64.neon.subhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
+  %vsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.subhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
+  %vsubhn_high3.i = tail call <4 x i16> @llvm.aarch64.neon.subhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
   %res = shufflevector <4 x i16> %vsubhn2.i, <4 x i16> %vsubhn_high3.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i16> %res
 }
@@ -51,22 +51,22 @@ define <4 x i32> @subhn2_4s(<2 x i64> %a
 ;CHECK-LABEL: subhn2_4s:
 ;CHECK: subhn.2s
 ;CHECK-NEXT: subhn2.4s
-  %vsubhn2.i = tail call <2 x i32> @llvm.arm64.neon.subhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
-  %vsubhn_high3.i = tail call <2 x i32> @llvm.arm64.neon.subhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
+  %vsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.subhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
+  %vsubhn_high3.i = tail call <2 x i32> @llvm.aarch64.neon.subhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
   %res = shufflevector <2 x i32> %vsubhn2.i, <2 x i32> %vsubhn_high3.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   ret <4 x i32> %res
 }
 
-declare <2 x i32> @llvm.arm64.neon.subhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.subhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.subhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.subhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.subhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.subhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
 
 define <8 x i8> @rsubhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: rsubhn8b:
 ;CHECK: rsubhn.8b
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i8> %tmp3
 }
 
@@ -75,7 +75,7 @@ define <4 x i16> @rsubhn4h(<4 x i32>* %A
 ;CHECK: rsubhn.4h
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i16> %tmp3
 }
 
@@ -84,7 +84,7 @@ define <2 x i32> @rsubhn2s(<2 x i64>* %A
 ;CHECK: rsubhn.2s
         %tmp1 = load <2 x i64>* %A
         %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i32> %tmp3
 }
 
@@ -92,8 +92,8 @@ define <16 x i8> @rsubhn2_16b(<8 x i16>
 ;CHECK-LABEL: rsubhn2_16b:
 ;CHECK: rsubhn.8b
 ;CHECK-NEXT: rsubhn2.16b
-  %vrsubhn2.i = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
-  %vrsubhn_high2.i = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
+  %vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
+  %vrsubhn_high2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
   %res = shufflevector <8 x i8> %vrsubhn2.i, <8 x i8> %vrsubhn_high2.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   ret <16 x i8> %res
 }
@@ -102,8 +102,8 @@ define <8 x i16> @rsubhn2_8h(<4 x i32> %
 ;CHECK-LABEL: rsubhn2_8h:
 ;CHECK: rsubhn.4h
 ;CHECK-NEXT: rsubhn2.8h
-  %vrsubhn2.i = tail call <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
-  %vrsubhn_high3.i = tail call <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
+  %vrsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
+  %vrsubhn_high3.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
   %res = shufflevector <4 x i16> %vrsubhn2.i, <4 x i16> %vrsubhn_high3.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i16> %res
 }
@@ -112,15 +112,15 @@ define <4 x i32> @rsubhn2_4s(<2 x i64> %
 ;CHECK-LABEL: rsubhn2_4s:
 ;CHECK: rsubhn.2s
 ;CHECK-NEXT: rsubhn2.4s
-  %vrsubhn2.i = tail call <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
-  %vrsubhn_high3.i = tail call <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
+  %vrsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
+  %vrsubhn_high3.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
   %res = shufflevector <2 x i32> %vrsubhn2.i, <2 x i32> %vrsubhn_high3.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   ret <4 x i32> %res
 }
 
-declare <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
 
 define <8 x i16> @ssubl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: ssubl8h:

Copied: llvm/trunk/test/CodeGen/AArch64/arm64-weak-reference.ll (from r209576, llvm/trunk/test/CodeGen/ARM64/weak-reference.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-weak-reference.ll?p2=llvm/trunk/test/CodeGen/AArch64/arm64-weak-reference.ll&p1=llvm/trunk/test/CodeGen/ARM64/weak-reference.ll&r1=209576&r2=209577&rev=209577&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/CodeGen/AArch64/arm64-xaluo.ll (from r209576, llvm/trunk/test/CodeGen/ARM64/xaluo.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-xaluo.ll?p2=llvm/trunk/test/CodeGen/AArch64/arm64-xaluo.ll&p1=llvm/trunk/test/CodeGen/ARM64/xaluo.ll&r1=209576&r2=209577&rev=209577&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll (from r209576, llvm/trunk/test/CodeGen/ARM64/zero-cycle-regmov.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll?p2=llvm/trunk/test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll&p1=llvm/trunk/test/CodeGen/ARM64/zero-cycle-regmov.ll&r1=209576&r2=209577&rev=209577&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll (from r209576, llvm/trunk/test/CodeGen/ARM64/zero-cycle-zeroing.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll?p2=llvm/trunk/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll&p1=llvm/trunk/test/CodeGen/ARM64/zero-cycle-zeroing.ll&r1=209576&r2=209577&rev=209577&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/CodeGen/AArch64/arm64-zext.ll (from r209576, llvm/trunk/test/CodeGen/ARM64/zext.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-zext.ll?p2=llvm/trunk/test/CodeGen/AArch64/arm64-zext.ll&p1=llvm/trunk/test/CodeGen/ARM64/zext.ll&r1=209576&r2=209577&rev=209577&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/CodeGen/AArch64/arm64-zextload-unscaled.ll (from r209576, llvm/trunk/test/CodeGen/ARM64/zextload-unscaled.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-zextload-unscaled.ll?p2=llvm/trunk/test/CodeGen/AArch64/arm64-zextload-unscaled.ll&p1=llvm/trunk/test/CodeGen/ARM64/zextload-unscaled.ll&r1=209576&r2=209577&rev=209577&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/CodeGen/AArch64/arm64-zip.ll (from r209576, llvm/trunk/test/CodeGen/ARM64/zip.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-zip.ll?p2=llvm/trunk/test/CodeGen/AArch64/arm64-zip.ll&p1=llvm/trunk/test/CodeGen/ARM64/zip.ll&r1=209576&r2=209577&rev=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/zip.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-zip.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vzipi8:

Modified: llvm/trunk/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/atomic-ops-not-barriers.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/atomic-ops-not-barriers.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/atomic-ops-not-barriers.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
 
 define i32 @foo(i32* %var, i1 %cond) {
 ; CHECK-LABEL: foo:

Modified: llvm/trunk/test/CodeGen/AArch64/atomic-ops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/atomic-ops.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/atomic-ops.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/atomic-ops.ll Sat May 24 07:50:23 2014
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
-; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-REG
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-REG
 
 
 ; Point of CHECK-REG is to make sure UNPREDICTABLE instructions aren't created
@@ -501,9 +501,9 @@ define i8 @test_atomic_load_min_i8(i8 %o
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 
-; CHECK-ARM64-NEXT: sxtb w[[OLD_EXT:[0-9]+]], w[[OLD]]
-; CHECK-ARM64-NEXT: cmp w[[OLD_EXT]], w0, sxtb
-; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le
+; CHECK-NEXT: sxtb w[[OLD_EXT:[0-9]+]], w[[OLD]]
+; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le
 
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
@@ -525,9 +525,9 @@ define i16 @test_atomic_load_min_i16(i16
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 
-; CHECK-ARM64-NEXT: sxth w[[OLD_EXT:[0-9]+]], w[[OLD]]
-; CHECK-ARM64-NEXT: cmp w[[OLD_EXT]], w0, sxth
-; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le
+; CHECK-NEXT: sxth w[[OLD_EXT:[0-9]+]], w[[OLD]]
+; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le
 
 
 ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -550,8 +550,8 @@ define i32 @test_atomic_load_min_i32(i32
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 
-; CHECK-ARM64-NEXT: cmp w[[OLD]], w0
-; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le
+; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le
 
 
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -574,8 +574,8 @@ define i64 @test_atomic_load_min_i64(i64
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 
-; CHECK-ARM64-NEXT: cmp x[[OLD]], x0
-; CHECK-ARM64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, le
+; CHECK-NEXT: cmp x[[OLD]], x0
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, le
 
 
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -598,9 +598,9 @@ define i8 @test_atomic_load_max_i8(i8 %o
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 
-; CHECK-ARM64-NEXT: sxtb w[[OLD_EXT:[0-9]+]], w[[OLD]]
-; CHECK-ARM64-NEXT: cmp w[[OLD_EXT]], w0, sxtb
-; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+; CHECK-NEXT: sxtb w[[OLD_EXT:[0-9]+]], w[[OLD]]
+; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
 
 
 ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -623,9 +623,9 @@ define i16 @test_atomic_load_max_i16(i16
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 
-; CHECK-ARM64-NEXT: sxth w[[OLD_EXT:[0-9]+]], w[[OLD]]
-; CHECK-ARM64-NEXT: cmp w[[OLD_EXT]], w0, sxth
-; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+; CHECK-NEXT: sxth w[[OLD_EXT:[0-9]+]], w[[OLD]]
+; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
 
 
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -648,8 +648,8 @@ define i32 @test_atomic_load_max_i32(i32
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 
-; CHECK-ARM64-NEXT: cmp w[[OLD]], w0
-; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
 
 
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -672,8 +672,8 @@ define i64 @test_atomic_load_max_i64(i64
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 
-; CHECK-ARM64-NEXT: cmp x[[OLD]], x0
-; CHECK-ARM64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt
+; CHECK-NEXT: cmp x[[OLD]], x0
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt
 
 
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -696,8 +696,8 @@ define i8 @test_atomic_load_umin_i8(i8 %
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 
-; CHECK-ARM64-NEXT: cmp w[[OLD]], w0, uxtb
-; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls
+; CHECK-NEXT: cmp w[[OLD]], w0, uxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls
 
 
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -720,8 +720,8 @@ define i16 @test_atomic_load_umin_i16(i1
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 
-; CHECK-ARM64-NEXT: cmp w[[OLD]], w0, uxth
-; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls
+; CHECK-NEXT: cmp w[[OLD]], w0, uxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls
 
 
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -744,8 +744,8 @@ define i32 @test_atomic_load_umin_i32(i3
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 
-; CHECK-ARM64-NEXT: cmp w[[OLD]], w0
-; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls
+; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls
 
 
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -768,8 +768,8 @@ define i64 @test_atomic_load_umin_i64(i6
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 
-; CHECK-ARM64-NEXT: cmp x[[OLD]], x0
-; CHECK-ARM64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, ls
+; CHECK-NEXT: cmp x[[OLD]], x0
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, ls
 
 
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -792,8 +792,8 @@ define i8 @test_atomic_load_umax_i8(i8 %
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 
-; CHECK-ARM64-NEXT: cmp w[[OLD]], w0, uxtb
-; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+; CHECK-NEXT: cmp w[[OLD]], w0, uxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
 
 
 ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -816,8 +816,8 @@ define i16 @test_atomic_load_umax_i16(i1
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 
-; CHECK-ARM64-NEXT: cmp w[[OLD]], w0, uxth
-; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+; CHECK-NEXT: cmp w[[OLD]], w0, uxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
 
 
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -840,8 +840,8 @@ define i32 @test_atomic_load_umax_i32(i3
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 
-; CHECK-ARM64-NEXT: cmp w[[OLD]], w0
-; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
 
 
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -864,8 +864,8 @@ define i64 @test_atomic_load_umax_i64(i6
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 
-; CHECK-ARM64-NEXT: cmp x[[OLD]], x0
-; CHECK-ARM64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi
+; CHECK-NEXT: cmp x[[OLD]], x0
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi
 
 
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]

Modified: llvm/trunk/test/CodeGen/AArch64/basic-pic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/basic-pic.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/basic-pic.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/basic-pic.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -relocation-model=pic %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -relocation-model=pic %s -o - | FileCheck %s
 
 @var = global i32 0
 

Modified: llvm/trunk/test/CodeGen/AArch64/bitfield-insert-0.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/bitfield-insert-0.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/bitfield-insert-0.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/bitfield-insert-0.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-linux-gnu -filetype=obj -o - %s | llvm-objdump -disassemble - | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -filetype=obj -o - %s | llvm-objdump -disassemble - | FileCheck %s
 
 ; The encoding of lsb -> immr in the CGed bitfield instructions was wrong at one
 ; point, in the edge case where lsb = 0. Just make sure.

Modified: llvm/trunk/test/CodeGen/AArch64/bitfield-insert.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/bitfield-insert.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/bitfield-insert.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/bitfield-insert.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s --check-prefix=CHECK
 
 ; First, a simple example from Clang. The registers could plausibly be
 ; different, but probably won't be.
@@ -64,7 +64,7 @@ define void @test_whole32_from64(i64* %e
 ; CHECK-LABEL: test_whole32_from64:
 
 
-; CHECK-ARM64: bfxil {{x[0-9]+}}, {{x[0-9]+}}, #0, #16
+; CHECK: bfxil {{x[0-9]+}}, {{x[0-9]+}}, #0, #16
 
 ; CHECK: ret
 
@@ -83,7 +83,7 @@ define void @test_whole32_from64(i64* %e
 define void @test_32bit_masked(i32 *%existing, i32 *%new) {
 ; CHECK-LABEL: test_32bit_masked:
 
-; CHECK-ARM64: and
+; CHECK: and
 ; CHECK: bfi [[INSERT:w[0-9]+]], {{w[0-9]+}}, #3, #4
 
   %oldval = load volatile i32* %existing
@@ -101,7 +101,7 @@ define void @test_32bit_masked(i32 *%exi
 
 define void @test_64bit_masked(i64 *%existing, i64 *%new) {
 ; CHECK-LABEL: test_64bit_masked:
-; CHECK-ARM64: and
+; CHECK: and
 ; CHECK: bfi [[INSERT:x[0-9]+]], {{x[0-9]+}}, #40, #8
 
   %oldval = load volatile i64* %existing
@@ -121,7 +121,7 @@ define void @test_64bit_masked(i64 *%exi
 define void @test_32bit_complexmask(i32 *%existing, i32 *%new) {
 ; CHECK-LABEL: test_32bit_complexmask:
 
-; CHECK-ARM64: and
+; CHECK: and
 ; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #3, #4
 
   %oldval = load volatile i32* %existing

Modified: llvm/trunk/test/CodeGen/AArch64/bitfield.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/bitfield.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/bitfield.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/bitfield.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefix=CHECK
 
 @var32 = global i32 0
 @var64 = global i64 0
@@ -23,7 +23,7 @@ define void @test_extendb(i8 %var) {
 
   %uxt64 = zext i8 %var to i64
   store volatile i64 %uxt64, i64* @var64
-; CHECK-ARM64: and {{x[0-9]+}}, {{x[0-9]+}}, #0xff
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xff
   ret void
 }
 
@@ -47,7 +47,7 @@ define void @test_extendh(i16 %var) {
 
   %uxt64 = zext i16 %var to i64
   store volatile i64 %uxt64, i64* @var64
-; CHECK-ARM64: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffff
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffff
   ret void
 }
 
@@ -60,7 +60,7 @@ define void @test_extendw(i32 %var) {
 
   %uxt64 = zext i32 %var to i64
   store volatile i64 %uxt64, i64* @var64
-; CHECK-ARM64: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #32
+; CHECK: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #32
   ret void
 }
 

Modified: llvm/trunk/test/CodeGen/AArch64/blockaddress.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/blockaddress.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/blockaddress.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/blockaddress.ll Sat May 24 07:50:23 2014
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -code-model=large -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -code-model=large -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s
 
 @addr = global i8* null
 

Modified: llvm/trunk/test/CodeGen/AArch64/bool-loads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/bool-loads.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/bool-loads.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/bool-loads.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
 
 @var = global i1 0
 

Modified: llvm/trunk/test/CodeGen/AArch64/breg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/breg.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/breg.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/breg.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
 
 @stored_label = global i8* null
 

Modified: llvm/trunk/test/CodeGen/AArch64/callee-save.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/callee-save.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/callee-save.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/callee-save.ll Sat May 24 07:50:23 2014
@@ -1,19 +1,14 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK-ARM64
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
 
 @var = global float 0.0
 
 define void @foo() {
 ; CHECK-LABEL: foo:
 
-; CHECK: stp d14, d15, [sp
-; CHECK: stp d12, d13, [sp
-; CHECK: stp d10, d11, [sp
-; CHECK: stp d8, d9, [sp
-
-; CHECK-ARM64: stp d15, d14, [sp
-; CHECK-ARM64: stp d13, d12, [sp
-; CHECK-ARM64: stp d11, d10, [sp
-; CHECK-ARM64: stp d9, d8, [sp
+; CHECK: stp d15, d14, [sp
+; CHECK: stp d13, d12, [sp
+; CHECK: stp d11, d10, [sp
+; CHECK: stp d9, d8, [sp
 
   ; Create lots of live variables to exhaust the supply of
   ; caller-saved registers
@@ -83,14 +78,9 @@ define void @foo() {
   store volatile float %val31, float* @var
   store volatile float %val32, float* @var
 
-; CHECK: ldp     d8, d9, [sp
-; CHECK: ldp     d10, d11, [sp
-; CHECK: ldp     d12, d13, [sp
-; CHECK: ldp     d14, d15, [sp
-
-; CHECK-ARM64: ldp     d9, d8, [sp
-; CHECK-ARM64: ldp     d11, d10, [sp
-; CHECK-ARM64: ldp     d13, d12, [sp
-; CHECK-ARM64: ldp     d15, d14, [sp
+; CHECK: ldp     d9, d8, [sp
+; CHECK: ldp     d11, d10, [sp
+; CHECK: ldp     d13, d12, [sp
+; CHECK: ldp     d15, d14, [sp
   ret void
 }

Modified: llvm/trunk/test/CodeGen/AArch64/code-model-large-abs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/code-model-large-abs.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/code-model-large-abs.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/code-model-large-abs.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-linux-gnu -code-model=large -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -code-model=large -o - %s | FileCheck %s
 
 @var8 = global i8 0
 @var16 = global i16 0

Modified: llvm/trunk/test/CodeGen/AArch64/compare-branch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/compare-branch.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/compare-branch.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/compare-branch.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
 
 @var32 = global i32 0
 @var64 = global i64 0

Modified: llvm/trunk/test/CodeGen/AArch64/complex-copy-noneon.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/complex-copy-noneon.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/complex-copy-noneon.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/complex-copy-noneon.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=-neon < %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-neon < %s
 
 ; The DAG combiner decided to use a vector load/store for this struct copy
 ; previously. This probably shouldn't happen without NEON, but the most

Modified: llvm/trunk/test/CodeGen/AArch64/cond-sel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/cond-sel.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/cond-sel.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/cond-sel.ll Sat May 24 07:50:23 2014
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mcpu=cyclone | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mcpu=cyclone | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 @var32 = global i32 0
 @var64 = global i64 0
@@ -45,7 +45,7 @@ define void @test_floatcsel(float %lhs32
 ; CHECK-NOFP-NOT: fcmp
   %val2 = select i1 %tst2, i64 9, i64 15
   store i64 %val2, i64* @var64
-; CHECK-ARM64: orr w[[CONST15:[0-9]+]], wzr, #0xf
+; CHECK: orr w[[CONST15:[0-9]+]], wzr, #0xf
 ; CHECK: movz {{[wx]}}[[CONST9:[0-9]+]], #{{9|0x9}}
 ; CHECK: csel [[MAYBETRUE:x[0-9]+]], x[[CONST9]], x[[CONST15]], eq
 ; CHECK: csel {{x[0-9]+}}, x[[CONST9]], [[MAYBETRUE]], vs

Modified: llvm/trunk/test/CodeGen/AArch64/directcond.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/directcond.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/directcond.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/directcond.ll Sat May 24 07:50:23 2014
@@ -1,10 +1,10 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) {
 ; CHECK-LABEL: test_select_i32:
   %val = select i1 %bit, i32 %a, i32 %b
-; CHECK-ARM64: tst w0, #0x1
+; CHECK: tst w0, #0x1
 ; CHECK-NEXT: csel w0, w1, w2, ne
 
   ret i32 %val
@@ -13,7 +13,7 @@ define i32 @test_select_i32(i1 %bit, i32
 define i64 @test_select_i64(i1 %bit, i64 %a, i64 %b) {
 ; CHECK-LABEL: test_select_i64:
   %val = select i1 %bit, i64 %a, i64 %b
-; CHECK-ARM64: tst w0, #0x1
+; CHECK: tst w0, #0x1
 ; CHECK-NEXT: csel x0, x1, x2, ne
 
   ret i64 %val
@@ -22,7 +22,7 @@ define i64 @test_select_i64(i1 %bit, i64
 define float @test_select_float(i1 %bit, float %a, float %b) {
 ; CHECK-LABEL: test_select_float:
   %val = select i1 %bit, float %a, float %b
-; CHECK-ARM64: tst w0, #0x1
+; CHECK: tst w0, #0x1
 ; CHECK-NEXT: fcsel s0, s0, s1, ne
 ; CHECK-NOFP-NOT: fcsel
   ret float %val
@@ -31,7 +31,7 @@ define float @test_select_float(i1 %bit,
 define double @test_select_double(i1 %bit, double %a, double %b) {
 ; CHECK-LABEL: test_select_double:
   %val = select i1 %bit, double %a, double %b
-; CHECK-ARM64: tst w0, #0x1
+; CHECK: tst w0, #0x1
 ; CHECK-NEXT: fcsel d0, d0, d1, ne
 ; CHECK-NOFP-NOT: fcsel
 

Modified: llvm/trunk/test/CodeGen/AArch64/dp1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/dp1.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/dp1.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/dp1.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
 
 @var32 = global i32 0
 @var64 = global i64 0

Modified: llvm/trunk/test/CodeGen/AArch64/eliminate-trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/eliminate-trunc.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/eliminate-trunc.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/eliminate-trunc.ll Sat May 24 07:50:23 2014
@@ -1,11 +1,11 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-apple-ios7.0 -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-ARM64
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-apple-ios7.0 -mcpu=cyclone | FileCheck %s
 
 ; Check  trunc i64 operation is translated as a subregister access
 ; eliminating an i32 induction varible.
 
-; CHECK-ARM64-NOT: add {{x[0-9]+}}, {{x[0-9]+}}, #1
-; CHECK-ARM64: add {{w[0-9]+}}, {{w[0-9]+}}, #1
-; CHECK-ARM64-NEXT: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK-NOT: add {{x[0-9]+}}, {{x[0-9]+}}, #1
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #1
+; CHECK-NEXT: cmp {{w[0-9]+}}, {{w[0-9]+}}
 define void @test1_signed([8 x i8]* nocapture %a, i8* nocapture readonly %box, i8 %limit) minsize {
 entry:
   %conv = zext i8 %limit to i32

Modified: llvm/trunk/test/CodeGen/AArch64/extern-weak.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/extern-weak.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/extern-weak.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/extern-weak.ll Sat May 24 07:50:23 2014
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -o - %s | FileCheck %s --check-prefix=CHECK-ARM64
-; RUN: llc -mtriple=arm64-none-linux-gnu -code-model=large -o - %s | FileCheck --check-prefix=CHECK-LARGE %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -code-model=large -o - %s | FileCheck --check-prefix=CHECK-LARGE %s
 
 declare extern_weak i32 @var()
 
@@ -9,8 +9,8 @@ define i32()* @foo() {
   ret i32()* @var
 
 
-; CHECK-ARM64: adrp x[[ADDRHI:[0-9]+]], :got:var
-; CHECK-ARM64: ldr x0, [x[[ADDRHI]], :got_lo12:var]
+; CHECK: adrp x[[ADDRHI:[0-9]+]], :got:var
+; CHECK: ldr x0, [x[[ADDRHI]], :got_lo12:var]
 
   ; In the large model, the usual relocations are absolute and can
   ; materialise 0.
@@ -27,9 +27,9 @@ define i32* @bar() {
   %addr = getelementptr [10 x i32]* @arr_var, i32 0, i32 5
 
 
-; CHECK-ARM64: adrp x[[ADDRHI:[0-9]+]], :got:arr_var
-; CHECK-ARM64: ldr [[BASE:x[0-9]+]], [x[[ADDRHI]], :got_lo12:arr_var]
-; CHECK-ARM64: add x0, [[BASE]], #20
+; CHECK: adrp x[[ADDRHI:[0-9]+]], :got:arr_var
+; CHECK: ldr [[BASE:x[0-9]+]], [x[[ADDRHI]], :got_lo12:arr_var]
+; CHECK: add x0, [[BASE]], #20
 
   ret i32* %addr
 
@@ -46,8 +46,8 @@ define i32* @bar() {
 define i32* @wibble() {
   ret i32* @defined_weak_var
 
-; CHECK-ARM64: adrp [[BASE:x[0-9]+]], defined_weak_var
-; CHECK-ARM64: add x0, [[BASE]], :lo12:defined_weak_var
+; CHECK: adrp [[BASE:x[0-9]+]], defined_weak_var
+; CHECK: add x0, [[BASE]], :lo12:defined_weak_var
 
 ; CHECK-LARGE: movz x0, #:abs_g3:defined_weak_var
 ; CHECK-LARGE: movk x0, #:abs_g2_nc:defined_weak_var

Modified: llvm/trunk/test/CodeGen/AArch64/fastcc-reserved.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fastcc-reserved.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fastcc-reserved.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fastcc-reserved.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -tailcallopt | FileCheck %s --check-prefix=CHECK-ARM64
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s
 
 ; This test is designed to be run in the situation where the
 ; call-frame is not reserved (hence disable-fp-elim), but where
@@ -12,30 +12,22 @@ define fastcc void @foo(i32 %in) {
   %addr = alloca i8, i32 %in
 
 ; Normal frame setup stuff:
-; CHECK: sub sp, sp,
-; CHECK: stp x29, x30
-; CHECK-ARM64: stp     x29, x30, [sp, #-16]!
-; CHECK-ARM64: mov     x29, sp
+; CHECK: stp     x29, x30, [sp, #-16]!
+; CHECK: mov     x29, sp
 
 ; Reserve space for call-frame:
 ; CHECK: sub sp, sp, #16
-; CHECK-ARM64: sub sp, sp, #16
 
   call fastcc void @will_pop([8 x i32] undef, i32 42)
 ; CHECK: bl will_pop
-; CHECK-ARM64: bl will_pop
 
 ; Since @will_pop is fastcc with tailcallopt, it will put the stack
 ; back where it needs to be, we shouldn't duplicate that
 ; CHECK-NOT: sub sp, sp, #16
 ; CHECK-NOT: add sp, sp,
-; CHECK-ARM64-NOT: sub sp, sp, #16
-; CHECK-ARM64-NOT: add sp, sp,
 
-; CHECK: ldp x29, x30
-; CHECK: add sp, sp,
-; CHECK-ARM64: mov     sp, x29
-; CHECK-ARM64: ldp     x29, x30, [sp], #16
+; CHECK: mov     sp, x29
+; CHECK: ldp     x29, x30, [sp], #16
   ret void
 }
 
@@ -46,28 +38,21 @@ define void @foo1(i32 %in) {
 
   %addr = alloca i8, i32 %in
 ; Normal frame setup again
-; CHECK: sub sp, sp,
-; CHECK: stp x29, x30
-; CHECK-ARM64: stp     x29, x30, [sp, #-16]!
-; CHECK-ARM64: mov     x29, sp
+; CHECK: stp     x29, x30, [sp, #-16]!
+; CHECK: mov     x29, sp
 
 ; Reserve space for call-frame
 ; CHECK: sub sp, sp, #16
-; CHECK-ARM64: sub sp, sp, #16
 
   call void @wont_pop([8 x i32] undef, i32 42)
 ; CHECK: bl wont_pop
-; CHECK-ARM64: bl wont_pop
 
 ; This time we *do* need to unreserve the call-frame
 ; CHECK: add sp, sp, #16
-; CHECK-ARM64: add sp, sp, #16
 
 ; Check for epilogue (primarily to make sure sp spotted above wasn't
 ; part of it).
-; CHECK: ldp x29, x30
-; CHECK: add sp, sp,
-; CHECK-ARM64: mov     sp, x29
-; CHECK-ARM64: ldp     x29, x30, [sp], #16
+; CHECK: mov     sp, x29
+; CHECK: ldp     x29, x30, [sp], #16
   ret void
 }

Modified: llvm/trunk/test/CodeGen/AArch64/fastcc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fastcc.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fastcc.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fastcc.ll Sat May 24 07:50:23 2014
@@ -1,226 +1,144 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -tailcallopt | FileCheck %s -check-prefix CHECK-ARM64-TAIL
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck --check-prefix=CHECK-ARM64 %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s -check-prefix CHECK-TAIL
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
 
 ; Without tailcallopt fastcc still means the caller cleans up the
 ; stack, so try to make sure this is respected.
 
 define fastcc void @func_stack0() {
 ; CHECK-LABEL: func_stack0:
-; CHECK: sub sp, sp, #48
-
-; CHECK-ARM64-LABEL: func_stack0:
-; CHECK-ARM64: stp x29, x30, [sp, #-16]!
-; CHECK-ARM64-NEXT: mov x29, sp
-; CHECK-ARM64-NEXT: sub sp, sp, #32
+; CHECK: mov x29, sp
+; CHECK-NEXT: sub sp, sp, #32
 
 ; CHECK-TAIL-LABEL: func_stack0:
-; CHECK-TAIL: sub sp, sp, #48
-
-; CHECK-ARM64-TAIL-LABEL: func_stack0:
-; CHECK-ARM64-TAIL: stp x29, x30, [sp, #-16]!
-; CHECK-ARM64-TAIL-NEXT: mov x29, sp
-; CHECK-ARM64-TAIL-NEXT: sub sp, sp, #32
+; CHECK-TAIL: stp x29, x30, [sp, #-16]!
+; CHECK-TAIL-NEXT: mov x29, sp
+; CHECK-TAIL-NEXT: sub sp, sp, #32
 
 
   call fastcc void @func_stack8([8 x i32] undef, i32 42)
 ; CHECK:  bl func_stack8
 ; CHECK-NOT: sub sp, sp,
 
-; CHECK-ARM64:  bl func_stack8
-; CHECK-ARM64-NOT: sub sp, sp,
-
 ; CHECK-TAIL: bl func_stack8
 ; CHECK-TAIL: sub sp, sp, #16
 
-; CHECK-ARM64-TAIL: bl func_stack8
-; CHECK-ARM64-TAIL: sub sp, sp, #16
-
 
   call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
 ; CHECK: bl func_stack32
 ; CHECK-NOT: sub sp, sp,
 
-; CHECK-ARM64: bl func_stack32
-; CHECK-ARM64-NOT: sub sp, sp,
 
 ; CHECK-TAIL: bl func_stack32
 ; CHECK-TAIL: sub sp, sp, #32
 
-; CHECK-ARM64-TAIL: bl func_stack32
-; CHECK-ARM64-TAIL: sub sp, sp, #32
-
 
   call fastcc void @func_stack0()
 ; CHECK: bl func_stack0
 ; CHECK-NOT: sub sp, sp
 
-; CHECK-ARM64: bl func_stack0
-; CHECK-ARM64-NOT: sub sp, sp
 
 ; CHECK-TAIL: bl func_stack0
 ; CHECK-TAIL-NOT: sub sp, sp
 
-; CHECK-ARM64-TAIL: bl func_stack0
-; CHECK-ARM64-TAIL-NOT: sub sp, sp
-
   ret void
-; CHECK: add sp, sp, #48
+; CHECK: mov sp, x29
+; CHECK-NEXT: ldp     x29, x30, [sp], #16
 ; CHECK-NEXT: ret
 
-; CHECK-ARM64: mov sp, x29
-; CHECK-ARM64-NEXT: ldp     x29, x30, [sp], #16
-; CHECK-ARM64-NEXT: ret
 
-; CHECK-TAIL: add sp, sp, #48
+; CHECK-TAIL: mov sp, x29
+; CHECK-TAIL-NEXT: ldp     x29, x30, [sp], #16
 ; CHECK-TAIL-NEXT: ret
-
-; CHECK-ARM64-TAIL: mov sp, x29
-; CHECK-ARM64-TAIL-NEXT: ldp     x29, x30, [sp], #16
-; CHECK-ARM64-TAIL-NEXT: ret
 }
 
 define fastcc void @func_stack8([8 x i32], i32 %stacked) {
 ; CHECK-LABEL: func_stack8:
-; CHECK: sub sp, sp, #48
+; CHECK: stp x29, x30, [sp, #-16]!
+; CHECK: mov x29, sp
+; CHECK: sub sp, sp, #32
 
-; CHECK-ARM64-LABEL: func_stack8:
-; CHECK-ARM64: stp x29, x30, [sp, #-16]!
-; CHECK-ARM64: mov x29, sp
-; CHECK-ARM64: sub sp, sp, #32
 
 ; CHECK-TAIL-LABEL: func_stack8:
-; CHECK-TAIL: sub sp, sp, #48
-
-; CHECK-ARM64-TAIL-LABEL: func_stack8:
-; CHECK-ARM64-TAIL: stp x29, x30, [sp, #-16]!
-; CHECK-ARM64-TAIL: mov x29, sp
-; CHECK-ARM64-TAIL: sub sp, sp, #32
+; CHECK-TAIL: stp x29, x30, [sp, #-16]!
+; CHECK-TAIL: mov x29, sp
+; CHECK-TAIL: sub sp, sp, #32
 
 
   call fastcc void @func_stack8([8 x i32] undef, i32 42)
 ; CHECK:  bl func_stack8
 ; CHECK-NOT: sub sp, sp,
 
-; CHECK-ARM64:  bl func_stack8
-; CHECK-ARM64-NOT: sub sp, sp,
 
 ; CHECK-TAIL: bl func_stack8
 ; CHECK-TAIL: sub sp, sp, #16
 
-; CHECK-ARM64-TAIL: bl func_stack8
-; CHECK-ARM64-TAIL: sub sp, sp, #16
-
 
   call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
 ; CHECK: bl func_stack32
 ; CHECK-NOT: sub sp, sp,
 
-; CHECK-ARM64: bl func_stack32
-; CHECK-ARM64-NOT: sub sp, sp,
 
 ; CHECK-TAIL: bl func_stack32
 ; CHECK-TAIL: sub sp, sp, #32
 
-; CHECK-ARM64-TAIL: bl func_stack32
-; CHECK-ARM64-TAIL: sub sp, sp, #32
-
 
   call fastcc void @func_stack0()
 ; CHECK: bl func_stack0
 ; CHECK-NOT: sub sp, sp
 
-; CHECK-ARM64: bl func_stack0
-; CHECK-ARM64-NOT: sub sp, sp
-
 ; CHECK-TAIL: bl func_stack0
 ; CHECK-TAIL-NOT: sub sp, sp
 
-; CHECK-ARM64-TAIL: bl func_stack0
-; CHECK-ARM64-TAIL-NOT: sub sp, sp
-
   ret void
-; CHECK: add sp, sp, #48
+; CHECK: mov sp, x29
+; CHECK-NEXT: ldp     x29, x30, [sp], #16
 ; CHECK-NEXT: ret
 
-; CHECK-ARM64: mov sp, x29
-; CHECK-ARM64-NEXT: ldp     x29, x30, [sp], #16
-; CHECK-ARM64-NEXT: ret
 
-; CHECK-TAIL: add sp, sp, #64
+; CHECK-TAIL: mov sp, x29
+; CHECK-TAIL-NEXT: ldp     x29, x30, [sp], #16
 ; CHECK-TAIL-NEXT: ret
-
-; CHECK-ARM64-TAIL: mov sp, x29
-; CHECK-ARM64-TAIL-NEXT: ldp     x29, x30, [sp], #16
-; CHECK-ARM64-TAIL-NEXT: ret
 }
 
 define fastcc void @func_stack32([8 x i32], i128 %stacked0, i128 %stacked1) {
 ; CHECK-LABEL: func_stack32:
-; CHECK: sub sp, sp, #48
-
-; CHECK-ARM64-LABEL: func_stack32:
-; CHECK-ARM64: mov x29, sp
+; CHECK: mov x29, sp
 
 ; CHECK-TAIL-LABEL: func_stack32:
-; CHECK-TAIL: sub sp, sp, #48
-
-; CHECK-ARM64-TAIL-LABEL: func_stack32:
-; CHECK-ARM64-TAIL: mov x29, sp
+; CHECK-TAIL: mov x29, sp
 
 
   call fastcc void @func_stack8([8 x i32] undef, i32 42)
 ; CHECK:  bl func_stack8
 ; CHECK-NOT: sub sp, sp,
 
-; CHECK-ARM64:  bl func_stack8
-; CHECK-ARM64-NOT: sub sp, sp,
-
 ; CHECK-TAIL: bl func_stack8
 ; CHECK-TAIL: sub sp, sp, #16
 
-; CHECK-ARM64-TAIL: bl func_stack8
-; CHECK-ARM64-TAIL: sub sp, sp, #16
-
 
   call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
 ; CHECK: bl func_stack32
 ; CHECK-NOT: sub sp, sp,
 
-; CHECK-ARM64: bl func_stack32
-; CHECK-ARM64-NOT: sub sp, sp,
 
 ; CHECK-TAIL: bl func_stack32
 ; CHECK-TAIL: sub sp, sp, #32
 
-; CHECK-ARM64-TAIL: bl func_stack32
-; CHECK-ARM64-TAIL: sub sp, sp, #32
-
 
   call fastcc void @func_stack0()
 ; CHECK: bl func_stack0
 ; CHECK-NOT: sub sp, sp
 
-; CHECK-ARM64: bl func_stack0
-; CHECK-ARM64-NOT: sub sp, sp
 
 ; CHECK-TAIL: bl func_stack0
 ; CHECK-TAIL-NOT: sub sp, sp
 
-; CHECK-ARM64-TAIL: bl func_stack0
-; CHECK-ARM64-TAIL-NOT: sub sp, sp
-
   ret void
-; CHECK: add sp, sp, #48
+; CHECK: mov sp, x29
+; CHECK-NEXT: ldp     x29, x30, [sp], #16
 ; CHECK-NEXT: ret
 
-; CHECK-ARM64: mov sp, x29
-; CHECK-ARM64-NEXT: ldp     x29, x30, [sp], #16
-; CHECK-ARM64-NEXT: ret
-
-; CHECK-TAIL: add sp, sp, #80
+; CHECK-TAIL: mov sp, x29
+; CHECK-TAIL-NEXT: ldp     x29, x30, [sp], #16
 ; CHECK-TAIL-NEXT: ret
-
-; CHECK-ARM64-TAIL: mov sp, x29
-; CHECK-ARM64-TAIL-NEXT: ldp     x29, x30, [sp], #16
-; CHECK-ARM64-TAIL-NEXT: ret
 }

Modified: llvm/trunk/test/CodeGen/AArch64/fcmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fcmp.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fcmp.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fcmp.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
 
 declare void @bar(i32)
 

Modified: llvm/trunk/test/CodeGen/AArch64/fcvt-fixed.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fcvt-fixed.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fcvt-fixed.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fcvt-fixed.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-apple-ios7.0 -O0
 
 ; (The O0 test is to make sure FastISel still constrains its operands properly

Modified: llvm/trunk/test/CodeGen/AArch64/flags-multiuse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/flags-multiuse.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/flags-multiuse.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/flags-multiuse.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
 
 ; LLVM should be able to cope with multiple uses of the same flag-setting
 ; instruction at different points of a routine. Either by rematerializing the

Modified: llvm/trunk/test/CodeGen/AArch64/floatdp_2source.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/floatdp_2source.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/floatdp_2source.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/floatdp_2source.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu -mcpu=cyclone | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu -mcpu=cyclone | FileCheck %s
 
 @varfloat = global float 0.0
 @vardouble = global double 0.0

Modified: llvm/trunk/test/CodeGen/AArch64/fp-cond-sel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fp-cond-sel.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fp-cond-sel.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fp-cond-sel.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu -mcpu=cyclone | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -mcpu=cyclone | FileCheck %s --check-prefix=CHECK
 
 @varfloat = global float 0.0
 @vardouble = global double 0.0
@@ -12,7 +12,7 @@ define void @test_csel(i32 %lhs32, i32 %
   %tst1 = icmp ugt i32 %lhs32, %rhs32
   %val1 = select i1 %tst1, float 0.0, float 1.0
   store float %val1, float* @varfloat
-; CHECK-ARM64: movi v[[FLT0:[0-9]+]].2d, #0
+; CHECK: movi v[[FLT0:[0-9]+]].2d, #0
 ; CHECK: fmov s[[FLT1:[0-9]+]], #1.0
 ; CHECK: fcsel {{s[0-9]+}}, s[[FLT0]], s[[FLT1]], hi
 

Modified: llvm/trunk/test/CodeGen/AArch64/fp-dp3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fp-dp3.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fp-dp3.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fp-dp3.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu -fp-contract=fast | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -fp-contract=fast | FileCheck %s
 ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s -check-prefix=CHECK-NOFAST
 
 declare float @llvm.fma.f32(float, float, float)

Modified: llvm/trunk/test/CodeGen/AArch64/fp128-folding.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fp128-folding.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fp128-folding.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fp128-folding.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
 declare void @bar(i8*, i8*, i32*)
 
 ; SelectionDAG used to try to fold some fp128 operations using the ppc128 type,

Modified: llvm/trunk/test/CodeGen/AArch64/fpimm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fpimm.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fpimm.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fpimm.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
 
 @varf32 = global float 0.0
 @varf64 = global double 0.0

Modified: llvm/trunk/test/CodeGen/AArch64/func-argpassing.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/func-argpassing.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/func-argpassing.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/func-argpassing.ll Sat May 24 07:50:23 2014
@@ -1,8 +1,5 @@
-
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM64 %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE --check-prefix=CHECK-ARM64-BE %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 %myStruct = type { i64 , i8, i32 }
 
@@ -63,7 +60,7 @@ define void @check_byval_align(i32* byva
 
     %val0 = load volatile i32* %addr0
     ; Some weird move means x0 is used for one access
-; CHECK-ARM64: ldr [[REG32:w[0-9]+]], [sp, #28]
+; CHECK: ldr [[REG32:w[0-9]+]], [sp, #28]
     store i32 %val0, i32* @var32
 ; CHECK: str [[REG32]], [{{x[0-9]+}}, {{#?}}:lo12:var32]
 
@@ -149,7 +146,6 @@ define i32 @struct_on_stack(i8 %var0, i1
     %retval = load volatile i32* %stacked
     ret i32 %retval
 ; CHECK-LE: ldr w0, [sp, #16]
-; CHECK-BE-AARCH64: ldr w0, [sp, #20]
 }
 
 define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3,
@@ -159,8 +155,8 @@ define void @stacked_fpu(float %var0, do
     store float %var8, float* @varfloat
     ; Beware as above: the offset would be different on big-endian
     ; machines if the first ldr were changed to use s-registers.
-; CHECK-ARM64: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp]
-; CHECK-ARM64: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat]
+; CHECK: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp]
+; CHECK: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat]
 
     ret void
 }
@@ -185,11 +181,10 @@ define void @check_i128_stackalign(i32 %
     ; Nothing local on stack in current codegen, so first stack is 16 away
 ; CHECK-LE: add     x[[REG:[0-9]+]], sp, #16
 ; CHECK-LE: ldr {{x[0-9]+}}, [x[[REG]], #8]
-; CHECK-BE-AARCH64: ldr {{x[0-9]+}}, [sp, #24]
 
     ; Important point is that we address sp+24 for second dword
 
-; CHECK-ARM64: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
+; CHECK: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
     ret void
 }
 
@@ -209,6 +204,5 @@ define i16 @stacked_i16(i32 %val0, i32 %
                         i32 %val4, i32 %val5, i32 %val6, i32 %val7,
                         i16 %stack1) {
 ; CHECK-LABEL: stacked_i16
-; CHECK-ARM64-BE: ldrh
   ret i16 %stack1
 }

Modified: llvm/trunk/test/CodeGen/AArch64/func-calls.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/func-calls.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/func-calls.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/func-calls.ll Sat May 24 07:50:23 2014
@@ -1,8 +1,7 @@
-
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-neon | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM64-NONEON %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-neon | FileCheck --check-prefix=CHECK-NONEON %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK-BE %s
 
 %myStruct = type { i64 , i8, i32 }
 
@@ -90,13 +89,13 @@ define void @check_stack_args() {
   ; that varstruct is passed on the stack. Rather dependent on how a
   ; memcpy gets created, but the following works for now.
 
-; CHECK-ARM64-DAG: str {{q[0-9]+}}, [sp]
-; CHECK-ARM64-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0
-; CHECK-ARM64: mov v0.16b, v[[FINAL_DOUBLE]].16b
-
-; CHECK-ARM64-NONEON-DAG: str {{q[0-9]+}}, [sp]
-; CHECK-ARM64-NONEON-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0
-; CHECK-ARM64-NONEON: fmov d0, d[[FINAL_DOUBLE]]
+; CHECK-DAG: str {{q[0-9]+}}, [sp]
+; CHECK-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0
+; CHECK: mov v0.16b, v[[FINAL_DOUBLE]].16b
+
+; CHECK-NONEON-DAG: str {{q[0-9]+}}, [sp]
+; CHECK-NONEON-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0
+; CHECK-NONEON: fmov d0, d[[FINAL_DOUBLE]]
 
 ; CHECK: bl struct_on_stack
 ; CHECK-NOFP-NOT: fmov
@@ -105,11 +104,11 @@ define void @check_stack_args() {
                          float -2.0, float -8.0, float 16.0, float 1.0,
                          float 64.0)
 
-; CHECK-ARM64:  movz [[SIXTY_FOUR:w[0-9]+]], #0x4280, lsl #16
-; CHECK-ARM64: str [[SIXTY_FOUR]], [sp]
+; CHECK:  movz [[SIXTY_FOUR:w[0-9]+]], #0x4280, lsl #16
+; CHECK: str [[SIXTY_FOUR]], [sp]
 
-; CHECK-ARM64-NONEON:  movz [[SIXTY_FOUR:w[0-9]+]], #0x4280, lsl #16
-; CHECK-ARM64-NONEON: str [[SIXTY_FOUR]], [sp]
+; CHECK-NONEON:  movz [[SIXTY_FOUR:w[0-9]+]], #0x4280, lsl #16
+; CHECK-NONEON: str [[SIXTY_FOUR]], [sp]
 
 ; CHECK: bl stacked_fpu
   ret void
@@ -131,8 +130,11 @@ define void @check_i128_align() {
                                    i32 42, i128 %val)
 ; CHECK: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:var128]
 ; CHECK: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, #8]
-; CHECK-ARM64: stp [[I128LO]], [[I128HI]], [sp, #16]
-; CHECK-ARM64-NONEON: stp [[I128LO]], [[I128HI]], [sp, #16]
+; CHECK: stp [[I128LO]], [[I128HI]], [sp, #16]
+
+; CHECK-NONEON: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, :lo12:var128]
+; CHECK-NONEON: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, #8]
+; CHECK-NONEON: stp [[I128LO]], [[I128HI]], [sp, #16]
 ; CHECK: bl check_i128_stackalign
 
   call void @check_i128_regalign(i32 0, i128 42)

Modified: llvm/trunk/test/CodeGen/AArch64/global-alignment.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/global-alignment.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/global-alignment.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/global-alignment.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
 
 @var32 = global [3 x i32] zeroinitializer
 @var64 = global [3 x i64] zeroinitializer

Modified: llvm/trunk/test/CodeGen/AArch64/got-abuse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/got-abuse.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/got-abuse.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/got-abuse.ll Sat May 24 07:50:23 2014
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s
-; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -filetype=obj -o - %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj -o - %s
 
 ; LLVM gives well-defined semantics to this horrible construct (though C says
 ; it's undefined). Regardless, we shouldn't crash. The important feature here is

Modified: llvm/trunk/test/CodeGen/AArch64/illegal-float-ops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/illegal-float-ops.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/illegal-float-ops.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/illegal-float-ops.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
 
 @varfloat = global float 0.0
 @vardouble = global double 0.0

Modified: llvm/trunk/test/CodeGen/AArch64/init-array.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/init-array.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/init-array.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/init-array.ll Sat May 24 07:50:23 2014
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -use-init-array -o - %s | FileCheck %s
-; RUN: llc -mtriple=arm64-none-none-eabi -verify-machineinstrs -use-init-array -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -use-init-array -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-none-eabi -verify-machineinstrs -use-init-array -o - %s | FileCheck %s
 
 define internal void @_GLOBAL__I_a() section ".text.startup" {
   ret void

Modified: llvm/trunk/test/CodeGen/AArch64/inline-asm-constraints-badI.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/inline-asm-constraints-badI.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/inline-asm-constraints-badI.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/inline-asm-constraints-badI.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: not llc -mtriple=arm64-none-linux-gnu -o - %s
+; RUN: not llc -mtriple=aarch64-none-linux-gnu -o - %s
 
 define void @foo() {
   ; Out of range immediate for I.

Modified: llvm/trunk/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: not llc -mtriple=arm64-none-linux-gnu -o - %s
+; RUN: not llc -mtriple=aarch64-none-linux-gnu -o - %s
 
 define void @foo() {
   ; 32-bit bitpattern ending in 1101 can't be produced.

Modified: llvm/trunk/test/CodeGen/AArch64/jump-table.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/jump-table.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/jump-table.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/jump-table.ll Sat May 24 07:50:23 2014
@@ -1,6 +1,6 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck %s
-; RUN: llc -code-model=large -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck --check-prefix=CHECK-LARGE %s
-; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -o - %s | FileCheck --check-prefix=CHECK-PIC %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -code-model=large -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK-LARGE %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -o - %s | FileCheck --check-prefix=CHECK-PIC %s
 
 define i32 @test_jumptable(i32 %in) {
 ; CHECK: test_jumptable

Modified: llvm/trunk/test/CodeGen/AArch64/large-consts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/large-consts.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/large-consts.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/large-consts.ll Sat May 24 07:50:23 2014
@@ -1,14 +1,14 @@
-; RUN: llc -mtriple=arm64-linux-gnu -o - %s -code-model=large -show-mc-encoding | FileCheck %s --check-prefix=CHECK-ARM64
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s -code-model=large -show-mc-encoding | FileCheck %s
 
 ; Make sure the shift amount is encoded into the instructions by LLVM because
 ; it's not the linker's job to put it there.
 
 define double @foo() {
 
-; CHECK-ARM64: movz [[CPADDR:x[0-9]+]], #:abs_g3:.LCPI0_0   // encoding: [0bAAA01000,A,0b111AAAAA,0xd2]
-; CHECK-ARM64: movk [[CPADDR]], #:abs_g2_nc:.LCPI0_0 // encoding: [0bAAA01000,A,0b110AAAAA,0xf2]
-; CHECK-ARM64: movk [[CPADDR]], #:abs_g1_nc:.LCPI0_0 // encoding: [0bAAA01000,A,0b101AAAAA,0xf2]
-; CHECK-ARM64: movk [[CPADDR]], #:abs_g0_nc:.LCPI0_0 // encoding: [0bAAA01000,A,0b100AAAAA,0xf2]
+; CHECK: movz [[CPADDR:x[0-9]+]], #:abs_g3:.LCPI0_0   // encoding: [0bAAA01000,A,0b111AAAAA,0xd2]
+; CHECK: movk [[CPADDR]], #:abs_g2_nc:.LCPI0_0 // encoding: [0bAAA01000,A,0b110AAAAA,0xf2]
+; CHECK: movk [[CPADDR]], #:abs_g1_nc:.LCPI0_0 // encoding: [0bAAA01000,A,0b101AAAAA,0xf2]
+; CHECK: movk [[CPADDR]], #:abs_g0_nc:.LCPI0_0 // encoding: [0bAAA01000,A,0b100AAAAA,0xf2]
 
   ret double 3.14159
 }

Modified: llvm/trunk/test/CodeGen/AArch64/ldst-regoffset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/ldst-regoffset.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/ldst-regoffset.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/ldst-regoffset.ll Sat May 24 07:50:23 2014
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 @var_8bit = global i8 0
 @var_16bit = global i16 0

Modified: llvm/trunk/test/CodeGen/AArch64/ldst-unscaledimm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/ldst-unscaledimm.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/ldst-unscaledimm.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/ldst-unscaledimm.ll Sat May 24 07:50:23 2014
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 @var_8bit = global i8 0
 @var_16bit = global i16 0

Modified: llvm/trunk/test/CodeGen/AArch64/ldst-unsignedimm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/ldst-unsignedimm.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/ldst-unsignedimm.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/ldst-unsignedimm.ll Sat May 24 07:50:23 2014
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 @var_8bit = global i8 0
 @var_16bit = global i16 0

Copied: llvm/trunk/test/CodeGen/AArch64/lit.local.cfg (from r209576, llvm/trunk/test/CodeGen/ARM64/lit.local.cfg)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/lit.local.cfg?p2=llvm/trunk/test/CodeGen/AArch64/lit.local.cfg&p1=llvm/trunk/test/CodeGen/ARM64/lit.local.cfg&r1=209576&r2=209577&rev=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/lit.local.cfg (original)
+++ llvm/trunk/test/CodeGen/AArch64/lit.local.cfg Sat May 24 07:50:23 2014
@@ -3,7 +3,7 @@ import re
 config.suffixes = ['.ll']
 
 targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
+if not 'AArch64' in targets:
     config.unsupported = True
 
 # For now we don't test arm64-win32.

Modified: llvm/trunk/test/CodeGen/AArch64/literal_pools_float.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/literal_pools_float.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/literal_pools_float.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/literal_pools_float.ll Sat May 24 07:50:23 2014
@@ -1,7 +1,7 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu -mcpu=cyclone | FileCheck %s
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu -code-model=large -mcpu=cyclone | FileCheck --check-prefix=CHECK-LARGE %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -code-model=large -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP-LARGE %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -mcpu=cyclone | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -code-model=large -mcpu=cyclone | FileCheck --check-prefix=CHECK-LARGE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -code-model=large -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP-LARGE %s
 
 @varfloat = global float 0.0
 @vardouble = global double 0.0

Modified: llvm/trunk/test/CodeGen/AArch64/local_vars.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/local_vars.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/local_vars.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/local_vars.ll Sat May 24 07:50:23 2014
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -disable-fp-elim | FileCheck -check-prefix CHECK-WITHFP-ARM64 %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -disable-fp-elim | FileCheck -check-prefix CHECK-WITHFP-ARM64 %s
 
 ; Make sure a reasonably sane prologue and epilogue are
 ; generated. This test is not robust in the face of an frame-handling

Modified: llvm/trunk/test/CodeGen/AArch64/logical_shifted_reg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/logical_shifted_reg.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/logical_shifted_reg.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/logical_shifted_reg.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
 
 @var1_32 = global i32 0
 @var2_32 = global i32 0

Modified: llvm/trunk/test/CodeGen/AArch64/mature-mc-support.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/mature-mc-support.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/mature-mc-support.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/mature-mc-support.ll Sat May 24 07:50:23 2014
@@ -1,14 +1,10 @@
 ; Test that inline assembly is parsed by the MC layer when MC support is mature
 ; (even when the output is assembly).
 
-; RUN: FileCheck %s < %t1
-
-; RUN: FileCheck %s < %t2
-
-; RUN: not llc -mtriple=arm64-pc-linux < %s > /dev/null 2> %t3
+; RUN: not llc -mtriple=aarch64-pc-linux < %s > /dev/null 2> %t3
 ; RUN: FileCheck %s < %t3
 
-; RUN: not llc -mtriple=arm64-pc-linux -filetype=obj < %s > /dev/null 2> %t4
+; RUN: not llc -mtriple=aarch64-pc-linux -filetype=obj < %s > /dev/null 2> %t4
 ; RUN: FileCheck %s < %t4
 
 module asm "	.this_directive_is_very_unlikely_to_exist"

Modified: llvm/trunk/test/CodeGen/AArch64/movw-consts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/movw-consts.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/movw-consts.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/movw-consts.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK
 
 define i64 @test0() {
 ; CHECK-LABEL: test0:
@@ -9,43 +9,43 @@ define i64 @test0() {
 
 define i64 @test1() {
 ; CHECK-LABEL: test1:
-; CHECK-ARM64: orr w0, wzr, #0x1
+; CHECK: orr w0, wzr, #0x1
   ret i64 1
 }
 
 define i64 @test2() {
 ; CHECK-LABEL: test2:
-; CHECK-ARM64: orr w0, wzr, #0xffff
+; CHECK: orr w0, wzr, #0xffff
   ret i64 65535
 }
 
 define i64 @test3() {
 ; CHECK-LABEL: test3:
-; CHECK-ARM64: orr w0, wzr, #0x10000
+; CHECK: orr w0, wzr, #0x10000
   ret i64 65536
 }
 
 define i64 @test4() {
 ; CHECK-LABEL: test4:
-; CHECK-ARM64: orr w0, wzr, #0xffff0000
+; CHECK: orr w0, wzr, #0xffff0000
   ret i64 4294901760
 }
 
 define i64 @test5() {
 ; CHECK-LABEL: test5:
-; CHECK-ARM64: orr x0, xzr, #0x100000000
+; CHECK: orr x0, xzr, #0x100000000
   ret i64 4294967296
 }
 
 define i64 @test6() {
 ; CHECK-LABEL: test6:
-; CHECK-ARM64: orr x0, xzr, #0xffff00000000
+; CHECK: orr x0, xzr, #0xffff00000000
   ret i64 281470681743360
 }
 
 define i64 @test7() {
 ; CHECK-LABEL: test7:
-; CHECK-ARM64: orr x0, xzr, #0x1000000000000
+; CHECK: orr x0, xzr, #0x1000000000000
   ret i64 281474976710656
 }
 
@@ -75,35 +75,35 @@ define i64 @test10() {
 
 define void @test11() {
 ; CHECK-LABEL: test11:
-; CHECK-ARM64: str wzr
+; CHECK: str wzr
   store i32 0, i32* @var32
   ret void
 }
 
 define void @test12() {
 ; CHECK-LABEL: test12:
-; CHECK-ARM64: orr {{w[0-9]+}}, wzr, #0x1
+; CHECK: orr {{w[0-9]+}}, wzr, #0x1
   store i32 1, i32* @var32
   ret void
 }
 
 define void @test13() {
 ; CHECK-LABEL: test13:
-; CHECK-ARM64: orr {{w[0-9]+}}, wzr, #0xffff
+; CHECK: orr {{w[0-9]+}}, wzr, #0xffff
   store i32 65535, i32* @var32
   ret void
 }
 
 define void @test14() {
 ; CHECK-LABEL: test14:
-; CHECK-ARM64: orr {{w[0-9]+}}, wzr, #0x10000
+; CHECK: orr {{w[0-9]+}}, wzr, #0x10000
   store i32 65536, i32* @var32
   ret void
 }
 
 define void @test15() {
 ; CHECK-LABEL: test15:
-; CHECK-ARM64: orr {{w[0-9]+}}, wzr, #0xffff0000
+; CHECK: orr {{w[0-9]+}}, wzr, #0xffff0000
   store i32 4294901760, i32* @var32
   ret void
 }
@@ -119,6 +119,6 @@ define i64 @test17() {
 ; CHECK-LABEL: test17:
 
   ; Mustn't MOVN w0 here.
-; CHECK-ARM64: orr x0, xzr, #0xfffffffffffffffd
+; CHECK: orr x0, xzr, #0xfffffffffffffffd
   ret i64 -3
 }

Modified: llvm/trunk/test/CodeGen/AArch64/movw-shift-encoding.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/movw-shift-encoding.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/movw-shift-encoding.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/movw-shift-encoding.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-linux-gnu < %s -show-mc-encoding -code-model=large | FileCheck %s --check-prefix=CHECK-ARM64
+; RUN: llc -mtriple=aarch64-linux-gnu < %s -show-mc-encoding -code-model=large | FileCheck %s
 
 @var = global i32 0
 
@@ -8,8 +8,8 @@
 define i32* @get_var() {
   ret i32* @var
 
-; CHECK-ARM64: movz    x0, #:abs_g3:var        // encoding: [0bAAA00000,A,0b111AAAAA,0xd2]
-; CHECK-ARM64: movk    x0, #:abs_g2_nc:var     // encoding: [0bAAA00000,A,0b110AAAAA,0xf2]
-; CHECK-ARM64: movk    x0, #:abs_g1_nc:var     // encoding: [0bAAA00000,A,0b101AAAAA,0xf2]
-; CHECK-ARM64: movk    x0, #:abs_g0_nc:var     // encoding: [0bAAA00000,A,0b100AAAAA,0xf2]
+; CHECK: movz    x0, #:abs_g3:var        // encoding: [0bAAA00000,A,0b111AAAAA,0xd2]
+; CHECK: movk    x0, #:abs_g2_nc:var     // encoding: [0bAAA00000,A,0b110AAAAA,0xf2]
+; CHECK: movk    x0, #:abs_g1_nc:var     // encoding: [0bAAA00000,A,0b101AAAAA,0xf2]
+; CHECK: movk    x0, #:abs_g0_nc:var     // encoding: [0bAAA00000,A,0b100AAAAA,0xf2]
 }

Modified: llvm/trunk/test/CodeGen/AArch64/neon-bitcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-bitcast.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-bitcast.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-bitcast.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s
 
 ; From <8 x i8>
 

Modified: llvm/trunk/test/CodeGen/AArch64/neon-bitwise-instructions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-bitwise-instructions.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-bitwise-instructions.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-bitwise-instructions.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 
 define <8 x i8> @and8xi8(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-LABEL: and8xi8:

Modified: llvm/trunk/test/CodeGen/AArch64/neon-compare-instructions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-compare-instructions.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-compare-instructions.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-compare-instructions.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
 
 define <8 x i8> @cmeq8xi8(<8 x i8> %A, <8 x i8> %B) {
 ; CHECK-LABEL: cmeq8xi8:

Modified: llvm/trunk/test/CodeGen/AArch64/neon-diagnostics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-diagnostics.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-diagnostics.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-diagnostics.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 
 define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
 ; CHECK: test_vfma_lane_f32:

Modified: llvm/trunk/test/CodeGen/AArch64/neon-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-extract.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-extract.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-extract.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 
 define <8 x i8> @test_vext_s8(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-LABEL: test_vext_s8:

Modified: llvm/trunk/test/CodeGen/AArch64/neon-fma.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-fma.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-fma.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-fma.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
 
 define <2 x float> @fmla2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
 ;CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s

Modified: llvm/trunk/test/CodeGen/AArch64/neon-fpround_f128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-fpround_f128.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-fpround_f128.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-fpround_f128.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
 
 define <1 x double> @test_fpround_v1f128(<1 x fp128>* %a) {
 ; CHECK-LABEL: test_fpround_v1f128:

Modified: llvm/trunk/test/CodeGen/AArch64/neon-idiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-idiv.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-idiv.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-idiv.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu < %s -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -mattr=+neon | FileCheck %s
 
 define <4 x i32> @test1(<4 x i32> %a) {
   %rem = srem <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>

Modified: llvm/trunk/test/CodeGen/AArch64/neon-mla-mls.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-mla-mls.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-mla-mls.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-mla-mls.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 
 
 define <8 x i8> @mla8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) {

Modified: llvm/trunk/test/CodeGen/AArch64/neon-mov.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-mov.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-mov.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-mov.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
 
 define <8 x i8> @movi8b() {
 ; CHECK-LABEL: movi8b:
@@ -14,75 +14,75 @@ define <16 x i8> @movi16b() {
 
 define <2 x i32> @movi2s_lsl0() {
 ; CHECK-LABEL: movi2s_lsl0:
-; CHECK-ARM64: movi {{d[0-9]+}}, #0x0000ff000000ff
+; CHECK: movi {{d[0-9]+}}, #0x0000ff000000ff
    ret <2 x i32> < i32 255, i32 255 >
 }
 
 define <2 x i32> @movi2s_lsl8() {
 ; CHECK-LABEL: movi2s_lsl8:
-; CHECK-ARM64: movi {{d[0-9]+}}, #0x00ff000000ff00
+; CHECK: movi {{d[0-9]+}}, #0x00ff000000ff00
    ret <2 x i32> < i32 65280, i32 65280 >
 }
 
 define <2 x i32> @movi2s_lsl16() {
 ; CHECK-LABEL: movi2s_lsl16:
-; CHECK-ARM64: movi {{d[0-9]+}}, #0xff000000ff0000
+; CHECK: movi {{d[0-9]+}}, #0xff000000ff0000
    ret <2 x i32> < i32 16711680, i32 16711680 >
 
 }
 
 define <2 x i32> @movi2s_lsl24() {
 ; CHECK-LABEL: movi2s_lsl24:
-; CHECK-ARM64: movi {{d[0-9]+}}, #0xff000000ff000000
+; CHECK: movi {{d[0-9]+}}, #0xff000000ff000000
    ret <2 x i32> < i32 4278190080, i32 4278190080 >
 }
 
 define <4 x i32> @movi4s_lsl0() {
 ; CHECK-LABEL: movi4s_lsl0:
-; CHECK-ARM64: movi {{v[0-9]+}}.2d, #0x0000ff000000ff
+; CHECK: movi {{v[0-9]+}}.2d, #0x0000ff000000ff
    ret <4 x i32> < i32 255, i32 255, i32 255, i32 255 >
 }
 
 define <4 x i32> @movi4s_lsl8() {
 ; CHECK-LABEL: movi4s_lsl8:
-; CHECK-ARM64: movi {{v[0-9]+}}.2d, #0x00ff000000ff00
+; CHECK: movi {{v[0-9]+}}.2d, #0x00ff000000ff00
    ret <4 x i32> < i32 65280, i32 65280, i32 65280, i32 65280 >
 }
 
 define <4 x i32> @movi4s_lsl16() {
 ; CHECK-LABEL: movi4s_lsl16:
-; CHECK-ARM64:  movi {{v[0-9]+}}.2d, #0xff000000ff0000
+; CHECK:  movi {{v[0-9]+}}.2d, #0xff000000ff0000
    ret <4 x i32> < i32 16711680, i32 16711680, i32 16711680, i32 16711680 >
 
 }
 
 define <4 x i32> @movi4s_lsl24() {
 ; CHECK-LABEL: movi4s_lsl24:
-; CHECK-ARM64:  movi {{v[0-9]+}}.2d, #0xff000000ff000000
+; CHECK:  movi {{v[0-9]+}}.2d, #0xff000000ff000000
    ret <4 x i32> < i32 4278190080, i32 4278190080, i32 4278190080, i32 4278190080 >
 }
 
 define <4 x i16> @movi4h_lsl0() {
 ; CHECK-LABEL: movi4h_lsl0:
-; CHECK-ARM64:  movi {{d[0-9]+}}, #0xff00ff00ff00ff
+; CHECK:  movi {{d[0-9]+}}, #0xff00ff00ff00ff
    ret <4 x i16> < i16 255, i16 255, i16 255, i16 255 >
 }
 
 define <4 x i16> @movi4h_lsl8() {
 ; CHECK-LABEL: movi4h_lsl8:
-; CHECK-ARM64: movi d0, #0xff00ff00ff00ff00
+; CHECK: movi d0, #0xff00ff00ff00ff00
    ret <4 x i16> < i16 65280, i16 65280, i16 65280, i16 65280 >
 }
 
 define <8 x i16> @movi8h_lsl0() {
 ; CHECK-LABEL: movi8h_lsl0:
-; CHECK-ARM64: movi v0.2d, #0xff00ff00ff00ff
+; CHECK: movi v0.2d, #0xff00ff00ff00ff
    ret <8 x i16> < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255 >
 }
 
 define <8 x i16> @movi8h_lsl8() {
 ; CHECK-LABEL: movi8h_lsl8:
-; CHECK-ARM64: movi v0.2d, #0xff00ff00ff00ff00
+; CHECK: movi v0.2d, #0xff00ff00ff00ff00
    ret <8 x i16> < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 >
 }
 
@@ -164,26 +164,26 @@ define <8 x i16> @mvni8h_lsl8() {
 
 define <2 x i32> @movi2s_msl8(<2 x i32> %a) {
 ; CHECK-LABEL: movi2s_msl8:
-; CHECK-ARM64: movi {{d[0-9]+}}, #0x00ffff0000ffff
+; CHECK: movi {{d[0-9]+}}, #0x00ffff0000ffff
 	ret <2 x i32> < i32 65535, i32 65535 >
 }
 
 define <2 x i32> @movi2s_msl16() {
 ; CHECK-LABEL: movi2s_msl16:
-; CHECK-ARM64:  movi d0, #0xffffff00ffffff
+; CHECK:  movi d0, #0xffffff00ffffff
    ret <2 x i32> < i32 16777215, i32 16777215 >
 }
 
 
 define <4 x i32> @movi4s_msl8() {
 ; CHECK-LABEL: movi4s_msl8:
-; CHECK-ARM64:  movi v0.2d, #0x00ffff0000ffff
+; CHECK:  movi v0.2d, #0x00ffff0000ffff
    ret <4 x i32> < i32 65535, i32 65535, i32 65535, i32 65535 >
 }
 
 define <4 x i32> @movi4s_msl16() {
 ; CHECK-LABEL: movi4s_msl16:
-; CHECK-ARM64:  movi v0.2d, #0xffffff00ffffff
+; CHECK:  movi v0.2d, #0xffffff00ffffff
    ret <4 x i32> < i32 16777215, i32 16777215, i32 16777215, i32 16777215 >
 }
 

Modified: llvm/trunk/test/CodeGen/AArch64/neon-or-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-or-combine.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-or-combine.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-or-combine.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 
 ; Check that the DAGCombiner does not crash with an assertion failure
 ; when performing a target specific combine to simplify a 'or' dag node

Modified: llvm/trunk/test/CodeGen/AArch64/neon-perm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-perm.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-perm.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-perm.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
 
 %struct.int8x8x2_t = type { [2 x <8 x i8>] }
 %struct.int16x4x2_t = type { [2 x <4 x i16>] }
@@ -53,7 +53,7 @@ entry:
 
 define <2 x i32> @test_vuzp1_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vuzp1_s32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i32> %shuffle.i
@@ -69,7 +69,7 @@ entry:
 
 define <2 x i64> @test_vuzp1q_s64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vuzp1q_s64:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i64> %shuffle.i
@@ -109,7 +109,7 @@ entry:
 
 define <2 x i32> @test_vuzp1_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vuzp1_u32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i32> %shuffle.i
@@ -125,7 +125,7 @@ entry:
 
 define <2 x i64> @test_vuzp1q_u64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vuzp1q_u64:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i64> %shuffle.i
@@ -133,7 +133,7 @@ entry:
 
 define <2 x float> @test_vuzp1_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vuzp1_f32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x float> %shuffle.i
@@ -149,7 +149,7 @@ entry:
 
 define <2 x double> @test_vuzp1q_f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: test_vuzp1q_f64:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x double> %shuffle.i
@@ -221,7 +221,7 @@ entry:
 
 define <2 x i32> @test_vuzp2_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vuzp2_s32:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i32> %shuffle.i
@@ -237,7 +237,7 @@ entry:
 
 define <2 x i64> @test_vuzp2q_s64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vuzp2q_s64:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i64> %shuffle.i
@@ -277,7 +277,7 @@ entry:
 
 define <2 x i32> @test_vuzp2_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vuzp2_u32:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i32> %shuffle.i
@@ -293,7 +293,7 @@ entry:
 
 define <2 x i64> @test_vuzp2q_u64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vuzp2q_u64:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i64> %shuffle.i
@@ -301,7 +301,7 @@ entry:
 
 define <2 x float> @test_vuzp2_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vuzp2_f32:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x float> %shuffle.i
@@ -317,7 +317,7 @@ entry:
 
 define <2 x double> @test_vuzp2q_f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: test_vuzp2q_f64:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x double> %shuffle.i
@@ -389,7 +389,7 @@ entry:
 
 define <2 x i32> @test_vzip1_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vzip1_s32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i32> %shuffle.i
@@ -405,7 +405,7 @@ entry:
 
 define <2 x i64> @test_vzip1q_s64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vzip1q_s64:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i64> %shuffle.i
@@ -445,7 +445,7 @@ entry:
 
 define <2 x i32> @test_vzip1_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vzip1_u32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i32> %shuffle.i
@@ -461,7 +461,7 @@ entry:
 
 define <2 x i64> @test_vzip1q_u64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vzip1q_u64:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i64> %shuffle.i
@@ -469,7 +469,7 @@ entry:
 
 define <2 x float> @test_vzip1_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vzip1_f32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x float> %shuffle.i
@@ -485,7 +485,7 @@ entry:
 
 define <2 x double> @test_vzip1q_f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: test_vzip1q_f64:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x double> %shuffle.i
@@ -557,7 +557,7 @@ entry:
 
 define <2 x i32> @test_vzip2_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vzip2_s32:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i32> %shuffle.i
@@ -573,7 +573,7 @@ entry:
 
 define <2 x i64> @test_vzip2q_s64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vzip2q_s64:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i64> %shuffle.i
@@ -613,7 +613,7 @@ entry:
 
 define <2 x i32> @test_vzip2_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vzip2_u32:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i32> %shuffle.i
@@ -629,7 +629,7 @@ entry:
 
 define <2 x i64> @test_vzip2q_u64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vzip2q_u64:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i64> %shuffle.i
@@ -637,7 +637,7 @@ entry:
 
 define <2 x float> @test_vzip2_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vzip2_f32:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x float> %shuffle.i
@@ -653,7 +653,7 @@ entry:
 
 define <2 x double> @test_vzip2q_f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: test_vzip2q_f64:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x double> %shuffle.i
@@ -725,7 +725,7 @@ entry:
 
 define <2 x i32> @test_vtrn1_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vtrn1_s32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i32> %shuffle.i
@@ -741,7 +741,7 @@ entry:
 
 define <2 x i64> @test_vtrn1q_s64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vtrn1q_s64:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i64> %shuffle.i
@@ -781,7 +781,7 @@ entry:
 
 define <2 x i32> @test_vtrn1_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vtrn1_u32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i32> %shuffle.i
@@ -797,7 +797,7 @@ entry:
 
 define <2 x i64> @test_vtrn1q_u64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vtrn1q_u64:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i64> %shuffle.i
@@ -805,7 +805,7 @@ entry:
 
 define <2 x float> @test_vtrn1_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vtrn1_f32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x float> %shuffle.i
@@ -821,7 +821,7 @@ entry:
 
 define <2 x double> @test_vtrn1q_f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: test_vtrn1q_f64:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x double> %shuffle.i
@@ -893,7 +893,7 @@ entry:
 
 define <2 x i32> @test_vtrn2_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vtrn2_s32:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i32> %shuffle.i
@@ -909,7 +909,7 @@ entry:
 
 define <2 x i64> @test_vtrn2q_s64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vtrn2q_s64:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i64> %shuffle.i
@@ -949,7 +949,7 @@ entry:
 
 define <2 x i32> @test_vtrn2_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vtrn2_u32:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i32> %shuffle.i
@@ -965,7 +965,7 @@ entry:
 
 define <2 x i64> @test_vtrn2q_u64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vtrn2q_u64:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i64> %shuffle.i
@@ -973,7 +973,7 @@ entry:
 
 define <2 x float> @test_vtrn2_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vtrn2_f32:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x float> %shuffle.i
@@ -989,7 +989,7 @@ entry:
 
 define <2 x double> @test_vtrn2q_f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: test_vtrn2q_f64:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x double> %shuffle.i
@@ -2494,8 +2494,8 @@ entry:
 
 define %struct.int32x2x2_t @test_vuzp_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vuzp_s32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vuzp.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   %vuzp1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
@@ -2530,8 +2530,8 @@ entry:
 
 define %struct.uint32x2x2_t @test_vuzp_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vuzp_u32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vuzp.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   %vuzp1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
@@ -2542,8 +2542,8 @@ entry:
 
 define %struct.float32x2x2_t @test_vuzp_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vuzp_f32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vuzp.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
   %vuzp1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
@@ -2710,8 +2710,8 @@ entry:
 
 define %struct.int32x2x2_t @test_vzip_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vzip_s32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vzip.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   %vzip1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
@@ -2746,8 +2746,8 @@ entry:
 
 define %struct.uint32x2x2_t @test_vzip_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vzip_u32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vzip.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   %vzip1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
@@ -2758,8 +2758,8 @@ entry:
 
 define %struct.float32x2x2_t @test_vzip_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vzip_f32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vzip.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
   %vzip1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
@@ -2926,8 +2926,8 @@ entry:
 
 define %struct.int32x2x2_t @test_vtrn_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vtrn_s32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vtrn.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   %vtrn1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
@@ -2962,8 +2962,8 @@ entry:
 
 define %struct.uint32x2x2_t @test_vtrn_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vtrn_u32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vtrn.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   %vtrn1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
@@ -2974,8 +2974,8 @@ entry:
 
 define %struct.float32x2x2_t @test_vtrn_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vtrn_f32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vtrn.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
   %vtrn1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>

Modified: llvm/trunk/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
 
 declare float @llvm.fma.f32(float, float, float)
 declare double @llvm.fma.f64(double, double, double)

Modified: llvm/trunk/test/CodeGen/AArch64/neon-scalar-copy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-copy.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-copy.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-copy.ll Sat May 24 07:50:23 2014
@@ -1,9 +1,9 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefix=CHECK
 
 
 define float @test_dup_sv2S(<2 x float> %v) {
  ; CHECK-LABEL: test_dup_sv2S
- ; CHECK-ARM64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+ ; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
  %tmp1 = extractelement <2 x float> %v, i32 1
  ret float  %tmp1
 }
@@ -37,14 +37,14 @@ define double @test_dup_dvD(<1 x double>
 
 define double @test_dup_dv2D(<2 x double> %v) {
  ; CHECK-LABEL: test_dup_dv2D
- ; CHECK-ARM64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+ ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
  %tmp1 = extractelement <2 x double> %v, i32 1
  ret double  %tmp1
 }
 
 define double @test_dup_dv2D_0(<2 x double> %v) {
  ; CHECK-LABEL: test_dup_dv2D_0
- ; CHECK-ARM64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+ ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
  ; CHECK: ret
  %tmp1 = extractelement <2 x double> %v, i32 1
  ret double  %tmp1
@@ -88,7 +88,7 @@ define <1 x i32> @test_vector_dup_sv2S(<
 
 define <1 x i64> @test_vector_dup_dv2D(<2 x i64> %v1) {
  ; CHECK-LABEL: test_vector_dup_dv2D
- ; CHECK-ARM64: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #8
+ ; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #8
  %shuffle.i = shufflevector <2 x i64> %v1, <2 x i64> undef, <1 x i32> <i32 1> 
  ret <1 x i64> %shuffle.i
 }

Modified: llvm/trunk/test/CodeGen/AArch64/neon-shift-left-long.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-shift-left-long.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-shift-left-long.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-shift-left-long.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 
 define <8 x i16> @test_sshll_v8i8(<8 x i8> %a) {
 ; CHECK: test_sshll_v8i8:

Modified: llvm/trunk/test/CodeGen/AArch64/neon-truncStore-extLoad.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-truncStore-extLoad.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-truncStore-extLoad.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-truncStore-extLoad.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 
 ; A vector TruncStore can not be selected.
 ; Test a trunc IR and a vector store IR can be selected correctly.

Modified: llvm/trunk/test/CodeGen/AArch64/pic-eh-stubs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/pic-eh-stubs.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/pic-eh-stubs.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/pic-eh-stubs.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s
 ; RUN: llc -mtriple=arm64_be-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s
 
 ; Make sure exception-handling PIC code can be linked correctly. An alternative

Modified: llvm/trunk/test/CodeGen/AArch64/regress-f128csel-flags.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/regress-f128csel-flags.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/regress-f128csel-flags.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/regress-f128csel-flags.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
 
 ; We used to not mark NZCV as being used in the continuation basic-block
 ; when lowering a 128-bit "select" to branches. This meant a subsequent use

Modified: llvm/trunk/test/CodeGen/AArch64/regress-fp128-livein.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/regress-fp128-livein.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/regress-fp128-livein.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/regress-fp128-livein.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -o - %s
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s
 
 ; Regression test for NZCV reg live-in not being added to fp128csel IfTrue BB,
 ; causing a crash during live range calc.

Modified: llvm/trunk/test/CodeGen/AArch64/regress-tblgen-chains.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/regress-tblgen-chains.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/regress-tblgen-chains.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/regress-tblgen-chains.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s --check-prefix CHECK-ARM64
+; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s
 
 ; When generating DAG selection tables, TableGen used to only flag an
 ; instruction as needing a chain on its own account if it had a built-in pattern
@@ -12,7 +12,7 @@
 declare void @bar(i8*)
 
 define i64 @test_chains() {
-; CHECK-ARM64-LABEL: test_chains:
+; CHECK-LABEL: test_chains:
 
   %locvar = alloca i8
 
@@ -25,13 +25,13 @@ define i64 @test_chains() {
   %inc.4 = trunc i64 %inc.3 to i8
   store i8 %inc.4, i8* %locvar
 
-; CHECK-ARM64: ldurb {{w[0-9]+}}, [x29, [[LOCADDR:#-?[0-9]+]]]
-; CHECK-ARM64: add {{w[0-9]+}}, {{w[0-9]+}}, #1
-; CHECK-ARM64: sturb {{w[0-9]+}}, [x29, [[LOCADDR]]]
-; CHECK-ARM64: ldurb {{w[0-9]+}}, [x29, [[LOCADDR]]]
+; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR:#-?[0-9]+]]]
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #1
+; CHECK: sturb {{w[0-9]+}}, [x29, [[LOCADDR]]]
+; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR]]]
 
   %ret.1 = load i8* %locvar
   %ret.2 = zext i8 %ret.1 to i64
   ret i64 %ret.2
-; CHECK-ARM64: ret
+; CHECK: ret
 }

Modified: llvm/trunk/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s
 @var = global i32 0
 
 declare void @bar()

Modified: llvm/trunk/test/CodeGen/AArch64/setcc-takes-i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/setcc-takes-i32.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/setcc-takes-i32.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/setcc-takes-i32.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -mtriple=arm64-none-linux-gnu -o - %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -o - %s | FileCheck %s
 
 ; Most important point here is that the promotion of the i1 works
 ; correctly. Previously LLVM thought that i64 was the appropriate SetCC output,

Modified: llvm/trunk/test/CodeGen/AArch64/sibling-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/sibling-call.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/sibling-call.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/sibling-call.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -arm64-load-store-opt=0 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -aarch64-load-store-opt=0 | FileCheck %s
 
 declare void @callee_stack0()
 declare void @callee_stack8([8 x i32], i64)

Modified: llvm/trunk/test/CodeGen/AArch64/sincos-expansion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/sincos-expansion.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/sincos-expansion.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/sincos-expansion.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
 
 define float @test_sincos_f32(float %f) {
   %sin = call float @sinf(float %f) readnone

Modified: llvm/trunk/test/CodeGen/AArch64/sincospow-vector-expansion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/sincospow-vector-expansion.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/sincospow-vector-expansion.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/sincospow-vector-expansion.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -o - %s -verify-machineinstrs -mtriple=arm64-linux-gnu -mattr=+neon | FileCheck %s
+; RUN: llc -o - %s -verify-machineinstrs -mtriple=aarch64-linux-gnu -mattr=+neon | FileCheck %s
 
 
 define <2 x float> @test_cos_v2f64(<2 x double> %v1) {

Modified: llvm/trunk/test/CodeGen/AArch64/tail-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/tail-call.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/tail-call.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/tail-call.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -tailcallopt | FileCheck --check-prefix=CHECK-ARM64 %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s
 
 declare fastcc void @callee_stack0()
 declare fastcc void @callee_stack8([8 x i32], i64)
@@ -8,91 +8,59 @@ define fastcc void @caller_to0_from0() n
 ; CHECK-LABEL: caller_to0_from0:
 ; CHECK-NEXT: // BB
 
-; CHECK-ARM64-LABEL: caller_to0_from0:
-; CHECK-ARM64-NEXT: // BB
-
   tail call fastcc void @callee_stack0()
   ret void
 
 ; CHECK-NEXT: b callee_stack0
-
-; CHECK-ARM64-NEXT: b callee_stack0
 }
 
 define fastcc void @caller_to0_from8([8 x i32], i64) {
 ; CHECK-LABEL: caller_to0_from8:
 
-; CHECK-ARM64-LABEL: caller_to0_from8:
-
   tail call fastcc void @callee_stack0()
   ret void
 
 ; CHECK: add sp, sp, #16
 ; CHECK-NEXT: b callee_stack0
-
-; CHECK-ARM64: add sp, sp, #16
-; CHECK-ARM64-NEXT: b callee_stack0
 }
 
 define fastcc void @caller_to8_from0() {
 ; CHECK-LABEL: caller_to8_from0:
 ; CHECK: sub sp, sp, #32
 
-; CHECK-ARM64-LABEL: caller_to8_from0:
-; CHECK-ARM64: sub sp, sp, #32
-
 ; Key point is that the "42" should go #16 below incoming stack
 ; pointer (we didn't have arg space to reuse).
   tail call fastcc void @callee_stack8([8 x i32] undef, i64 42)
   ret void
 
-; CHECK: str {{x[0-9]+}}, [sp, #16]
-; CHECK-NEXT: add sp, sp, #16
+; CHECK: str {{x[0-9]+}}, [sp, #16]!
 ; CHECK-NEXT: b callee_stack8
-
-; CHECK-ARM64: str {{x[0-9]+}}, [sp, #16]!
-; CHECK-ARM64-NEXT: b callee_stack8
 }
 
 define fastcc void @caller_to8_from8([8 x i32], i64 %a) {
 ; CHECK-LABEL: caller_to8_from8:
 ; CHECK: sub sp, sp, #16
 
-; CHECK-ARM64-LABEL: caller_to8_from8:
-; CHECK-ARM64: sub sp, sp, #16
-
 ; Key point is that the "%a" should go where at SP on entry.
   tail call fastcc void @callee_stack8([8 x i32] undef, i64 42)
   ret void
 
-; CHECK: str {{x[0-9]+}}, [sp, #16]
-; CHECK-NEXT: add sp, sp, #16
+; CHECK: str {{x[0-9]+}}, [sp, #16]!
 ; CHECK-NEXT: b callee_stack8
-
-; CHECK-ARM64: str {{x[0-9]+}}, [sp, #16]!
-; CHECK-ARM64-NEXT: b callee_stack8
 }
 
 define fastcc void @caller_to16_from8([8 x i32], i64 %a) {
 ; CHECK-LABEL: caller_to16_from8:
 ; CHECK: sub sp, sp, #16
 
-; CHECK-ARM64-LABEL: caller_to16_from8:
-; CHECK-ARM64: sub sp, sp, #16
-
 ; Important point is that the call reuses the "dead" argument space
 ; above %a on the stack. If it tries to go below incoming-SP then the
 ; callee will not deallocate the space, even in fastcc.
   tail call fastcc void @callee_stack16([8 x i32] undef, i64 42, i64 2)
 
-; CHECK: str {{x[0-9]+}}, [sp, #24]
-; CHECK: str {{x[0-9]+}}, [sp, #16]
+; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
 ; CHECK-NEXT: add sp, sp, #16
 ; CHECK-NEXT: b callee_stack16
-
-; CHECK-ARM64: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
-; CHECK-ARM64-NEXT: add sp, sp, #16
-; CHECK-ARM64-NEXT: b callee_stack16
   ret void
 }
 
@@ -101,19 +69,12 @@ define fastcc void @caller_to8_from24([8
 ; CHECK-LABEL: caller_to8_from24:
 ; CHECK: sub sp, sp, #16
 
-; CHECK-ARM64-LABEL: caller_to8_from24:
-; CHECK-ARM64: sub sp, sp, #16
-
 ; Key point is that the "%a" should go where at #16 above SP on entry.
   tail call fastcc void @callee_stack8([8 x i32] undef, i64 42)
   ret void
 
-; CHECK: str {{x[0-9]+}}, [sp, #32]
-; CHECK-NEXT: add sp, sp, #32
+; CHECK: str {{x[0-9]+}}, [sp, #32]!
 ; CHECK-NEXT: b callee_stack8
-
-; CHECK-ARM64: str {{x[0-9]+}}, [sp, #32]!
-; CHECK-ARM64-NEXT: b callee_stack8
 }
 
 
@@ -121,24 +82,13 @@ define fastcc void @caller_to16_from16([
 ; CHECK-LABEL: caller_to16_from16:
 ; CHECK: sub sp, sp, #16
 
-; CHECK-ARM64-LABEL: caller_to16_from16:
-; CHECK-ARM64: sub sp, sp, #16
-
 ; Here we want to make sure that both loads happen before the stores:
 ; otherwise either %a or %b will be wrongly clobbered.
   tail call fastcc void @callee_stack16([8 x i32] undef, i64 %b, i64 %a)
   ret void
 
-; CHECK: ldr x0,
-; CHECK: ldr x1,
-; CHECK: str x1,
-; CHECK: str x0,
-
+; CHECK: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
+; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
 ; CHECK-NEXT: add sp, sp, #16
 ; CHECK-NEXT: b callee_stack16
-
-; CHECK-ARM64: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
-; CHECK-ARM64: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
-; CHECK-ARM64-NEXT: add sp, sp, #16
-; CHECK-ARM64-NEXT: b callee_stack16
 }

Modified: llvm/trunk/test/CodeGen/AArch64/zero-reg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/zero-reg.ll?rev=209577&r1=209576&r2=209577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/zero-reg.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/zero-reg.ll Sat May 24 07:50:23 2014
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
 
 @var32 = global i32 0
 @var64 = global i64 0

Removed: llvm/trunk/test/CodeGen/ARM64/2011-03-09-CPSRSpill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2011-03-09-CPSRSpill.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/2011-03-09-CPSRSpill.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/2011-03-09-CPSRSpill.ll (removed)
@@ -1,47 +0,0 @@
-; RUN: llc < %s -mtriple=arm64-apple-darwin
-
-; Can't copy or spill / restore CPSR.
-; rdar://9105206
-
-define fastcc void @t() ssp align 2 {
-entry:
-  br i1 undef, label %bb3.i, label %bb2.i
-
-bb2.i:                                            ; preds = %entry
-  br label %bb3.i
-
-bb3.i:                                            ; preds = %bb2.i, %entry
-  br i1 undef, label %_ZN12gjkepa2_impl3EPA6appendERNS0_5sListEPNS0_5sFaceE.exit71, label %bb.i69
-
-bb.i69:                                           ; preds = %bb3.i
-  br label %_ZN12gjkepa2_impl3EPA6appendERNS0_5sListEPNS0_5sFaceE.exit71
-
-_ZN12gjkepa2_impl3EPA6appendERNS0_5sListEPNS0_5sFaceE.exit71: ; preds = %bb.i69, %bb3.i
-  %0 = select i1 undef, float 0.000000e+00, float undef
-  %1 = fdiv float %0, undef
-  %2 = fcmp ult float %1, 0xBF847AE140000000
-  %storemerge9 = select i1 %2, float %1, float 0.000000e+00
-  store float %storemerge9, float* undef, align 4
-  br i1 undef, label %bb42, label %bb47
-
-bb42:                                             ; preds = %_ZN12gjkepa2_impl3EPA6appendERNS0_5sListEPNS0_5sFaceE.exit71
-  br i1 undef, label %bb46, label %bb53
-
-bb46:                                             ; preds = %bb42
-  br label %bb48
-
-bb47:                                             ; preds = %_ZN12gjkepa2_impl3EPA6appendERNS0_5sListEPNS0_5sFaceE.exit71
-  br label %bb48
-
-bb48:                                             ; preds = %bb47, %bb46
-  br i1 undef, label %bb1.i14, label %bb.i13
-
-bb.i13:                                           ; preds = %bb48
-  br label %bb1.i14
-
-bb1.i14:                                          ; preds = %bb.i13, %bb48
-  br label %bb53
-
-bb53:                                             ; preds = %bb1.i14, %bb42
-  ret void
-}

Removed: llvm/trunk/test/CodeGen/ARM64/2011-03-17-AsmPrinterCrash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2011-03-17-AsmPrinterCrash.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/2011-03-17-AsmPrinterCrash.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/2011-03-17-AsmPrinterCrash.ll (removed)
@@ -1,45 +0,0 @@
-; RUN: llc < %s -mtriple=arm64-apple-darwin
-
-; rdar://9146594
-
-define void @drt_vsprintf() nounwind ssp {
-entry:
-  %do_tab_convert = alloca i32, align 4
-  br i1 undef, label %if.then24, label %if.else295, !dbg !13
-
-if.then24:                                        ; preds = %entry
-  unreachable
-
-if.else295:                                       ; preds = %entry
-  call void @llvm.dbg.declare(metadata !{i32* %do_tab_convert}, metadata !16), !dbg !18
-  store i32 0, i32* %do_tab_convert, align 4, !dbg !19
-  unreachable
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-!llvm.dbg.gv = !{!0}
-!llvm.dbg.sp = !{!1, !7, !10, !11, !12}
-
-!0 = metadata !{i32 589876, i32 0, metadata !1, metadata !"vsplive", metadata !"vsplive", metadata !"", metadata !2, i32 617, metadata !6, i32 1, i32 1, null, null} ; [ DW_TAG_variable ]
-!1 = metadata !{i32 589870, metadata !20, metadata !2, metadata !"drt_vsprintf", metadata !"drt_vsprintf", metadata !"", i32 616, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 589865, metadata !20} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589841, metadata !20, i32 12, metadata !"clang version 3.0 (http://llvm.org/git/clang.git git:/git/puzzlebox/clang.git/ c4d1aea01c4444eb81bdbf391f1be309127c3cf1)", i1 true, metadata !"", i32 0, metadata !21, metadata !21, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 589845, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!5 = metadata !{metadata !6}
-!6 = metadata !{i32 589860, null, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 589870, metadata !20, metadata !2, metadata !"putc_mem", metadata !"putc_mem", metadata !"", i32 30, metadata !8, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!8 = metadata !{i32 589845, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !9, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!9 = metadata !{null}
-!10 = metadata !{i32 589870, metadata !20, metadata !2, metadata !"print_double", metadata !"print_double", metadata !"", i32 203, metadata !4, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 589870, metadata !20, metadata !2, metadata !"print_number", metadata !"print_number", metadata !"", i32 75, metadata !4, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!12 = metadata !{i32 589870, metadata !20, metadata !2, metadata !"get_flags", metadata !"get_flags", metadata !"", i32 508, metadata !8, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!13 = metadata !{i32 653, i32 5, metadata !14, null}
-!14 = metadata !{i32 589835, metadata !20, metadata !15, i32 652, i32 35, i32 2} ; [ DW_TAG_lexical_block ]
-!15 = metadata !{i32 589835, metadata !20, metadata !1, i32 616, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
-!16 = metadata !{i32 590080, metadata !17, metadata !"do_tab_convert", metadata !2, i32 853, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
-!17 = metadata !{i32 589835, metadata !20, metadata !14, i32 850, i32 12, i32 33} ; [ DW_TAG_lexical_block ]
-!18 = metadata !{i32 853, i32 11, metadata !17, null}
-!19 = metadata !{i32 853, i32 29, metadata !17, null}
-!20 = metadata !{metadata !"print.i", metadata !"/Volumes/Ebi/echeng/radars/r9146594"}
-!21 = metadata !{i32 0}

Removed: llvm/trunk/test/CodeGen/ARM64/2011-03-21-Unaligned-Frame-Index.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2011-03-21-Unaligned-Frame-Index.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/2011-03-21-Unaligned-Frame-Index.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/2011-03-21-Unaligned-Frame-Index.ll (removed)
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
-define void @foo(i64 %val) {
-; CHECK: foo
-;   The stack frame store is not 64-bit aligned. Make sure we use an
-;   instruction that can handle that.
-; CHECK: stur x0, [sp, #20]
-  %a = alloca [49 x i32], align 4
-  %p32 = getelementptr inbounds [49 x i32]* %a, i64 0, i64 2
-  %p = bitcast i32* %p32 to i64*
-  store i64 %val, i64* %p, align 8
-  ret void
-}

Removed: llvm/trunk/test/CodeGen/ARM64/2011-04-21-CPSRBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2011-04-21-CPSRBug.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/2011-04-21-CPSRBug.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/2011-04-21-CPSRBug.ll (removed)
@@ -1,26 +0,0 @@
-; RUN: llc < %s -mtriple=arm64-apple-iOS5.0
-
-; CPSR is not allocatable so fast allocatable wouldn't mark them killed.
-; rdar://9313272
-
-define hidden void @t() nounwind {
-entry:
-  %cmp = icmp eq i32* null, undef
-  %frombool = zext i1 %cmp to i8
-  store i8 %frombool, i8* undef, align 1
-  %tmp4 = load i8* undef, align 1
-  %tobool = trunc i8 %tmp4 to i1
-  br i1 %tobool, label %land.lhs.true, label %if.end
-
-land.lhs.true:                                    ; preds = %entry
-  unreachable
-
-if.end:                                           ; preds = %entry
-  br i1 undef, label %land.lhs.true14, label %if.end33
-
-land.lhs.true14:                                  ; preds = %if.end
-  unreachable
-
-if.end33:                                         ; preds = %if.end
-  unreachable
-}

Removed: llvm/trunk/test/CodeGen/ARM64/2011-10-18-LdStOptBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2011-10-18-LdStOptBug.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/2011-10-18-LdStOptBug.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/2011-10-18-LdStOptBug.ll (removed)
@@ -1,31 +0,0 @@
-; RUN: llc < %s -mtriple=arm64-apple-ios | FileCheck %s
-
-; Can't fold the increment by 1<<12 into a post-increment load
-; rdar://10301335
-
- at test_data = common global i32 0, align 4
-
-define void @t() nounwind ssp {
-; CHECK-LABEL: t:
-entry:
-  br label %for.body
-
-for.body:
-; CHECK: for.body
-; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}]
-; CHECK: add x[[REG:[0-9]+]],
-; CHECK:                      x[[REG]], #1, lsl  #12
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %0 = shl nsw i64 %indvars.iv, 12
-  %add = add nsw i64 %0, 34628173824
-  %1 = inttoptr i64 %add to i32*
-  %2 = load volatile i32* %1, align 4096
-  store volatile i32 %2, i32* @test_data, align 4
-  %indvars.iv.next = add i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, 200
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:
-  ret void
-}

Removed: llvm/trunk/test/CodeGen/ARM64/2012-01-11-ComparisonDAGCrash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2012-01-11-ComparisonDAGCrash.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/2012-01-11-ComparisonDAGCrash.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/2012-01-11-ComparisonDAGCrash.ll (removed)
@@ -1,40 +0,0 @@
-; RUN: llc < %s -march=arm64
-
-; The target lowering for integer comparisons was replacing some DAG nodes
-; during operation legalization, which resulted in dangling pointers,
-; cycles in DAGs, and eventually crashes.  This is the testcase for
-; one of those crashes. (rdar://10653656)
-
-define void @test(i1 zeroext %IsArrow) nounwind ssp align 2 {
-entry:
-  br i1 undef, label %return, label %lor.lhs.false
-
-lor.lhs.false:
-  br i1 undef, label %return, label %if.end
-
-if.end:
-  %tmp.i = load i64* undef, align 8
-  %and.i.i.i = and i64 %tmp.i, -16
-  br i1 %IsArrow, label %if.else_crit_edge, label %if.end32
-
-if.else_crit_edge:
-  br i1 undef, label %if.end32, label %return
-
-if.end32:
-  %0 = icmp ult i32 undef, 3
-  %1 = zext i64 %tmp.i to i320
-  %.pn.v = select i1 %0, i320 128, i320 64
-  %.pn = shl i320 %1, %.pn.v
-  %ins346392 = or i320 %.pn, 0
-  store i320 %ins346392, i320* undef, align 8
-  br i1 undef, label %sw.bb.i.i, label %exit
-
-sw.bb.i.i:
-  unreachable
-
-exit:
-  unreachable
-
-return:
-  ret void
-}

Removed: llvm/trunk/test/CodeGen/ARM64/2012-05-07-DAGCombineVectorExtract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2012-05-07-DAGCombineVectorExtract.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/2012-05-07-DAGCombineVectorExtract.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/2012-05-07-DAGCombineVectorExtract.ll (removed)
@@ -1,20 +0,0 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
-
-define i32 @foo(<4 x i32> %a, i32 %n) nounwind {
-; CHECK-LABEL: foo:
-; CHECK: fmov w0, s0
-; CHECK-NEXT: ret
-  %b = bitcast <4 x i32> %a to i128
-  %c = trunc i128 %b to i32
-  ret i32 %c
-}
-
-define i64 @bar(<2 x i64> %a, i64 %n) nounwind {
-; CHECK-LABEL: bar:
-; CHECK: fmov x0, d0
-; CHECK-NEXT: ret
-  %b = bitcast <2 x i64> %a to i128
-  %c = trunc i128 %b to i64
-  ret i64 %c
-}
-

Removed: llvm/trunk/test/CodeGen/ARM64/2012-05-07-MemcpyAlignBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2012-05-07-MemcpyAlignBug.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/2012-05-07-MemcpyAlignBug.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/2012-05-07-MemcpyAlignBug.ll (removed)
@@ -1,21 +0,0 @@
-; RUN: llc < %s -march arm64 -mcpu=cyclone | FileCheck %s
-; <rdar://problem/11294426>
-
- at b = private unnamed_addr constant [3 x i32] [i32 1768775988, i32 1685481784, i32 1836253201], align 4
-
-; The important thing for this test is that we need an unaligned load of `l_b'
-; ("ldr w2, [x1, #8]" in this case).
-
-; CHECK:      adrp x[[PAGE:[0-9]+]], {{l_b at PAGE|.Lb}}
-; CHECK: add  x[[ADDR:[0-9]+]], x[[PAGE]], {{l_b at PAGEOFF|:lo12:.Lb}}
-; CHECK-NEXT: ldr  [[VAL:w[0-9]+]], [x[[ADDR]], #8]
-; CHECK-NEXT: str  [[VAL]], [x0, #8]
-; CHECK-NEXT: ldr  [[VAL2:x[0-9]+]], [x[[ADDR]]]
-; CHECK-NEXT: str  [[VAL2]], [x0]
-
-define void @foo(i8* %a) {
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast ([3 x i32]* @b to i8*), i64 12, i32 4, i1 false)
-  ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind

Removed: llvm/trunk/test/CodeGen/ARM64/2012-05-09-LOADgot-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2012-05-09-LOADgot-bug.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/2012-05-09-LOADgot-bug.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/2012-05-09-LOADgot-bug.ll (removed)
@@ -1,22 +0,0 @@
-; RUN: llc -mtriple=arm64-apple-ios < %s | FileCheck %s
-; RUN: llc -mtriple=arm64-linux-gnu -relocation-model=pic < %s | FileCheck %s --check-prefix=CHECK-LINUX
-; <rdar://problem/11392109>
-
-define hidden void @t() optsize ssp {
-entry:
-  store i64 zext (i32 ptrtoint (i64 (i32)* @x to i32) to i64), i64* undef, align 8
-; CHECK:             adrp    x{{[0-9]+}}, _x at GOTPAGE
-; CHECK:        ldr     x{{[0-9]+}}, [x{{[0-9]+}}, _x at GOTPAGEOFF]
-; CHECK-NEXT:        and     x{{[0-9]+}}, x{{[0-9]+}}, #0xffffffff
-; CHECK-NEXT:        str     x{{[0-9]+}}, [x{{[0-9]+}}]
-  unreachable
-}
-
-declare i64 @x(i32) optsize
-
-; Worth checking the Linux code is sensible too: only way to access
-; the GOT is via a 64-bit load. Just loading wN is unacceptable
-; (there's no ELF relocation to do that).
-
-; CHECK-LINUX: adrp {{x[0-9]+}}, :got:x
-; CHECK-LINUX: ldr {{x[0-9]+}}, [{{x[0-9]+}}, :got_lo12:x]

Removed: llvm/trunk/test/CodeGen/ARM64/2012-05-22-LdStOptBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2012-05-22-LdStOptBug.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/2012-05-22-LdStOptBug.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/2012-05-22-LdStOptBug.ll (removed)
@@ -1,50 +0,0 @@
-; RUN: llc < %s -mtriple=arm64-apple-ios -verify-machineinstrs | FileCheck %s
-
-; LdStOpt bug created illegal instruction:
-;   %D1<def>, %D2<def> = LDPSi %X0, 1
-; rdar://11512047
-
-%0 = type opaque
-%struct.CGRect = type { %struct.CGPoint, %struct.CGSize }
-%struct.CGPoint = type { double, double }
-%struct.CGSize = type { double, double }
-
-@"OBJC_IVAR_$_UIScreen._bounds" = external hidden global i64, section "__DATA, __objc_ivar", align 8
-
-define hidden %struct.CGRect @t(%0* nocapture %self, i8* nocapture %_cmd) nounwind readonly optsize ssp {
-entry:
-; CHECK-LABEL: t:
-; CHECK: ldp d{{[0-9]+}}, d{{[0-9]+}}
-  %ivar = load i64* @"OBJC_IVAR_$_UIScreen._bounds", align 8, !invariant.load !4
-  %0 = bitcast %0* %self to i8*
-  %add.ptr = getelementptr inbounds i8* %0, i64 %ivar
-  %add.ptr10.0 = bitcast i8* %add.ptr to double*
-  %tmp11 = load double* %add.ptr10.0, align 8
-  %add.ptr.sum = add i64 %ivar, 8
-  %add.ptr10.1 = getelementptr inbounds i8* %0, i64 %add.ptr.sum
-  %1 = bitcast i8* %add.ptr10.1 to double*
-  %tmp12 = load double* %1, align 8
-  %add.ptr.sum17 = add i64 %ivar, 16
-  %add.ptr4.1 = getelementptr inbounds i8* %0, i64 %add.ptr.sum17
-  %add.ptr4.1.0 = bitcast i8* %add.ptr4.1 to double*
-  %tmp = load double* %add.ptr4.1.0, align 8
-  %add.ptr4.1.sum = add i64 %ivar, 24
-  %add.ptr4.1.1 = getelementptr inbounds i8* %0, i64 %add.ptr4.1.sum
-  %2 = bitcast i8* %add.ptr4.1.1 to double*
-  %tmp5 = load double* %2, align 8
-  %insert14 = insertvalue %struct.CGPoint undef, double %tmp11, 0
-  %insert16 = insertvalue %struct.CGPoint %insert14, double %tmp12, 1
-  %insert = insertvalue %struct.CGRect undef, %struct.CGPoint %insert16, 0
-  %insert7 = insertvalue %struct.CGSize undef, double %tmp, 0
-  %insert9 = insertvalue %struct.CGSize %insert7, double %tmp5, 1
-  %insert3 = insertvalue %struct.CGRect %insert, %struct.CGSize %insert9, 1
-  ret %struct.CGRect %insert3
-}
-
-!llvm.module.flags = !{!0, !1, !2, !3}
-
-!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
-!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
-!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
-!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
-!4 = metadata !{}

Removed: llvm/trunk/test/CodeGen/ARM64/2012-06-06-FPToUI.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2012-06-06-FPToUI.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/2012-06-06-FPToUI.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/2012-06-06-FPToUI.ll (removed)
@@ -1,67 +0,0 @@
-; RUN: llc -march=arm64 -O0 < %s | FileCheck %s
-; RUN: llc -march=arm64 -O3 < %s | FileCheck %s
-
- at .str = private unnamed_addr constant [9 x i8] c"%lf %lu\0A\00", align 1
- at .str1 = private unnamed_addr constant [8 x i8] c"%lf %u\0A\00", align 1
- at .str2 = private unnamed_addr constant [8 x i8] c"%f %lu\0A\00", align 1
- at .str3 = private unnamed_addr constant [7 x i8] c"%f %u\0A\00", align 1
-
-define void @testDouble(double %d) ssp {
-; CHECK-LABEL: testDouble:
-; CHECK:  fcvtzu x{{[0-9]+}}, d{{[0-9]+}}
-; CHECK:  fcvtzu w{{[0-9]+}}, d{{[0-9]+}}
-entry:
-  %d.addr = alloca double, align 8
-  store double %d, double* %d.addr, align 8
-  %0 = load double* %d.addr, align 8
-  %1 = load double* %d.addr, align 8
-  %conv = fptoui double %1 to i64
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), double %0, i64 %conv)
-  %2 = load double* %d.addr, align 8
-  %3 = load double* %d.addr, align 8
-  %conv1 = fptoui double %3 to i32
-  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str1, i32 0, i32 0), double %2, i32 %conv1)
-  ret void
-}
-
-declare i32 @printf(i8*, ...)
-
-define void @testFloat(float %f) ssp {
-; CHECK-LABEL: testFloat:
-; CHECK:  fcvtzu x{{[0-9]+}}, s{{[0-9]+}}
-; CHECK:  fcvtzu w{{[0-9]+}}, s{{[0-9]+}}
-entry:
-  %f.addr = alloca float, align 4
-  store float %f, float* %f.addr, align 4
-  %0 = load float* %f.addr, align 4
-  %conv = fpext float %0 to double
-  %1 = load float* %f.addr, align 4
-  %conv1 = fptoui float %1 to i64
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str2, i32 0, i32 0), double %conv, i64 %conv1)
-  %2 = load float* %f.addr, align 4
-  %conv2 = fpext float %2 to double
-  %3 = load float* %f.addr, align 4
-  %conv3 = fptoui float %3 to i32
-  %call4 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str3, i32 0, i32 0), double %conv2, i32 %conv3)
-  ret void
-}
-
-define i32 @main(i32 %argc, i8** %argv) ssp {
-entry:
-  %retval = alloca i32, align 4
-  %argc.addr = alloca i32, align 4
-  %argv.addr = alloca i8**, align 8
-  store i32 0, i32* %retval
-  store i32 %argc, i32* %argc.addr, align 4
-  store i8** %argv, i8*** %argv.addr, align 8
-  call void @testDouble(double 1.159198e+01)
-  call void @testFloat(float 0x40272F1800000000)
-  ret i32 0
-}
-
-!llvm.module.flags = !{!0, !1, !2, !3}
-
-!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
-!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
-!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
-!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}

Removed: llvm/trunk/test/CodeGen/ARM64/2012-07-11-InstrEmitterBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2012-07-11-InstrEmitterBug.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/2012-07-11-InstrEmitterBug.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/2012-07-11-InstrEmitterBug.ll (removed)
@@ -1,56 +0,0 @@
-; RUN: llc < %s -mtriple=arm64-apple-ios
-; rdar://11849816
-
- at shlib_path_substitutions = external hidden unnamed_addr global i8**, align 8
-
-declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone
-
-declare noalias i8* @xmalloc(i64) optsize
-
-declare i64 @strlen(i8* nocapture) nounwind readonly optsize
-
-declare i8* @__strcpy_chk(i8*, i8*, i64) nounwind optsize
-
-declare i8* @__strcat_chk(i8*, i8*, i64) nounwind optsize
-
-declare noalias i8* @xstrdup(i8*) optsize
-
-define i8* @dyld_fix_path(i8* %path) nounwind optsize ssp {
-entry:
-  br i1 undef, label %if.end56, label %for.cond
-
-for.cond:                                         ; preds = %entry
-  br i1 undef, label %for.cond10, label %for.body
-
-for.body:                                         ; preds = %for.cond
-  unreachable
-
-for.cond10:                                       ; preds = %for.cond
-  br i1 undef, label %if.end56, label %for.body14
-
-for.body14:                                       ; preds = %for.cond10
-  %call22 = tail call i64 @strlen(i8* undef) nounwind optsize
-  %sext = shl i64 %call22, 32
-  %conv30 = ashr exact i64 %sext, 32
-  %add29 = sub i64 0, %conv30
-  %sub = add i64 %add29, 0
-  %add31 = shl i64 %sub, 32
-  %sext59 = add i64 %add31, 4294967296
-  %conv33 = ashr exact i64 %sext59, 32
-  %call34 = tail call noalias i8* @xmalloc(i64 %conv33) nounwind optsize
-  br i1 undef, label %cond.false45, label %cond.true43
-
-cond.true43:                                      ; preds = %for.body14
-  unreachable
-
-cond.false45:                                     ; preds = %for.body14
-  %add.ptr = getelementptr inbounds i8* %path, i64 %conv30
-  unreachable
-
-if.end56:                                         ; preds = %for.cond10, %entry
-  ret i8* null
-}
-
-declare i32 @strncmp(i8* nocapture, i8* nocapture, i64) nounwind readonly optsize
-
-declare i8* @strcpy(i8*, i8* nocapture) nounwind

Removed: llvm/trunk/test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll (removed)
@@ -1,19 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -fp-contract=fast | FileCheck %s --check-prefix=FAST
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
-target triple = "arm64-apple-ios7.0.0"
-
-;FAST-LABEL: _Z9example25v:
-;FAST: fcmgt.4s
-;FAST: ret
-
-;CHECK-LABEL: _Z9example25v:
-;CHECK: fcmgt.4s
-;CHECK: ret
-
-define <4 x i32> @_Z9example25v( <4 x float> %N0,  <4 x float> %N1) {
-  %A = fcmp olt <4 x float> %N0, %N1
-  %B = zext <4 x i1> %A to <4 x i32>
-  ret <4 x i32> %B
-}

Removed: llvm/trunk/test/CodeGen/ARM64/2013-01-23-frem-crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2013-01-23-frem-crash.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/2013-01-23-frem-crash.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/2013-01-23-frem-crash.ll (removed)
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=arm64
-; Make sure we are not crashing on this test.
-
-define void @autogen_SD13158() {
-entry:
-  %B26 = frem float 0.000000e+00, undef
-  br i1 undef, label %CF, label %CF77
-
-CF:                                               ; preds = %CF, %CF76
-  store float %B26, float* undef
-  br i1 undef, label %CF, label %CF77
-
-CF77:                                             ; preds = %CF
-  ret void
-}

Removed: llvm/trunk/test/CodeGen/ARM64/2013-01-23-sext-crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2013-01-23-sext-crash.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/2013-01-23-sext-crash.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/2013-01-23-sext-crash.ll (removed)
@@ -1,37 +0,0 @@
-; RUN: llc < %s -march=arm64
-
-; Make sure we are not crashing on this test.
-
-define void @autogen_SD12881() {
-BB:
-  %B17 = ashr <4 x i32> zeroinitializer, zeroinitializer
-  br label %CF
-
-CF:                                               ; preds = %CF83, %CF, %BB
-  br i1 undef, label %CF, label %CF83
-
-CF83:                                             ; preds = %CF
-  %FC70 = sitofp <4 x i32> %B17 to <4 x double>
-  br label %CF
-}
-
-
-define void @autogen_SD12881_2() {
-BB:
-  %B17 = ashr <4 x i32> zeroinitializer, zeroinitializer
-  br label %CF
-
-CF:                                               ; preds = %CF83, %CF, %BB
-  br i1 undef, label %CF, label %CF83
-
-CF83:                                             ; preds = %CF
-  %FC70 = uitofp <4 x i32> %B17 to <4 x double>
-  br label %CF
-}
-
-define void @_Z12my_example2bv() nounwind noinline ssp {
-entry:
-  %0 = fptosi <2 x double> undef to <2 x i32>
-  store <2 x i32> %0, <2 x i32>* undef, align 8
-  ret void
-}

Removed: llvm/trunk/test/CodeGen/ARM64/2013-02-12-shufv8i8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2013-02-12-shufv8i8.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/2013-02-12-shufv8i8.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/2013-02-12-shufv8i8.ll (removed)
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple
-
-;CHECK-LABEL: Shuff:
-;CHECK: tbl.8b
-;CHECK: ret
-define <8 x i8 > @Shuff(<8 x i8> %in, <8 x i8>* %out) nounwind ssp {
-  %value = shufflevector <8 x i8> %in, <8 x i8> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i8> %value
-}
-
-

Removed: llvm/trunk/test/CodeGen/ARM64/2014-04-16-AnInfiniteLoopInDAGCombine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2014-04-16-AnInfiniteLoopInDAGCombine.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/2014-04-16-AnInfiniteLoopInDAGCombine.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/2014-04-16-AnInfiniteLoopInDAGCombine.ll (removed)
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=arm64
-
-; This test case tests an infinite loop bug in DAG combiner.
-; It just tries to do the following replacing endlessly:
-; (1)  Replacing.3 0x2c509f0: v4i32 = any_extend 0x2c4cd08 [ORD=4]
-;      With: 0x2c4d128: v4i32 = sign_extend 0x2c4cd08 [ORD=4]
-;
-; (2)  Replacing.2 0x2c4d128: v4i32 = sign_extend 0x2c4cd08 [ORD=4]
-;      With: 0x2c509f0: v4i32 = any_extend 0x2c4cd08 [ORD=4]
-; As we think the (2) optimization from SIGN_EXTEND to ANY_EXTEND is
-; an optimization to replace unused bits with undefined bits, we remove
-; the (1) optimization (It doesn't make sense to replace undefined bits
-; with signed bits).
-
-define <4 x i32> @infiniteLoop(<4 x i32> %in0, <4 x i16> %in1) {
-entry:
-  %cmp.i = icmp sge <4 x i16> %in1, <i16 32767, i16 32767, i16 -1, i16 -32768>
-  %sext.i = sext <4 x i1> %cmp.i to <4 x i32>
-  %mul.i = mul <4 x i32> %in0, %sext.i
-  %sext = shl <4 x i32> %mul.i, <i32 16, i32 16, i32 16, i32 16>
-  %vmovl.i.i = ashr <4 x i32> %sext, <i32 16, i32 16, i32 16, i32 16>
-  ret <4 x i32> %vmovl.i.i
-}
\ No newline at end of file

Removed: llvm/trunk/test/CodeGen/ARM64/2014-04-28-sqshl-uqshl-i64Contant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2014-04-28-sqshl-uqshl-i64Contant.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/2014-04-28-sqshl-uqshl-i64Contant.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/2014-04-28-sqshl-uqshl-i64Contant.ll (removed)
@@ -1,19 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -march=arm64 | FileCheck %s
-
-; Check if sqshl/uqshl with constant shift amout can be selected. 
-define i64 @test_vqshld_s64_i(i64 %a) {
-; CHECK-LABEL: test_vqshld_s64_i:
-; CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, #36
-  %1 = tail call i64 @llvm.arm64.neon.sqshl.i64(i64 %a, i64 36)
-  ret i64 %1
-}
-
-define i64 @test_vqshld_u64_i(i64 %a) {
-; CHECK-LABEL: test_vqshld_u64_i:
-; CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, #36
-  %1 = tail call i64 @llvm.arm64.neon.uqshl.i64(i64 %a, i64 36)
-  ret i64 %1
-}
-
-declare i64 @llvm.arm64.neon.uqshl.i64(i64, i64)
-declare i64 @llvm.arm64.neon.sqshl.i64(i64, i64)

Removed: llvm/trunk/test/CodeGen/ARM64/2014-04-29-EXT-undef-mask.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/2014-04-29-EXT-undef-mask.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/2014-04-29-EXT-undef-mask.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/2014-04-29-EXT-undef-mask.ll (removed)
@@ -1,23 +0,0 @@
-; RUN: llc < %s -O0 -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-; The following 2 test cases test shufflevector with beginning UNDEF mask.
-define <8 x i16> @test_vext_undef_traverse(<8 x i16> %in) {
-;CHECK-LABEL: test_vext_undef_traverse:
-;CHECK: {{ext.16b.*v0, #4}}
-  %vext = shufflevector <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 0>, <8 x i16> %in, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9>
-  ret <8 x i16> %vext
-}
-
-define <8 x i16> @test_vext_undef_traverse2(<8 x i16> %in) {
-;CHECK-LABEL: test_vext_undef_traverse2:
-;CHECK: {{ext.16b.*v0, #6}}
-  %vext = shufflevector <8 x i16> %in, <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2>
-  ret <8 x i16> %vext
-}
-
-define <8 x i8> @test_vext_undef_traverse3(<8 x i8> %in) {
-;CHECK-LABEL: test_vext_undef_traverse3:
-;CHECK: {{ext.8b.*v0, #6}}
-  %vext = shufflevector <8 x i8> %in, <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 3, i32 4, i32 5>
-  ret <8 x i8> %vext
-}

Removed: llvm/trunk/test/CodeGen/ARM64/AdvSIMD-Scalar.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/AdvSIMD-Scalar.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/AdvSIMD-Scalar.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/AdvSIMD-Scalar.ll (removed)
@@ -1,67 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -arm64-simd-scalar=true -asm-verbose=false | FileCheck %s
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=generic -arm64-simd-scalar=true -asm-verbose=false | FileCheck %s -check-prefix=GENERIC
-
-define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
-; CHECK-LABEL: bar:
-; CHECK: add.2d	v[[REG:[0-9]+]], v0, v1
-; CHECK: add	d[[REG3:[0-9]+]], d[[REG]], d1
-; CHECK: sub	d[[REG2:[0-9]+]], d[[REG]], d1
-; GENERIC-LABEL: bar:
-; GENERIC: add	v[[REG:[0-9]+]].2d, v0.2d, v1.2d
-; GENERIC: add	d[[REG3:[0-9]+]], d[[REG]], d1
-; GENERIC: sub	d[[REG2:[0-9]+]], d[[REG]], d1
-  %add = add <2 x i64> %a, %b
-  %vgetq_lane = extractelement <2 x i64> %add, i32 0
-  %vgetq_lane2 = extractelement <2 x i64> %b, i32 0
-  %add3 = add i64 %vgetq_lane, %vgetq_lane2
-  %sub = sub i64 %vgetq_lane, %vgetq_lane2
-  %vecinit = insertelement <2 x i64> undef, i64 %add3, i32 0
-  %vecinit8 = insertelement <2 x i64> %vecinit, i64 %sub, i32 1
-  ret <2 x i64> %vecinit8
-}
-
-define double @subdd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
-; CHECK-LABEL: subdd_su64:
-; CHECK: sub d0, d1, d0
-; CHECK-NEXT: ret
-; GENERIC-LABEL: subdd_su64:
-; GENERIC: sub d0, d1, d0
-; GENERIC-NEXT: ret
-  %vecext = extractelement <2 x i64> %a, i32 0
-  %vecext1 = extractelement <2 x i64> %b, i32 0
-  %sub.i = sub nsw i64 %vecext1, %vecext
-  %retval = bitcast i64 %sub.i to double
-  ret double %retval
-}
-
-define double @vaddd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
-; CHECK-LABEL: vaddd_su64:
-; CHECK: add d0, d1, d0
-; CHECK-NEXT: ret
-; GENERIC-LABEL: vaddd_su64:
-; GENERIC: add d0, d1, d0
-; GENERIC-NEXT: ret
-  %vecext = extractelement <2 x i64> %a, i32 0
-  %vecext1 = extractelement <2 x i64> %b, i32 0
-  %add.i = add nsw i64 %vecext1, %vecext
-  %retval = bitcast i64 %add.i to double
-  ret double %retval
-}
-
-; sub MI doesn't access dsub register.
-define double @add_sub_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
-; CHECK-LABEL: add_sub_su64:
-; CHECK: add d0, d1, d0
-; CHECK: sub d0, {{d[0-9]+}}, d0
-; CHECK-NEXT: ret
-; GENERIC-LABEL: add_sub_su64:
-; GENERIC: add d0, d1, d0
-; GENERIC: sub d0, {{d[0-9]+}}, d0
-; GENERIC-NEXT: ret
-  %vecext = extractelement <2 x i64> %a, i32 0
-  %vecext1 = extractelement <2 x i64> %b, i32 0
-  %add.i = add i64 %vecext1, %vecext
-  %sub.i = sub i64 0, %add.i
-  %retval = bitcast i64 %sub.i to double
-  ret double %retval
-}

Removed: llvm/trunk/test/CodeGen/ARM64/aapcs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/aapcs.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/aapcs.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/aapcs.ll (removed)
@@ -1,103 +0,0 @@
-; RUN: llc -mtriple=arm64-linux-gnu -enable-misched=false < %s | FileCheck %s
-
- at var = global i32 0, align 4
-
-define i128 @test_i128_align(i32, i128 %arg, i32 %after) {
-  store i32 %after, i32* @var, align 4
-; CHECK: str w4, [{{x[0-9]+}}, :lo12:var]
-
-  ret i128 %arg
-; CHECK: mov x0, x2
-; CHECK: mov x1, x3
-}
-
- at var64 = global i64 0, align 8
-
-  ; Check stack slots are 64-bit at all times.
-define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short,
-                                i32 %int, i64 %long) {
-  ; Part of last store. Blasted scheduler.
-; CHECK: ldr [[LONG:x[0-9]+]], [sp, #32]
-
-  %ext_bool = zext i1 %bool to i64
-  store volatile i64 %ext_bool, i64* @var64, align 8
-; CHECK: ldrb w[[EXT:[0-9]+]], [sp]
-; CHECK: and x[[EXTED:[0-9]+]], x[[EXT]], #0x1
-; CHECK: str x[[EXTED]], [{{x[0-9]+}}, :lo12:var64]
-
-  %ext_char = zext i8 %char to i64
-  store volatile i64 %ext_char, i64* @var64, align 8
-; CHECK: ldrb w[[EXT:[0-9]+]], [sp, #8]
-; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
-
-  %ext_short = zext i16 %short to i64
-  store volatile i64 %ext_short, i64* @var64, align 8
-; CHECK: ldrh w[[EXT:[0-9]+]], [sp, #16]
-; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
-
-  %ext_int = zext i32 %int to i64
-  store volatile i64 %ext_int, i64* @var64, align 8
-; CHECK: ldr{{b?}} w[[EXT:[0-9]+]], [sp, #24]
-; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
-
-  store volatile i64 %long, i64* @var64, align 8
-; CHECK: str [[LONG]], [{{x[0-9]+}}, :lo12:var64]
-
-  ret void
-}
-
-; Make sure the callee does extensions (in the absence of zext/sext
-; keyword on args) while we're here.
-
-define void @test_extension(i1 %bool, i8 %char, i16 %short, i32 %int) {
-  %ext_bool = zext i1 %bool to i64
-  store volatile i64 %ext_bool, i64* @var64
-; CHECK: and [[EXT:x[0-9]+]], x0, #0x1
-; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
-
-  %ext_char = sext i8 %char to i64
-  store volatile i64 %ext_char, i64* @var64
-; CHECK: sxtb [[EXT:x[0-9]+]], w1
-; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
-
-  %ext_short = zext i16 %short to i64
-  store volatile i64 %ext_short, i64* @var64
-; CHECK: and [[EXT:x[0-9]+]], x2, #0xffff
-; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
-
-  %ext_int = zext i32 %int to i64
-  store volatile i64 %ext_int, i64* @var64
-; CHECK: ubfx [[EXT:x[0-9]+]], x3, #0, #32
-; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
-
-  ret void
-}
-
-declare void @variadic(i32 %a, ...)
-
-  ; Under AAPCS variadic functions have the same calling convention as
-  ; others. The extra arguments should go in registers rather than on the stack.
-define void @test_variadic() {
-  call void(i32, ...)* @variadic(i32 0, i64 1, double 2.0)
-; CHECK: fmov d0, #2.0
-; CHECK: orr w1, wzr, #0x1
-; CHECK: bl variadic
-  ret void
-}
-
-; We weren't marking x7 as used after deciding that the i128 didn't fit into
-; registers and putting the first half on the stack, so the *second* half went
-; into x7. Yuck!
-define i128 @test_i128_shadow([7 x i64] %x0_x6, i128 %sp) {
-; CHECK-LABEL: test_i128_shadow:
-; CHECK: ldp x0, x1, [sp]
-
-  ret i128 %sp
-}
-
-; This test is to check if fp128 can be correctly handled on stack.
-define fp128 @test_fp128([8 x float] %arg0, fp128 %arg1) {
-; CHECK-LABEL: test_fp128:
-; CHECK: ldr {{q[0-9]+}}, [sp]
-  ret fp128 %arg1
-}

Removed: llvm/trunk/test/CodeGen/ARM64/aarch64-large-frame.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/aarch64-large-frame.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/aarch64-large-frame.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/aarch64-large-frame.ll (removed)
@@ -1,69 +0,0 @@
-; RUN: llc -verify-machineinstrs -mtriple=arm64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s
-declare void @use_addr(i8*)
-
- at addr = global i8* null
-
-define void @test_bigframe() {
-; CHECK-LABEL: test_bigframe:
-; CHECK: .cfi_startproc
-
-  %var1 = alloca i8, i32 20000000
-  %var2 = alloca i8, i32 16
-  %var3 = alloca i8, i32 20000000
-
-; CHECK: sub sp, sp, #4095, lsl #12
-; CHECK: sub sp, sp, #4095, lsl #12
-; CHECK: sub sp, sp, #1575, lsl #12
-; CHECK: sub sp, sp, #2576
-; CHECK: .cfi_def_cfa_offset 40000032
-
-
-; CHECK: add [[TMP:x[0-9]+]], sp, #4095, lsl #12
-; CHECK: add [[TMP1:x[0-9]+]], [[TMP]], #787, lsl #12
-; CHECK: add {{x[0-9]+}}, [[TMP1]], #3344
-  store volatile i8* %var1, i8** @addr
-
-  %var1plus2 = getelementptr i8* %var1, i32 2
-  store volatile i8* %var1plus2, i8** @addr
-
-; CHECK: add [[TMP:x[0-9]+]], sp, #4095, lsl #12
-; CHECK: add [[TMP1:x[0-9]+]], [[TMP]], #787, lsl #12
-; CHECK: add {{x[0-9]+}}, [[TMP1]], #3328
-  store volatile i8* %var2, i8** @addr
-
-  %var2plus2 = getelementptr i8* %var2, i32 2
-  store volatile i8* %var2plus2, i8** @addr
-
-  store volatile i8* %var3, i8** @addr
-
-  %var3plus2 = getelementptr i8* %var3, i32 2
-  store volatile i8* %var3plus2, i8** @addr
-
-; CHECK: add sp, sp, #4095, lsl #12
-; CHECK: add sp, sp, #4095, lsl #12
-; CHECK: add sp, sp, #1575, lsl #12
-; CHECK: add sp, sp, #2576
-; CHECK: .cfi_endproc
-  ret void
-}
-
-define void @test_mediumframe() {
-; CHECK-LABEL: test_mediumframe:
-  %var1 = alloca i8, i32 1000000
-  %var2 = alloca i8, i32 16
-  %var3 = alloca i8, i32 1000000
-; CHECK: sub sp, sp, #488, lsl #12
-; CHECK-NEXT: sub sp, sp, #1168
-
-  store volatile i8* %var1, i8** @addr
-; CHECK: add     [[VAR1ADDR:x[0-9]+]], sp, #244, lsl #12
-; CHECK: add     [[VAR1ADDR]], [[VAR1ADDR]], #592
-
-; CHECK: add [[VAR2ADDR:x[0-9]+]], sp, #244, lsl #12
-; CHECK: add [[VAR2ADDR]], [[VAR2ADDR]], #576
-
-  store volatile i8* %var2, i8** @addr
-; CHECK: add     sp, sp, #488, lsl #12
-; CHECK: add     sp, sp, #1168
-  ret void
-}

Removed: llvm/trunk/test/CodeGen/ARM64/aarch64-neon-2velem-high.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/aarch64-neon-2velem-high.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/aarch64-neon-2velem-high.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/aarch64-neon-2velem-high.ll (removed)
@@ -1,341 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
-
-declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
-
-declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
-
-declare <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
-
-declare <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
-
-declare <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
-
-declare <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
-
-declare <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
-
-declare <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
-
-declare <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32>, <2 x i32>)
-
-declare <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
-
-define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) {
-; CHECK-LABEL: test_vmull_high_n_s16:
-; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0
-; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
-  %vmull15.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  ret <4 x i32> %vmull15.i.i
-}
-
-define <2 x i64> @test_vmull_high_n_s32(<4 x i32> %a, i32 %b) {
-; CHECK-LABEL: test_vmull_high_n_s32:
-; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0
-; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
-  %vmull9.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  ret <2 x i64> %vmull9.i.i
-}
-
-define <4 x i32> @test_vmull_high_n_u16(<8 x i16> %a, i16 %b) {
-; CHECK-LABEL: test_vmull_high_n_u16:
-; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0
-; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
-  %vmull15.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  ret <4 x i32> %vmull15.i.i
-}
-
-define <2 x i64> @test_vmull_high_n_u32(<4 x i32> %a, i32 %b) {
-; CHECK-LABEL: test_vmull_high_n_u32:
-; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0
-; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
-  %vmull9.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  ret <2 x i64> %vmull9.i.i
-}
-
-define <4 x i32> @test_vqdmull_high_n_s16(<8 x i16> %a, i16 %b) {
-; CHECK-LABEL: test_vqdmull_high_n_s16:
-; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0
-; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
-  %vqdmull15.i.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  ret <4 x i32> %vqdmull15.i.i
-}
-
-define <2 x i64> @test_vqdmull_high_n_s32(<4 x i32> %a, i32 %b) {
-; CHECK-LABEL: test_vqdmull_high_n_s32:
-; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0
-; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
-  %vqdmull9.i.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  ret <2 x i64> %vqdmull9.i.i
-}
-
-define <4 x i32> @test_vmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
-; CHECK-LABEL: test_vmlal_high_n_s16:
-; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0
-; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
-  ret <4 x i32> %add.i.i
-}
-
-define <2 x i64> @test_vmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
-; CHECK-LABEL: test_vmlal_high_n_s32:
-; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0
-; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
-  ret <2 x i64> %add.i.i
-}
-
-define <4 x i32> @test_vmlal_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
-; CHECK-LABEL: test_vmlal_high_n_u16:
-; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0
-; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
-  ret <4 x i32> %add.i.i
-}
-
-define <2 x i64> @test_vmlal_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
-; CHECK-LABEL: test_vmlal_high_n_u32:
-; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0
-; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
-  ret <2 x i64> %add.i.i
-}
-
-define <4 x i32> @test_vqdmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
-; CHECK-LABEL: test_vqdmlal_high_n_s16:
-; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
-  %vqdmlal15.i.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  %vqdmlal17.i.i = tail call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i)
-  ret <4 x i32> %vqdmlal17.i.i
-}
-
-define <2 x i64> @test_vqdmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
-; CHECK-LABEL: test_vqdmlal_high_n_s32:
-; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
-  %vqdmlal9.i.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  %vqdmlal11.i.i = tail call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i)
-  ret <2 x i64> %vqdmlal11.i.i
-}
-
-define <4 x i32> @test_vmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
-; CHECK-LABEL: test_vmlsl_high_n_s16:
-; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
-  ret <4 x i32> %sub.i.i
-}
-
-define <2 x i64> @test_vmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
-; CHECK-LABEL: test_vmlsl_high_n_s32:
-; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
-  ret <2 x i64> %sub.i.i
-}
-
-define <4 x i32> @test_vmlsl_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
-; CHECK-LABEL: test_vmlsl_high_n_u16:
-; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
-  ret <4 x i32> %sub.i.i
-}
-
-define <2 x i64> @test_vmlsl_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
-; CHECK-LABEL: test_vmlsl_high_n_u32:
-; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
-  ret <2 x i64> %sub.i.i
-}
-
-define <4 x i32> @test_vqdmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
-; CHECK-LABEL: test_vqdmlsl_high_n_s16:
-; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
-  %vqdmlsl15.i.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  %vqdmlsl17.i.i = tail call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i)
-  ret <4 x i32> %vqdmlsl17.i.i
-}
-
-define <2 x i64> @test_vqdmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
-; CHECK-LABEL: test_vqdmlsl_high_n_s32:
-; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
-  %vqdmlsl9.i.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  %vqdmlsl11.i.i = tail call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i)
-  ret <2 x i64> %vqdmlsl11.i.i
-}
-
-define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) {
-; CHECK-LABEL: test_vmul_n_f32:
-; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-entry:
-  %vecinit.i = insertelement <2 x float> undef, float %b, i32 0
-  %vecinit1.i = insertelement <2 x float> %vecinit.i, float %b, i32 1
-  %mul.i = fmul <2 x float> %vecinit1.i, %a
-  ret <2 x float> %mul.i
-}
-
-define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) {
-; CHECK-LABEL: test_vmulq_n_f32:
-; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-entry:
-  %vecinit.i = insertelement <4 x float> undef, float %b, i32 0
-  %vecinit1.i = insertelement <4 x float> %vecinit.i, float %b, i32 1
-  %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %b, i32 2
-  %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %b, i32 3
-  %mul.i = fmul <4 x float> %vecinit3.i, %a
-  ret <4 x float> %mul.i
-}
-
-define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) {
-; CHECK-LABEL: test_vmulq_n_f64:
-; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-entry:
-  %vecinit.i = insertelement <2 x double> undef, double %b, i32 0
-  %vecinit1.i = insertelement <2 x double> %vecinit.i, double %b, i32 1
-  %mul.i = fmul <2 x double> %vecinit1.i, %a
-  ret <2 x double> %mul.i
-}
-
-define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) {
-; CHECK-LABEL: test_vfma_n_f32:
-; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
-entry:
-  %vecinit.i = insertelement <2 x float> undef, float %n, i32 0
-  %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %vecinit1.i, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) {
-; CHECK-LABEL: test_vfmaq_n_f32:
-; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
-entry:
-  %vecinit.i = insertelement <4 x float> undef, float %n, i32 0
-  %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1
-  %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2
-  %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %vecinit3.i, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) {
-; CHECK-LABEL: test_vfms_n_f32:
-; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
-entry:
-  %vecinit.i = insertelement <2 x float> undef, float %n, i32 0
-  %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1
-  %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b
-  %1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %vecinit1.i, <2 x float> %a)
-  ret <2 x float> %1
-}
-
-define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) {
-; CHECK-LABEL: test_vfmsq_n_f32:
-; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
-entry:
-  %vecinit.i = insertelement <4 x float> undef, float %n, i32 0
-  %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1
-  %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2
-  %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3
-  %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
-  %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %vecinit3.i, <4 x float> %a)
-  ret <4 x float> %1
-}

Removed: llvm/trunk/test/CodeGen/ARM64/aarch64-neon-2velem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/aarch64-neon-2velem.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/aarch64-neon-2velem.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/aarch64-neon-2velem.ll (removed)
@@ -1,2853 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
-
-declare <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double>, <2 x double>)
-
-declare <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float>, <4 x float>)
-
-declare <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float>, <2 x float>)
-
-declare <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>)
-
-declare <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>)
-
-declare <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>)
-
-declare <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>)
-
-declare <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>)
-
-declare <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>)
-
-declare <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
-
-declare <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
-
-declare <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
-
-declare <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
-
-declare <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
-
-declare <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
-
-declare <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32>, <2 x i32>)
-
-declare <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmla_lane_s16:
-; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <4 x i16> %shuffle, %b
-  %add = add <4 x i16> %mul, %a
-  ret <4 x i16> %add
-}
-
-define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlaq_lane_s16:
-; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <8 x i16> %shuffle, %b
-  %add = add <8 x i16> %mul, %a
-  ret <8 x i16> %add
-}
-
-define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmla_lane_s32:
-; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %mul = mul <2 x i32> %shuffle, %b
-  %add = add <2 x i32> %mul, %a
-  ret <2 x i32> %add
-}
-
-define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlaq_lane_s32:
-; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %mul = mul <4 x i32> %shuffle, %b
-  %add = add <4 x i32> %mul, %a
-  ret <4 x i32> %add
-}
-
-define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmla_laneq_s16:
-; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %mul = mul <4 x i16> %shuffle, %b
-  %add = add <4 x i16> %mul, %a
-  ret <4 x i16> %add
-}
-
-define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlaq_laneq_s16:
-; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-  %mul = mul <8 x i16> %shuffle, %b
-  %add = add <8 x i16> %mul, %a
-  ret <8 x i16> %add
-}
-
-define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmla_laneq_s32:
-; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %mul = mul <2 x i32> %shuffle, %b
-  %add = add <2 x i32> %mul, %a
-  ret <2 x i32> %add
-}
-
-define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlaq_laneq_s32:
-; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <4 x i32> %shuffle, %b
-  %add = add <4 x i32> %mul, %a
-  ret <4 x i32> %add
-}
-
-define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmls_lane_s16:
-; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <4 x i16> %shuffle, %b
-  %sub = sub <4 x i16> %a, %mul
-  ret <4 x i16> %sub
-}
-
-define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlsq_lane_s16:
-; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <8 x i16> %shuffle, %b
-  %sub = sub <8 x i16> %a, %mul
-  ret <8 x i16> %sub
-}
-
-define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmls_lane_s32:
-; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %mul = mul <2 x i32> %shuffle, %b
-  %sub = sub <2 x i32> %a, %mul
-  ret <2 x i32> %sub
-}
-
-define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlsq_lane_s32:
-; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %mul = mul <4 x i32> %shuffle, %b
-  %sub = sub <4 x i32> %a, %mul
-  ret <4 x i32> %sub
-}
-
-define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmls_laneq_s16:
-; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %mul = mul <4 x i16> %shuffle, %b
-  %sub = sub <4 x i16> %a, %mul
-  ret <4 x i16> %sub
-}
-
-define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlsq_laneq_s16:
-; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-  %mul = mul <8 x i16> %shuffle, %b
-  %sub = sub <8 x i16> %a, %mul
-  ret <8 x i16> %sub
-}
-
-define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmls_laneq_s32:
-; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %mul = mul <2 x i32> %shuffle, %b
-  %sub = sub <2 x i32> %a, %mul
-  ret <2 x i32> %sub
-}
-
-define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlsq_laneq_s32:
-; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <4 x i32> %shuffle, %b
-  %sub = sub <4 x i32> %a, %mul
-  ret <4 x i32> %sub
-}
-
-define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vmul_lane_s16:
-; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <4 x i16> %shuffle, %a
-  ret <4 x i16> %mul
-}
-
-define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vmulq_lane_s16:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <8 x i16> %shuffle, %a
-  ret <8 x i16> %mul
-}
-
-define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vmul_lane_s32:
-; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %mul = mul <2 x i32> %shuffle, %a
-  ret <2 x i32> %mul
-}
-
-define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vmulq_lane_s32:
-; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %mul = mul <4 x i32> %shuffle, %a
-  ret <4 x i32> %mul
-}
-
-define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vmul_lane_u16:
-; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <4 x i16> %shuffle, %a
-  ret <4 x i16> %mul
-}
-
-define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vmulq_lane_u16:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <8 x i16> %shuffle, %a
-  ret <8 x i16> %mul
-}
-
-define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vmul_lane_u32:
-; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %mul = mul <2 x i32> %shuffle, %a
-  ret <2 x i32> %mul
-}
-
-define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vmulq_lane_u32:
-; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %mul = mul <4 x i32> %shuffle, %a
-  ret <4 x i32> %mul
-}
-
-define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmul_laneq_s16:
-; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %mul = mul <4 x i16> %shuffle, %a
-  ret <4 x i16> %mul
-}
-
-define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmulq_laneq_s16:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-  %mul = mul <8 x i16> %shuffle, %a
-  ret <8 x i16> %mul
-}
-
-define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmul_laneq_s32:
-; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %mul = mul <2 x i32> %shuffle, %a
-  ret <2 x i32> %mul
-}
-
-define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmulq_laneq_s32:
-; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <4 x i32> %shuffle, %a
-  ret <4 x i32> %mul
-}
-
-define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmul_laneq_u16:
-; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %mul = mul <4 x i16> %shuffle, %a
-  ret <4 x i16> %mul
-}
-
-define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmulq_laneq_u16:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-  %mul = mul <8 x i16> %shuffle, %a
-  ret <8 x i16> %mul
-}
-
-define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmul_laneq_u32:
-; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %mul = mul <2 x i32> %shuffle, %a
-  ret <2 x i32> %mul
-}
-
-define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmulq_laneq_u32:
-; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <4 x i32> %shuffle, %a
-  ret <4 x i32> %mul
-}
-
-define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
-; CHECK-LABEL: test_vfma_lane_f32:
-; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
-
-define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
-; CHECK-LABEL: test_vfmaq_lane_f32:
-; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
-
-define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
-; CHECK-LABEL: test_vfma_laneq_f32:
-; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
-; CHECK-LABEL: test_vfmaq_laneq_f32:
-; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
-; CHECK-LABEL: test_vfms_lane_f32:
-; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
-  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> <i32 1, i32 1>
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
-; CHECK-LABEL: test_vfmsq_lane_f32:
-; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
-  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
-; CHECK-LABEL: test_vfms_laneq_f32:
-; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
-  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> <i32 3, i32 3>
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
-; CHECK-LABEL: test_vfmsq_laneq_f32:
-; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
-  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
-; CHECK-LABEL: test_vfmaq_lane_f64:
-; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
-  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
-  ret <2 x double> %0
-}
-
-declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
-
-define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
-; CHECK-LABEL: test_vfmaq_laneq_f64:
-; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
-  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
-  ret <2 x double> %0
-}
-
-define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
-; CHECK-LABEL: test_vfmsq_lane_f64:
-; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <1 x double> <double -0.000000e+00>, %v
-  %lane = shufflevector <1 x double> %sub, <1 x double> undef, <2 x i32> zeroinitializer
-  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
-  ret <2 x double> %0
-}
-
-define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
-; CHECK-LABEL: test_vfmsq_laneq_f64:
-; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
-  %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> <i32 1, i32 1>
-  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
-  ret <2 x double> %0
-}
-
-define float @test_vfmas_laneq_f32(float %a, float %b, <4 x float> %v) {
-; CHECK-LABEL: test_vfmas_laneq_f32
-; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %extract = extractelement <4 x float> %v, i32 3
-  %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a)
-  ret float %0
-}
-
-declare float @llvm.fma.f32(float, float, float)
-
-define double @test_vfmsd_lane_f64(double %a, double %b, <1 x double> %v) {
-; CHECK-LABEL: test_vfmsd_lane_f64
-; CHECK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-; CHECK-NEXT: ret
-entry:
-  %extract.rhs = extractelement <1 x double> %v, i32 0
-  %extract = fsub double -0.000000e+00, %extract.rhs
-  %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a)
-  ret double %0
-}
-
-declare double @llvm.fma.f64(double, double, double)
-
-define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) {
-; CHECK-LABEL: test_vfmss_laneq_f32
-; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %extract.rhs = extractelement <4 x float> %v, i32 3
-  %extract = fsub float -0.000000e+00, %extract.rhs
-  %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a)
-  ret float %0
-}
-
-define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) {
-; CHECK-LABEL: test_vfmsd_laneq_f64
-; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
-; CHECK-NEXT: ret
-entry:
-  %extract.rhs = extractelement <2 x double> %v, i32 1
-  %extract = fsub double -0.000000e+00, %extract.rhs
-  %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a)
-  ret double %0
-}
-
-define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlal_lane_s16:
-; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlal_lane_s32:
-; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlal_laneq_s16:
-; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlal_laneq_s32:
-; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlal_high_lane_s16:
-; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlal_high_lane_s32:
-; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlal_high_laneq_s16:
-; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlal_high_laneq_s32:
-; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_lane_s16:
-; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_lane_s32:
-; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_laneq_s16:
-; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_laneq_s32:
-; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_high_lane_s16:
-; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_high_lane_s32:
-; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_high_laneq_s16:
-; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_high_laneq_s32:
-; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlal_lane_u16:
-; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlal_lane_u32:
-; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlal_laneq_u16:
-; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlal_laneq_u32:
-; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlal_high_lane_u16:
-; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlal_high_lane_u32:
-; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlal_high_laneq_u16:
-; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlal_high_laneq_u32:
-; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_lane_u16:
-; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_lane_u32:
-; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_laneq_u16:
-; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_laneq_u32:
-; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_high_lane_u16:
-; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_high_lane_u32:
-; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_high_laneq_u16:
-; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_high_laneq_u32:
-; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vmull_lane_s16:
-; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vmull_lane_s32:
-; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vmull_lane_u16:
-; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vmull_lane_u32:
-; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vmull_high_lane_s16:
-; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vmull_high_lane_s32:
-; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vmull_high_lane_u16:
-; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vmull_high_lane_u32:
-; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmull_laneq_s16:
-; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmull_laneq_s32:
-; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmull_laneq_u16:
-; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmull_laneq_u32:
-; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmull_high_laneq_s16:
-; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmull_high_laneq_s32:
-; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmull_high_laneq_u16:
-; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmull_high_laneq_u32:
-; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmlal_lane_s16:
-; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmlal2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %vqdmlal4.i = tail call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
-  ret <4 x i32> %vqdmlal4.i
-}
-
-define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmlal_lane_s32:
-; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmlal2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %vqdmlal4.i = tail call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
-  ret <2 x i64> %vqdmlal4.i
-}
-
-define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmlal_high_lane_s16:
-; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmlal2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %vqdmlal4.i = tail call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
-  ret <4 x i32> %vqdmlal4.i
-}
-
-define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmlal_high_lane_s32:
-; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmlal2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %vqdmlal4.i = tail call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
-  ret <2 x i64> %vqdmlal4.i
-}
-
-define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmlsl_lane_s16:
-; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
-  ret <4 x i32> %vqdmlsl4.i
-}
-
-define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmlsl_lane_s32:
-; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
-  ret <2 x i64> %vqdmlsl4.i
-}
-
-define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmlsl_high_lane_s16:
-; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
-  ret <4 x i32> %vqdmlsl4.i
-}
-
-define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmlsl_high_lane_s32:
-; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
-  ret <2 x i64> %vqdmlsl4.i
-}
-
-define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmull_lane_s16:
-; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmull_lane_s32:
-; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vqdmull_laneq_s16:
-; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vqdmull_laneq_s32:
-; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmull_high_lane_s16:
-; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmull_high_lane_s32:
-; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vqdmull_high_laneq_s16:
-; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vqdmull_high_laneq_s32:
-; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmulh_lane_s16:
-; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmulh2.i = tail call <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i16> %vqdmulh2.i
-}
-
-define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmulhq_lane_s16:
-; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %vqdmulh2.i = tail call <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
-  ret <8 x i16> %vqdmulh2.i
-}
-
-define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmulh_lane_s32:
-; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmulh2.i = tail call <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i32> %vqdmulh2.i
-}
-
-define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmulhq_lane_s32:
-; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %vqdmulh2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
-  ret <4 x i32> %vqdmulh2.i
-}
-
-define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vqrdmulh_lane_s16:
-; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqrdmulh2.i = tail call <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i16> %vqrdmulh2.i
-}
-
-define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vqrdmulhq_lane_s16:
-; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %vqrdmulh2.i = tail call <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
-  ret <8 x i16> %vqrdmulh2.i
-}
-
-define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vqrdmulh_lane_s32:
-; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqrdmulh2.i = tail call <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i32> %vqrdmulh2.i
-}
-
-define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vqrdmulhq_lane_s32:
-; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %vqrdmulh2.i = tail call <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
-  ret <4 x i32> %vqrdmulh2.i
-}
-
-define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) {
-; CHECK-LABEL: test_vmul_lane_f32:
-; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
-  %mul = fmul <2 x float> %shuffle, %a
-  ret <2 x float> %mul
-}
-
-define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) {
-; CHECK-LABEL: test_vmul_lane_f64:
-; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-; CHECK-NEXT: ret
-entry:
-  %0 = bitcast <1 x double> %a to <8 x i8>
-  %1 = bitcast <8 x i8> %0 to double
-  %extract = extractelement <1 x double> %v, i32 0
-  %2 = fmul double %1, %extract
-  %3 = insertelement <1 x double> undef, double %2, i32 0
-  ret <1 x double> %3
-}
-
-define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) {
-; CHECK-LABEL: test_vmulq_lane_f32:
-; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %mul = fmul <4 x float> %shuffle, %a
-  ret <4 x float> %mul
-}
-
-define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) {
-; CHECK-LABEL: test_vmulq_lane_f64:
-; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
-  %mul = fmul <2 x double> %shuffle, %a
-  ret <2 x double> %mul
-}
-
-define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) {
-; CHECK-LABEL: test_vmul_laneq_f32:
-; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
-  %mul = fmul <2 x float> %shuffle, %a
-  ret <2 x float> %mul
-}
-
-define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) {
-; CHECK-LABEL: test_vmul_laneq_f64:
-; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
-; CHECK-NEXT: ret
-entry:
-  %0 = bitcast <1 x double> %a to <8 x i8>
-  %1 = bitcast <8 x i8> %0 to double
-  %extract = extractelement <2 x double> %v, i32 1
-  %2 = fmul double %1, %extract
-  %3 = insertelement <1 x double> undef, double %2, i32 0
-  ret <1 x double> %3
-}
-
-define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) {
-; CHECK-LABEL: test_vmulq_laneq_f32:
-; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = fmul <4 x float> %shuffle, %a
-  ret <4 x float> %mul
-}
-
-define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) {
-; CHECK-LABEL: test_vmulq_laneq_f64:
-; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
-  %mul = fmul <2 x double> %shuffle, %a
-  ret <2 x double> %mul
-}
-
-define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) {
-; CHECK-LABEL: test_vmulx_lane_f32:
-; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
-  %vmulx2.i = tail call <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
-  ret <2 x float> %vmulx2.i
-}
-
-define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) {
-; CHECK-LABEL: test_vmulxq_lane_f32:
-; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %vmulx2.i = tail call <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
-  ret <4 x float> %vmulx2.i
-}
-
-define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) {
-; CHECK-LABEL: test_vmulxq_lane_f64:
-; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
-  %vmulx2.i = tail call <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
-  ret <2 x double> %vmulx2.i
-}
-
-define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) {
-; CHECK-LABEL: test_vmulx_laneq_f32:
-; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
-  %vmulx2.i = tail call <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
-  ret <2 x float> %vmulx2.i
-}
-
-define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) {
-; CHECK-LABEL: test_vmulxq_laneq_f32:
-; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmulx2.i = tail call <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
-  ret <4 x float> %vmulx2.i
-}
-
-define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) {
-; CHECK-LABEL: test_vmulxq_laneq_f64:
-; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
-  %vmulx2.i = tail call <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
-  ret <2 x double> %vmulx2.i
-}
-
-define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmla_lane_s16_0:
-; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i16> %shuffle, %b
-  %add = add <4 x i16> %mul, %a
-  ret <4 x i16> %add
-}
-
-define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlaq_lane_s16_0:
-; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
-  %mul = mul <8 x i16> %shuffle, %b
-  %add = add <8 x i16> %mul, %a
-  ret <8 x i16> %add
-}
-
-define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmla_lane_s32_0:
-; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %mul = mul <2 x i32> %shuffle, %b
-  %add = add <2 x i32> %mul, %a
-  ret <2 x i32> %add
-}
-
-define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlaq_lane_s32_0:
-; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i32> %shuffle, %b
-  %add = add <4 x i32> %mul, %a
-  ret <4 x i32> %add
-}
-
-define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmla_laneq_s16_0:
-; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i16> %shuffle, %b
-  %add = add <4 x i16> %mul, %a
-  ret <4 x i16> %add
-}
-
-define <8 x i16> @test_vmlaq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlaq_laneq_s16_0:
-; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
-  %mul = mul <8 x i16> %shuffle, %b
-  %add = add <8 x i16> %mul, %a
-  ret <8 x i16> %add
-}
-
-define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmla_laneq_s32_0:
-; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %mul = mul <2 x i32> %shuffle, %b
-  %add = add <2 x i32> %mul, %a
-  ret <2 x i32> %add
-}
-
-define <4 x i32> @test_vmlaq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlaq_laneq_s32_0:
-; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i32> %shuffle, %b
-  %add = add <4 x i32> %mul, %a
-  ret <4 x i32> %add
-}
-
-define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmls_lane_s16_0:
-; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i16> %shuffle, %b
-  %sub = sub <4 x i16> %a, %mul
-  ret <4 x i16> %sub
-}
-
-define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlsq_lane_s16_0:
-; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
-  %mul = mul <8 x i16> %shuffle, %b
-  %sub = sub <8 x i16> %a, %mul
-  ret <8 x i16> %sub
-}
-
-define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmls_lane_s32_0:
-; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %mul = mul <2 x i32> %shuffle, %b
-  %sub = sub <2 x i32> %a, %mul
-  ret <2 x i32> %sub
-}
-
-define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlsq_lane_s32_0:
-; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i32> %shuffle, %b
-  %sub = sub <4 x i32> %a, %mul
-  ret <4 x i32> %sub
-}
-
-define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmls_laneq_s16_0:
-; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i16> %shuffle, %b
-  %sub = sub <4 x i16> %a, %mul
-  ret <4 x i16> %sub
-}
-
-define <8 x i16> @test_vmlsq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlsq_laneq_s16_0:
-; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
-  %mul = mul <8 x i16> %shuffle, %b
-  %sub = sub <8 x i16> %a, %mul
-  ret <8 x i16> %sub
-}
-
-define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmls_laneq_s32_0:
-; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %mul = mul <2 x i32> %shuffle, %b
-  %sub = sub <2 x i32> %a, %mul
-  ret <2 x i32> %sub
-}
-
-define <4 x i32> @test_vmlsq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlsq_laneq_s32_0:
-; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i32> %shuffle, %b
-  %sub = sub <4 x i32> %a, %mul
-  ret <4 x i32> %sub
-}
-
-define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vmul_lane_s16_0:
-; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i16> %shuffle, %a
-  ret <4 x i16> %mul
-}
-
-define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vmulq_lane_s16_0:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
-  %mul = mul <8 x i16> %shuffle, %a
-  ret <8 x i16> %mul
-}
-
-define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vmul_lane_s32_0:
-; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %mul = mul <2 x i32> %shuffle, %a
-  ret <2 x i32> %mul
-}
-
-define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vmulq_lane_s32_0:
-; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i32> %shuffle, %a
-  ret <4 x i32> %mul
-}
-
-define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vmul_lane_u16_0:
-; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i16> %shuffle, %a
-  ret <4 x i16> %mul
-}
-
-define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vmulq_lane_u16_0:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
-  %mul = mul <8 x i16> %shuffle, %a
-  ret <8 x i16> %mul
-}
-
-define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vmul_lane_u32_0:
-; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %mul = mul <2 x i32> %shuffle, %a
-  ret <2 x i32> %mul
-}
-
-define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vmulq_lane_u32_0:
-; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i32> %shuffle, %a
-  ret <4 x i32> %mul
-}
-
-define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmul_laneq_s16_0:
-; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i16> %shuffle, %a
-  ret <4 x i16> %mul
-}
-
-define <8 x i16> @test_vmulq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmulq_laneq_s16_0:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
-  %mul = mul <8 x i16> %shuffle, %a
-  ret <8 x i16> %mul
-}
-
-define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmul_laneq_s32_0:
-; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %mul = mul <2 x i32> %shuffle, %a
-  ret <2 x i32> %mul
-}
-
-define <4 x i32> @test_vmulq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmulq_laneq_s32_0:
-; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i32> %shuffle, %a
-  ret <4 x i32> %mul
-}
-
-define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmul_laneq_u16_0:
-; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i16> %shuffle, %a
-  ret <4 x i16> %mul
-}
-
-define <8 x i16> @test_vmulq_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmulq_laneq_u16_0:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
-  %mul = mul <8 x i16> %shuffle, %a
-  ret <8 x i16> %mul
-}
-
-define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmul_laneq_u32_0:
-; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %mul = mul <2 x i32> %shuffle, %a
-  ret <2 x i32> %mul
-}
-
-define <4 x i32> @test_vmulq_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmulq_laneq_u32_0:
-; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i32> %shuffle, %a
-  ret <4 x i32> %mul
-}
-
-define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
-; CHECK-LABEL: test_vfma_lane_f32_0:
-; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
-; CHECK-LABEL: test_vfmaq_lane_f32_0:
-; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
-; CHECK-LABEL: test_vfma_laneq_f32_0:
-; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
-; CHECK-LABEL: test_vfmaq_laneq_f32_0:
-; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
-; CHECK-LABEL: test_vfms_lane_f32_0:
-; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
-  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> zeroinitializer
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
-; CHECK-LABEL: test_vfmsq_lane_f32_0:
-; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
-  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> zeroinitializer
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
-; CHECK-LABEL: test_vfms_laneq_f32_0:
-; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
-  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> zeroinitializer
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
-; CHECK-LABEL: test_vfmsq_laneq_f32_0:
-; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
-  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> zeroinitializer
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
-; CHECK-LABEL: test_vfmaq_laneq_f64_0:
-; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
-  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
-  ret <2 x double> %0
-}
-
-define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
-; CHECK-LABEL: test_vfmsq_laneq_f64_0:
-; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
-  %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> zeroinitializer
-  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
-  ret <2 x double> %0
-}
-
-define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlal_lane_s16_0:
-; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlal_lane_s32_0:
-; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlal_laneq_s16_0:
-; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlal_laneq_s32_0:
-; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlal_high_lane_s16_0:
-; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlal_high_lane_s32_0:
-; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlal_high_laneq_s16_0:
-; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlal_high_laneq_s32_0:
-; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_lane_s16_0:
-; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_lane_s32_0:
-; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_laneq_s16_0:
-; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_laneq_s32_0:
-; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_high_lane_s16_0:
-; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_high_lane_s32_0:
-; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_high_laneq_s16_0:
-; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_high_laneq_s32_0:
-; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlal_lane_u16_0:
-; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlal_lane_u32_0:
-; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlal_laneq_u16_0:
-; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlal_laneq_u32_0:
-; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlal_high_lane_u16_0:
-; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlal_high_lane_u32_0:
-; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlal_high_laneq_u16_0:
-; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlal_high_laneq_u32_0:
-; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_lane_u16_0:
-; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_lane_u32_0:
-; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_laneq_u16_0:
-; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_laneq_u32_0:
-; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_high_lane_u16_0:
-; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_high_lane_u32_0:
-; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_high_laneq_u16_0:
-; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_high_laneq_u32_0:
-; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vmull_lane_s16_0:
-; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vmull_lane_s32_0:
-; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vmull_lane_u16_0:
-; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vmull_lane_u32_0:
-; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vmull_high_lane_s16_0:
-; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vmull_high_lane_s32_0:
-; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vmull_high_lane_u16_0:
-; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vmull_high_lane_u32_0:
-; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmull_laneq_s16_0:
-; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmull_laneq_s32_0:
-; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmull_laneq_u16_0:
-; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmull_laneq_u32_0:
-; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmull_high_laneq_s16_0:
-; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmull_high_laneq_s32_0:
-; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmull_high_laneq_u16_0:
-; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmull_high_laneq_u32_0:
-; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmlal_lane_s16_0:
-; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmlal2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %vqdmlal4.i = tail call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
-  ret <4 x i32> %vqdmlal4.i
-}
-
-define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmlal_lane_s32_0:
-; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmlal2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %vqdmlal4.i = tail call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
-  ret <2 x i64> %vqdmlal4.i
-}
-
-define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmlal_high_lane_s16_0:
-; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmlal2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %vqdmlal4.i = tail call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
-  ret <4 x i32> %vqdmlal4.i
-}
-
-define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmlal_high_lane_s32_0:
-; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmlal2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %vqdmlal4.i = tail call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
-  ret <2 x i64> %vqdmlal4.i
-}
-
-define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmlsl_lane_s16_0:
-; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
-  ret <4 x i32> %vqdmlsl4.i
-}
-
-define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmlsl_lane_s32_0:
-; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
-  ret <2 x i64> %vqdmlsl4.i
-}
-
-define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmlsl_high_lane_s16_0:
-; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
-  ret <4 x i32> %vqdmlsl4.i
-}
-
-define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmlsl_high_lane_s32_0:
-; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
-  ret <2 x i64> %vqdmlsl4.i
-}
-
-define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmull_lane_s16_0:
-; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmull_lane_s32_0:
-; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vqdmull_laneq_s16_0:
-; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vqdmull_laneq_s32_0:
-; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmull_high_lane_s16_0:
-; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmull_high_lane_s32_0:
-; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vqdmull_high_laneq_s16_0:
-; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vqdmull_high_laneq_s32_0:
-; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmulh_lane_s16_0:
-; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmulh2.i = tail call <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i16> %vqdmulh2.i
-}
-
-define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmulhq_lane_s16_0:
-; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
-  %vqdmulh2.i = tail call <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
-  ret <8 x i16> %vqdmulh2.i
-}
-
-define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmulh_lane_s32_0:
-; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmulh2.i = tail call <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i32> %vqdmulh2.i
-}
-
-define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmulhq_lane_s32_0:
-; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
-  %vqdmulh2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
-  ret <4 x i32> %vqdmulh2.i
-}
-
-define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vqrdmulh_lane_s16_0:
-; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqrdmulh2.i = tail call <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i16> %vqrdmulh2.i
-}
-
-define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vqrdmulhq_lane_s16_0:
-; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
-  %vqrdmulh2.i = tail call <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
-  ret <8 x i16> %vqrdmulh2.i
-}
-
-define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vqrdmulh_lane_s32_0:
-; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqrdmulh2.i = tail call <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i32> %vqrdmulh2.i
-}
-
-define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vqrdmulhq_lane_s32_0:
-; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
-  %vqrdmulh2.i = tail call <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
-  ret <4 x i32> %vqrdmulh2.i
-}
-
-define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) {
-; CHECK-LABEL: test_vmul_lane_f32_0:
-; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
-  %mul = fmul <2 x float> %shuffle, %a
-  ret <2 x float> %mul
-}
-
-define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
-; CHECK-LABEL: test_vmulq_lane_f32_0:
-; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
-  %mul = fmul <4 x float> %shuffle, %a
-  ret <4 x float> %mul
-}
-
-define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
-; CHECK-LABEL: test_vmul_laneq_f32_0:
-; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
-  %mul = fmul <2 x float> %shuffle, %a
-  ret <2 x float> %mul
-}
-
-define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) {
-; CHECK-LABEL: test_vmul_laneq_f64_0:
-; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %0 = bitcast <1 x double> %a to <8 x i8>
-  %1 = bitcast <8 x i8> %0 to double
-  %extract = extractelement <2 x double> %v, i32 0
-  %2 = fmul double %1, %extract
-  %3 = insertelement <1 x double> undef, double %2, i32 0
-  ret <1 x double> %3
-}
-
-define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) {
-; CHECK-LABEL: test_vmulq_laneq_f32_0:
-; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
-  %mul = fmul <4 x float> %shuffle, %a
-  ret <4 x float> %mul
-}
-
-define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) {
-; CHECK-LABEL: test_vmulq_laneq_f64_0:
-; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
-  %mul = fmul <2 x double> %shuffle, %a
-  ret <2 x double> %mul
-}
-
-define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) {
-; CHECK-LABEL: test_vmulx_lane_f32_0:
-; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
-  %vmulx2.i = tail call <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
-  ret <2 x float> %vmulx2.i
-}
-
-define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
-; CHECK-LABEL: test_vmulxq_lane_f32_0:
-; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
-  %vmulx2.i = tail call <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
-  ret <4 x float> %vmulx2.i
-}
-
-define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) {
-; CHECK-LABEL: test_vmulxq_lane_f64_0:
-; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
-  %vmulx2.i = tail call <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
-  ret <2 x double> %vmulx2.i
-}
-
-define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
-; CHECK-LABEL: test_vmulx_laneq_f32_0:
-; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
-  %vmulx2.i = tail call <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
-  ret <2 x float> %vmulx2.i
-}
-
-define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) {
-; CHECK-LABEL: test_vmulxq_laneq_f32_0:
-; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
-  %vmulx2.i = tail call <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
-  ret <4 x float> %vmulx2.i
-}
-
-define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) {
-; CHECK-LABEL: test_vmulxq_laneq_f64_0:
-; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
-  %vmulx2.i = tail call <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
-  ret <2 x double> %vmulx2.i
-}
-

Removed: llvm/trunk/test/CodeGen/ARM64/aarch64-neon-3vdiff.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/aarch64-neon-3vdiff.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/aarch64-neon-3vdiff.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/aarch64-neon-3vdiff.ll (removed)
@@ -1,1829 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-declare <8 x i16> @llvm.arm64.neon.pmull.v8i16(<8 x i8>, <8 x i8>)
-
-declare <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
-
-declare <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
-
-declare <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
-
-declare <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
-
-declare <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
-
-declare <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
-
-declare <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8>, <8 x i8>)
-
-declare <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32>, <2 x i32>)
-
-declare <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
-
-declare <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8>, <8 x i8>)
-
-declare <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32>, <2 x i32>)
-
-declare <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16>, <4 x i16>)
-
-declare <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8>, <8 x i8>)
-
-declare <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32>, <2 x i32>)
-
-declare <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16>, <4 x i16>)
-
-declare <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8>, <8 x i8>)
-
-declare <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64>, <2 x i64>)
-
-declare <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32>, <4 x i32>)
-
-declare <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>)
-
-declare <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64>, <2 x i64>)
-
-declare <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>)
-
-declare <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vaddl_s8:
-; CHECK: saddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmovl.i.i = sext <8 x i8> %a to <8 x i16>
-  %vmovl.i2.i = sext <8 x i8> %b to <8 x i16>
-  %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK-LABEL: test_vaddl_s16:
-; CHECK: saddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmovl.i.i = sext <4 x i16> %a to <4 x i32>
-  %vmovl.i2.i = sext <4 x i16> %b to <4 x i32>
-  %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK-LABEL: test_vaddl_s32:
-; CHECK: saddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmovl.i.i = sext <2 x i32> %a to <2 x i64>
-  %vmovl.i2.i = sext <2 x i32> %b to <2 x i64>
-  %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vaddl_u8:
-; CHECK: uaddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmovl.i.i = zext <8 x i8> %a to <8 x i16>
-  %vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
-  %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK-LABEL: test_vaddl_u16:
-; CHECK: uaddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmovl.i.i = zext <4 x i16> %a to <4 x i32>
-  %vmovl.i2.i = zext <4 x i16> %b to <4 x i32>
-  %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK-LABEL: test_vaddl_u32:
-; CHECK: uaddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmovl.i.i = zext <2 x i32> %a to <2 x i64>
-  %vmovl.i2.i = zext <2 x i32> %b to <2 x i64>
-  %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vaddl_high_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vaddl_high_s8:
-; CHECK: saddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
-  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16>
-  %add.i = add <8 x i16> %0, %1
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vaddl_high_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vaddl_high_s16:
-; CHECK: saddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
-  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32>
-  %add.i = add <4 x i32> %0, %1
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vaddl_high_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vaddl_high_s32:
-; CHECK: saddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
-  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64>
-  %add.i = add <2 x i64> %0, %1
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vaddl_high_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vaddl_high_u8:
-; CHECK: uaddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
-  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16>
-  %add.i = add <8 x i16> %0, %1
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vaddl_high_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vaddl_high_u16:
-; CHECK: uaddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
-  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32>
-  %add.i = add <4 x i32> %0, %1
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vaddl_high_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vaddl_high_u32:
-; CHECK: uaddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
-  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64>
-  %add.i = add <2 x i64> %0, %1
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vaddw_s8:
-; CHECK: saddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
-entry:
-  %vmovl.i.i = sext <8 x i8> %b to <8 x i16>
-  %add.i = add <8 x i16> %vmovl.i.i, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) {
-; CHECK-LABEL: test_vaddw_s16:
-; CHECK: saddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
-entry:
-  %vmovl.i.i = sext <4 x i16> %b to <4 x i32>
-  %add.i = add <4 x i32> %vmovl.i.i, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) {
-; CHECK-LABEL: test_vaddw_s32:
-; CHECK: saddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
-entry:
-  %vmovl.i.i = sext <2 x i32> %b to <2 x i64>
-  %add.i = add <2 x i64> %vmovl.i.i, %a
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vaddw_u8:
-; CHECK: uaddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
-entry:
-  %vmovl.i.i = zext <8 x i8> %b to <8 x i16>
-  %add.i = add <8 x i16> %vmovl.i.i, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) {
-; CHECK-LABEL: test_vaddw_u16:
-; CHECK: uaddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
-entry:
-  %vmovl.i.i = zext <4 x i16> %b to <4 x i32>
-  %add.i = add <4 x i32> %vmovl.i.i, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) {
-; CHECK-LABEL: test_vaddw_u32:
-; CHECK: uaddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
-entry:
-  %vmovl.i.i = zext <2 x i32> %b to <2 x i64>
-  %add.i = add <2 x i64> %vmovl.i.i, %a
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vaddw_high_s8(<8 x i16> %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vaddw_high_s8:
-; CHECK: saddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
-  %add.i = add <8 x i16> %0, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vaddw_high_s16(<4 x i32> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vaddw_high_s16:
-; CHECK: saddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
-  %add.i = add <4 x i32> %0, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vaddw_high_s32(<2 x i64> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vaddw_high_s32:
-; CHECK: saddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
-  %add.i = add <2 x i64> %0, %a
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vaddw_high_u8(<8 x i16> %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vaddw_high_u8:
-; CHECK: uaddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
-  %add.i = add <8 x i16> %0, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vaddw_high_u16(<4 x i32> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vaddw_high_u16:
-; CHECK: uaddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
-  %add.i = add <4 x i32> %0, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vaddw_high_u32(<2 x i64> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vaddw_high_u32:
-; CHECK: uaddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
-  %add.i = add <2 x i64> %0, %a
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vsubl_s8:
-; CHECK: ssubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmovl.i.i = sext <8 x i8> %a to <8 x i16>
-  %vmovl.i2.i = sext <8 x i8> %b to <8 x i16>
-  %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK-LABEL: test_vsubl_s16:
-; CHECK: ssubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmovl.i.i = sext <4 x i16> %a to <4 x i32>
-  %vmovl.i2.i = sext <4 x i16> %b to <4 x i32>
-  %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK-LABEL: test_vsubl_s32:
-; CHECK: ssubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmovl.i.i = sext <2 x i32> %a to <2 x i64>
-  %vmovl.i2.i = sext <2 x i32> %b to <2 x i64>
-  %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vsubl_u8:
-; CHECK: usubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmovl.i.i = zext <8 x i8> %a to <8 x i16>
-  %vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
-  %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK-LABEL: test_vsubl_u16:
-; CHECK: usubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmovl.i.i = zext <4 x i16> %a to <4 x i32>
-  %vmovl.i2.i = zext <4 x i16> %b to <4 x i32>
-  %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK-LABEL: test_vsubl_u32:
-; CHECK: usubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmovl.i.i = zext <2 x i32> %a to <2 x i64>
-  %vmovl.i2.i = zext <2 x i32> %b to <2 x i64>
-  %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vsubl_high_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vsubl_high_s8:
-; CHECK: ssubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
-  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16>
-  %sub.i = sub <8 x i16> %0, %1
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vsubl_high_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vsubl_high_s16:
-; CHECK: ssubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
-  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32>
-  %sub.i = sub <4 x i32> %0, %1
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vsubl_high_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vsubl_high_s32:
-; CHECK: ssubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
-  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64>
-  %sub.i = sub <2 x i64> %0, %1
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vsubl_high_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vsubl_high_u8:
-; CHECK: usubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
-  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16>
-  %sub.i = sub <8 x i16> %0, %1
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vsubl_high_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vsubl_high_u16:
-; CHECK: usubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
-  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32>
-  %sub.i = sub <4 x i32> %0, %1
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vsubl_high_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vsubl_high_u32:
-; CHECK: usubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
-  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64>
-  %sub.i = sub <2 x i64> %0, %1
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vsubw_s8:
-; CHECK: ssubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
-entry:
-  %vmovl.i.i = sext <8 x i8> %b to <8 x i16>
-  %sub.i = sub <8 x i16> %a, %vmovl.i.i
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) {
-; CHECK-LABEL: test_vsubw_s16:
-; CHECK: ssubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
-entry:
-  %vmovl.i.i = sext <4 x i16> %b to <4 x i32>
-  %sub.i = sub <4 x i32> %a, %vmovl.i.i
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) {
-; CHECK-LABEL: test_vsubw_s32:
-; CHECK: ssubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
-entry:
-  %vmovl.i.i = sext <2 x i32> %b to <2 x i64>
-  %sub.i = sub <2 x i64> %a, %vmovl.i.i
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vsubw_u8:
-; CHECK: usubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
-entry:
-  %vmovl.i.i = zext <8 x i8> %b to <8 x i16>
-  %sub.i = sub <8 x i16> %a, %vmovl.i.i
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) {
-; CHECK-LABEL: test_vsubw_u16:
-; CHECK: usubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
-entry:
-  %vmovl.i.i = zext <4 x i16> %b to <4 x i32>
-  %sub.i = sub <4 x i32> %a, %vmovl.i.i
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) {
-; CHECK-LABEL: test_vsubw_u32:
-; CHECK: usubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
-entry:
-  %vmovl.i.i = zext <2 x i32> %b to <2 x i64>
-  %sub.i = sub <2 x i64> %a, %vmovl.i.i
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vsubw_high_s8(<8 x i16> %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vsubw_high_s8:
-; CHECK: ssubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
-  %sub.i = sub <8 x i16> %a, %0
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vsubw_high_s16(<4 x i32> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vsubw_high_s16:
-; CHECK: ssubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
-  %sub.i = sub <4 x i32> %a, %0
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vsubw_high_s32(<2 x i64> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vsubw_high_s32:
-; CHECK: ssubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
-  %sub.i = sub <2 x i64> %a, %0
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vsubw_high_u8(<8 x i16> %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vsubw_high_u8:
-; CHECK: usubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
-  %sub.i = sub <8 x i16> %a, %0
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vsubw_high_u16(<4 x i32> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vsubw_high_u16:
-; CHECK: usubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
-  %sub.i = sub <4 x i32> %a, %0
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vsubw_high_u32(<2 x i64> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vsubw_high_u32:
-; CHECK: usubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
-  %sub.i = sub <2 x i64> %a, %0
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vaddhn_s16:
-; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vaddhn.i = add <8 x i16> %a, %b
-  %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8>
-  ret <8 x i8> %vaddhn2.i
-}
-
-define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vaddhn_s32:
-; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vaddhn.i = add <4 x i32> %a, %b
-  %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
-  %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16>
-  ret <4 x i16> %vaddhn2.i
-}
-
-define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vaddhn_s64:
-; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vaddhn.i = add <2 x i64> %a, %b
-  %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
-  %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32>
-  ret <2 x i32> %vaddhn2.i
-}
-
-define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vaddhn_u16:
-; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vaddhn.i = add <8 x i16> %a, %b
-  %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8>
-  ret <8 x i8> %vaddhn2.i
-}
-
-define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vaddhn_u32:
-; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vaddhn.i = add <4 x i32> %a, %b
-  %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
-  %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16>
-  ret <4 x i16> %vaddhn2.i
-}
-
-define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vaddhn_u64:
-; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vaddhn.i = add <2 x i64> %a, %b
-  %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
-  %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32>
-  ret <2 x i32> %vaddhn2.i
-}
-
-define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vaddhn_high_s16:
-; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vaddhn.i.i = add <8 x i16> %a, %b
-  %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8>
-  %0 = bitcast <8 x i8> %r to <1 x i64>
-  %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
-  ret <16 x i8> %2
-}
-
-define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vaddhn_high_s32:
-; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vaddhn.i.i = add <4 x i32> %a, %b
-  %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16>
-  %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16>
-  %0 = bitcast <4 x i16> %r to <1 x i64>
-  %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vaddhn_high_s64:
-; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vaddhn.i.i = add <2 x i64> %a, %b
-  %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32>
-  %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32>
-  %0 = bitcast <2 x i32> %r to <1 x i64>
-  %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
-  ret <4 x i32> %2
-}
-
-define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vaddhn_high_u16:
-; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vaddhn.i.i = add <8 x i16> %a, %b
-  %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8>
-  %0 = bitcast <8 x i8> %r to <1 x i64>
-  %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
-  ret <16 x i8> %2
-}
-
-define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vaddhn_high_u32:
-; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vaddhn.i.i = add <4 x i32> %a, %b
-  %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16>
-  %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16>
-  %0 = bitcast <4 x i16> %r to <1 x i64>
-  %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vaddhn_high_u64:
-; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vaddhn.i.i = add <2 x i64> %a, %b
-  %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32>
-  %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32>
-  %0 = bitcast <2 x i32> %r to <1 x i64>
-  %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
-  ret <4 x i32> %2
-}
-
-define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vraddhn_s16:
-; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vraddhn2.i = tail call <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-  ret <8 x i8> %vraddhn2.i
-}
-
-define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vraddhn_s32:
-; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vraddhn2.i = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-  ret <4 x i16> %vraddhn2.i
-}
-
-define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vraddhn_s64:
-; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vraddhn2.i = tail call <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-  ret <2 x i32> %vraddhn2.i
-}
-
-define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vraddhn_u16:
-; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vraddhn2.i = tail call <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-  ret <8 x i8> %vraddhn2.i
-}
-
-define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vraddhn_u32:
-; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vraddhn2.i = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-  ret <4 x i16> %vraddhn2.i
-}
-
-define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vraddhn_u64:
-; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vraddhn2.i = tail call <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-  ret <2 x i32> %vraddhn2.i
-}
-
-define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vraddhn_high_s16:
-; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vraddhn2.i.i = tail call <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-  %0 = bitcast <8 x i8> %r to <1 x i64>
-  %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
-  ret <16 x i8> %2
-}
-
-define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vraddhn_high_s32:
-; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vraddhn2.i.i = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-  %0 = bitcast <4 x i16> %r to <1 x i64>
-  %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vraddhn_high_s64:
-; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vraddhn2.i.i = tail call <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-  %0 = bitcast <2 x i32> %r to <1 x i64>
-  %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
-  ret <4 x i32> %2
-}
-
-define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vraddhn_high_u16:
-; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vraddhn2.i.i = tail call <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-  %0 = bitcast <8 x i8> %r to <1 x i64>
-  %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
-  ret <16 x i8> %2
-}
-
-define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vraddhn_high_u32:
-; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vraddhn2.i.i = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-  %0 = bitcast <4 x i16> %r to <1 x i64>
-  %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vraddhn_high_u64:
-; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vraddhn2.i.i = tail call <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-  %0 = bitcast <2 x i32> %r to <1 x i64>
-  %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
-  ret <4 x i32> %2
-}
-
-define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vsubhn_s16:
-; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vsubhn.i = sub <8 x i16> %a, %b
-  %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8>
-  ret <8 x i8> %vsubhn2.i
-}
-
-define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vsubhn_s32:
-; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vsubhn.i = sub <4 x i32> %a, %b
-  %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
-  %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16>
-  ret <4 x i16> %vsubhn2.i
-}
-
-define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vsubhn_s64:
-; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vsubhn.i = sub <2 x i64> %a, %b
-  %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
-  %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32>
-  ret <2 x i32> %vsubhn2.i
-}
-
-define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vsubhn_u16:
-; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vsubhn.i = sub <8 x i16> %a, %b
-  %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8>
-  ret <8 x i8> %vsubhn2.i
-}
-
-define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vsubhn_u32:
-; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vsubhn.i = sub <4 x i32> %a, %b
-  %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
-  %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16>
-  ret <4 x i16> %vsubhn2.i
-}
-
-define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vsubhn_u64:
-; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vsubhn.i = sub <2 x i64> %a, %b
-  %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
-  %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32>
-  ret <2 x i32> %vsubhn2.i
-}
-
-define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vsubhn_high_s16:
-; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vsubhn.i.i = sub <8 x i16> %a, %b
-  %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8>
-  %0 = bitcast <8 x i8> %r to <1 x i64>
-  %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
-  ret <16 x i8> %2
-}
-
-define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vsubhn_high_s32:
-; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vsubhn.i.i = sub <4 x i32> %a, %b
-  %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16>
-  %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16>
-  %0 = bitcast <4 x i16> %r to <1 x i64>
-  %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vsubhn_high_s64:
-; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vsubhn.i.i = sub <2 x i64> %a, %b
-  %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32>
-  %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32>
-  %0 = bitcast <2 x i32> %r to <1 x i64>
-  %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
-  ret <4 x i32> %2
-}
-
-define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vsubhn_high_u16:
-; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vsubhn.i.i = sub <8 x i16> %a, %b
-  %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8>
-  %0 = bitcast <8 x i8> %r to <1 x i64>
-  %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
-  ret <16 x i8> %2
-}
-
-define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vsubhn_high_u32:
-; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vsubhn.i.i = sub <4 x i32> %a, %b
-  %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16>
-  %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16>
-  %0 = bitcast <4 x i16> %r to <1 x i64>
-  %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vsubhn_high_u64:
-; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vsubhn.i.i = sub <2 x i64> %a, %b
-  %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32>
-  %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32>
-  %0 = bitcast <2 x i32> %r to <1 x i64>
-  %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
-  ret <4 x i32> %2
-}
-
-define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vrsubhn_s16:
-; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vrsubhn2.i = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-  ret <8 x i8> %vrsubhn2.i
-}
-
-define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vrsubhn_s32:
-; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vrsubhn2.i = tail call <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-  ret <4 x i16> %vrsubhn2.i
-}
-
-define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vrsubhn_s64:
-; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vrsubhn2.i = tail call <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-  ret <2 x i32> %vrsubhn2.i
-}
-
-define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vrsubhn_u16:
-; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vrsubhn2.i = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-  ret <8 x i8> %vrsubhn2.i
-}
-
-define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vrsubhn_u32:
-; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vrsubhn2.i = tail call <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-  ret <4 x i16> %vrsubhn2.i
-}
-
-define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vrsubhn_u64:
-; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vrsubhn2.i = tail call <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-  ret <2 x i32> %vrsubhn2.i
-}
-
-define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vrsubhn_high_s16:
-; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vrsubhn2.i.i = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-  %0 = bitcast <8 x i8> %r to <1 x i64>
-  %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
-  ret <16 x i8> %2
-}
-
-define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vrsubhn_high_s32:
-; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vrsubhn2.i.i = tail call <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-  %0 = bitcast <4 x i16> %r to <1 x i64>
-  %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vrsubhn_high_s64:
-; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vrsubhn2.i.i = tail call <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-  %0 = bitcast <2 x i32> %r to <1 x i64>
-  %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
-  ret <4 x i32> %2
-}
-
-define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vrsubhn_high_u16:
-; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vrsubhn2.i.i = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-  %0 = bitcast <8 x i8> %r to <1 x i64>
-  %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
-  ret <16 x i8> %2
-}
-
-define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vrsubhn_high_u32:
-; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vrsubhn2.i.i = tail call <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-  %0 = bitcast <4 x i16> %r to <1 x i64>
-  %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vrsubhn_high_u64:
-; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vrsubhn2.i.i = tail call <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-  %0 = bitcast <2 x i32> %r to <1 x i64>
-  %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
-  ret <4 x i32> %2
-}
-
-define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vabdl_s8:
-; CHECK: sabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vabd.i.i = tail call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b)
-  %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16>
-  ret <8 x i16> %vmovl.i.i
-}
-
-define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK-LABEL: test_vabdl_s16:
-; CHECK: sabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vabd2.i.i = tail call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b)
-  %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32>
-  ret <4 x i32> %vmovl.i.i
-}
-
-define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK-LABEL: test_vabdl_s32:
-; CHECK: sabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vabd2.i.i = tail call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b)
-  %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64>
-  ret <2 x i64> %vmovl.i.i
-}
-
-define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vabdl_u8:
-; CHECK: uabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vabd.i.i = tail call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b)
-  %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16>
-  ret <8 x i16> %vmovl.i.i
-}
-
-define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK-LABEL: test_vabdl_u16:
-; CHECK: uabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vabd2.i.i = tail call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b)
-  %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32>
-  ret <4 x i32> %vmovl.i.i
-}
-
-define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK-LABEL: test_vabdl_u32:
-; CHECK: uabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vabd2.i.i = tail call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b)
-  %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64>
-  ret <2 x i64> %vmovl.i.i
-}
-
-define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
-; CHECK-LABEL: test_vabal_s8:
-; CHECK: sabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vabd.i.i.i = tail call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c)
-  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
-  %add.i = add <8 x i16> %vmovl.i.i.i, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
-; CHECK-LABEL: test_vabal_s16:
-; CHECK: sabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vabd2.i.i.i = tail call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c)
-  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
-  %add.i = add <4 x i32> %vmovl.i.i.i, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
-; CHECK-LABEL: test_vabal_s32:
-; CHECK: sabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vabd2.i.i.i = tail call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c)
-  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
-  %add.i = add <2 x i64> %vmovl.i.i.i, %a
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
-; CHECK-LABEL: test_vabal_u8:
-; CHECK: uabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vabd.i.i.i = tail call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c)
-  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
-  %add.i = add <8 x i16> %vmovl.i.i.i, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
-; CHECK-LABEL: test_vabal_u16:
-; CHECK: uabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vabd2.i.i.i = tail call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c)
-  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
-  %add.i = add <4 x i32> %vmovl.i.i.i, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
-; CHECK-LABEL: test_vabal_u32:
-; CHECK: uabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vabd2.i.i.i = tail call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c)
-  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
-  %add.i = add <2 x i64> %vmovl.i.i.i, %a
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vabdl_high_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vabdl_high_s8:
-; CHECK: sabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vabd.i.i.i = tail call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
-  ret <8 x i16> %vmovl.i.i.i
-}
-
-define <4 x i32> @test_vabdl_high_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vabdl_high_s16:
-; CHECK: sabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vabd2.i.i.i = tail call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
-  ret <4 x i32> %vmovl.i.i.i
-}
-
-define <2 x i64> @test_vabdl_high_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vabdl_high_s32:
-; CHECK: sabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vabd2.i.i.i = tail call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
-  ret <2 x i64> %vmovl.i.i.i
-}
-
-define <8 x i16> @test_vabdl_high_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vabdl_high_u8:
-; CHECK: uabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vabd.i.i.i = tail call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
-  ret <8 x i16> %vmovl.i.i.i
-}
-
-define <4 x i32> @test_vabdl_high_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vabdl_high_u16:
-; CHECK: uabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vabd2.i.i.i = tail call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
-  ret <4 x i32> %vmovl.i.i.i
-}
-
-define <2 x i64> @test_vabdl_high_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vabdl_high_u32:
-; CHECK: uabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vabd2.i.i.i = tail call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
-  ret <2 x i64> %vmovl.i.i.i
-}
-
-define <8 x i16> @test_vabal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
-; CHECK-LABEL: test_vabal_high_s8:
-; CHECK: sabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vabd.i.i.i.i = tail call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16>
-  %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a
-  ret <8 x i16> %add.i.i
-}
-
-define <4 x i32> @test_vabal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK-LABEL: test_vabal_high_s16:
-; CHECK: sabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vabd2.i.i.i.i = tail call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32>
-  %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a
-  ret <4 x i32> %add.i.i
-}
-
-define <2 x i64> @test_vabal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK-LABEL: test_vabal_high_s32:
-; CHECK: sabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vabd2.i.i.i.i = tail call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64>
-  %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a
-  ret <2 x i64> %add.i.i
-}
-
-define <8 x i16> @test_vabal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
-; CHECK-LABEL: test_vabal_high_u8:
-; CHECK: uabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vabd.i.i.i.i = tail call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16>
-  %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a
-  ret <8 x i16> %add.i.i
-}
-
-define <4 x i32> @test_vabal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK-LABEL: test_vabal_high_u16:
-; CHECK: uabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vabd2.i.i.i.i = tail call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32>
-  %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a
-  ret <4 x i32> %add.i.i
-}
-
-define <2 x i64> @test_vabal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK-LABEL: test_vabal_high_u32:
-; CHECK: uabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vabd2.i.i.i.i = tail call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64>
-  %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a
-  ret <2 x i64> %add.i.i
-}
-
-define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vmull_s8:
-; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmull.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b)
-  ret <8 x i16> %vmull.i
-}
-
-define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK-LABEL: test_vmull_s16:
-; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK-LABEL: test_vmull_s32:
-; CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b)
-  ret <2 x i64> %vmull2.i
-}
-
-define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vmull_u8:
-; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmull.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b)
-  ret <8 x i16> %vmull.i
-}
-
-define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK-LABEL: test_vmull_u16:
-; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK-LABEL: test_vmull_u32:
-; CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b)
-  ret <2 x i64> %vmull2.i
-}
-
-define <8 x i16> @test_vmull_high_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vmull_high_s8:
-; CHECK: smull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  ret <8 x i16> %vmull.i.i
-}
-
-define <4 x i32> @test_vmull_high_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vmull_high_s16:
-; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  ret <4 x i32> %vmull2.i.i
-}
-
-define <2 x i64> @test_vmull_high_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vmull_high_s32:
-; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  ret <2 x i64> %vmull2.i.i
-}
-
-define <8 x i16> @test_vmull_high_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vmull_high_u8:
-; CHECK: umull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  ret <8 x i16> %vmull.i.i
-}
-
-define <4 x i32> @test_vmull_high_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vmull_high_u16:
-; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  ret <4 x i32> %vmull2.i.i
-}
-
-define <2 x i64> @test_vmull_high_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vmull_high_u32:
-; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  ret <2 x i64> %vmull2.i.i
-}
-
-define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
-; CHECK-LABEL: test_vmlal_s8:
-; CHECK: smlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
-  %add.i = add <8 x i16> %vmull.i.i, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
-; CHECK-LABEL: test_vmlal_s16:
-; CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
-  %add.i = add <4 x i32> %vmull2.i.i, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
-; CHECK-LABEL: test_vmlal_s32:
-; CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
-  %add.i = add <2 x i64> %vmull2.i.i, %a
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
-; CHECK-LABEL: test_vmlal_u8:
-; CHECK: umlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
-  %add.i = add <8 x i16> %vmull.i.i, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
-; CHECK-LABEL: test_vmlal_u16:
-; CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
-  %add.i = add <4 x i32> %vmull2.i.i, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
-; CHECK-LABEL: test_vmlal_u32:
-; CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
-  %add.i = add <2 x i64> %vmull2.i.i, %a
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vmlal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
-; CHECK-LABEL: test_vmlal_high_s8:
-; CHECK: smlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  %add.i.i = add <8 x i16> %vmull.i.i.i, %a
-  ret <8 x i16> %add.i.i
-}
-
-define <4 x i32> @test_vmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK-LABEL: test_vmlal_high_s16:
-; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
-  ret <4 x i32> %add.i.i
-}
-
-define <2 x i64> @test_vmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK-LABEL: test_vmlal_high_s32:
-; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
-  ret <2 x i64> %add.i.i
-}
-
-define <8 x i16> @test_vmlal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
-; CHECK-LABEL: test_vmlal_high_u8:
-; CHECK: umlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  %add.i.i = add <8 x i16> %vmull.i.i.i, %a
-  ret <8 x i16> %add.i.i
-}
-
-define <4 x i32> @test_vmlal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK-LABEL: test_vmlal_high_u16:
-; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
-  ret <4 x i32> %add.i.i
-}
-
-define <2 x i64> @test_vmlal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK-LABEL: test_vmlal_high_u32:
-; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
-  ret <2 x i64> %add.i.i
-}
-
-define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
-; CHECK-LABEL: test_vmlsl_s8:
-; CHECK: smlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
-  %sub.i = sub <8 x i16> %a, %vmull.i.i
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
-; CHECK-LABEL: test_vmlsl_s16:
-; CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
-  %sub.i = sub <4 x i32> %a, %vmull2.i.i
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
-; CHECK-LABEL: test_vmlsl_s32:
-; CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
-  %sub.i = sub <2 x i64> %a, %vmull2.i.i
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
-; CHECK-LABEL: test_vmlsl_u8:
-; CHECK: umlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
-  %sub.i = sub <8 x i16> %a, %vmull.i.i
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
-; CHECK-LABEL: test_vmlsl_u16:
-; CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
-  %sub.i = sub <4 x i32> %a, %vmull2.i.i
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
-; CHECK-LABEL: test_vmlsl_u32:
-; CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
-  %sub.i = sub <2 x i64> %a, %vmull2.i.i
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vmlsl_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
-; CHECK-LABEL: test_vmlsl_high_s8:
-; CHECK: smlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i
-  ret <8 x i16> %sub.i.i
-}
-
-define <4 x i32> @test_vmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK-LABEL: test_vmlsl_high_s16:
-; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
-  ret <4 x i32> %sub.i.i
-}
-
-define <2 x i64> @test_vmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK-LABEL: test_vmlsl_high_s32:
-; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
-  ret <2 x i64> %sub.i.i
-}
-
-define <8 x i16> @test_vmlsl_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
-; CHECK-LABEL: test_vmlsl_high_u8:
-; CHECK: umlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i
-  ret <8 x i16> %sub.i.i
-}
-
-define <4 x i32> @test_vmlsl_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK-LABEL: test_vmlsl_high_u16:
-; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
-  ret <4 x i32> %sub.i.i
-}
-
-define <2 x i64> @test_vmlsl_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK-LABEL: test_vmlsl_high_u32:
-; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
-  ret <2 x i64> %sub.i.i
-}
-
-define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK-LABEL: test_vqdmull_s16:
-; CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK-LABEL: test_vqdmull_s32:
-; CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
-; CHECK-LABEL: test_vqdmlal_s16:
-; CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vqdmlal2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
-  %vqdmlal4.i = tail call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
-  ret <4 x i32> %vqdmlal4.i
-}
-
-define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
-; CHECK-LABEL: test_vqdmlal_s32:
-; CHECK: sqdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vqdmlal2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
-  %vqdmlal4.i = tail call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
-  ret <2 x i64> %vqdmlal4.i
-}
-
-define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
-; CHECK-LABEL: test_vqdmlsl_s16:
-; CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
-  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
-  ret <4 x i32> %vqdmlsl4.i
-}
-
-define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
-; CHECK-LABEL: test_vqdmlsl_s32:
-; CHECK: sqdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
-  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
-  ret <2 x i64> %vqdmlsl4.i
-}
-
-define <4 x i32> @test_vqdmull_high_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vqdmull_high_s16:
-; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vqdmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  ret <4 x i32> %vqdmull2.i.i
-}
-
-define <2 x i64> @test_vqdmull_high_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vqdmull_high_s32:
-; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vqdmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  ret <2 x i64> %vqdmull2.i.i
-}
-
-define <4 x i32> @test_vqdmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK-LABEL: test_vqdmlal_high_s16:
-; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vqdmlal2.i.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %vqdmlal4.i.i = tail call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i.i)
-  ret <4 x i32> %vqdmlal4.i.i
-}
-
-define <2 x i64> @test_vqdmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK-LABEL: test_vqdmlal_high_s32:
-; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vqdmlal2.i.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %vqdmlal4.i.i = tail call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i.i)
-  ret <2 x i64> %vqdmlal4.i.i
-}
-
-define <4 x i32> @test_vqdmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK-LABEL: test_vqdmlsl_high_s16:
-; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vqdmlsl2.i.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %vqdmlsl4.i.i = tail call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i.i)
-  ret <4 x i32> %vqdmlsl4.i.i
-}
-
-define <2 x i64> @test_vqdmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK-LABEL: test_vqdmlsl_high_s32:
-; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vqdmlsl2.i.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %vqdmlsl4.i.i = tail call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i.i)
-  ret <2 x i64> %vqdmlsl4.i.i
-}
-
-define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vmull_p8:
-; CHECK: pmull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmull.i = tail call <8 x i16> @llvm.arm64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b)
-  ret <8 x i16> %vmull.i
-}
-
-define <8 x i16> @test_vmull_high_p8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vmull_high_p8:
-; CHECK: pmull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.pmull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  ret <8 x i16> %vmull.i.i
-}
-
-define i128 @test_vmull_p64(i64 %a, i64 %b) #4 {
-; CHECK-LABEL: test_vmull_p64
-; CHECK: pmull {{v[0-9]+}}.1q, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d
-entry:
-  %vmull2.i = tail call <16 x i8> @llvm.arm64.neon.pmull64(i64 %a, i64 %b)
-  %vmull3.i = bitcast <16 x i8> %vmull2.i to i128
-  ret i128 %vmull3.i
-}
-
-define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #4 {
-; CHECK-LABEL: test_vmull_high_p64
-; CHECK: pmull2 {{v[0-9]+}}.1q, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %0 = extractelement <2 x i64> %a, i32 1
-  %1 = extractelement <2 x i64> %b, i32 1
-  %vmull2.i.i = tail call <16 x i8> @llvm.arm64.neon.pmull64(i64 %0, i64 %1) #1
-  %vmull3.i.i = bitcast <16 x i8> %vmull2.i.i to i128
-  ret i128 %vmull3.i.i
-}
-
-declare <16 x i8> @llvm.arm64.neon.pmull64(i64, i64) #5
-
-

Removed: llvm/trunk/test/CodeGen/ARM64/aarch64-neon-aba-abd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/aarch64-neon-aba-abd.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/aarch64-neon-aba-abd.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/aarch64-neon-aba-abd.ll (removed)
@@ -1,236 +0,0 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-declare <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_uabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_uabd_v8i8:
-  %abd = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: uabd v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %abd
-}
-
-define <8 x i8> @test_uaba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_uaba_v8i8:
-  %abd = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-  %aba = add <8 x i8> %lhs, %abd
-; CHECK: uaba v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %aba
-}
-
-define <8 x i8> @test_sabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_sabd_v8i8:
-  %abd = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: sabd v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %abd
-}
-
-define <8 x i8> @test_saba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_saba_v8i8:
-  %abd = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-  %aba = add <8 x i8> %lhs, %abd
-; CHECK: saba v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %aba
-}
-
-declare <16 x i8> @llvm.arm64.neon.uabd.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm64.neon.sabd.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_uabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_uabd_v16i8:
-  %abd = call <16 x i8> @llvm.arm64.neon.uabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: uabd v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %abd
-}
-
-define <16 x i8> @test_uaba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_uaba_v16i8:
-  %abd = call <16 x i8> @llvm.arm64.neon.uabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-  %aba = add <16 x i8> %lhs, %abd
-; CHECK: uaba v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %aba
-}
-
-define <16 x i8> @test_sabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_sabd_v16i8:
-  %abd = call <16 x i8> @llvm.arm64.neon.sabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: sabd v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %abd
-}
-
-define <16 x i8> @test_saba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_saba_v16i8:
-  %abd = call <16 x i8> @llvm.arm64.neon.sabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-  %aba = add <16 x i8> %lhs, %abd
-; CHECK: saba v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %aba
-}
-
-declare <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_uabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_uabd_v4i16:
-  %abd = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: uabd v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %abd
-}
-
-define <4 x i16> @test_uaba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_uaba_v4i16:
-  %abd = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-  %aba = add <4 x i16> %lhs, %abd
-; CHECK: uaba v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %aba
-}
-
-define <4 x i16> @test_sabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_sabd_v4i16:
-  %abd = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: sabd v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %abd
-}
-
-define <4 x i16> @test_saba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_saba_v4i16:
-  %abd = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-  %aba = add <4 x i16> %lhs, %abd
-; CHECK: saba v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %aba
-}
-
-declare <8 x i16> @llvm.arm64.neon.uabd.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm64.neon.sabd.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_uabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_uabd_v8i16:
-  %abd = call <8 x i16> @llvm.arm64.neon.uabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: uabd v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %abd
-}
-
-define <8 x i16> @test_uaba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_uaba_v8i16:
-  %abd = call <8 x i16> @llvm.arm64.neon.uabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-  %aba = add <8 x i16> %lhs, %abd
-; CHECK: uaba v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %aba
-}
-
-define <8 x i16> @test_sabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_sabd_v8i16:
-  %abd = call <8 x i16> @llvm.arm64.neon.sabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: sabd v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %abd
-}
-
-define <8 x i16> @test_saba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_saba_v8i16:
-  %abd = call <8 x i16> @llvm.arm64.neon.sabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-  %aba = add <8 x i16> %lhs, %abd
-; CHECK: saba v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %aba
-}
-
-declare <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_uabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_uabd_v2i32:
-  %abd = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: uabd v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %abd
-}
-
-define <2 x i32> @test_uaba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_uaba_v2i32:
-  %abd = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-  %aba = add <2 x i32> %lhs, %abd
-; CHECK: uaba v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %aba
-}
-
-define <2 x i32> @test_sabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_sabd_v2i32:
-  %abd = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: sabd v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %abd
-}
-
-define <2 x i32> @test_sabd_v2i32_const() {
-; CHECK: test_sabd_v2i32_const:
-; CHECK: movi     d1, #0x00ffffffff0000
-; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s
-  %1 = tail call <2 x i32> @llvm.arm64.neon.sabd.v2i32(
-    <2 x i32> <i32 -2147483648, i32 2147450880>,
-    <2 x i32> <i32 -65536, i32 65535>)
-  ret <2 x i32> %1
-}
-
-define <2 x i32> @test_saba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_saba_v2i32:
-  %abd = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-  %aba = add <2 x i32> %lhs, %abd
-; CHECK: saba v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %aba
-}
-
-declare <4 x i32> @llvm.arm64.neon.uabd.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm64.neon.sabd.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_uabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_uabd_v4i32:
-  %abd = call <4 x i32> @llvm.arm64.neon.uabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: uabd v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %abd
-}
-
-define <4 x i32> @test_uaba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_uaba_v4i32:
-  %abd = call <4 x i32> @llvm.arm64.neon.uabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-  %aba = add <4 x i32> %lhs, %abd
-; CHECK: uaba v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %aba
-}
-
-define <4 x i32> @test_sabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_sabd_v4i32:
-  %abd = call <4 x i32> @llvm.arm64.neon.sabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: sabd v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %abd
-}
-
-define <4 x i32> @test_saba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_saba_v4i32:
-  %abd = call <4 x i32> @llvm.arm64.neon.sabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-  %aba = add <4 x i32> %lhs, %abd
-; CHECK: saba v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %aba
-}
-
-declare <2 x float> @llvm.arm64.neon.fabd.v2f32(<2 x float>, <2 x float>)
-
-define <2 x float> @test_fabd_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; CHECK: test_fabd_v2f32:
-  %abd = call <2 x float> @llvm.arm64.neon.fabd.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-; CHECK: fabd v0.2s, v0.2s, v1.2s
-  ret <2 x float> %abd
-}
-
-declare <4 x float> @llvm.arm64.neon.fabd.v4f32(<4 x float>, <4 x float>)
-
-define <4 x float> @test_fabd_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; CHECK: test_fabd_v4f32:
-  %abd = call <4 x float> @llvm.arm64.neon.fabd.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-; CHECK: fabd v0.4s, v0.4s, v1.4s
-  ret <4 x float> %abd
-}
-
-declare <2 x double> @llvm.arm64.neon.fabd.v2f64(<2 x double>, <2 x double>)
-
-define <2 x double> @test_fabd_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; CHECK: test_fabd_v2f64:
-  %abd = call <2 x double> @llvm.arm64.neon.fabd.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-; CHECK: fabd v0.2d, v0.2d, v1.2d
-  ret <2 x double> %abd
-}

Removed: llvm/trunk/test/CodeGen/ARM64/aarch64-neon-across.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/aarch64-neon-across.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/aarch64-neon-across.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/aarch64-neon-across.ll (removed)
@@ -1,460 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-declare float @llvm.arm64.neon.fminnmv.f32.v4f32(<4 x float>)
-
-declare float @llvm.arm64.neon.fmaxnmv.f32.v4f32(<4 x float>)
-
-declare float @llvm.arm64.neon.fminv.f32.v4f32(<4 x float>)
-
-declare float @llvm.arm64.neon.fmaxv.f32.v4f32(<4 x float>)
-
-declare i32 @llvm.arm64.neon.saddv.i32.v4i32(<4 x i32>)
-
-declare i32 @llvm.arm64.neon.saddv.i32.v8i16(<8 x i16>)
-
-declare i32 @llvm.arm64.neon.saddv.i32.v16i8(<16 x i8>)
-
-declare i32 @llvm.arm64.neon.saddv.i32.v4i16(<4 x i16>)
-
-declare i32 @llvm.arm64.neon.saddv.i32.v8i8(<8 x i8>)
-
-declare i32 @llvm.arm64.neon.uminv.i32.v4i32(<4 x i32>)
-
-declare i32 @llvm.arm64.neon.uminv.i32.v8i16(<8 x i16>)
-
-declare i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8>)
-
-declare i32 @llvm.arm64.neon.sminv.i32.v4i32(<4 x i32>)
-
-declare i32 @llvm.arm64.neon.sminv.i32.v8i16(<8 x i16>)
-
-declare i32 @llvm.arm64.neon.sminv.i32.v16i8(<16 x i8>)
-
-declare i32 @llvm.arm64.neon.uminv.i32.v4i16(<4 x i16>)
-
-declare i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8>)
-
-declare i32 @llvm.arm64.neon.sminv.i32.v4i16(<4 x i16>)
-
-declare i32 @llvm.arm64.neon.sminv.i32.v8i8(<8 x i8>)
-
-declare i32 @llvm.arm64.neon.umaxv.i32.v4i32(<4 x i32>)
-
-declare i32 @llvm.arm64.neon.umaxv.i32.v8i16(<8 x i16>)
-
-declare i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8>)
-
-declare i32 @llvm.arm64.neon.smaxv.i32.v4i32(<4 x i32>)
-
-declare i32 @llvm.arm64.neon.smaxv.i32.v8i16(<8 x i16>)
-
-declare i32 @llvm.arm64.neon.smaxv.i32.v16i8(<16 x i8>)
-
-declare i32 @llvm.arm64.neon.umaxv.i32.v4i16(<4 x i16>)
-
-declare i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8>)
-
-declare i32 @llvm.arm64.neon.smaxv.i32.v4i16(<4 x i16>)
-
-declare i32 @llvm.arm64.neon.smaxv.i32.v8i8(<8 x i8>)
-
-declare i64 @llvm.arm64.neon.uaddlv.i64.v4i32(<4 x i32>)
-
-declare i32 @llvm.arm64.neon.uaddlv.i32.v8i16(<8 x i16>)
-
-declare i32 @llvm.arm64.neon.uaddlv.i32.v16i8(<16 x i8>)
-
-declare i64 @llvm.arm64.neon.saddlv.i64.v4i32(<4 x i32>)
-
-declare i32 @llvm.arm64.neon.saddlv.i32.v8i16(<8 x i16>)
-
-declare i32 @llvm.arm64.neon.saddlv.i32.v16i8(<16 x i8>)
-
-declare i32 @llvm.arm64.neon.uaddlv.i32.v4i16(<4 x i16>)
-
-declare i32 @llvm.arm64.neon.uaddlv.i32.v8i8(<8 x i8>)
-
-declare i32 @llvm.arm64.neon.saddlv.i32.v4i16(<4 x i16>)
-
-declare i32 @llvm.arm64.neon.saddlv.i32.v8i8(<8 x i8>)
-
-define i16 @test_vaddlv_s8(<8 x i8> %a) {
-; CHECK: test_vaddlv_s8:
-; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.8b
-entry:
-  %saddlvv.i = tail call i32 @llvm.arm64.neon.saddlv.i32.v8i8(<8 x i8> %a)
-  %0 = trunc i32 %saddlvv.i to i16
-  ret i16 %0
-}
-
-define i32 @test_vaddlv_s16(<4 x i16> %a) {
-; CHECK: test_vaddlv_s16:
-; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.4h
-entry:
-  %saddlvv.i = tail call i32 @llvm.arm64.neon.saddlv.i32.v4i16(<4 x i16> %a)
-  ret i32 %saddlvv.i
-}
-
-define i16 @test_vaddlv_u8(<8 x i8> %a) {
-; CHECK: test_vaddlv_u8:
-; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.8b
-entry:
-  %uaddlvv.i = tail call i32 @llvm.arm64.neon.uaddlv.i32.v8i8(<8 x i8> %a)
-  %0 = trunc i32 %uaddlvv.i to i16
-  ret i16 %0
-}
-
-define i32 @test_vaddlv_u16(<4 x i16> %a) {
-; CHECK: test_vaddlv_u16:
-; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.4h
-entry:
-  %uaddlvv.i = tail call i32 @llvm.arm64.neon.uaddlv.i32.v4i16(<4 x i16> %a)
-  ret i32 %uaddlvv.i
-}
-
-define i16 @test_vaddlvq_s8(<16 x i8> %a) {
-; CHECK: test_vaddlvq_s8:
-; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.16b
-entry:
-  %saddlvv.i = tail call i32 @llvm.arm64.neon.saddlv.i32.v16i8(<16 x i8> %a)
-  %0 = trunc i32 %saddlvv.i to i16
-  ret i16 %0
-}
-
-define i32 @test_vaddlvq_s16(<8 x i16> %a) {
-; CHECK: test_vaddlvq_s16:
-; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.8h
-entry:
-  %saddlvv.i = tail call i32 @llvm.arm64.neon.saddlv.i32.v8i16(<8 x i16> %a)
-  ret i32 %saddlvv.i
-}
-
-define i64 @test_vaddlvq_s32(<4 x i32> %a) {
-; CHECK: test_vaddlvq_s32:
-; CHECK: saddlv d{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %saddlvv.i = tail call i64 @llvm.arm64.neon.saddlv.i64.v4i32(<4 x i32> %a)
-  ret i64 %saddlvv.i
-}
-
-define i16 @test_vaddlvq_u8(<16 x i8> %a) {
-; CHECK: test_vaddlvq_u8:
-; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.16b
-entry:
-  %uaddlvv.i = tail call i32 @llvm.arm64.neon.uaddlv.i32.v16i8(<16 x i8> %a)
-  %0 = trunc i32 %uaddlvv.i to i16
-  ret i16 %0
-}
-
-define i32 @test_vaddlvq_u16(<8 x i16> %a) {
-; CHECK: test_vaddlvq_u16:
-; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.8h
-entry:
-  %uaddlvv.i = tail call i32 @llvm.arm64.neon.uaddlv.i32.v8i16(<8 x i16> %a)
-  ret i32 %uaddlvv.i
-}
-
-define i64 @test_vaddlvq_u32(<4 x i32> %a) {
-; CHECK: test_vaddlvq_u32:
-; CHECK: uaddlv d{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %uaddlvv.i = tail call i64 @llvm.arm64.neon.uaddlv.i64.v4i32(<4 x i32> %a)
-  ret i64 %uaddlvv.i
-}
-
-define i8 @test_vmaxv_s8(<8 x i8> %a) {
-; CHECK: test_vmaxv_s8:
-; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.8b
-entry:
-  %smaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v8i8(<8 x i8> %a)
-  %0 = trunc i32 %smaxv.i to i8
-  ret i8 %0
-}
-
-define i16 @test_vmaxv_s16(<4 x i16> %a) {
-; CHECK: test_vmaxv_s16:
-; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.4h
-entry:
-  %smaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v4i16(<4 x i16> %a)
-  %0 = trunc i32 %smaxv.i to i16
-  ret i16 %0
-}
-
-define i8 @test_vmaxv_u8(<8 x i8> %a) {
-; CHECK: test_vmaxv_u8:
-; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.8b
-entry:
-  %umaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8> %a)
-  %0 = trunc i32 %umaxv.i to i8
-  ret i8 %0
-}
-
-define i16 @test_vmaxv_u16(<4 x i16> %a) {
-; CHECK: test_vmaxv_u16:
-; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.4h
-entry:
-  %umaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v4i16(<4 x i16> %a)
-  %0 = trunc i32 %umaxv.i to i16
-  ret i16 %0
-}
-
-define i8 @test_vmaxvq_s8(<16 x i8> %a) {
-; CHECK: test_vmaxvq_s8:
-; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.16b
-entry:
-  %smaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v16i8(<16 x i8> %a)
-  %0 = trunc i32 %smaxv.i to i8
-  ret i8 %0
-}
-
-define i16 @test_vmaxvq_s16(<8 x i16> %a) {
-; CHECK: test_vmaxvq_s16:
-; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.8h
-entry:
-  %smaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v8i16(<8 x i16> %a)
-  %0 = trunc i32 %smaxv.i to i16
-  ret i16 %0
-}
-
-define i32 @test_vmaxvq_s32(<4 x i32> %a) {
-; CHECK: test_vmaxvq_s32:
-; CHECK: smaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %smaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v4i32(<4 x i32> %a)
-  ret i32 %smaxv.i
-}
-
-define i8 @test_vmaxvq_u8(<16 x i8> %a) {
-; CHECK: test_vmaxvq_u8:
-; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.16b
-entry:
-  %umaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8> %a)
-  %0 = trunc i32 %umaxv.i to i8
-  ret i8 %0
-}
-
-define i16 @test_vmaxvq_u16(<8 x i16> %a) {
-; CHECK: test_vmaxvq_u16:
-; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.8h
-entry:
-  %umaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v8i16(<8 x i16> %a)
-  %0 = trunc i32 %umaxv.i to i16
-  ret i16 %0
-}
-
-define i32 @test_vmaxvq_u32(<4 x i32> %a) {
-; CHECK: test_vmaxvq_u32:
-; CHECK: umaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %umaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v4i32(<4 x i32> %a)
-  ret i32 %umaxv.i
-}
-
-define i8 @test_vminv_s8(<8 x i8> %a) {
-; CHECK: test_vminv_s8:
-; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.8b
-entry:
-  %sminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v8i8(<8 x i8> %a)
-  %0 = trunc i32 %sminv.i to i8
-  ret i8 %0
-}
-
-define i16 @test_vminv_s16(<4 x i16> %a) {
-; CHECK: test_vminv_s16:
-; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.4h
-entry:
-  %sminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v4i16(<4 x i16> %a)
-  %0 = trunc i32 %sminv.i to i16
-  ret i16 %0
-}
-
-define i8 @test_vminv_u8(<8 x i8> %a) {
-; CHECK: test_vminv_u8:
-; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.8b
-entry:
-  %uminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8> %a)
-  %0 = trunc i32 %uminv.i to i8
-  ret i8 %0
-}
-
-define i16 @test_vminv_u16(<4 x i16> %a) {
-; CHECK: test_vminv_u16:
-; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.4h
-entry:
-  %uminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v4i16(<4 x i16> %a)
-  %0 = trunc i32 %uminv.i to i16
-  ret i16 %0
-}
-
-define i8 @test_vminvq_s8(<16 x i8> %a) {
-; CHECK: test_vminvq_s8:
-; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.16b
-entry:
-  %sminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v16i8(<16 x i8> %a)
-  %0 = trunc i32 %sminv.i to i8
-  ret i8 %0
-}
-
-define i16 @test_vminvq_s16(<8 x i16> %a) {
-; CHECK: test_vminvq_s16:
-; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.8h
-entry:
-  %sminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v8i16(<8 x i16> %a)
-  %0 = trunc i32 %sminv.i to i16
-  ret i16 %0
-}
-
-define i32 @test_vminvq_s32(<4 x i32> %a) {
-; CHECK: test_vminvq_s32:
-; CHECK: sminv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %sminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v4i32(<4 x i32> %a)
-  ret i32 %sminv.i
-}
-
-define i8 @test_vminvq_u8(<16 x i8> %a) {
-; CHECK: test_vminvq_u8:
-; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.16b
-entry:
-  %uminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8> %a)
-  %0 = trunc i32 %uminv.i to i8
-  ret i8 %0
-}
-
-define i16 @test_vminvq_u16(<8 x i16> %a) {
-; CHECK: test_vminvq_u16:
-; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.8h
-entry:
-  %uminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v8i16(<8 x i16> %a)
-  %0 = trunc i32 %uminv.i to i16
-  ret i16 %0
-}
-
-define i32 @test_vminvq_u32(<4 x i32> %a) {
-; CHECK: test_vminvq_u32:
-; CHECK: uminv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %uminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v4i32(<4 x i32> %a)
-  ret i32 %uminv.i
-}
-
-define i8 @test_vaddv_s8(<8 x i8> %a) {
-; CHECK: test_vaddv_s8:
-; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b
-entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v8i8(<8 x i8> %a)
-  %0 = trunc i32 %vaddv.i to i8
-  ret i8 %0
-}
-
-define i16 @test_vaddv_s16(<4 x i16> %a) {
-; CHECK: test_vaddv_s16:
-; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h
-entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v4i16(<4 x i16> %a)
-  %0 = trunc i32 %vaddv.i to i16
-  ret i16 %0
-}
-
-define i8 @test_vaddv_u8(<8 x i8> %a) {
-; CHECK: test_vaddv_u8:
-; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b
-entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v8i8(<8 x i8> %a)
-  %0 = trunc i32 %vaddv.i to i8
-  ret i8 %0
-}
-
-define i16 @test_vaddv_u16(<4 x i16> %a) {
-; CHECK: test_vaddv_u16:
-; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h
-entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v4i16(<4 x i16> %a)
-  %0 = trunc i32 %vaddv.i to i16
-  ret i16 %0
-}
-
-define i8 @test_vaddvq_s8(<16 x i8> %a) {
-; CHECK: test_vaddvq_s8:
-; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b
-entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v16i8(<16 x i8> %a)
-  %0 = trunc i32 %vaddv.i to i8
-  ret i8 %0
-}
-
-define i16 @test_vaddvq_s16(<8 x i16> %a) {
-; CHECK: test_vaddvq_s16:
-; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h
-entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v8i16(<8 x i16> %a)
-  %0 = trunc i32 %vaddv.i to i16
-  ret i16 %0
-}
-
-define i32 @test_vaddvq_s32(<4 x i32> %a) {
-; CHECK: test_vaddvq_s32:
-; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v4i32(<4 x i32> %a)
-  ret i32 %vaddv.i
-}
-
-define i8 @test_vaddvq_u8(<16 x i8> %a) {
-; CHECK: test_vaddvq_u8:
-; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b
-entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v16i8(<16 x i8> %a)
-  %0 = trunc i32 %vaddv.i to i8
-  ret i8 %0
-}
-
-define i16 @test_vaddvq_u16(<8 x i16> %a) {
-; CHECK: test_vaddvq_u16:
-; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h
-entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v8i16(<8 x i16> %a)
-  %0 = trunc i32 %vaddv.i to i16
-  ret i16 %0
-}
-
-define i32 @test_vaddvq_u32(<4 x i32> %a) {
-; CHECK: test_vaddvq_u32:
-; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v4i32(<4 x i32> %a)
-  ret i32 %vaddv.i
-}
-
-define float @test_vmaxvq_f32(<4 x float> %a) {
-; CHECK: test_vmaxvq_f32:
-; CHECK: fmaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %0 = call float @llvm.arm64.neon.fmaxv.f32.v4f32(<4 x float> %a)
-  ret float %0
-}
-
-define float @test_vminvq_f32(<4 x float> %a) {
-; CHECK: test_vminvq_f32:
-; CHECK: fminv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %0 = call float @llvm.arm64.neon.fminv.f32.v4f32(<4 x float> %a)
-  ret float %0
-}
-
-define float @test_vmaxnmvq_f32(<4 x float> %a) {
-; CHECK: test_vmaxnmvq_f32:
-; CHECK: fmaxnmv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %0 = call float @llvm.arm64.neon.fmaxnmv.f32.v4f32(<4 x float> %a)
-  ret float %0
-}
-
-define float @test_vminnmvq_f32(<4 x float> %a) {
-; CHECK: test_vminnmvq_f32:
-; CHECK: fminnmv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %0 = call float @llvm.arm64.neon.fminnmv.f32.v4f32(<4 x float> %a)
-  ret float %0
-}
-

Removed: llvm/trunk/test/CodeGen/ARM64/aarch64-neon-add-pairwise.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/aarch64-neon-add-pairwise.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/aarch64-neon-add-pairwise.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/aarch64-neon-add-pairwise.ll (removed)
@@ -1,100 +0,0 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-declare <8 x i8> @llvm.arm64.neon.addp.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_addp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; CHECK: test_addp_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm64.neon.addp.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: addp v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm64.neon.addp.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_addp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_addp_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm64.neon.addp.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: addp v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm64.neon.addp.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_addp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_addp_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm64.neon.addp.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: addp v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-declare <8 x i16> @llvm.arm64.neon.addp.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_addp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_addp_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm64.neon.addp.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: addp v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-declare <2 x i32> @llvm.arm64.neon.addp.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_addp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_addp_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm64.neon.addp.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: addp v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm64.neon.addp.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_addp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_addp_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm64.neon.addp.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: addp v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-
-declare <2 x i64> @llvm.arm64.neon.addp.v2i64(<2 x i64>, <2 x i64>)
-
-define <2 x i64> @test_addp_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_addp_v2i64:
-        %val = call <2 x i64> @llvm.arm64.neon.addp.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: addp v0.2d, v0.2d, v1.2d
-        ret <2 x i64> %val
-}
-
-declare <2 x float> @llvm.arm64.neon.addp.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.arm64.neon.addp.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.arm64.neon.addp.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @test_faddp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; CHECK: test_faddp_v2f32:
-        %val = call <2 x float> @llvm.arm64.neon.addp.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-; CHECK: faddp v0.2s, v0.2s, v1.2s
-        ret <2 x float> %val
-}
-
-define <4 x float> @test_faddp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; CHECK: test_faddp_v4f32:
-        %val = call <4 x float> @llvm.arm64.neon.addp.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-; CHECK: faddp v0.4s, v0.4s, v1.4s
-        ret <4 x float> %val
-}
-
-define <2 x double> @test_faddp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; CHECK: test_faddp_v2f64:
-        %val = call <2 x double> @llvm.arm64.neon.addp.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-; CHECK: faddp v0.2d, v0.2d, v1.2d
-        ret <2 x double> %val
-}
-
-define i32 @test_vaddv.v2i32(<2 x i32> %a) {
-; CHECK-LABEL: test_vaddv.v2i32
-; CHECK: addp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %1 = tail call i32 @llvm.arm64.neon.saddv.i32.v2i32(<2 x i32> %a)
-  ret i32 %1
-}
-
-declare i32 @llvm.arm64.neon.saddv.i32.v2i32(<2 x i32>)

Removed: llvm/trunk/test/CodeGen/ARM64/aarch64-neon-add-sub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/aarch64-neon-add-sub.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/aarch64-neon-add-sub.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/aarch64-neon-add-sub.ll (removed)
@@ -1,237 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -arm64-simd-scalar| FileCheck %s
-
-define <8 x i8> @add8xi8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-	%tmp3 = add <8 x i8> %A, %B;
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @add16xi8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: add {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-	%tmp3 = add <16 x i8> %A, %B;
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @add4xi16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: add {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-	%tmp3 = add <4 x i16> %A, %B;
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @add8xi16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: add {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-	%tmp3 = add <8 x i16> %A, %B;
-	ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @add2xi32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: add {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = add <2 x i32> %A, %B;
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @add4x32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: add {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = add <4 x i32> %A, %B;
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @add2xi64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-	%tmp3 = add <2 x i64> %A, %B;
-	ret <2 x i64> %tmp3
-}
-
-define <2 x float> @add2xfloat(<2 x float> %A, <2 x float> %B) {
-;CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = fadd <2 x float> %A, %B;
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @add4xfloat(<4 x float> %A, <4 x float> %B) {
-;CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = fadd <4 x float> %A, %B;
-	ret <4 x float> %tmp3
-}
-define <2 x double> @add2xdouble(<2 x double> %A, <2 x double> %B) {
-;CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-	%tmp3 = fadd <2 x double> %A, %B;
-	ret <2 x double> %tmp3
-}
-
-define <8 x i8> @sub8xi8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: sub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-	%tmp3 = sub <8 x i8> %A, %B;
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @sub16xi8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: sub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-	%tmp3 = sub <16 x i8> %A, %B;
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @sub4xi16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: sub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-	%tmp3 = sub <4 x i16> %A, %B;
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @sub8xi16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: sub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-	%tmp3 = sub <8 x i16> %A, %B;
-	ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @sub2xi32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: sub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = sub <2 x i32> %A, %B;
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @sub4x32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: sub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = sub <4 x i32> %A, %B;
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @sub2xi64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-	%tmp3 = sub <2 x i64> %A, %B;
-	ret <2 x i64> %tmp3
-}
-
-define <2 x float> @sub2xfloat(<2 x float> %A, <2 x float> %B) {
-;CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = fsub <2 x float> %A, %B;
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @sub4xfloat(<4 x float> %A, <4 x float> %B) {
-;CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = fsub <4 x float> %A, %B;
-	ret <4 x float> %tmp3
-}
-define <2 x double> @sub2xdouble(<2 x double> %A, <2 x double> %B) {
-;CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-	%tmp3 = fsub <2 x double> %A, %B;
-	ret <2 x double> %tmp3
-}
-
-define <1 x double> @test_vadd_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vadd_f64
-; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fadd <1 x double> %a, %b
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vmul_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vmul_f64
-; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fmul <1 x double> %a, %b
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vdiv_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vdiv_f64
-; CHECK: fdiv d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fdiv <1 x double> %a, %b
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vmla_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
-; CHECK-LABEL: test_vmla_f64
-; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fmul <1 x double> %b, %c
-  %2 = fadd <1 x double> %1, %a
-  ret <1 x double> %2
-}
-
-define <1 x double> @test_vmls_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
-; CHECK-LABEL: test_vmls_f64
-; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fmul <1 x double> %b, %c
-  %2 = fsub <1 x double> %a, %1
-  ret <1 x double> %2
-}
-
-define <1 x double> @test_vfms_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
-; CHECK-LABEL: test_vfms_f64
-; CHECK: fmsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fsub <1 x double> <double -0.000000e+00>, %b
-  %2 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %1, <1 x double> %c, <1 x double> %a)
-  ret <1 x double> %2
-}
-
-define <1 x double> @test_vfma_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
-; CHECK-LABEL: test_vfma_f64
-; CHECK: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vsub_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vsub_f64
-; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fsub <1 x double> %a, %b
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vabd_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vabd_f64
-; CHECK: fabd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.arm64.neon.fabd.v1f64(<1 x double> %a, <1 x double> %b)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vmax_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vmax_f64
-; CHECK: fmax d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.arm64.neon.fmax.v1f64(<1 x double> %a, <1 x double> %b)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vmin_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vmin_f64
-; CHECK: fmin d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.arm64.neon.fmin.v1f64(<1 x double> %a, <1 x double> %b)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vmaxnm_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vmaxnm_f64
-; CHECK: fmaxnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.arm64.neon.fmaxnm.v1f64(<1 x double> %a, <1 x double> %b)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vminnm_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vminnm_f64
-; CHECK: fminnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.arm64.neon.fminnm.v1f64(<1 x double> %a, <1 x double> %b)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vabs_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vabs_f64
-; CHECK: fabs d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.fabs.v1f64(<1 x double> %a)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vneg_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vneg_f64
-; CHECK: fneg d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fsub <1 x double> <double -0.000000e+00>, %a
-  ret <1 x double> %1
-}
-
-declare <1 x double> @llvm.fabs.v1f64(<1 x double>)
-declare <1 x double> @llvm.arm64.neon.fminnm.v1f64(<1 x double>, <1 x double>)
-declare <1 x double> @llvm.arm64.neon.fmaxnm.v1f64(<1 x double>, <1 x double>)
-declare <1 x double> @llvm.arm64.neon.fmin.v1f64(<1 x double>, <1 x double>)
-declare <1 x double> @llvm.arm64.neon.fmax.v1f64(<1 x double>, <1 x double>)
-declare <1 x double> @llvm.arm64.neon.fabd.v1f64(<1 x double>, <1 x double>)
-declare <1 x double> @llvm.fma.v1f64(<1 x double>, <1 x double>, <1 x double>)

Removed: llvm/trunk/test/CodeGen/ARM64/aarch64-neon-copy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/aarch64-neon-copy.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/aarch64-neon-copy.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/aarch64-neon-copy.ll (removed)
@@ -1,1445 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
-
-
-define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) {
-; CHECK-LABEL: ins16bw:
-; CHECK: ins {{v[0-9]+}}.b[15], {{w[0-9]+}}
-  %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15
-  ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) {
-; CHECK-LABEL: ins8hw:
-; CHECK: ins {{v[0-9]+}}.h[6], {{w[0-9]+}}
-  %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6
-  ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) {
-; CHECK-LABEL: ins4sw:
-; CHECK: ins {{v[0-9]+}}.s[2], {{w[0-9]+}}
-  %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2
-  ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) {
-; CHECK-LABEL: ins2dw:
-; CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}}
-  %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1
-  ret <2 x i64> %tmp3
-}
-
-define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
-; CHECK-LABEL: ins8bw:
-; CHECK: ins {{v[0-9]+}}.b[5], {{w[0-9]+}}
-  %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5
-  ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
-; CHECK-LABEL: ins4hw:
-; CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}}
-  %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3
-  ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) {
-; CHECK-LABEL: ins2sw:
-; CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}}
-  %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
-  ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) {
-; CHECK-LABEL: ins16b16:
-; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
-  %tmp3 = extractelement <16 x i8> %tmp1, i32 2
-  %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
-  ret <16 x i8> %tmp4
-}
-
-define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) {
-; CHECK-LABEL: ins8h8:
-; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
-  %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
-  ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) {
-; CHECK-LABEL: ins4s4:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
-  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
-  %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
-  ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) {
-; CHECK-LABEL: ins2d2:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-  %tmp3 = extractelement <2 x i64> %tmp1, i32 0
-  %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
-  ret <2 x i64> %tmp4
-}
-
-define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) {
-; CHECK-LABEL: ins4f4:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
-  %tmp3 = extractelement <4 x float> %tmp1, i32 2
-  %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
-  ret <4 x float> %tmp4
-}
-
-define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) {
-; CHECK-LABEL: ins2df2:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-  %tmp3 = extractelement <2 x double> %tmp1, i32 0
-  %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
-  ret <2 x double> %tmp4
-}
-
-define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
-; CHECK-LABEL: ins8b16:
-; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
-  %tmp3 = extractelement <8 x i8> %tmp1, i32 2
-  %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
-  ret <16 x i8> %tmp4
-}
-
-define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
-; CHECK-LABEL: ins4h8:
-; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
-  %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
-  ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
-; CHECK-LABEL: ins2s4:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
-  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
-  %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
-  ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
-; CHECK-LABEL: ins1d2:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
-  %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
-  ret <2 x i64> %tmp4
-}
-
-define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
-; CHECK-LABEL: ins2f4:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
-  %tmp3 = extractelement <2 x float> %tmp1, i32 1
-  %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
-  ret <4 x float> %tmp4
-}
-
-define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
-; CHECK-LABEL: ins1f2:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-  %tmp3 = extractelement <1 x double> %tmp1, i32 0
-  %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
-  ret <2 x double> %tmp4
-}
-
-define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
-; CHECK-LABEL: ins16b8:
-; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[2]
-  %tmp3 = extractelement <16 x i8> %tmp1, i32 2
-  %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7
-  ret <8 x i8> %tmp4
-}
-
-define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
-; CHECK-LABEL: ins8h4:
-; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
-  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
-  ret <4 x i16> %tmp4
-}
-
-define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
-; CHECK-LABEL: ins4s2:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
-  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
-  %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
-  ret <2 x i32> %tmp4
-}
-
-define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
-; CHECK-LABEL: ins2d1:
-; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
-  %tmp3 = extractelement <2 x i64> %tmp1, i32 0
-  %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
-  ret <1 x i64> %tmp4
-}
-
-define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
-; CHECK-LABEL: ins4f2:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
-  %tmp3 = extractelement <4 x float> %tmp1, i32 2
-  %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
-  ret <2 x float> %tmp4
-}
-
-define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
-; CHECK-LABEL: ins2f1:
-; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
-  %tmp3 = extractelement <2 x double> %tmp1, i32 1
-  %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
-  ret <1 x double> %tmp4
-}
-
-define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
-; CHECK-LABEL: ins8b8:
-; CHECK: ins {{v[0-9]+}}.b[4], {{v[0-9]+}}.b[2]
-  %tmp3 = extractelement <8 x i8> %tmp1, i32 2
-  %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
-  ret <8 x i8> %tmp4
-}
-
-define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
-; CHECK-LABEL: ins4h4:
-; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
-  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
-  ret <4 x i16> %tmp4
-}
-
-define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
-; CHECK-LABEL: ins2s2:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
-  %tmp3 = extractelement <2 x i32> %tmp1, i32 0
-  %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
-  ret <2 x i32> %tmp4
-}
-
-define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
-; CHECK-LABEL: ins1d1:
-; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
-  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
-  %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
-  ret <1 x i64> %tmp4
-}
-
-define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) {
-; CHECK-LABEL: ins2f2:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
-  %tmp3 = extractelement <2 x float> %tmp1, i32 0
-  %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
-  ret <2 x float> %tmp4
-}
-
-define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) {
-; CHECK-LABEL: ins1df1:
-; CHECK-NOT: ins {{v[0-9]+}}
-  %tmp3 = extractelement <1 x double> %tmp1, i32 0
-  %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
-  ret <1 x double> %tmp4
-}
-
-define i32 @umovw16b(<16 x i8> %tmp1) {
-; CHECK-LABEL: umovw16b:
-; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
-  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
-  %tmp4 = zext i8 %tmp3 to i32
-  ret i32 %tmp4
-}
-
-define i32 @umovw8h(<8 x i16> %tmp1) {
-; CHECK-LABEL: umovw8h:
-; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
-  %tmp4 = zext i16 %tmp3 to i32
-  ret i32 %tmp4
-}
-
-define i32 @umovw4s(<4 x i32> %tmp1) {
-; CHECK-LABEL: umovw4s:
-; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[2]
-  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
-  ret i32 %tmp3
-}
-
-define i64 @umovx2d(<2 x i64> %tmp1) {
-; CHECK-LABEL: umovx2d:
-; CHECK: mov {{x[0-9]+}}, {{v[0-9]+}}.d[1]
-  %tmp3 = extractelement <2 x i64> %tmp1, i32 1
-  ret i64 %tmp3
-}
-
-define i32 @umovw8b(<8 x i8> %tmp1) {
-; CHECK-LABEL: umovw8b:
-; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.b[7]
-  %tmp3 = extractelement <8 x i8> %tmp1, i32 7
-  %tmp4 = zext i8 %tmp3 to i32
-  ret i32 %tmp4
-}
-
-define i32 @umovw4h(<4 x i16> %tmp1) {
-; CHECK-LABEL: umovw4h:
-; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
-  %tmp4 = zext i16 %tmp3 to i32
-  ret i32 %tmp4
-}
-
-define i32 @umovw2s(<2 x i32> %tmp1) {
-; CHECK-LABEL: umovw2s:
-; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[1]
-  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
-  ret i32 %tmp3
-}
-
-define i64 @umovx1d(<1 x i64> %tmp1) {
-; CHECK-LABEL: umovx1d:
-; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
-  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
-  ret i64 %tmp3
-}
-
-define i32 @smovw16b(<16 x i8> %tmp1) {
-; CHECK-LABEL: smovw16b:
-; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
-  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
-  %tmp4 = sext i8 %tmp3 to i32
-  %tmp5 = add i32 %tmp4, %tmp4
-  ret i32 %tmp5
-}
-
-define i32 @smovw8h(<8 x i16> %tmp1) {
-; CHECK-LABEL: smovw8h:
-; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
-  %tmp4 = sext i16 %tmp3 to i32
-  %tmp5 = add i32 %tmp4, %tmp4
-  ret i32 %tmp5
-}
-
-define i32 @smovx16b(<16 x i8> %tmp1) {
-; CHECK-LABEL: smovx16b:
-; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[8]
-  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
-  %tmp4 = sext i8 %tmp3 to i32
-  %tmp5 = add i32 %tmp4, %tmp4
-  ret i32 %tmp5
-}
-
-define i32 @smovx8h(<8 x i16> %tmp1) {
-; CHECK-LABEL: smovx8h:
-; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
-  %tmp4 = sext i16 %tmp3 to i32
-  ret i32 %tmp4
-}
-
-define i64 @smovx4s(<4 x i32> %tmp1) {
-; CHECK-LABEL: smovx4s:
-; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2]
-  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
-  %tmp4 = sext i32 %tmp3 to i64
-  ret i64 %tmp4
-}
-
-define i32 @smovw8b(<8 x i8> %tmp1) {
-; CHECK-LABEL: smovw8b:
-; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[4]
-  %tmp3 = extractelement <8 x i8> %tmp1, i32 4
-  %tmp4 = sext i8 %tmp3 to i32
-  %tmp5 = add i32 %tmp4, %tmp4
-  ret i32 %tmp5
-}
-
-define i32 @smovw4h(<4 x i16> %tmp1) {
-; CHECK-LABEL: smovw4h:
-; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
-  %tmp4 = sext i16 %tmp3 to i32
-  %tmp5 = add i32 %tmp4, %tmp4
-  ret i32 %tmp5
-}
-
-define i32 @smovx8b(<8 x i8> %tmp1) {
-; CHECK-LABEL: smovx8b:
-; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[6]
-  %tmp3 = extractelement <8 x i8> %tmp1, i32 6
-  %tmp4 = sext i8 %tmp3 to i32
-  ret i32 %tmp4
-}
-
-define i32 @smovx4h(<4 x i16> %tmp1) {
-; CHECK-LABEL: smovx4h:
-; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
-  %tmp4 = sext i16 %tmp3 to i32
-  ret i32 %tmp4
-}
-
-define i64 @smovx2s(<2 x i32> %tmp1) {
-; CHECK-LABEL: smovx2s:
-; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1]
-  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
-  %tmp4 = sext i32 %tmp3 to i64
-  ret i64 %tmp4
-}
-
-define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
-; CHECK-LABEL: test_vcopy_lane_s8:
-; CHECK: ins  {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
-  %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
-  ret <8 x i8> %vset_lane
-}
-
-define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
-; CHECK-LABEL: test_vcopyq_laneq_s8:
-; CHECK: ins  {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6]
-  %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15>
-  ret <16 x i8> %vset_lane
-}
-
-define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
-; CHECK-LABEL: test_vcopy_lane_swap_s8:
-; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0]
-  %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
-  ret <8 x i8> %vset_lane
-}
-
-define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) {
-; CHECK-LABEL: test_vcopyq_laneq_swap_s8:
-; CHECK: ins {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15]
-  %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-  ret <16 x i8> %vset_lane
-}
-
-define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 {
-; CHECK-LABEL: test_vdup_n_u8:
-; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
-  %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0
-  %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1
-  %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2
-  %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3
-  %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4
-  %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5
-  %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6
-  %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7
-  ret <8 x i8> %vecinit7.i
-}
-
-define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 {
-; CHECK-LABEL: test_vdup_n_u16:
-; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
-  %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0
-  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1
-  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2
-  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3
-  ret <4 x i16> %vecinit3.i
-}
-
-define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 {
-; CHECK-LABEL: test_vdup_n_u32:
-; CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
-  %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0
-  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1
-  ret <2 x i32> %vecinit1.i
-}
-
-define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 {
-; CHECK-LABEL: test_vdup_n_u64:
-; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
-  %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0
-  ret <1 x i64> %vecinit.i
-}
-
-define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 {
-; CHECK-LABEL: test_vdupq_n_u8:
-; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
-  %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0
-  %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1
-  %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2
-  %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3
-  %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4
-  %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5
-  %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6
-  %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7
-  %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8
-  %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9
-  %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10
-  %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11
-  %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12
-  %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13
-  %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14
-  %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15
-  ret <16 x i8> %vecinit15.i
-}
-
-define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 {
-; CHECK-LABEL: test_vdupq_n_u16:
-; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
-  %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0
-  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1
-  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2
-  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3
-  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4
-  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5
-  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6
-  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7
-  ret <8 x i16> %vecinit7.i
-}
-
-define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 {
-; CHECK-LABEL: test_vdupq_n_u32:
-; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
-  %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0
-  %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1
-  %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2
-  %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3
-  ret <4 x i32> %vecinit3.i
-}
-
-define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
-; CHECK-LABEL: test_vdupq_n_u64:
-; CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
-  %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0
-  %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1
-  ret <2 x i64> %vecinit1.i
-}
-
-define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
-; CHECK-LABEL: test_vdup_lane_s8:
-; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
-  %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-  ret <8 x i8> %shuffle
-}
-
-define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
-; CHECK-LABEL: test_vdup_lane_s16:
-; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
-  %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
-  ret <4 x i16> %shuffle
-}
-
-define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
-; CHECK-LABEL: test_vdup_lane_s32:
-; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-  %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  ret <2 x i32> %shuffle
-}
-
-define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
-; CHECK-LABEL: test_vdupq_lane_s8:
-; CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
-  %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-  ret <16 x i8> %shuffle
-}
-
-define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
-; CHECK-LABEL: test_vdupq_lane_s16:
-; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
-  %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-  ret <8 x i16> %shuffle
-}
-
-define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
-; CHECK-LABEL: test_vdupq_lane_s32:
-; CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-  %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  ret <4 x i32> %shuffle
-}
-
-define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
-; CHECK-LABEL: test_vdupq_lane_s64:
-; CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-  %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
-  ret <2 x i64> %shuffle
-}
-
-define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
-; CHECK-LABEL: test_vdup_laneq_s8:
-; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
-  %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-  ret <8 x i8> %shuffle
-}
-
-define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
-; CHECK-LABEL: test_vdup_laneq_s16:
-; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
-  %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
-  ret <4 x i16> %shuffle
-}
-
-define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
-; CHECK-LABEL: test_vdup_laneq_s32:
-; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-  %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
-  ret <2 x i32> %shuffle
-}
-
-define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 {
-; CHECK-LABEL: test_vdupq_laneq_s8:
-; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
-  %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-  ret <16 x i8> %shuffle
-}
-
-define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 {
-; CHECK-LABEL: test_vdupq_laneq_s16:
-; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
-  %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-  ret <8 x i16> %shuffle
-}
-
-define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 {
-; CHECK-LABEL: test_vdupq_laneq_s32:
-; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-  %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  ret <4 x i32> %shuffle
-}
-
-define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 {
-; CHECK-LABEL: test_vdupq_laneq_s64:
-; CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-  %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
-  ret <2 x i64> %shuffle
-}
-
-define i64 @test_bitcastv8i8toi64(<8 x i8> %in) {
-; CHECK-LABEL: test_bitcastv8i8toi64:
-   %res = bitcast <8 x i8> %in to i64
-; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
-   ret i64 %res
-}
-
-define i64 @test_bitcastv4i16toi64(<4 x i16> %in) {
-; CHECK-LABEL: test_bitcastv4i16toi64:
-   %res = bitcast <4 x i16> %in to i64
-; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
-   ret i64 %res
-}
-
-define i64 @test_bitcastv2i32toi64(<2 x i32> %in) {
-; CHECK-LABEL: test_bitcastv2i32toi64:
-   %res = bitcast <2 x i32> %in to i64
-; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
-   ret i64 %res
-}
-
-define i64 @test_bitcastv2f32toi64(<2 x float> %in) {
-; CHECK-LABEL: test_bitcastv2f32toi64:
-   %res = bitcast <2 x float> %in to i64
-; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
-   ret i64 %res
-}
-
-define i64 @test_bitcastv1i64toi64(<1 x i64> %in) {
-; CHECK-LABEL: test_bitcastv1i64toi64:
-   %res = bitcast <1 x i64> %in to i64
-; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
-   ret i64 %res
-}
-
-define i64 @test_bitcastv1f64toi64(<1 x double> %in) {
-; CHECK-LABEL: test_bitcastv1f64toi64:
-   %res = bitcast <1 x double> %in to i64
-; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
-   ret i64 %res
-}
-
-define <8 x i8> @test_bitcasti64tov8i8(i64 %in) {
-; CHECK-LABEL: test_bitcasti64tov8i8:
-   %res = bitcast i64 %in to <8 x i8>
-; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
-   ret <8 x i8> %res
-}
-
-define <4 x i16> @test_bitcasti64tov4i16(i64 %in) {
-; CHECK-LABEL: test_bitcasti64tov4i16:
-   %res = bitcast i64 %in to <4 x i16>
-; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
-   ret <4 x i16> %res
-}
-
-define <2 x i32> @test_bitcasti64tov2i32(i64 %in) {
-; CHECK-LABEL: test_bitcasti64tov2i32:
-   %res = bitcast i64 %in to <2 x i32>
-; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
-   ret <2 x i32> %res
-}
-
-define <2 x float> @test_bitcasti64tov2f32(i64 %in) {
-; CHECK-LABEL: test_bitcasti64tov2f32:
-   %res = bitcast i64 %in to <2 x float>
-; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
-   ret <2 x float> %res
-}
-
-define <1 x i64> @test_bitcasti64tov1i64(i64 %in) {
-; CHECK-LABEL: test_bitcasti64tov1i64:
-   %res = bitcast i64 %in to <1 x i64>
-; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
-   ret <1 x i64> %res
-}
-
-define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
-; CHECK-LABEL: test_bitcasti64tov1f64:
-   %res = bitcast i64 %in to <1 x double>
-; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
-   ret <1 x double> %res
-}
-
-define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
-; CHECK-LABEL: test_bitcastv8i8tov1f64:
-; CHECK: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
-  %sub.i = sub <8 x i8> zeroinitializer, %a
-  %1 = bitcast <8 x i8> %sub.i to <1 x double>
-  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
-  ret <1 x i64> %vcvt.i
-}
-
-define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
-; CHECK-LABEL: test_bitcastv4i16tov1f64:
-; CHECK: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
-  %sub.i = sub <4 x i16> zeroinitializer, %a
-  %1 = bitcast <4 x i16> %sub.i to <1 x double>
-  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
-  ret <1 x i64> %vcvt.i
-}
-
-define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 {
-; CHECK-LABEL: test_bitcastv2i32tov1f64:
-; CHECK: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
-  %sub.i = sub <2 x i32> zeroinitializer, %a
-  %1 = bitcast <2 x i32> %sub.i to <1 x double>
-  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
-  ret <1 x i64> %vcvt.i
-}
-
-define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 {
-; CHECK-LABEL: test_bitcastv1i64tov1f64:
-; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}}
-; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
-  %sub.i = sub <1 x i64> zeroinitializer, %a
-  %1 = bitcast <1 x i64> %sub.i to <1 x double>
-  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
-  ret <1 x i64> %vcvt.i
-}
-
-define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 {
-; CHECK-LABEL: test_bitcastv2f32tov1f64:
-; CHECK: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
-  %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
-  %1 = bitcast <2 x float> %sub.i to <1 x double>
-  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
-  ret <1 x i64> %vcvt.i
-}
-
-define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 {
-; CHECK-LABEL: test_bitcastv1f64tov8i8:
-; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
-; CHECK-NEXT: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
-  %1 = bitcast <1 x double> %vcvt.i to <8 x i8>
-  %sub.i = sub <8 x i8> zeroinitializer, %1
-  ret <8 x i8> %sub.i
-}
-
-define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 {
-; CHECK-LABEL: test_bitcastv1f64tov4i16:
-; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
-; CHECK-NEXT: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
-  %1 = bitcast <1 x double> %vcvt.i to <4 x i16>
-  %sub.i = sub <4 x i16> zeroinitializer, %1
-  ret <4 x i16> %sub.i
-}
-
-define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 {
-; CHECK-LABEL: test_bitcastv1f64tov2i32:
-; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
-; CHECK-NEXT: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
-  %1 = bitcast <1 x double> %vcvt.i to <2 x i32>
-  %sub.i = sub <2 x i32> zeroinitializer, %1
-  ret <2 x i32> %sub.i
-}
-
-define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 {
-; CHECK-LABEL: test_bitcastv1f64tov1i64:
-; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
-; CHECK-NEXT: neg {{d[0-9]+}}, {{d[0-9]+}}
-  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
-  %1 = bitcast <1 x double> %vcvt.i to <1 x i64>
-  %sub.i = sub <1 x i64> zeroinitializer, %1
-  ret <1 x i64> %sub.i
-}
-
-define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 {
-; CHECK-LABEL: test_bitcastv1f64tov2f32:
-; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
-; CHECK-NEXT: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
-  %1 = bitcast <1 x double> %vcvt.i to <2 x float>
-  %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1
-  ret <2 x float> %sub.i
-}
-
-; Test insert element into an undef vector
-define <8 x i8> @scalar_to_vector.v8i8(i8 %a) {
-; CHECK-LABEL: scalar_to_vector.v8i8:
-; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
-  %b = insertelement <8 x i8> undef, i8 %a, i32 0
-  ret <8 x i8> %b
-}
-
-define <16 x i8> @scalar_to_vector.v16i8(i8 %a) {
-; CHECK-LABEL: scalar_to_vector.v16i8:
-; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
-  %b = insertelement <16 x i8> undef, i8 %a, i32 0
-  ret <16 x i8> %b
-}
-
-define <4 x i16> @scalar_to_vector.v4i16(i16 %a) {
-; CHECK-LABEL: scalar_to_vector.v4i16:
-; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
-  %b = insertelement <4 x i16> undef, i16 %a, i32 0
-  ret <4 x i16> %b
-}
-
-define <8 x i16> @scalar_to_vector.v8i16(i16 %a) {
-; CHECK-LABEL: scalar_to_vector.v8i16:
-; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
-  %b = insertelement <8 x i16> undef, i16 %a, i32 0
-  ret <8 x i16> %b
-}
-
-define <2 x i32> @scalar_to_vector.v2i32(i32 %a) {
-; CHECK-LABEL: scalar_to_vector.v2i32:
-; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
-  %b = insertelement <2 x i32> undef, i32 %a, i32 0
-  ret <2 x i32> %b
-}
-
-define <4 x i32> @scalar_to_vector.v4i32(i32 %a) {
-; CHECK-LABEL: scalar_to_vector.v4i32:
-; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
-  %b = insertelement <4 x i32> undef, i32 %a, i32 0
-  ret <4 x i32> %b
-}
-
-define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
-; CHECK-LABEL: scalar_to_vector.v2i64:
-; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
-  %b = insertelement <2 x i64> undef, i64 %a, i32 0
-  ret <2 x i64> %b
-}
-
-define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
-; CHECK-LABEL: testDUP.v1i8:
-; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
-  %b = extractelement <1 x i8> %a, i32 0
-  %c = insertelement <8 x i8> undef, i8 %b, i32 0
-  %d = insertelement <8 x i8> %c, i8 %b, i32 1
-  %e = insertelement <8 x i8> %d, i8 %b, i32 2
-  %f = insertelement <8 x i8> %e, i8 %b, i32 3
-  %g = insertelement <8 x i8> %f, i8 %b, i32 4
-  %h = insertelement <8 x i8> %g, i8 %b, i32 5
-  %i = insertelement <8 x i8> %h, i8 %b, i32 6
-  %j = insertelement <8 x i8> %i, i8 %b, i32 7
-  ret <8 x i8> %j
-}
-
-define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
-; CHECK-LABEL: testDUP.v1i16:
-; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
-  %b = extractelement <1 x i16> %a, i32 0
-  %c = insertelement <8 x i16> undef, i16 %b, i32 0
-  %d = insertelement <8 x i16> %c, i16 %b, i32 1
-  %e = insertelement <8 x i16> %d, i16 %b, i32 2
-  %f = insertelement <8 x i16> %e, i16 %b, i32 3
-  %g = insertelement <8 x i16> %f, i16 %b, i32 4
-  %h = insertelement <8 x i16> %g, i16 %b, i32 5
-  %i = insertelement <8 x i16> %h, i16 %b, i32 6
-  %j = insertelement <8 x i16> %i, i16 %b, i32 7
-  ret <8 x i16> %j
-}
-
-define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
-; CHECK-LABEL: testDUP.v1i32:
-; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
-  %b = extractelement <1 x i32> %a, i32 0
-  %c = insertelement <4 x i32> undef, i32 %b, i32 0
-  %d = insertelement <4 x i32> %c, i32 %b, i32 1
-  %e = insertelement <4 x i32> %d, i32 %b, i32 2
-  %f = insertelement <4 x i32> %e, i32 %b, i32 3
-  ret <4 x i32> %f
-}
-
-define <8 x i8> @getl(<16 x i8> %x) #0 {
-; CHECK-LABEL: getl:
-; CHECK: ret
-  %vecext = extractelement <16 x i8> %x, i32 0
-  %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0
-  %vecext1 = extractelement <16 x i8> %x, i32 1
-  %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1
-  %vecext3 = extractelement <16 x i8> %x, i32 2
-  %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2
-  %vecext5 = extractelement <16 x i8> %x, i32 3
-  %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3
-  %vecext7 = extractelement <16 x i8> %x, i32 4
-  %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4
-  %vecext9 = extractelement <16 x i8> %x, i32 5
-  %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5
-  %vecext11 = extractelement <16 x i8> %x, i32 6
-  %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6
-  %vecext13 = extractelement <16 x i8> %x, i32 7
-  %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7
-  ret <8 x i8> %vecinit14
-}
-
-define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) {
-; CHECK-LABEL: test_dup_v2i32_v4i16:
-; CHECK: dup v0.4h, v0.h[2]
-entry:
-  %x = extractelement <2 x i32> %a, i32 1
-  %vget_lane = trunc i32 %x to i16
-  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
-  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
-  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
-  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
-  ret <4 x i16> %vecinit3.i
-}
-
-define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) {
-; CHECK-LABEL: test_dup_v4i32_v8i16:
-; CHECK: dup v0.8h, v0.h[6]
-entry:
-  %x = extractelement <4 x i32> %a, i32 3
-  %vget_lane = trunc i32 %x to i16
-  %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
-  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
-  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
-  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
-  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
-  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
-  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
-  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
-  ret <8 x i16> %vecinit7.i
-}
-
-define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) {
-; CHECK-LABEL: test_dup_v1i64_v4i16:
-; CHECK: dup v0.4h, v0.h[0]
-entry:
-  %x = extractelement <1 x i64> %a, i32 0
-  %vget_lane = trunc i64 %x to i16
-  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
-  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
-  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
-  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
-  ret <4 x i16> %vecinit3.i
-}
-
-define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) {
-; CHECK-LABEL: test_dup_v1i64_v2i32:
-; CHECK: dup v0.2s, v0.s[0]
-entry:
-  %x = extractelement <1 x i64> %a, i32 0
-  %vget_lane = trunc i64 %x to i32
-  %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
-  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
-  ret <2 x i32> %vecinit1.i
-}
-
-define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) {
-; CHECK-LABEL: test_dup_v2i64_v8i16:
-; CHECK: dup v0.8h, v0.h[4]
-entry:
-  %x = extractelement <2 x i64> %a, i32 1
-  %vget_lane = trunc i64 %x to i16
-  %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
-  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
-  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
-  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
-  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
-  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
-  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
-  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
-  ret <8 x i16> %vecinit7.i
-}
-
-define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) {
-; CHECK-LABEL: test_dup_v2i64_v4i32:
-; CHECK: dup v0.4s, v0.s[2]
-entry:
-  %x = extractelement <2 x i64> %a, i32 1
-  %vget_lane = trunc i64 %x to i32
-  %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0
-  %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1
-  %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2
-  %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3
-  ret <4 x i32> %vecinit3.i
-}
-
-define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) {
-; CHECK-LABEL: test_dup_v4i32_v4i16:
-; CHECK: dup v0.4h, v0.h[2]
-entry:
-  %x = extractelement <4 x i32> %a, i32 1
-  %vget_lane = trunc i32 %x to i16
-  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
-  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
-  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
-  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
-  ret <4 x i16> %vecinit3.i
-}
-
-define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) {
-; CHECK-LABEL: test_dup_v2i64_v4i16:
-; CHECK: dup v0.4h, v0.h[0]
-entry:
-  %x = extractelement <2 x i64> %a, i32 0
-  %vget_lane = trunc i64 %x to i16
-  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
-  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
-  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
-  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
-  ret <4 x i16> %vecinit3.i
-}
-
-define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) {
-; CHECK-LABEL: test_dup_v2i64_v2i32:
-; CHECK: dup v0.2s, v0.s[0]
-entry:
-  %x = extractelement <2 x i64> %a, i32 0
-  %vget_lane = trunc i64 %x to i32
-  %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
-  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
-  ret <2 x i32> %vecinit1.i
-}
-
-
-define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) {
-; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32:
-; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
-; CHECK-NEXT: ret
-entry:
-  %0 = call float @llvm.arm64.neon.fmaxv.f32.v2f32(<2 x float> %a)
-  %1 = insertelement <1 x float> undef, float %0, i32 0
-  %2 = extractelement <1 x float> %1, i32 0
-  %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0
-  ret <2 x float> %vecinit1.i
-}
-
-define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) {
-; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32:
-; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
-; CHECK-NEXT: ret
-entry:
-  %0 = call float @llvm.arm64.neon.fmaxv.f32.v2f32(<2 x float> %a)
-  %1 = insertelement <1 x float> undef, float %0, i32 0
-  %2 = extractelement <1 x float> %1, i32 0
-  %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0
-  ret <4 x float> %vecinit1.i
-}
-
-declare float @llvm.arm64.neon.fmaxv.f32.v2f32(<2 x float>)
-
-define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) {
-; CHECK-LABEL: test_concat_undef_v1i32:
-; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-entry:
-  %0 = extractelement <2 x i32> %a, i32 0
-  %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1
-  ret <2 x i32> %vecinit1.i
-}
-
-declare i32 @llvm.arm64.neon.sqabs.i32(i32) #4
-
-define <2 x i32> @test_concat_v1i32_undef(i32 %a) {
-; CHECK-LABEL: test_concat_v1i32_undef:
-; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
-; CHECK-NEXT: ret
-entry:
-  %b = tail call i32 @llvm.arm64.neon.sqabs.i32(i32 %a)
-  %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0
-  ret <2 x i32> %vecinit.i432
-}
-
-define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) {
-; CHECK-LABEL: test_concat_same_v1i32_v1i32:
-; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
-entry:
-  %0 = extractelement <2 x i32> %a, i32 0
-  %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0
-  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1
-  ret <2 x i32> %vecinit1.i
-}
-
-define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) {
-; CHECK-LABEL: test_concat_diff_v1i32_v1i32:
-; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
-; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
-; CHECK-NEXT: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %c = tail call i32 @llvm.arm64.neon.sqabs.i32(i32 %a)
-  %d = insertelement <2 x i32> undef, i32 %c, i32 0
-  %e = tail call i32 @llvm.arm64.neon.sqabs.i32(i32 %b)
-  %f = insertelement <2 x i32> undef, i32 %e, i32 0
-  %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> <i32 0, i32 2>
-  ret <2 x i32> %h
-}
-
-define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
-; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-  ret <16 x i8> %vecinit30
-}
-
-define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
-; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <8 x i8> %x, i32 0
-  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
-  %vecext1 = extractelement <8 x i8> %x, i32 1
-  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
-  %vecext3 = extractelement <8 x i8> %x, i32 2
-  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
-  %vecext5 = extractelement <8 x i8> %x, i32 3
-  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
-  %vecext7 = extractelement <8 x i8> %x, i32 4
-  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
-  %vecext9 = extractelement <8 x i8> %x, i32 5
-  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
-  %vecext11 = extractelement <8 x i8> %x, i32 6
-  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
-  %vecext13 = extractelement <8 x i8> %x, i32 7
-  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
-  %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-  ret <16 x i8> %vecinit30
-}
-
-define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
-; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <16 x i8> %x, i32 0
-  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
-  %vecext1 = extractelement <16 x i8> %x, i32 1
-  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
-  %vecext3 = extractelement <16 x i8> %x, i32 2
-  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
-  %vecext5 = extractelement <16 x i8> %x, i32 3
-  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
-  %vecext7 = extractelement <16 x i8> %x, i32 4
-  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
-  %vecext9 = extractelement <16 x i8> %x, i32 5
-  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
-  %vecext11 = extractelement <16 x i8> %x, i32 6
-  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
-  %vecext13 = extractelement <16 x i8> %x, i32 7
-  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
-  %vecext15 = extractelement <8 x i8> %y, i32 0
-  %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
-  %vecext17 = extractelement <8 x i8> %y, i32 1
-  %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
-  %vecext19 = extractelement <8 x i8> %y, i32 2
-  %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
-  %vecext21 = extractelement <8 x i8> %y, i32 3
-  %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
-  %vecext23 = extractelement <8 x i8> %y, i32 4
-  %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
-  %vecext25 = extractelement <8 x i8> %y, i32 5
-  %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
-  %vecext27 = extractelement <8 x i8> %y, i32 6
-  %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
-  %vecext29 = extractelement <8 x i8> %y, i32 7
-  %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
-  ret <16 x i8> %vecinit30
-}
-
-define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
-; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <8 x i8> %x, i32 0
-  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
-  %vecext1 = extractelement <8 x i8> %x, i32 1
-  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
-  %vecext3 = extractelement <8 x i8> %x, i32 2
-  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
-  %vecext5 = extractelement <8 x i8> %x, i32 3
-  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
-  %vecext7 = extractelement <8 x i8> %x, i32 4
-  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
-  %vecext9 = extractelement <8 x i8> %x, i32 5
-  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
-  %vecext11 = extractelement <8 x i8> %x, i32 6
-  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
-  %vecext13 = extractelement <8 x i8> %x, i32 7
-  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
-  %vecext15 = extractelement <8 x i8> %y, i32 0
-  %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
-  %vecext17 = extractelement <8 x i8> %y, i32 1
-  %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
-  %vecext19 = extractelement <8 x i8> %y, i32 2
-  %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
-  %vecext21 = extractelement <8 x i8> %y, i32 3
-  %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
-  %vecext23 = extractelement <8 x i8> %y, i32 4
-  %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
-  %vecext25 = extractelement <8 x i8> %y, i32 5
-  %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
-  %vecext27 = extractelement <8 x i8> %y, i32 6
-  %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
-  %vecext29 = extractelement <8 x i8> %y, i32 7
-  %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
-  ret <16 x i8> %vecinit30
-}
-
-define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
-; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-  ret <8 x i16> %vecinit14
-}
-
-define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
-; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <4 x i16> %x, i32 0
-  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
-  %vecext1 = extractelement <4 x i16> %x, i32 1
-  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
-  %vecext3 = extractelement <4 x i16> %x, i32 2
-  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
-  %vecext5 = extractelement <4 x i16> %x, i32 3
-  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
-  %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-  ret <8 x i16> %vecinit14
-}
-
-define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
-; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <8 x i16> %x, i32 0
-  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
-  %vecext1 = extractelement <8 x i16> %x, i32 1
-  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
-  %vecext3 = extractelement <8 x i16> %x, i32 2
-  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
-  %vecext5 = extractelement <8 x i16> %x, i32 3
-  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
-  %vecext7 = extractelement <4 x i16> %y, i32 0
-  %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
-  %vecext9 = extractelement <4 x i16> %y, i32 1
-  %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
-  %vecext11 = extractelement <4 x i16> %y, i32 2
-  %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
-  %vecext13 = extractelement <4 x i16> %y, i32 3
-  %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
-  ret <8 x i16> %vecinit14
-}
-
-define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
-; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <4 x i16> %x, i32 0
-  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
-  %vecext1 = extractelement <4 x i16> %x, i32 1
-  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
-  %vecext3 = extractelement <4 x i16> %x, i32 2
-  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
-  %vecext5 = extractelement <4 x i16> %x, i32 3
-  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
-  %vecext7 = extractelement <4 x i16> %y, i32 0
-  %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
-  %vecext9 = extractelement <4 x i16> %y, i32 1
-  %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
-  %vecext11 = extractelement <4 x i16> %y, i32 2
-  %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
-  %vecext13 = extractelement <4 x i16> %y, i32 3
-  %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
-  ret <8 x i16> %vecinit14
-}
-
-define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
-; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-  ret <4 x i32> %vecinit6
-}
-
-define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
-; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <2 x i32> %x, i32 0
-  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
-  %vecext1 = extractelement <2 x i32> %x, i32 1
-  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
-  %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-  ret <4 x i32> %vecinit6
-}
-
-define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
-; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <4 x i32> %x, i32 0
-  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
-  %vecext1 = extractelement <4 x i32> %x, i32 1
-  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
-  %vecext3 = extractelement <2 x i32> %y, i32 0
-  %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
-  %vecext5 = extractelement <2 x i32> %y, i32 1
-  %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
-  ret <4 x i32> %vecinit6
-}
-
-define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
-; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x i32> %vecinit6
-}
-
-define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 {
-; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64:
-; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
-  ret <2 x i64> %vecinit2
-}
-
-define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
-; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
-; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vecext = extractelement <1 x i64> %x, i32 0
-  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
-  %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
-  ret <2 x i64> %vecinit2
-}
-
-define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
-; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <2 x i64> %x, i32 0
-  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
-  %vecext1 = extractelement <1 x i64> %y, i32 0
-  %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
-  ret <2 x i64> %vecinit2
-}
-
-define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
-; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <1 x i64> %x, i32 0
-  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
-  %vecext1 = extractelement <1 x i64> %y, i32 0
-  %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
-  ret <2 x i64> %vecinit2
-}
-
-
-define <4 x i16> @concat_vector_v4i16_const() {
-; CHECK-LABEL: concat_vector_v4i16_const:
-; CHECK: movi {{d[0-9]+}}, #0
- %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer
- ret <4 x i16> %r
-}
-
-define <4 x i16> @concat_vector_v4i16_const_one() {
-; CHECK-LABEL: concat_vector_v4i16_const_one:
-; CHECK: movi {{v[0-9]+}}.4h, #0x1
- %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer
- ret <4 x i16> %r
-}
-
-define <4 x i32> @concat_vector_v4i32_const() {
-; CHECK-LABEL: concat_vector_v4i32_const:
-; CHECK: movi {{v[0-9]+}}.2d, #0
- %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer
- ret <4 x i32> %r
-}
-
-define <8 x i8> @concat_vector_v8i8_const() {
-; CHECK-LABEL: concat_vector_v8i8_const:
-; CHECK: movi {{d[0-9]+}}, #0
- %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer
- ret <8 x i8> %r
-}
-
-define <8 x i16> @concat_vector_v8i16_const() {
-; CHECK-LABEL: concat_vector_v8i16_const:
-; CHECK: movi {{v[0-9]+}}.2d, #0
- %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer
- ret <8 x i16> %r
-}
-
-define <8 x i16> @concat_vector_v8i16_const_one() {
-; CHECK-LABEL: concat_vector_v8i16_const_one:
-; CHECK: movi {{v[0-9]+}}.8h, #0x1
- %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer
- ret <8 x i16> %r
-}
-
-define <16 x i8> @concat_vector_v16i8_const() {
-; CHECK-LABEL: concat_vector_v16i8_const:
-; CHECK: movi {{v[0-9]+}}.2d, #0
- %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer
- ret <16 x i8> %r
-}
-
-define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
-; CHECK-LABEL: concat_vector_v4i16:
-; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
- %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
- ret <4 x i16> %r
-}
-
-define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
-; CHECK-LABEL: concat_vector_v4i32:
-; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
- %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
- ret <4 x i32> %r
-}
-
-define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
-; CHECK-LABEL: concat_vector_v8i8:
-; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
- %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
- ret <8 x i8> %r
-}
-
-define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
-; CHECK-LABEL: concat_vector_v8i16:
-; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
- %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
- ret <8 x i16> %r
-}
-
-define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
-; CHECK-LABEL: concat_vector_v16i8:
-; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
- %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer
- ret <16 x i8> %r
-}

Removed: llvm/trunk/test/CodeGen/ARM64/aarch64-neon-copyPhysReg-tuple.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/aarch64-neon-copyPhysReg-tuple.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/aarch64-neon-copyPhysReg-tuple.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/aarch64-neon-copyPhysReg-tuple.ll (removed)
@@ -1,48 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
-; arm64 has a separate copy due to intrinsics
-
-define <4 x i32> @copyTuple.QPair(i32* %a, i32* %b) {
-; CHECK-LABEL: copyTuple.QPair:
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: ld2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}]
-entry:
-  %vld = tail call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>, i64 1, i32* %a)
-  %extract = extractvalue { <4 x i32>, <4 x i32> } %vld, 0
-  %vld1 = tail call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i64 1, i32* %b)
-  %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld1, 0
-  ret <4 x i32> %vld1.fca.0.extract
-}
-
-define <4 x i32> @copyTuple.QTriple(i32* %a, i32* %b, <4 x i32> %c) {
-; CHECK-LABEL: copyTuple.QTriple:
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: ld3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}]
-entry:
-  %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, i64 1, i32* %a)
-  %extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0
-  %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, i64 1, i32* %b)
-  %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld1, 0
-  ret <4 x i32> %vld1.fca.0.extract
-}
-
-define <4 x i32> @copyTuple.QQuad(i32* %a, i32* %b, <4 x i32> %c) {
-; CHECK-LABEL: copyTuple.QQuad:
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: ld4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}]
-entry:
-  %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, <4 x i32> %c, i64 1, i32* %a)
-  %extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0
-  %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, i64 1, i32* %b)
-  %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld1, 0
-  ret <4 x i32> %vld1.fca.0.extract
-}
-
-declare { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*)
-declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)
-declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)

Removed: llvm/trunk/test/CodeGen/ARM64/aarch64-neon-mul-div.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/aarch64-neon-mul-div.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/aarch64-neon-mul-div.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/aarch64-neon-mul-div.ll (removed)
@@ -1,797 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
-; arm64 has its own copy of this because of the intrinsics
-
-define <8 x i8> @mul8xi8(<8 x i8> %A, <8 x i8> %B) {
-; CHECK-LABEL: mul8xi8:
-; CHECK: mul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-	%tmp3 = mul <8 x i8> %A, %B;
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @mul16xi8(<16 x i8> %A, <16 x i8> %B) {
-; CHECK-LABEL: mul16xi8:
-; CHECK: mul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-	%tmp3 = mul <16 x i8> %A, %B;
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @mul4xi16(<4 x i16> %A, <4 x i16> %B) {
-; CHECK-LABEL: mul4xi16:
-; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-	%tmp3 = mul <4 x i16> %A, %B;
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @mul8xi16(<8 x i16> %A, <8 x i16> %B) {
-; CHECK-LABEL: mul8xi16:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-	%tmp3 = mul <8 x i16> %A, %B;
-	ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @mul2xi32(<2 x i32> %A, <2 x i32> %B) {
-; CHECK-LABEL: mul2xi32:
-; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = mul <2 x i32> %A, %B;
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @mul4x32(<4 x i32> %A, <4 x i32> %B) {
-; CHECK-LABEL: mul4x32:
-; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = mul <4 x i32> %A, %B;
-	ret <4 x i32> %tmp3
-}
-
-define <1 x i64> @mul1xi64(<1 x i64> %A, <1 x i64> %B) {
-; CHECK-LABEL: mul1xi64:
-; CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
-  %tmp3 = mul <1 x i64> %A, %B;
-  ret <1 x i64> %tmp3
-}
-
-define <2 x i64> @mul2xi64(<2 x i64> %A, <2 x i64> %B) {
-; CHECK-LABEL: mul2xi64:
-; CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
-; CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
-  %tmp3 = mul <2 x i64> %A, %B;
-  ret <2 x i64> %tmp3
-}
-
- define <2 x float> @mul2xfloat(<2 x float> %A, <2 x float> %B) {
-; CHECK-LABEL: mul2xfloat:
-; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = fmul <2 x float> %A, %B;
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @mul4xfloat(<4 x float> %A, <4 x float> %B) {
-; CHECK-LABEL: mul4xfloat:
-; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = fmul <4 x float> %A, %B;
-	ret <4 x float> %tmp3
-}
-define <2 x double> @mul2xdouble(<2 x double> %A, <2 x double> %B) {
-; CHECK-LABEL: mul2xdouble:
-; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-	%tmp3 = fmul <2 x double> %A, %B;
-	ret <2 x double> %tmp3
-}
-
-
- define <2 x float> @div2xfloat(<2 x float> %A, <2 x float> %B) {
-; CHECK-LABEL: div2xfloat:
-; CHECK: fdiv {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = fdiv <2 x float> %A, %B;
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @div4xfloat(<4 x float> %A, <4 x float> %B) {
-; CHECK-LABEL: div4xfloat:
-; CHECK: fdiv {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = fdiv <4 x float> %A, %B;
-	ret <4 x float> %tmp3
-}
-define <2 x double> @div2xdouble(<2 x double> %A, <2 x double> %B) {
-; CHECK-LABEL: div2xdouble:
-; CHECK: fdiv {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-	%tmp3 = fdiv <2 x double> %A, %B;
-	ret <2 x double> %tmp3
-}
-
-define <1 x i8> @sdiv1x8(<1 x i8> %A, <1 x i8> %B) {
-; CHECK-LABEL: sdiv1x8:
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <1 x i8> %A, %B;
-	ret <1 x i8> %tmp3
-}
-
-define <8 x i8> @sdiv8x8(<8 x i8> %A, <8 x i8> %B) {
-; CHECK-LABEL: sdiv8x8:
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <8 x i8> %A, %B;
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @sdiv16x8(<16 x i8> %A, <16 x i8> %B) {
-; CHECK-LABEL: sdiv16x8:
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <16 x i8> %A, %B;
-	ret <16 x i8> %tmp3
-}
-
-define <1 x i16> @sdiv1x16(<1 x i16> %A, <1 x i16> %B) {
-; CHECK-LABEL: sdiv1x16:
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <1 x i16> %A, %B;
-	ret <1 x i16> %tmp3
-}
-
-define <4 x i16> @sdiv4x16(<4 x i16> %A, <4 x i16> %B) {
-; CHECK-LABEL: sdiv4x16:
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <4 x i16> %A, %B;
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @sdiv8x16(<8 x i16> %A, <8 x i16> %B) {
-; CHECK-LABEL: sdiv8x16:
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <8 x i16> %A, %B;
-	ret <8 x i16> %tmp3
-}
-
-define <1 x i32> @sdiv1x32(<1 x i32> %A, <1 x i32> %B) {
-; CHECK-LABEL: sdiv1x32:
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <1 x i32> %A, %B;
-	ret <1 x i32> %tmp3
-}
-
-define <2 x i32> @sdiv2x32(<2 x i32> %A, <2 x i32> %B) {
-; CHECK-LABEL: sdiv2x32:
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <2 x i32> %A, %B;
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @sdiv4x32(<4 x i32> %A, <4 x i32> %B) {
-; CHECK-LABEL: sdiv4x32:
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <4 x i32> %A, %B;
-	ret <4 x i32> %tmp3
-}
-
-define <1 x i64> @sdiv1x64(<1 x i64> %A, <1 x i64> %B) {
-; CHECK-LABEL: sdiv1x64:
-; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-	%tmp3 = sdiv <1 x i64> %A, %B;
-	ret <1 x i64> %tmp3
-}
-
-define <2 x i64> @sdiv2x64(<2 x i64> %A, <2 x i64> %B) {
-; CHECK-LABEL: sdiv2x64:
-; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-	%tmp3 = sdiv <2 x i64> %A, %B;
-	ret <2 x i64> %tmp3
-}
-
-define <1 x i8> @udiv1x8(<1 x i8> %A, <1 x i8> %B) {
-; CHECK-LABEL: udiv1x8:
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <1 x i8> %A, %B;
-	ret <1 x i8> %tmp3
-}
-
-define <8 x i8> @udiv8x8(<8 x i8> %A, <8 x i8> %B) {
-; CHECK-LABEL: udiv8x8:
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <8 x i8> %A, %B;
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @udiv16x8(<16 x i8> %A, <16 x i8> %B) {
-; CHECK-LABEL: udiv16x8:
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <16 x i8> %A, %B;
-	ret <16 x i8> %tmp3
-}
-
-define <1 x i16> @udiv1x16(<1 x i16> %A, <1 x i16> %B) {
-; CHECK-LABEL: udiv1x16:
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <1 x i16> %A, %B;
-	ret <1 x i16> %tmp3
-}
-
-define <4 x i16> @udiv4x16(<4 x i16> %A, <4 x i16> %B) {
-; CHECK-LABEL: udiv4x16:
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <4 x i16> %A, %B;
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @udiv8x16(<8 x i16> %A, <8 x i16> %B) {
-; CHECK-LABEL: udiv8x16:
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <8 x i16> %A, %B;
-	ret <8 x i16> %tmp3
-}
-
-define <1 x i32> @udiv1x32(<1 x i32> %A, <1 x i32> %B) {
-; CHECK-LABEL: udiv1x32:
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <1 x i32> %A, %B;
-	ret <1 x i32> %tmp3
-}
-
-define <2 x i32> @udiv2x32(<2 x i32> %A, <2 x i32> %B) {
-; CHECK-LABEL: udiv2x32:
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <2 x i32> %A, %B;
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @udiv4x32(<4 x i32> %A, <4 x i32> %B) {
-; CHECK-LABEL: udiv4x32:
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <4 x i32> %A, %B;
-	ret <4 x i32> %tmp3
-}
-
-define <1 x i64> @udiv1x64(<1 x i64> %A, <1 x i64> %B) {
-; CHECK-LABEL: udiv1x64:
-; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-	%tmp3 = udiv <1 x i64> %A, %B;
-	ret <1 x i64> %tmp3
-}
-
-define <2 x i64> @udiv2x64(<2 x i64> %A, <2 x i64> %B) {
-; CHECK-LABEL: udiv2x64:
-; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-	%tmp3 = udiv <2 x i64> %A, %B;
-	ret <2 x i64> %tmp3
-}
-
-define <1 x i8> @srem1x8(<1 x i8> %A, <1 x i8> %B) {
-; CHECK-LABEL: srem1x8:
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <1 x i8> %A, %B;
-	ret <1 x i8> %tmp3
-}
-
-define <8 x i8> @srem8x8(<8 x i8> %A, <8 x i8> %B) {
-; CHECK-LABEL: srem8x8:
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <8 x i8> %A, %B;
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @srem16x8(<16 x i8> %A, <16 x i8> %B) {
-; CHECK-LABEL: srem16x8:
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <16 x i8> %A, %B;
-	ret <16 x i8> %tmp3
-}
-
-define <1 x i16> @srem1x16(<1 x i16> %A, <1 x i16> %B) {
-; CHECK-LABEL: srem1x16:
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <1 x i16> %A, %B;
-	ret <1 x i16> %tmp3
-}
-
-define <4 x i16> @srem4x16(<4 x i16> %A, <4 x i16> %B) {
-; CHECK-LABEL: srem4x16:
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <4 x i16> %A, %B;
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @srem8x16(<8 x i16> %A, <8 x i16> %B) {
-; CHECK-LABEL: srem8x16:
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <8 x i16> %A, %B;
-	ret <8 x i16> %tmp3
-}
-
-define <1 x i32> @srem1x32(<1 x i32> %A, <1 x i32> %B) {
-; CHECK-LABEL: srem1x32:
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <1 x i32> %A, %B;
-	ret <1 x i32> %tmp3
-}
-
-define <2 x i32> @srem2x32(<2 x i32> %A, <2 x i32> %B) {
-; CHECK-LABEL: srem2x32:
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <2 x i32> %A, %B;
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @srem4x32(<4 x i32> %A, <4 x i32> %B) {
-; CHECK-LABEL: srem4x32:
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <4 x i32> %A, %B;
-	ret <4 x i32> %tmp3
-}
-
-define <1 x i64> @srem1x64(<1 x i64> %A, <1 x i64> %B) {
-; CHECK-LABEL: srem1x64:
-; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-	%tmp3 = srem <1 x i64> %A, %B;
-	ret <1 x i64> %tmp3
-}
-
-define <2 x i64> @srem2x64(<2 x i64> %A, <2 x i64> %B) {
-; CHECK-LABEL: srem2x64:
-; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-	%tmp3 = srem <2 x i64> %A, %B;
-	ret <2 x i64> %tmp3
-}
-
-define <1 x i8> @urem1x8(<1 x i8> %A, <1 x i8> %B) {
-; CHECK-LABEL: urem1x8:
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <1 x i8> %A, %B;
-	ret <1 x i8> %tmp3
-}
-
-define <8 x i8> @urem8x8(<8 x i8> %A, <8 x i8> %B) {
-; CHECK-LABEL: urem8x8:
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <8 x i8> %A, %B;
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @urem16x8(<16 x i8> %A, <16 x i8> %B) {
-; CHECK-LABEL: urem16x8:
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <16 x i8> %A, %B;
-	ret <16 x i8> %tmp3
-}
-
-define <1 x i16> @urem1x16(<1 x i16> %A, <1 x i16> %B) {
-; CHECK-LABEL: urem1x16:
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <1 x i16> %A, %B;
-	ret <1 x i16> %tmp3
-}
-
-define <4 x i16> @urem4x16(<4 x i16> %A, <4 x i16> %B) {
-; CHECK-LABEL: urem4x16:
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <4 x i16> %A, %B;
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @urem8x16(<8 x i16> %A, <8 x i16> %B) {
-; CHECK-LABEL: urem8x16:
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <8 x i16> %A, %B;
-	ret <8 x i16> %tmp3
-}
-
-define <1 x i32> @urem1x32(<1 x i32> %A, <1 x i32> %B) {
-; CHECK-LABEL: urem1x32:
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <1 x i32> %A, %B;
-	ret <1 x i32> %tmp3
-}
-
-define <2 x i32> @urem2x32(<2 x i32> %A, <2 x i32> %B) {
-; CHECK-LABEL: urem2x32:
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <2 x i32> %A, %B;
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @urem4x32(<4 x i32> %A, <4 x i32> %B) {
-; CHECK-LABEL: urem4x32:
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <4 x i32> %A, %B;
-	ret <4 x i32> %tmp3
-}
-
-define <1 x i64> @urem1x64(<1 x i64> %A, <1 x i64> %B) {
-; CHECK-LABEL: urem1x64:
-; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-	%tmp3 = urem <1 x i64> %A, %B;
-	ret <1 x i64> %tmp3
-}
-
-define <2 x i64> @urem2x64(<2 x i64> %A, <2 x i64> %B) {
-; CHECK-LABEL: urem2x64:
-; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-	%tmp3 = urem <2 x i64> %A, %B;
-	ret <2 x i64> %tmp3
-}
-
-define <2 x float> @frem2f32(<2 x float> %A, <2 x float> %B) {
-; CHECK-LABEL: frem2f32:
-; CHECK: bl fmodf
-; CHECK: bl fmodf
-	%tmp3 = frem <2 x float> %A, %B;
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @frem4f32(<4 x float> %A, <4 x float> %B) {
-; CHECK-LABEL: frem4f32:
-; CHECK: bl fmodf
-; CHECK: bl fmodf
-; CHECK: bl fmodf
-; CHECK: bl fmodf
-	%tmp3 = frem <4 x float> %A, %B;
-	ret <4 x float> %tmp3
-}
-
-define <1 x double> @frem1d64(<1 x double> %A, <1 x double> %B) {
-; CHECK-LABEL: frem1d64:
-; CHECK: bl fmod
-	%tmp3 = frem <1 x double> %A, %B;
-	ret <1 x double> %tmp3
-}
-
-define <2 x double> @frem2d64(<2 x double> %A, <2 x double> %B) {
-; CHECK-LABEL: frem2d64:
-; CHECK: bl fmod
-; CHECK: bl fmod
-	%tmp3 = frem <2 x double> %A, %B;
-	ret <2 x double> %tmp3
-}
-
-declare <8 x i8> @llvm.arm64.neon.pmul.v8i8(<8 x i8>, <8 x i8>)
-declare <16 x i8> @llvm.arm64.neon.pmul.v16i8(<16 x i8>, <16 x i8>)
-
-define <8 x i8> @poly_mulv8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK-LABEL: poly_mulv8i8:
-   %prod = call <8 x i8> @llvm.arm64.neon.pmul.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: pmul v0.8b, v0.8b, v1.8b
-   ret <8 x i8> %prod
-}
-
-define <16 x i8> @poly_mulv16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK-LABEL: poly_mulv16i8:
-   %prod = call <16 x i8> @llvm.arm64.neon.pmul.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: pmul v0.16b, v0.16b, v1.16b
-   ret <16 x i8> %prod
-}
-
-declare <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>)
-declare <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>)
-declare <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>)
-declare <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i16> @test_sqdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK-LABEL: test_sqdmulh_v4i16:
-   %prod = call <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: sqdmulh v0.4h, v0.4h, v1.4h
-   ret <4 x i16> %prod
-}
-
-define <8 x i16> @test_sqdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK-LABEL: test_sqdmulh_v8i16:
-   %prod = call <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: sqdmulh v0.8h, v0.8h, v1.8h
-   ret <8 x i16> %prod
-}
-
-define <2 x i32> @test_sqdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK-LABEL: test_sqdmulh_v2i32:
-   %prod = call <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: sqdmulh v0.2s, v0.2s, v1.2s
-   ret <2 x i32> %prod
-}
-
-define <4 x i32> @test_sqdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK-LABEL: test_sqdmulh_v4i32:
-   %prod = call <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: sqdmulh v0.4s, v0.4s, v1.4s
-   ret <4 x i32> %prod
-}
-
-declare <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>)
-declare <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>)
-declare <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>)
-declare <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i16> @test_sqrdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK-LABEL: test_sqrdmulh_v4i16:
-   %prod = call <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: sqrdmulh v0.4h, v0.4h, v1.4h
-   ret <4 x i16> %prod
-}
-
-define <8 x i16> @test_sqrdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK-LABEL: test_sqrdmulh_v8i16:
-   %prod = call <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: sqrdmulh v0.8h, v0.8h, v1.8h
-   ret <8 x i16> %prod
-}
-
-define <2 x i32> @test_sqrdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK-LABEL: test_sqrdmulh_v2i32:
-   %prod = call <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: sqrdmulh v0.2s, v0.2s, v1.2s
-   ret <2 x i32> %prod
-}
-
-define <4 x i32> @test_sqrdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK-LABEL: test_sqrdmulh_v4i32:
-   %prod = call <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: sqrdmulh v0.4s, v0.4s, v1.4s
-   ret <4 x i32> %prod
-}
-
-declare <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @fmulx_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; CHECK-LABEL: fmulx_v2f32:
-; Using registers other than v0, v1 and v2 are possible, but would be odd.
-; CHECK: fmulx v0.2s, v0.2s, v1.2s
-        %val = call <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-        ret <2 x float> %val
-}
-
-define <4 x float> @fmulx_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; CHECK-LABEL: fmulx_v4f32:
-; Using registers other than v0, v1 and v2 are possible, but would be odd.
-; CHECK: fmulx v0.4s, v0.4s, v1.4s
-        %val = call <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-        ret <4 x float> %val
-}
-
-define <2 x double> @fmulx_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; CHECK-LABEL: fmulx_v2f64:
-; Using registers other than v0, v1 and v2 are possible, but would be odd.
-; CHECK: fmulx v0.2d, v0.2d, v1.2d
-        %val = call <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-        ret <2 x double> %val
-}
-

Removed: llvm/trunk/test/CodeGen/ARM64/aarch64-neon-scalar-by-elem-mul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/aarch64-neon-scalar-by-elem-mul.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/aarch64-neon-scalar-by-elem-mul.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/aarch64-neon-scalar-by-elem-mul.ll (removed)
@@ -1,124 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
-
-define float @test_fmul_lane_ss2S(float %a, <2 x float> %v) {
-  ; CHECK-LABEL: test_fmul_lane_ss2S
-  ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
-  %tmp1 = extractelement <2 x float> %v, i32 1
-  %tmp2 = fmul float %a, %tmp1;
-  ret float %tmp2;
-}
-
-define float @test_fmul_lane_ss2S_swap(float %a, <2 x float> %v) {
-  ; CHECK-LABEL: test_fmul_lane_ss2S_swap
-  ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
-  %tmp1 = extractelement <2 x float> %v, i32 1
-  %tmp2 = fmul float %tmp1, %a;
-  ret float %tmp2;
-}
-
-
-define float @test_fmul_lane_ss4S(float %a, <4 x float> %v) {
-  ; CHECK-LABEL: test_fmul_lane_ss4S
-  ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
-  %tmp1 = extractelement <4 x float> %v, i32 3
-  %tmp2 = fmul float %a, %tmp1;
-  ret float %tmp2;
-}
-
-define float @test_fmul_lane_ss4S_swap(float %a, <4 x float> %v) {
-  ; CHECK-LABEL: test_fmul_lane_ss4S_swap
-  ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
-  %tmp1 = extractelement <4 x float> %v, i32 3
-  %tmp2 = fmul float %tmp1, %a;
-  ret float %tmp2;
-}
-
-
-define double @test_fmul_lane_ddD(double %a, <1 x double> %v) {
-  ; CHECK-LABEL: test_fmul_lane_ddD
-  ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+.d\[0]|d[0-9]+}}
-  %tmp1 = extractelement <1 x double> %v, i32 0
-  %tmp2 = fmul double %a, %tmp1;
-  ret double %tmp2;
-}
-
-
-
-define double @test_fmul_lane_dd2D(double %a, <2 x double> %v) {
-  ; CHECK-LABEL: test_fmul_lane_dd2D
-  ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
-  %tmp1 = extractelement <2 x double> %v, i32 1
-  %tmp2 = fmul double %a, %tmp1;
-  ret double %tmp2;
-}
-
-
-define double @test_fmul_lane_dd2D_swap(double %a, <2 x double> %v) {
-  ; CHECK-LABEL: test_fmul_lane_dd2D_swap
-  ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
-  %tmp1 = extractelement <2 x double> %v, i32 1
-  %tmp2 = fmul double %tmp1, %a;
-  ret double %tmp2;
-}
-
-declare float @llvm.arm64.neon.fmulx.f32(float, float)
-
-define float @test_fmulx_lane_f32(float %a, <2 x float> %v) {
-  ; CHECK-LABEL: test_fmulx_lane_f32
-  ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
-  %tmp1 = extractelement <2 x float> %v, i32 1
-  %tmp2 = call float @llvm.arm64.neon.fmulx.f32(float %a, float %tmp1)
-  ret float %tmp2;
-}
-
-define float @test_fmulx_laneq_f32(float %a, <4 x float> %v) {
-  ; CHECK-LABEL: test_fmulx_laneq_f32
-  ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
-  %tmp1 = extractelement <4 x float> %v, i32 3
-  %tmp2 = call float @llvm.arm64.neon.fmulx.f32(float %a, float %tmp1)
-  ret float %tmp2;
-}
-
-define float @test_fmulx_laneq_f32_swap(float %a, <4 x float> %v) {
-  ; CHECK-LABEL: test_fmulx_laneq_f32_swap
-  ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
-  %tmp1 = extractelement <4 x float> %v, i32 3
-  %tmp2 = call float @llvm.arm64.neon.fmulx.f32(float %tmp1, float %a)
-  ret float %tmp2;
-}
-
-declare double @llvm.arm64.neon.fmulx.f64(double, double)
-
-define double @test_fmulx_lane_f64(double %a, <1 x double> %v) {
-  ; CHECK-LABEL: test_fmulx_lane_f64
-  ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+.d\[0]|d[0-9]+}}
-  %tmp1 = extractelement <1 x double> %v, i32 0
-  %tmp2 = call double @llvm.arm64.neon.fmulx.f64(double %a, double %tmp1)
-  ret double %tmp2;
-}
-
-define double @test_fmulx_laneq_f64_0(double %a, <2 x double> %v) {
-  ; CHECK-LABEL: test_fmulx_laneq_f64_0
-  ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
-  %tmp1 = extractelement <2 x double> %v, i32 0
-  %tmp2 = call double @llvm.arm64.neon.fmulx.f64(double %a, double %tmp1)
-  ret double %tmp2;
-}
-
-
-define double @test_fmulx_laneq_f64_1(double %a, <2 x double> %v) {
-  ; CHECK-LABEL: test_fmulx_laneq_f64_1
-  ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
-  %tmp1 = extractelement <2 x double> %v, i32 1
-  %tmp2 = call double @llvm.arm64.neon.fmulx.f64(double %a, double %tmp1)
-  ret double %tmp2;
-}
-
-define double @test_fmulx_laneq_f64_1_swap(double %a, <2 x double> %v) {
-  ; CHECK-LABEL: test_fmulx_laneq_f64_1_swap
-  ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
-  %tmp1 = extractelement <2 x double> %v, i32 1
-  %tmp2 = call double @llvm.arm64.neon.fmulx.f64(double %tmp1, double %a)
-  ret double %tmp2;
-}
-

Removed: llvm/trunk/test/CodeGen/ARM64/aarch64-neon-select_cc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/aarch64-neon-select_cc.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/aarch64-neon-select_cc.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/aarch64-neon-select_cc.ll (removed)
@@ -1,206 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
-
-define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) {
-; CHECK-LABEL: test_select_cc_v8i8_i8:
-; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
-; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
-; CHECK: cmeq [[MASK:v[0-9]+]].8b, v[[LHS]].8b, v[[RHS]].8b
-; CHECK: dup [[DUPMASK:v[0-9]+]].8b, [[MASK]].b[0]
-; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b
-  %cmp31 = icmp eq i8 %a, %b
-  %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
-  ret <8x i8> %e
-}
-
-define <8x i8> @test_select_cc_v8i8_f32(float %a, float %b, <8x i8> %c, <8x i8> %d ) {
-; CHECK-LABEL: test_select_cc_v8i8_f32:
-; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s
-; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0]
-; CHECK-NEXT: bsl [[DUPMASK]].8b, v2.8b, v3.8b
-  %cmp31 = fcmp oeq float %a, %b
-  %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
-  ret <8x i8> %e
-}
-
-define <8x i8> @test_select_cc_v8i8_f64(double %a, double %b, <8x i8> %c, <8x i8> %d ) {
-; CHECK-LABEL: test_select_cc_v8i8_f64:
-; CHECK: fcmeq d[[MASK:[0-9]+]], d0, d1
-; CHECK-NEXT: bsl v[[MASK]].8b, v2.8b, v3.8b
-  %cmp31 = fcmp oeq double %a, %b
-  %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
-  ret <8x i8> %e
-}
-
-define <16x i8> @test_select_cc_v16i8_i8(i8 %a, i8 %b, <16x i8> %c, <16x i8> %d ) {
-; CHECK-LABEL: test_select_cc_v16i8_i8:
-; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
-; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
-; CHECK: cmeq [[MASK:v[0-9]+]].16b, v[[LHS]].16b, v[[RHS]].16b
-; CHECK: dup [[DUPMASK:v[0-9]+]].16b, [[MASK]].b[0]
-; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
-  %cmp31 = icmp eq i8 %a, %b
-  %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
-  ret <16x i8> %e
-}
-
-define <16x i8> @test_select_cc_v16i8_f32(float %a, float %b, <16x i8> %c, <16x i8> %d ) {
-; CHECK-LABEL: test_select_cc_v16i8_f32:
-; CHECK: fcmeq [[MASK:v[0-9]+]].4s, v0.4s, v1.4s
-; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
-; CHECK-NEXT: bsl [[DUPMASK]].16b, v2.16b, v3.16b
-  %cmp31 = fcmp oeq float %a, %b
-  %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
-  ret <16x i8> %e
-}
-
-define <16x i8> @test_select_cc_v16i8_f64(double %a, double %b, <16x i8> %c, <16x i8> %d ) {
-; CHECK-LABEL: test_select_cc_v16i8_f64:
-; CHECK: fcmeq [[MASK:v[0-9]+]].2d, v0.2d, v1.2d
-; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
-; CHECK-NEXT: bsl [[DUPMASK]].16b, v2.16b, v3.16b
-  %cmp31 = fcmp oeq double %a, %b
-  %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
-  ret <16x i8> %e
-}
-
-define <4x i16> @test_select_cc_v4i16(i16 %a, i16 %b, <4x i16> %c, <4x i16> %d ) {
-; CHECK-LABEL: test_select_cc_v4i16:
-; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
-; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
-; CHECK: cmeq [[MASK:v[0-9]+]].4h, v[[LHS]].4h, v[[RHS]].4h
-; CHECK: dup [[DUPMASK:v[0-9]+]].4h, [[MASK]].h[0]
-; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b
-  %cmp31 = icmp eq i16 %a, %b
-  %e = select i1 %cmp31, <4x i16> %c, <4x i16> %d
-  ret <4x i16> %e
-}
-
-define <8x i16> @test_select_cc_v8i16(i16 %a, i16 %b, <8x i16> %c, <8x i16> %d ) {
-; CHECK-LABEL: test_select_cc_v8i16:
-; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
-; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
-; CHECK: cmeq [[MASK:v[0-9]+]].8h, v[[LHS]].8h, v[[RHS]].8h
-; CHECK: dup [[DUPMASK:v[0-9]+]].8h, [[MASK]].h[0]
-; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
-  %cmp31 = icmp eq i16 %a, %b
-  %e = select i1 %cmp31, <8x i16> %c, <8x i16> %d
-  ret <8x i16> %e
-}
-
-define <2x i32> @test_select_cc_v2i32(i32 %a, i32 %b, <2x i32> %c, <2x i32> %d ) {
-; CHECK-LABEL: test_select_cc_v2i32:
-; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
-; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
-; CHECK: cmeq [[MASK:v[0-9]+]].2s, v[[LHS]].2s, v[[RHS]].2s
-; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0]
-; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b
-  %cmp31 = icmp eq i32 %a, %b
-  %e = select i1 %cmp31, <2x i32> %c, <2x i32> %d
-  ret <2x i32> %e
-}
-
-define <4x i32> @test_select_cc_v4i32(i32 %a, i32 %b, <4x i32> %c, <4x i32> %d ) {
-; CHECK-LABEL: test_select_cc_v4i32:
-; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
-; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
-; CHECK: cmeq [[MASK:v[0-9]+]].4s, v[[LHS]].4s, v[[RHS]].4s
-; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
-; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
-  %cmp31 = icmp eq i32 %a, %b
-  %e = select i1 %cmp31, <4x i32> %c, <4x i32> %d
-  ret <4x i32> %e
-}
-
-define <1x i64> @test_select_cc_v1i64(i64 %a, i64 %b, <1x i64> %c, <1x i64> %d ) {
-; CHECK-LABEL: test_select_cc_v1i64:
-; CHECK-DAG: fmov d[[LHS:[0-9]+]], x0
-; CHECK-DAG: fmov d[[RHS:[0-9]+]], x1
-; CHECK: cmeq d[[MASK:[0-9]+]], d[[LHS]], d[[RHS]]
-; CHECK: bsl v[[MASK]].8b, v0.8b, v1.8b
-  %cmp31 = icmp eq i64 %a, %b
-  %e = select i1 %cmp31, <1x i64> %c, <1x i64> %d
-  ret <1x i64> %e
-}
-
-define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d ) {
-; CHECK-LABEL: test_select_cc_v2i64:
-; CHECK-DAG: fmov d[[LHS:[0-9]+]], x0
-; CHECK-DAG: fmov d[[RHS:[0-9]+]], x1
-; CHECK: cmeq [[MASK:v[0-9]+]].2d, v[[LHS]].2d, v[[RHS]].2d
-; CHECK: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
-; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
-  %cmp31 = icmp eq i64 %a, %b
-  %e = select i1 %cmp31, <2x i64> %c, <2x i64> %d
-  ret <2x i64> %e
-}
-
-define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) {
-; CHECK-LABEL: test_select_cc_v1f32:
-; CHECK: fcmp s0, s1
-; CHECK-NEXT: fcsel s0, s2, s3, eq
-  %cmp31 = fcmp oeq float %a, %b
-  %e = select i1 %cmp31, <1 x float> %c, <1 x float> %d
-  ret <1 x float> %e
-}
-
-define <2 x float> @test_select_cc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d ) {
-; CHECK-LABEL: test_select_cc_v2f32:
-; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s
-; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0]
-; CHECK: bsl [[DUPMASK]].8b, v2.8b, v3.8b
-  %cmp31 = fcmp oeq float %a, %b
-  %e = select i1 %cmp31, <2 x float> %c, <2 x float> %d
-  ret <2 x float> %e
-}
-
-define <4x float> @test_select_cc_v4f32(float %a, float %b, <4x float> %c, <4x float> %d ) {
-; CHECK-LABEL: test_select_cc_v4f32:
-; CHECK: fcmeq [[MASK:v[0-9]+]].4s, v0.4s, v1.4s
-; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
-; CHECK: bsl [[DUPMASK]].16b, v2.16b, v3.16b
-  %cmp31 = fcmp oeq float %a, %b
-  %e = select i1 %cmp31, <4x float> %c, <4x float> %d
-  ret <4x float> %e
-}
-
-define <4x float> @test_select_cc_v4f32_icmp(i32 %a, i32 %b, <4x float> %c, <4x float> %d ) {
-; CHECK-LABEL: test_select_cc_v4f32_icmp:
-; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
-; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
-; CHECK: cmeq [[MASK:v[0-9]+]].4s, v[[LHS]].4s, v[[RHS]].4s
-; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
-; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
-  %cmp31 = icmp eq i32 %a, %b
-  %e = select i1 %cmp31, <4x float> %c, <4x float> %d
-  ret <4x float> %e
-}
-
-define <1 x double> @test_select_cc_v1f64(double %a, double %b, <1 x double> %c, <1 x double> %d ) {
-; CHECK-LABEL: test_select_cc_v1f64:
-; CHECK: fcmeq d[[MASK:[0-9]+]], d0, d1
-; CHECK: bsl v[[MASK]].8b, v2.8b, v3.8b
-  %cmp31 = fcmp oeq double %a, %b
-  %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d
-  ret <1 x double> %e
-}
-
-define <1 x double> @test_select_cc_v1f64_icmp(i64 %a, i64 %b, <1 x double> %c, <1 x double> %d ) {
-; CHECK-LABEL: test_select_cc_v1f64_icmp:
-; CHECK-DAG: fmov [[LHS:d[0-9]+]], x0
-; CHECK-DAG: fmov [[RHS:d[0-9]+]], x1
-; CHECK: cmeq d[[MASK:[0-9]+]], [[LHS]], [[RHS]]
-; CHECK: bsl v[[MASK]].8b, v0.8b, v1.8b
-  %cmp31 = icmp eq i64 %a, %b
-  %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d
-  ret <1 x double> %e
-}
-
-define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d ) {
-; CHECK-LABEL: test_select_cc_v2f64:
-; CHECK: fcmeq [[MASK:v[0-9]+]].2d, v0.2d, v1.2d
-; CHECK: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
-; CHECK: bsl [[DUPMASK]].16b, v2.16b, v3.16b
-  %cmp31 = fcmp oeq double %a, %b
-  %e = select i1 %cmp31, <2 x double> %c, <2 x double> %d
-  ret <2 x double> %e
-}

Removed: llvm/trunk/test/CodeGen/ARM64/aarch64-neon-simd-ldst-one.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/aarch64-neon-simd-ldst-one.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/aarch64-neon-simd-ldst-one.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/aarch64-neon-simd-ldst-one.ll (removed)
@@ -1,482 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-
-%struct.uint8x16x2_t = type { [2 x <16 x i8>] }
-%struct.poly8x16x2_t = type { [2 x <16 x i8>] }
-%struct.uint8x16x3_t = type { [3 x <16 x i8>] }
-%struct.int8x16x2_t = type { [2 x <16 x i8>] }
-%struct.int16x8x2_t = type { [2 x <8 x i16>] }
-%struct.int32x4x2_t = type { [2 x <4 x i32>] }
-%struct.int64x2x2_t = type { [2 x <2 x i64>] }
-%struct.float32x4x2_t = type { [2 x <4 x float>] }
-%struct.float64x2x2_t = type { [2 x <2 x double>] }
-%struct.int8x8x2_t = type { [2 x <8 x i8>] }
-%struct.int16x4x2_t = type { [2 x <4 x i16>] }
-%struct.int32x2x2_t = type { [2 x <2 x i32>] }
-%struct.int64x1x2_t = type { [2 x <1 x i64>] }
-%struct.float32x2x2_t = type { [2 x <2 x float>] }
-%struct.float64x1x2_t = type { [2 x <1 x double>] }
-%struct.int8x16x3_t = type { [3 x <16 x i8>] }
-%struct.int16x8x3_t = type { [3 x <8 x i16>] }
-%struct.int32x4x3_t = type { [3 x <4 x i32>] }
-%struct.int64x2x3_t = type { [3 x <2 x i64>] }
-%struct.float32x4x3_t = type { [3 x <4 x float>] }
-%struct.float64x2x3_t = type { [3 x <2 x double>] }
-%struct.int8x8x3_t = type { [3 x <8 x i8>] }
-%struct.int16x4x3_t = type { [3 x <4 x i16>] }
-%struct.int32x2x3_t = type { [3 x <2 x i32>] }
-%struct.int64x1x3_t = type { [3 x <1 x i64>] }
-%struct.float32x2x3_t = type { [3 x <2 x float>] }
-%struct.float64x1x3_t = type { [3 x <1 x double>] }
-%struct.int8x16x4_t = type { [4 x <16 x i8>] }
-%struct.int16x8x4_t = type { [4 x <8 x i16>] }
-%struct.int32x4x4_t = type { [4 x <4 x i32>] }
-%struct.int64x2x4_t = type { [4 x <2 x i64>] }
-%struct.float32x4x4_t = type { [4 x <4 x float>] }
-%struct.float64x2x4_t = type { [4 x <2 x double>] }
-%struct.int8x8x4_t = type { [4 x <8 x i8>] }
-%struct.int16x4x4_t = type { [4 x <4 x i16>] }
-%struct.int32x2x4_t = type { [4 x <2 x i32>] }
-%struct.int64x1x4_t = type { [4 x <1 x i64>] }
-%struct.float32x2x4_t = type { [4 x <2 x float>] }
-%struct.float64x1x4_t = type { [4 x <1 x double>] }
-
-define <16 x i8> @test_ld_from_poll_v16i8(<16 x i8> %a) {
-; CHECK-LABEL: test_ld_from_poll_v16i8:
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = add <16 x i8> %a, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 2, i8 13, i8 14, i8 15, i8 16>
-  ret <16 x i8> %b
-}
-
-define <8 x i16> @test_ld_from_poll_v8i16(<8 x i16> %a) {
-; CHECK-LABEL: test_ld_from_poll_v8i16:
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = add <8 x i16> %a, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
-  ret <8 x i16> %b
-}
-
-define <4 x i32> @test_ld_from_poll_v4i32(<4 x i32> %a) {
-; CHECK-LABEL: test_ld_from_poll_v4i32:
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = add <4 x i32> %a, <i32 1, i32 2, i32 3, i32 4>
-  ret <4 x i32> %b
-}
-
-define <2 x i64> @test_ld_from_poll_v2i64(<2 x i64> %a) {
-; CHECK-LABEL: test_ld_from_poll_v2i64:
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = add <2 x i64> %a, <i64 1, i64 2>
-  ret <2 x i64> %b
-}
-
-define <4 x float> @test_ld_from_poll_v4f32(<4 x float> %a) {
-; CHECK-LABEL: test_ld_from_poll_v4f32:
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = fadd <4 x float> %a, <float 1.0, float 2.0, float 3.0, float 4.0>
-  ret <4 x float> %b
-}
-
-define <2 x double> @test_ld_from_poll_v2f64(<2 x double> %a) {
-; CHECK-LABEL: test_ld_from_poll_v2f64:
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = fadd <2 x double> %a, <double 1.0, double 2.0>
-  ret <2 x double> %b
-}
-
-define <8 x i8> @test_ld_from_poll_v8i8(<8 x i8> %a) {
-; CHECK-LABEL: test_ld_from_poll_v8i8:
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = add <8 x i8> %a, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>
-  ret <8 x i8> %b
-}
-
-define <4 x i16> @test_ld_from_poll_v4i16(<4 x i16> %a) {
-; CHECK-LABEL: test_ld_from_poll_v4i16:
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = add <4 x i16> %a, <i16 1, i16 2, i16 3, i16 4>
-  ret <4 x i16> %b
-}
-
-define <2 x i32> @test_ld_from_poll_v2i32(<2 x i32> %a) {
-; CHECK-LABEL: test_ld_from_poll_v2i32:
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = add <2 x i32> %a, <i32 1, i32 2>
-  ret <2 x i32> %b
-}
-
-define <16 x i8> @test_vld1q_dup_s8(i8* %a) {
-; CHECK-LABEL: test_vld1q_dup_s8:
-; CHECK: ld1r {{{ ?v[0-9]+.16b ?}}}, [x0]
-entry:
-  %0 = load i8* %a, align 1
-  %1 = insertelement <16 x i8> undef, i8 %0, i32 0
-  %lane = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
-  ret <16 x i8> %lane
-}
-
-define <8 x i16> @test_vld1q_dup_s16(i16* %a) {
-; CHECK-LABEL: test_vld1q_dup_s16:
-; CHECK: ld1r {{{ ?v[0-9]+.8h ?}}}, [x0]
-entry:
-  %0 = load i16* %a, align 2
-  %1 = insertelement <8 x i16> undef, i16 %0, i32 0
-  %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer
-  ret <8 x i16> %lane
-}
-
-define <4 x i32> @test_vld1q_dup_s32(i32* %a) {
-; CHECK-LABEL: test_vld1q_dup_s32:
-; CHECK: ld1r {{{ ?v[0-9]+.4s ?}}}, [x0]
-entry:
-  %0 = load i32* %a, align 4
-  %1 = insertelement <4 x i32> undef, i32 %0, i32 0
-  %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
-  ret <4 x i32> %lane
-}
-
-define <2 x i64> @test_vld1q_dup_s64(i64* %a) {
-; CHECK-LABEL: test_vld1q_dup_s64:
-; CHECK: ld1r {{{ ?v[0-9]+.2d ?}}}, [x0]
-entry:
-  %0 = load i64* %a, align 8
-  %1 = insertelement <2 x i64> undef, i64 %0, i32 0
-  %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer
-  ret <2 x i64> %lane
-}
-
-define <4 x float> @test_vld1q_dup_f32(float* %a) {
-; CHECK-LABEL: test_vld1q_dup_f32:
-; CHECK: ld1r {{{ ?v[0-9]+.4s ?}}}, [x0]
-entry:
-  %0 = load float* %a, align 4
-  %1 = insertelement <4 x float> undef, float %0, i32 0
-  %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
-  ret <4 x float> %lane
-}
-
-define <2 x double> @test_vld1q_dup_f64(double* %a) {
-; CHECK-LABEL: test_vld1q_dup_f64:
-; CHECK: ld1r {{{ ?v[0-9]+.2d ?}}}, [x0]
-entry:
-  %0 = load double* %a, align 8
-  %1 = insertelement <2 x double> undef, double %0, i32 0
-  %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer
-  ret <2 x double> %lane
-}
-
-define <8 x i8> @test_vld1_dup_s8(i8* %a) {
-; CHECK-LABEL: test_vld1_dup_s8:
-; CHECK: ld1r {{{ ?v[0-9]+.8b ?}}}, [x0]
-entry:
-  %0 = load i8* %a, align 1
-  %1 = insertelement <8 x i8> undef, i8 %0, i32 0
-  %lane = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
-  ret <8 x i8> %lane
-}
-
-define <4 x i16> @test_vld1_dup_s16(i16* %a) {
-; CHECK-LABEL: test_vld1_dup_s16:
-; CHECK: ld1r {{{ ?v[0-9]+.4h ?}}}, [x0]
-entry:
-  %0 = load i16* %a, align 2
-  %1 = insertelement <4 x i16> undef, i16 %0, i32 0
-  %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer
-  ret <4 x i16> %lane
-}
-
-define <2 x i32> @test_vld1_dup_s32(i32* %a) {
-; CHECK-LABEL: test_vld1_dup_s32:
-; CHECK: ld1r {{{ ?v[0-9]+.2s ?}}}, [x0]
-entry:
-  %0 = load i32* %a, align 4
-  %1 = insertelement <2 x i32> undef, i32 %0, i32 0
-  %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer
-  ret <2 x i32> %lane
-}
-
-define <1 x i64> @test_vld1_dup_s64(i64* %a) {
-; CHECK-LABEL: test_vld1_dup_s64:
-; CHECK: ldr {{d[0-9]+}}, [x0]
-entry:
-  %0 = load i64* %a, align 8
-  %1 = insertelement <1 x i64> undef, i64 %0, i32 0
-  ret <1 x i64> %1
-}
-
-define <2 x float> @test_vld1_dup_f32(float* %a) {
-; CHECK-LABEL: test_vld1_dup_f32:
-; CHECK: ld1r {{{ ?v[0-9]+.2s ?}}}, [x0]
-entry:
-  %0 = load float* %a, align 4
-  %1 = insertelement <2 x float> undef, float %0, i32 0
-  %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer
-  ret <2 x float> %lane
-}
-
-define <1 x double> @test_vld1_dup_f64(double* %a) {
-; CHECK-LABEL: test_vld1_dup_f64:
-; CHECK: ldr {{d[0-9]+}}, [x0]
-entry:
-  %0 = load double* %a, align 8
-  %1 = insertelement <1 x double> undef, double %0, i32 0
-  ret <1 x double> %1
-}
-
-define <1 x i64> @testDUP.v1i64(i64* %a, i64* %b) #0 {
-; As there is a store operation depending on %1, LD1R pattern can't be selected.
-; So LDR and FMOV should be emitted.
-; CHECK-LABEL: testDUP.v1i64:
-; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}]
-; CHECK-DAG: fmov {{d[0-9]+}}, {{x[0-9]+}}
-; CHECK-DAG: str {{x[0-9]+}}, [{{x[0-9]+}}]
-  %1 = load i64* %a, align 8
-  store i64 %1, i64* %b, align 8
-  %vecinit.i = insertelement <1 x i64> undef, i64 %1, i32 0
-  ret <1 x i64> %vecinit.i
-}
-
-define <1 x double> @testDUP.v1f64(double* %a, double* %b) #0 {
-; As there is a store operation depending on %1, LD1R pattern can't be selected.
-; So LDR and FMOV should be emitted.
-; CHECK-LABEL: testDUP.v1f64:
-; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}]
-; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}]
-  %1 = load double* %a, align 8
-  store double %1, double* %b, align 8
-  %vecinit.i = insertelement <1 x double> undef, double %1, i32 0
-  ret <1 x double> %vecinit.i
-}
-
-define <16 x i8> @test_vld1q_lane_s8(i8* %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vld1q_lane_s8:
-; CHECK: ld1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
-entry:
-  %0 = load i8* %a, align 1
-  %vld1_lane = insertelement <16 x i8> %b, i8 %0, i32 15
-  ret <16 x i8> %vld1_lane
-}
-
-define <8 x i16> @test_vld1q_lane_s16(i16* %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vld1q_lane_s16:
-; CHECK: ld1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
-entry:
-  %0 = load i16* %a, align 2
-  %vld1_lane = insertelement <8 x i16> %b, i16 %0, i32 7
-  ret <8 x i16> %vld1_lane
-}
-
-define <4 x i32> @test_vld1q_lane_s32(i32* %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vld1q_lane_s32:
-; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %0 = load i32* %a, align 4
-  %vld1_lane = insertelement <4 x i32> %b, i32 %0, i32 3
-  ret <4 x i32> %vld1_lane
-}
-
-define <2 x i64> @test_vld1q_lane_s64(i64* %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vld1q_lane_s64:
-; CHECK: ld1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %0 = load i64* %a, align 8
-  %vld1_lane = insertelement <2 x i64> %b, i64 %0, i32 1
-  ret <2 x i64> %vld1_lane
-}
-
-define <4 x float> @test_vld1q_lane_f32(float* %a, <4 x float> %b) {
-; CHECK-LABEL: test_vld1q_lane_f32:
-; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %0 = load float* %a, align 4
-  %vld1_lane = insertelement <4 x float> %b, float %0, i32 3
-  ret <4 x float> %vld1_lane
-}
-
-define <2 x double> @test_vld1q_lane_f64(double* %a, <2 x double> %b) {
-; CHECK-LABEL: test_vld1q_lane_f64:
-; CHECK: ld1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %0 = load double* %a, align 8
-  %vld1_lane = insertelement <2 x double> %b, double %0, i32 1
-  ret <2 x double> %vld1_lane
-}
-
-define <8 x i8> @test_vld1_lane_s8(i8* %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vld1_lane_s8:
-; CHECK: ld1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
-entry:
-  %0 = load i8* %a, align 1
-  %vld1_lane = insertelement <8 x i8> %b, i8 %0, i32 7
-  ret <8 x i8> %vld1_lane
-}
-
-define <4 x i16> @test_vld1_lane_s16(i16* %a, <4 x i16> %b) {
-; CHECK-LABEL: test_vld1_lane_s16:
-; CHECK: ld1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
-entry:
-  %0 = load i16* %a, align 2
-  %vld1_lane = insertelement <4 x i16> %b, i16 %0, i32 3
-  ret <4 x i16> %vld1_lane
-}
-
-define <2 x i32> @test_vld1_lane_s32(i32* %a, <2 x i32> %b) {
-; CHECK-LABEL: test_vld1_lane_s32:
-; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %0 = load i32* %a, align 4
-  %vld1_lane = insertelement <2 x i32> %b, i32 %0, i32 1
-  ret <2 x i32> %vld1_lane
-}
-
-define <1 x i64> @test_vld1_lane_s64(i64* %a, <1 x i64> %b) {
-; CHECK-LABEL: test_vld1_lane_s64:
-; CHECK: ldr {{d[0-9]+}}, [x0]
-entry:
-  %0 = load i64* %a, align 8
-  %vld1_lane = insertelement <1 x i64> undef, i64 %0, i32 0
-  ret <1 x i64> %vld1_lane
-}
-
-define <2 x float> @test_vld1_lane_f32(float* %a, <2 x float> %b) {
-; CHECK-LABEL: test_vld1_lane_f32:
-; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %0 = load float* %a, align 4
-  %vld1_lane = insertelement <2 x float> %b, float %0, i32 1
-  ret <2 x float> %vld1_lane
-}
-
-define <1 x double> @test_vld1_lane_f64(double* %a, <1 x double> %b) {
-; CHECK-LABEL: test_vld1_lane_f64:
-; CHECK: ldr {{d[0-9]+}}, [x0]
-entry:
-  %0 = load double* %a, align 8
-  %vld1_lane = insertelement <1 x double> undef, double %0, i32 0
-  ret <1 x double> %vld1_lane
-}
-
-define void @test_vst1q_lane_s8(i8* %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vst1q_lane_s8:
-; CHECK: st1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <16 x i8> %b, i32 15
-  store i8 %0, i8* %a, align 1
-  ret void
-}
-
-define void @test_vst1q_lane_s16(i16* %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vst1q_lane_s16:
-; CHECK: st1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <8 x i16> %b, i32 7
-  store i16 %0, i16* %a, align 2
-  ret void
-}
-
-define void @test_vst1q_lane_s32(i32* %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vst1q_lane_s32:
-; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <4 x i32> %b, i32 3
-  store i32 %0, i32* %a, align 4
-  ret void
-}
-
-define void @test_vst1q_lane_s64(i64* %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vst1q_lane_s64:
-; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <2 x i64> %b, i32 1
-  store i64 %0, i64* %a, align 8
-  ret void
-}
-
-define void @test_vst1q_lane_f32(float* %a, <4 x float> %b) {
-; CHECK-LABEL: test_vst1q_lane_f32:
-; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <4 x float> %b, i32 3
-  store float %0, float* %a, align 4
-  ret void
-}
-
-define void @test_vst1q_lane_f64(double* %a, <2 x double> %b) {
-; CHECK-LABEL: test_vst1q_lane_f64:
-; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <2 x double> %b, i32 1
-  store double %0, double* %a, align 8
-  ret void
-}
-
-define void @test_vst1_lane_s8(i8* %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vst1_lane_s8:
-; CHECK: st1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <8 x i8> %b, i32 7
-  store i8 %0, i8* %a, align 1
-  ret void
-}
-
-define void @test_vst1_lane_s16(i16* %a, <4 x i16> %b) {
-; CHECK-LABEL: test_vst1_lane_s16:
-; CHECK: st1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <4 x i16> %b, i32 3
-  store i16 %0, i16* %a, align 2
-  ret void
-}
-
-define void @test_vst1_lane_s32(i32* %a, <2 x i32> %b) {
-; CHECK-LABEL: test_vst1_lane_s32:
-; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <2 x i32> %b, i32 1
-  store i32 %0, i32* %a, align 4
-  ret void
-}
-
-define void @test_vst1_lane_s64(i64* %a, <1 x i64> %b) {
-; CHECK-LABEL: test_vst1_lane_s64:
-; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <1 x i64> %b, i32 0
-  store i64 %0, i64* %a, align 8
-  ret void
-}
-
-define void @test_vst1_lane_f32(float* %a, <2 x float> %b) {
-; CHECK-LABEL: test_vst1_lane_f32:
-; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <2 x float> %b, i32 1
-  store float %0, float* %a, align 4
-  ret void
-}
-
-define void @test_vst1_lane_f64(double* %a, <1 x double> %b) {
-; CHECK-LABEL: test_vst1_lane_f64:
-; CHECK: str {{d[0-9]+}}, [x0]
-entry:
-  %0 = extractelement <1 x double> %b, i32 0
-  store double %0, double* %a, align 8
-  ret void
-}

Removed: llvm/trunk/test/CodeGen/ARM64/aarch64-neon-simd-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/aarch64-neon-simd-shift.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/aarch64-neon-simd-shift.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/aarch64-neon-simd-shift.ll (removed)
@@ -1,663 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) {
-; CHECK: test_vshr_n_s8
-; CHECK: sshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
-  %vshr_n = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-  ret <8 x i8> %vshr_n
-}
-
-define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) {
-; CHECK: test_vshr_n_s16
-; CHECK: sshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
-  %vshr_n = ashr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
-  ret <4 x i16> %vshr_n
-}
-
-define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) {
-; CHECK: test_vshr_n_s32
-; CHECK: sshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
-  %vshr_n = ashr <2 x i32> %a, <i32 3, i32 3>
-  ret <2 x i32> %vshr_n
-}
-
-define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) {
-; CHECK: test_vshrq_n_s8
-; CHECK: sshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
-  %vshr_n = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-  ret <16 x i8> %vshr_n
-}
-
-define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) {
-; CHECK: test_vshrq_n_s16
-; CHECK: sshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
-  %vshr_n = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-  ret <8 x i16> %vshr_n
-}
-
-define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) {
-; CHECK: test_vshrq_n_s32
-; CHECK: sshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
-  %vshr_n = ashr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
-  ret <4 x i32> %vshr_n
-}
-
-define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) {
-; CHECK: test_vshrq_n_s64
-; CHECK: sshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
-  %vshr_n = ashr <2 x i64> %a, <i64 3, i64 3>
-  ret <2 x i64> %vshr_n
-}
-
-define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) {
-; CHECK: test_vshr_n_u8
-; CHECK: ushr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
-  %vshr_n = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-  ret <8 x i8> %vshr_n
-}
-
-define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) {
-; CHECK: test_vshr_n_u16
-; CHECK: ushr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
-  %vshr_n = lshr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
-  ret <4 x i16> %vshr_n
-}
-
-define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) {
-; CHECK: test_vshr_n_u32
-; CHECK: ushr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
-  %vshr_n = lshr <2 x i32> %a, <i32 3, i32 3>
-  ret <2 x i32> %vshr_n
-}
-
-define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) {
-; CHECK: test_vshrq_n_u8
-; CHECK: ushr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
-  %vshr_n = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-  ret <16 x i8> %vshr_n
-}
-
-define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) {
-; CHECK: test_vshrq_n_u16
-; CHECK: ushr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
-  %vshr_n = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-  ret <8 x i16> %vshr_n
-}
-
-define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) {
-; CHECK: test_vshrq_n_u32
-; CHECK: ushr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
-  %vshr_n = lshr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
-  ret <4 x i32> %vshr_n
-}
-
-define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) {
-; CHECK: test_vshrq_n_u64
-; CHECK: ushr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
-  %vshr_n = lshr <2 x i64> %a, <i64 3, i64 3>
-  ret <2 x i64> %vshr_n
-}
-
-define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vsra_n_s8
-; CHECK: ssra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
-  %vsra_n = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-  %1 = add <8 x i8> %vsra_n, %a
-  ret <8 x i8> %1
-}
-
-define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vsra_n_s16
-; CHECK: ssra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
-  %vsra_n = ashr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
-  %1 = add <4 x i16> %vsra_n, %a
-  ret <4 x i16> %1
-}
-
-define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vsra_n_s32
-; CHECK: ssra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
-  %vsra_n = ashr <2 x i32> %b, <i32 3, i32 3>
-  %1 = add <2 x i32> %vsra_n, %a
-  ret <2 x i32> %1
-}
-
-define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vsraq_n_s8
-; CHECK: ssra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
-  %vsra_n = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-  %1 = add <16 x i8> %vsra_n, %a
-  ret <16 x i8> %1
-}
-
-define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vsraq_n_s16
-; CHECK: ssra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
-  %vsra_n = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-  %1 = add <8 x i16> %vsra_n, %a
-  ret <8 x i16> %1
-}
-
-define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vsraq_n_s32
-; CHECK: ssra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
-  %vsra_n = ashr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
-  %1 = add <4 x i32> %vsra_n, %a
-  ret <4 x i32> %1
-}
-
-define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vsraq_n_s64
-; CHECK: ssra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
-  %vsra_n = ashr <2 x i64> %b, <i64 3, i64 3>
-  %1 = add <2 x i64> %vsra_n, %a
-  ret <2 x i64> %1
-}
-
-define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vsra_n_u8
-; CHECK: usra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
-  %vsra_n = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-  %1 = add <8 x i8> %vsra_n, %a
-  ret <8 x i8> %1
-}
-
-define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vsra_n_u16
-; CHECK: usra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
-  %vsra_n = lshr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
-  %1 = add <4 x i16> %vsra_n, %a
-  ret <4 x i16> %1
-}
-
-define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vsra_n_u32
-; CHECK: usra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
-  %vsra_n = lshr <2 x i32> %b, <i32 3, i32 3>
-  %1 = add <2 x i32> %vsra_n, %a
-  ret <2 x i32> %1
-}
-
-define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vsraq_n_u8
-; CHECK: usra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
-  %vsra_n = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-  %1 = add <16 x i8> %vsra_n, %a
-  ret <16 x i8> %1
-}
-
-define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vsraq_n_u16
-; CHECK: usra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
-  %vsra_n = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-  %1 = add <8 x i16> %vsra_n, %a
-  ret <8 x i16> %1
-}
-
-define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vsraq_n_u32
-; CHECK: usra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
-  %vsra_n = lshr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
-  %1 = add <4 x i32> %vsra_n, %a
-  ret <4 x i32> %1
-}
-
-define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vsraq_n_u64
-; CHECK: usra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
-  %vsra_n = lshr <2 x i64> %b, <i64 3, i64 3>
-  %1 = add <2 x i64> %vsra_n, %a
-  ret <2 x i64> %1
-}
-
-define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) {
-; CHECK: test_vshrn_n_s16
-; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
-  %1 = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
-  ret <8 x i8> %vshrn_n
-}
-
-define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) {
-; CHECK: test_vshrn_n_s32
-; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
-  %1 = ashr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
-  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
-  ret <4 x i16> %vshrn_n
-}
-
-define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) {
-; CHECK: test_vshrn_n_s64
-; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
-  %1 = ashr <2 x i64> %a, <i64 19, i64 19>
-  %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
-  ret <2 x i32> %vshrn_n
-}
-
-define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) {
-; CHECK: test_vshrn_n_u16
-; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
-  %1 = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
-  ret <8 x i8> %vshrn_n
-}
-
-define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) {
-; CHECK: test_vshrn_n_u32
-; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
-  %1 = lshr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
-  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
-  ret <4 x i16> %vshrn_n
-}
-
-define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) {
-; CHECK: test_vshrn_n_u64
-; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
-  %1 = lshr <2 x i64> %a, <i64 19, i64 19>
-  %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
-  ret <2 x i32> %vshrn_n
-}
-
-define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vshrn_high_n_s16
-; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
-  %1 = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
-  %2 = bitcast <8 x i8> %a to <1 x i64>
-  %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
-  %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
-  ret <16 x i8> %4
-}
-
-define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vshrn_high_n_s32
-; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
-  %1 = ashr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
-  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
-  %2 = bitcast <4 x i16> %a to <1 x i64>
-  %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
-  %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
-  ret <8 x i16> %4
-}
-
-define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vshrn_high_n_s64
-; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
-  %1 = bitcast <2 x i32> %a to <1 x i64>
-  %2 = ashr <2 x i64> %b, <i64 19, i64 19>
-  %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
-  %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
-  %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
-  ret <4 x i32> %4
-}
-
-define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vshrn_high_n_u16
-; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
-  %1 = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
-  %2 = bitcast <8 x i8> %a to <1 x i64>
-  %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
-  %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
-  ret <16 x i8> %4
-}
-
-define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vshrn_high_n_u32
-; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
-  %1 = lshr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
-  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
-  %2 = bitcast <4 x i16> %a to <1 x i64>
-  %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
-  %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
-  ret <8 x i16> %4
-}
-
-define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vshrn_high_n_u64
-; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
-  %1 = bitcast <2 x i32> %a to <1 x i64>
-  %2 = lshr <2 x i64> %b, <i64 19, i64 19>
-  %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
-  %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
-  %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
-  ret <4 x i32> %4
-}
-
-define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vqshrun_high_n_s16
-; CHECK: sqshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
-  %vqshrun = tail call <8 x i8> @llvm.arm64.neon.sqshrun.v8i8(<8 x i16> %b, i32 3)
-  %1 = bitcast <8 x i8> %a to <1 x i64>
-  %2 = bitcast <8 x i8> %vqshrun to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
-  ret <16 x i8> %3
-}
-
-define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vqshrun_high_n_s32
-; CHECK: sqshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
-  %vqshrun = tail call <4 x i16> @llvm.arm64.neon.sqshrun.v4i16(<4 x i32> %b, i32 9)
-  %1 = bitcast <4 x i16> %a to <1 x i64>
-  %2 = bitcast <4 x i16> %vqshrun to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
-  ret <8 x i16> %3
-}
-
-define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vqshrun_high_n_s64
-; CHECK: sqshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
-  %1 = bitcast <2 x i32> %a to <1 x i64>
-  %vqshrun = tail call <2 x i32> @llvm.arm64.neon.sqshrun.v2i32(<2 x i64> %b, i32 19)
-  %2 = bitcast <2 x i32> %vqshrun to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
-  ret <4 x i32> %3
-}
-
-define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vrshrn_high_n_s16
-; CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
-  %vrshrn = tail call <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16> %b, i32 3)
-  %1 = bitcast <8 x i8> %a to <1 x i64>
-  %2 = bitcast <8 x i8> %vrshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
-  ret <16 x i8> %3
-}
-
-define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vrshrn_high_n_s32
-; CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
-  %vrshrn = tail call <4 x i16> @llvm.arm64.neon.rshrn.v4i16(<4 x i32> %b, i32 9)
-  %1 = bitcast <4 x i16> %a to <1 x i64>
-  %2 = bitcast <4 x i16> %vrshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
-  ret <8 x i16> %3
-}
-
-define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vrshrn_high_n_s64
-; CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
-  %1 = bitcast <2 x i32> %a to <1 x i64>
-  %vrshrn = tail call <2 x i32> @llvm.arm64.neon.rshrn.v2i32(<2 x i64> %b, i32 19)
-  %2 = bitcast <2 x i32> %vrshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
-  ret <4 x i32> %3
-}
-
-define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vqrshrun_high_n_s16
-; CHECK: sqrshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
-  %vqrshrun = tail call <8 x i8> @llvm.arm64.neon.sqrshrun.v8i8(<8 x i16> %b, i32 3)
-  %1 = bitcast <8 x i8> %a to <1 x i64>
-  %2 = bitcast <8 x i8> %vqrshrun to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
-  ret <16 x i8> %3
-}
-
-define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vqrshrun_high_n_s32
-; CHECK: sqrshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
-  %vqrshrun = tail call <4 x i16> @llvm.arm64.neon.sqrshrun.v4i16(<4 x i32> %b, i32 9)
-  %1 = bitcast <4 x i16> %a to <1 x i64>
-  %2 = bitcast <4 x i16> %vqrshrun to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
-  ret <8 x i16> %3
-}
-
-define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vqrshrun_high_n_s64
-; CHECK: sqrshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
-  %1 = bitcast <2 x i32> %a to <1 x i64>
-  %vqrshrun = tail call <2 x i32> @llvm.arm64.neon.sqrshrun.v2i32(<2 x i64> %b, i32 19)
-  %2 = bitcast <2 x i32> %vqrshrun to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
-  ret <4 x i32> %3
-}
-
-define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vqshrn_high_n_s16
-; CHECK: sqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
-  %vqshrn = tail call <8 x i8> @llvm.arm64.neon.sqshrn.v8i8(<8 x i16> %b, i32 3)
-  %1 = bitcast <8 x i8> %a to <1 x i64>
-  %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
-  ret <16 x i8> %3
-}
-
-define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vqshrn_high_n_s32
-; CHECK: sqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
-  %vqshrn = tail call <4 x i16> @llvm.arm64.neon.sqshrn.v4i16(<4 x i32> %b, i32 9)
-  %1 = bitcast <4 x i16> %a to <1 x i64>
-  %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
-  ret <8 x i16> %3
-}
-
-define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vqshrn_high_n_s64
-; CHECK: sqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
-  %1 = bitcast <2 x i32> %a to <1 x i64>
-  %vqshrn = tail call <2 x i32> @llvm.arm64.neon.sqshrn.v2i32(<2 x i64> %b, i32 19)
-  %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
-  ret <4 x i32> %3
-}
-
-define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vqshrn_high_n_u16
-; CHECK: uqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
-  %vqshrn = tail call <8 x i8> @llvm.arm64.neon.uqshrn.v8i8(<8 x i16> %b, i32 3)
-  %1 = bitcast <8 x i8> %a to <1 x i64>
-  %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
-  ret <16 x i8> %3
-}
-
-define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vqshrn_high_n_u32
-; CHECK: uqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
-  %vqshrn = tail call <4 x i16> @llvm.arm64.neon.uqshrn.v4i16(<4 x i32> %b, i32 9)
-  %1 = bitcast <4 x i16> %a to <1 x i64>
-  %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
-  ret <8 x i16> %3
-}
-
-define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vqshrn_high_n_u64
-; CHECK: uqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
-  %1 = bitcast <2 x i32> %a to <1 x i64>
-  %vqshrn = tail call <2 x i32> @llvm.arm64.neon.uqshrn.v2i32(<2 x i64> %b, i32 19)
-  %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
-  ret <4 x i32> %3
-}
-
-define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vqrshrn_high_n_s16
-; CHECK: sqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
-  %vqrshrn = tail call <8 x i8> @llvm.arm64.neon.sqrshrn.v8i8(<8 x i16> %b, i32 3)
-  %1 = bitcast <8 x i8> %a to <1 x i64>
-  %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
-  ret <16 x i8> %3
-}
-
-define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vqrshrn_high_n_s32
-; CHECK: sqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
-  %vqrshrn = tail call <4 x i16> @llvm.arm64.neon.sqrshrn.v4i16(<4 x i32> %b, i32 9)
-  %1 = bitcast <4 x i16> %a to <1 x i64>
-  %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
-  ret <8 x i16> %3
-}
-
-define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vqrshrn_high_n_s64
-; CHECK: sqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
-  %1 = bitcast <2 x i32> %a to <1 x i64>
-  %vqrshrn = tail call <2 x i32> @llvm.arm64.neon.sqrshrn.v2i32(<2 x i64> %b, i32 19)
-  %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
-  ret <4 x i32> %3
-}
-
-define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vqrshrn_high_n_u16
-; CHECK: uqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
-  %vqrshrn = tail call <8 x i8> @llvm.arm64.neon.uqrshrn.v8i8(<8 x i16> %b, i32 3)
-  %1 = bitcast <8 x i8> %a to <1 x i64>
-  %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
-  ret <16 x i8> %3
-}
-
-define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vqrshrn_high_n_u32
-; CHECK: uqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
-  %vqrshrn = tail call <4 x i16> @llvm.arm64.neon.uqrshrn.v4i16(<4 x i32> %b, i32 9)
-  %1 = bitcast <4 x i16> %a to <1 x i64>
-  %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
-  ret <8 x i16> %3
-}
-
-define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vqrshrn_high_n_u64
-; CHECK: uqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
-  %1 = bitcast <2 x i32> %a to <1 x i64>
-  %vqrshrn = tail call <2 x i32> @llvm.arm64.neon.uqrshrn.v2i32(<2 x i64> %b, i32 19)
-  %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
-  ret <4 x i32> %3
-}
-
-
-
-declare <8 x i8> @llvm.arm64.neon.sqshrun.v8i8(<8 x i16>, i32)
-
-declare <4 x i16> @llvm.arm64.neon.sqshrun.v4i16(<4 x i32>, i32)
-
-declare <2 x i32> @llvm.arm64.neon.sqshrun.v2i32(<2 x i64>, i32)
-
-declare <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16>, i32)
-
-declare <4 x i16> @llvm.arm64.neon.rshrn.v4i16(<4 x i32>, i32)
-
-declare <2 x i32> @llvm.arm64.neon.rshrn.v2i32(<2 x i64>, i32)
-
-declare <8 x i8> @llvm.arm64.neon.sqrshrun.v8i8(<8 x i16>, i32)
-
-declare <4 x i16> @llvm.arm64.neon.sqrshrun.v4i16(<4 x i32>, i32)
-
-declare <2 x i32> @llvm.arm64.neon.sqrshrun.v2i32(<2 x i64>, i32)
-
-declare <8 x i8> @llvm.arm64.neon.sqshrn.v8i8(<8 x i16>, i32)
-
-declare <4 x i16> @llvm.arm64.neon.sqshrn.v4i16(<4 x i32>, i32)
-
-declare <2 x i32> @llvm.arm64.neon.sqshrn.v2i32(<2 x i64>, i32)
-
-declare <8 x i8> @llvm.arm64.neon.uqshrn.v8i8(<8 x i16>, i32)
-
-declare <4 x i16> @llvm.arm64.neon.uqshrn.v4i16(<4 x i32>, i32)
-
-declare <2 x i32> @llvm.arm64.neon.uqshrn.v2i32(<2 x i64>, i32)
-
-declare <8 x i8> @llvm.arm64.neon.sqrshrn.v8i8(<8 x i16>, i32)
-
-declare <4 x i16> @llvm.arm64.neon.sqrshrn.v4i16(<4 x i32>, i32)
-
-declare <2 x i32> @llvm.arm64.neon.sqrshrn.v2i32(<2 x i64>, i32)
-
-declare <8 x i8> @llvm.arm64.neon.uqrshrn.v8i8(<8 x i16>, i32)
-
-declare <4 x i16> @llvm.arm64.neon.uqrshrn.v4i16(<4 x i32>, i32)
-
-declare <2 x i32> @llvm.arm64.neon.uqrshrn.v2i32(<2 x i64>, i32)
-
-declare <2 x float> @llvm.arm64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32)
-
-declare <4 x float> @llvm.arm64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32)
-
-declare <2 x double> @llvm.arm64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32)
-
-declare <2 x float> @llvm.arm64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32)
-
-declare <4 x float> @llvm.arm64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32)
-
-declare <2 x double> @llvm.arm64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32)
-
-declare <2 x i32> @llvm.arm64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32)
-
-declare <4 x i32> @llvm.arm64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32)
-
-declare <2 x i64> @llvm.arm64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32)
-
-declare <2 x i32> @llvm.arm64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32)
-
-declare <4 x i32> @llvm.arm64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32)
-
-declare <2 x i64> @llvm.arm64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32)
-
-define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvt_n_s64_f64
-; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}, #64
-  %1 = tail call <1 x i64> @llvm.arm64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> %a, i32 64)
-  ret <1 x i64> %1
-}
-
-define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvt_n_u64_f64
-; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}, #64
-  %1 = tail call <1 x i64> @llvm.arm64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> %a, i32 64)
-  ret <1 x i64> %1
-}
-
-define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) {
-; CHECK-LABEL: test_vcvt_n_f64_s64
-; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
-  %1 = tail call <1 x double> @llvm.arm64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) {
-; CHECK-LABEL: test_vcvt_n_f64_u64
-; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
-  %1 = tail call <1 x double> @llvm.arm64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
-  ret <1 x double> %1
-}
-
-declare <1 x i64> @llvm.arm64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double>, i32)
-declare <1 x i64> @llvm.arm64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double>, i32)
-declare <1 x double> @llvm.arm64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64>, i32)
-declare <1 x double> @llvm.arm64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64>, i32)

Removed: llvm/trunk/test/CodeGen/ARM64/aarch64-neon-simd-vget.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/aarch64-neon-simd-vget.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/aarch64-neon-simd-vget.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/aarch64-neon-simd-vget.ll (removed)
@@ -1,225 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-define <8 x i8> @test_vget_high_s8(<16 x i8> %a) {
-; CHECK-LABEL: test_vget_high_s8:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
-entry:
-  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  ret <8 x i8> %shuffle.i
-}
-
-define <4 x i16> @test_vget_high_s16(<8 x i16> %a) {
-; CHECK-LABEL: test_vget_high_s16:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  ret <4 x i16> %shuffle.i
-}
-
-define <2 x i32> @test_vget_high_s32(<4 x i32> %a) {
-; CHECK-LABEL: test_vget_high_s32:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  ret <2 x i32> %shuffle.i
-}
-
-define <1 x i64> @test_vget_high_s64(<2 x i64> %a) {
-; CHECK-LABEL: test_vget_high_s64:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
-entry:
-  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
-  ret <1 x i64> %shuffle.i
-}
-
-define <8 x i8> @test_vget_high_u8(<16 x i8> %a) {
-; CHECK-LABEL: test_vget_high_u8:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
-entry:
-  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  ret <8 x i8> %shuffle.i
-}
-
-define <4 x i16> @test_vget_high_u16(<8 x i16> %a) {
-; CHECK-LABEL: test_vget_high_u16:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  ret <4 x i16> %shuffle.i
-}
-
-define <2 x i32> @test_vget_high_u32(<4 x i32> %a) {
-; CHECK-LABEL: test_vget_high_u32:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  ret <2 x i32> %shuffle.i
-}
-
-define <1 x i64> @test_vget_high_u64(<2 x i64> %a) {
-; CHECK-LABEL: test_vget_high_u64:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
-entry:
-  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
-  ret <1 x i64> %shuffle.i
-}
-
-define <1 x i64> @test_vget_high_p64(<2 x i64> %a) {
-; CHECK-LABEL: test_vget_high_p64:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
-entry:
-  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
-  ret <1 x i64> %shuffle.i
-}
-
-define <4 x i16> @test_vget_high_f16(<8 x i16> %a) {
-; CHECK-LABEL: test_vget_high_f16:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  ret <4 x i16> %shuffle.i
-}
-
-define <2 x float> @test_vget_high_f32(<4 x float> %a) {
-; CHECK-LABEL: test_vget_high_f32:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
-entry:
-  %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 2, i32 3>
-  ret <2 x float> %shuffle.i
-}
-
-define <8 x i8> @test_vget_high_p8(<16 x i8> %a) {
-; CHECK-LABEL: test_vget_high_p8:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
-entry:
-  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  ret <8 x i8> %shuffle.i
-}
-
-define <4 x i16> @test_vget_high_p16(<8 x i16> %a) {
-; CHECK-LABEL: test_vget_high_p16:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  ret <4 x i16> %shuffle.i
-}
-
-define <1 x double> @test_vget_high_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vget_high_f64:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
-entry:
-  %shuffle.i = shufflevector <2 x double> %a, <2 x double> undef, <1 x i32> <i32 1>
-  ret <1 x double> %shuffle.i
-}
-
-define <8 x i8> @test_vget_low_s8(<16 x i8> %a) {
-; CHECK-LABEL: test_vget_low_s8:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i8> %shuffle.i
-}
-
-define <4 x i16> @test_vget_low_s16(<8 x i16> %a) {
-; CHECK-LABEL: test_vget_low_s16:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x i16> %shuffle.i
-}
-
-define <2 x i32> @test_vget_low_s32(<4 x i32> %a) {
-; CHECK-LABEL: test_vget_low_s32:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
-  ret <2 x i32> %shuffle.i
-}
-
-define <1 x i64> @test_vget_low_s64(<2 x i64> %a) {
-; CHECK-LABEL: test_vget_low_s64:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
-  ret <1 x i64> %shuffle.i
-}
-
-define <8 x i8> @test_vget_low_u8(<16 x i8> %a) {
-; CHECK-LABEL: test_vget_low_u8:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i8> %shuffle.i
-}
-
-define <4 x i16> @test_vget_low_u16(<8 x i16> %a) {
-; CHECK-LABEL: test_vget_low_u16:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x i16> %shuffle.i
-}
-
-define <2 x i32> @test_vget_low_u32(<4 x i32> %a) {
-; CHECK-LABEL: test_vget_low_u32:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
-  ret <2 x i32> %shuffle.i
-}
-
-define <1 x i64> @test_vget_low_u64(<2 x i64> %a) {
-; CHECK-LABEL: test_vget_low_u64:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
-  ret <1 x i64> %shuffle.i
-}
-
-define <1 x i64> @test_vget_low_p64(<2 x i64> %a) {
-; CHECK-LABEL: test_vget_low_p64:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
-  ret <1 x i64> %shuffle.i
-}
-
-define <4 x i16> @test_vget_low_f16(<8 x i16> %a) {
-; CHECK-LABEL: test_vget_low_f16:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x i16> %shuffle.i
-}
-
-define <2 x float> @test_vget_low_f32(<4 x float> %a) {
-; CHECK-LABEL: test_vget_low_f32:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
-  ret <2 x float> %shuffle.i
-}
-
-define <8 x i8> @test_vget_low_p8(<16 x i8> %a) {
-; CHECK-LABEL: test_vget_low_p8:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i8> %shuffle.i
-}
-
-define <4 x i16> @test_vget_low_p16(<8 x i16> %a) {
-; CHECK-LABEL: test_vget_low_p16:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x i16> %shuffle.i
-}
-
-define <1 x double> @test_vget_low_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vget_low_f64:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <2 x double> %a, <2 x double> undef, <1 x i32> zeroinitializer
-  ret <1 x double> %shuffle.i
-}

Removed: llvm/trunk/test/CodeGen/ARM64/aarch64-neon-v1i1-setcc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/aarch64-neon-v1i1-setcc.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/aarch64-neon-v1i1-setcc.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/aarch64-neon-v1i1-setcc.ll (removed)
@@ -1,69 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
-; arm64 has a separate copy as aarch64-neon-v1i1-setcc.ll
-
-; This file test the DAG node like "v1i1 SETCC v1i64, v1i64". As the v1i1 type
-; is illegal in AArch64 backend, the legalizer tries to scalarize this node.
-; As the v1i64 operands of SETCC are legal types, they will not be scalarized.
-; Currently the type legalizer will have an assertion failure as it assumes all
-; operands of SETCC have been legalized.
-; FIXME: If the algorithm of type scalarization is improved and can legaize
-; "v1i1 SETCC" correctly, these test cases are not needed.
-
-define i64 @test_sext_extr_cmp_0(<1 x i64> %v1, <1 x i64> %v2) {
-; CHECK-LABEL: test_sext_extr_cmp_0:
-; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}
-  %1 = icmp sge <1 x i64> %v1, %v2
-  %2 = extractelement <1 x i1> %1, i32 0
-  %vget_lane = sext i1 %2 to i64
-  ret i64 %vget_lane
-}
-
-define i64 @test_sext_extr_cmp_1(<1 x double> %v1, <1 x double> %v2) {
-; CHECK-LABEL: test_sext_extr_cmp_1:
-; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}}
-  %1 = fcmp oeq <1 x double> %v1, %v2
-  %2 = extractelement <1 x i1> %1, i32 0
-  %vget_lane = sext i1 %2 to i64
-  ret i64 %vget_lane
-}
-
-define <1 x i64> @test_select_v1i1_0(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
-; CHECK-LABEL: test_select_v1i1_0:
-; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %1 = icmp eq <1 x i64> %v1, %v2
-  %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3
-  ret <1 x i64> %res
-}
-
-define <1 x i64> @test_select_v1i1_1(<1 x double> %v1, <1 x double> %v2, <1 x i64> %v3) {
-; CHECK-LABEL: test_select_v1i1_1:
-; CHECK: fcmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %1 = fcmp oeq <1 x double> %v1, %v2
-  %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3
-  ret <1 x i64> %res
-}
-
-define <1 x double> @test_select_v1i1_2(<1 x i64> %v1, <1 x i64> %v2, <1 x double> %v3) {
-; CHECK-LABEL: test_select_v1i1_2:
-; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %1 = icmp eq <1 x i64> %v1, %v2
-  %res = select <1 x i1> %1, <1 x double> zeroinitializer, <1 x double> %v3
-  ret <1 x double> %res
-}
-
-define i32 @test_br_extr_cmp(<1 x i64> %v1, <1 x i64> %v2) {
-; CHECK-LABEL: test_br_extr_cmp:
-; CHECK: cmp x{{[0-9]+}}, x{{[0-9]+}}
-  %1 = icmp eq <1 x i64> %v1, %v2
-  %2 = extractelement <1 x i1> %1, i32 0
-  br i1 %2, label %if.end, label %if.then
-
-if.then:
-  ret i32 0;
-
-if.end:
-  ret i32 1;
-}

Removed: llvm/trunk/test/CodeGen/ARM64/aarch64-neon-vector-list-spill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/aarch64-neon-vector-list-spill.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/aarch64-neon-vector-list-spill.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/aarch64-neon-vector-list-spill.ll (removed)
@@ -1,175 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
-
-; FIXME: We should not generate ld/st for such register spill/fill, because the
-; test case seems very simple and the register pressure is not high. If the
-; spill/fill algorithm is optimized, this test case may not be triggered. And
-; then we can delete it.
-define i32 @spill.DPairReg(i32* %arg1, i32 %arg2) {
-; CHECK-LABEL: spill.DPairReg:
-; CHECK: ld2 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
-; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-entry:
-  %vld = tail call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2.v2i32.p0i32(i32* %arg1)
-  %cmp = icmp eq i32 %arg2, 0
-  br i1 %cmp, label %if.then, label %if.end
-
-if.then:
-  tail call void @foo()
-  br label %if.end
-
-if.end:
-  %vld.extract = extractvalue { <2 x i32>, <2 x i32> } %vld, 0
-  %res = extractelement <2 x i32> %vld.extract, i32 1
-  ret i32 %res
-}
-
-define i16 @spill.DTripleReg(i16* %arg1, i32 %arg2) {
-; CHECK-LABEL: spill.DTripleReg:
-; CHECK: ld3 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}]
-; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-entry:
-  %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3.v4i16.p0i16(i16* %arg1)
-  %cmp = icmp eq i32 %arg2, 0
-  br i1 %cmp, label %if.then, label %if.end
-
-if.then:
-  tail call void @foo()
-  br label %if.end
-
-if.end:
-  %vld.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld, 0
-  %res = extractelement <4 x i16> %vld.extract, i32 1
-  ret i16 %res
-}
-
-define i16 @spill.DQuadReg(i16* %arg1, i32 %arg2) {
-; CHECK-LABEL: spill.DQuadReg:
-; CHECK: ld4 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}]
-; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-entry:
-  %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4.v4i16.p0i16(i16* %arg1)
-  %cmp = icmp eq i32 %arg2, 0
-  br i1 %cmp, label %if.then, label %if.end
-
-if.then:
-  tail call void @foo()
-  br label %if.end
-
-if.end:
-  %vld.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld, 0
-  %res = extractelement <4 x i16> %vld.extract, i32 0
-  ret i16 %res
-}
-
-define i32 @spill.QPairReg(i32* %arg1, i32 %arg2) {
-; CHECK-LABEL: spill.QPairReg:
-; CHECK: ld2 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
-; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-entry:
-  %vld = tail call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2.v4i32.p0i32(i32* %arg1)
-  %cmp = icmp eq i32 %arg2, 0
-  br i1 %cmp, label %if.then, label %if.end
-
-if.then:
-  tail call void @foo()
-  br label %if.end
-
-if.end:
-  %vld.extract = extractvalue { <4 x i32>, <4 x i32> } %vld, 0
-  %res = extractelement <4 x i32> %vld.extract, i32 1
-  ret i32 %res
-}
-
-define float @spill.QTripleReg(float* %arg1, i32 %arg2) {
-; CHECK-LABEL: spill.QTripleReg:
-; CHECK: ld3 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
-; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-entry:
-  %vld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3.v4f32.p0f32(float* %arg1)
-  %cmp = icmp eq i32 %arg2, 0
-  br i1 %cmp, label %if.then, label %if.end
-
-if.then:
-  tail call void @foo()
-  br label %if.end
-
-if.end:
-  %vld3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 0
-  %res = extractelement <4 x float> %vld3.extract, i32 1
-  ret float %res
-}
-
-define i8 @spill.QQuadReg(i8* %arg1, i32 %arg2) {
-; CHECK-LABEL: spill.QQuadReg:
-; CHECK: ld4 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}]
-; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-entry:
-  %vld = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0i8(i8* %arg1)
-  %cmp = icmp eq i32 %arg2, 0
-  br i1 %cmp, label %if.then, label %if.end
-
-if.then:
-  tail call void @foo()
-  br label %if.end
-
-if.end:
-  %vld.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld, 0
-  %res = extractelement <16 x i8> %vld.extract, i32 1
-  ret i8 %res
-}
-
-declare { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2.v2i32.p0i32(i32*)
-declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3.v4i16.p0i16(i16*)
-declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4.v4i16.p0i16(i16*)
-declare { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2.v4i32.p0i32(i32*)
-declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3.v4f32.p0f32(float*)
-declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0i8(i8*)
-
-declare void @foo()
-
-; FIXME: We should not generate ld/st for such register spill/fill, because the
-; test case seems very simple and the register pressure is not high. If the
-; spill/fill algorithm is optimized, this test case may not be triggered. And
-; then we can delete it.
-; check the spill for Register Class QPair_with_qsub_0_in_FPR128Lo
-define <8 x i16> @test_2xFPR128Lo(i64 %got, i64* %ptr, <1 x i64> %a) {
-  tail call void @llvm.arm64.neon.st2lane.v1i64.p0i64(<1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i64 0, i64* %ptr)
-  tail call void @foo()
-  %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1>
-  %1 = bitcast <2 x i64> %sv to <8 x i16>
-  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-  %3 = mul <8 x i16> %2, %2
-  ret <8 x i16> %3
-}
-
-; check the spill for Register Class QTriple_with_qsub_0_in_FPR128Lo
-define <8 x i16> @test_3xFPR128Lo(i64 %got, i64* %ptr, <1 x i64> %a) {
-  tail call void @llvm.arm64.neon.st3lane.v1i64.p0i64(<1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i64 0, i64* %ptr)
-  tail call void @foo()
-  %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1>
-  %1 = bitcast <2 x i64> %sv to <8 x i16>
-  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-  %3 = mul <8 x i16> %2, %2
-  ret <8 x i16> %3
-}
-
-; check the spill for Register Class QQuad_with_qsub_0_in_FPR128Lo
-define <8 x i16> @test_4xFPR128Lo(i64 %got, i64* %ptr, <1 x i64> %a) {
-  tail call void @llvm.arm64.neon.st4lane.v1i64.p0i64(<1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i64 0, i64* %ptr)
-  tail call void @foo()
-  %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1>
-  %1 = bitcast <2 x i64> %sv to <8 x i16>
-  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-  %3 = mul <8 x i16> %2, %2
-  ret <8 x i16> %3
-}
-
-declare void @llvm.arm64.neon.st2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*)
-declare void @llvm.arm64.neon.st3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)
-declare void @llvm.arm64.neon.st4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)

Removed: llvm/trunk/test/CodeGen/ARM64/abi-varargs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/abi-varargs.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/abi-varargs.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/abi-varargs.ll (removed)
@@ -1,191 +0,0 @@
-; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s
-target triple = "arm64-apple-ios7.0.0"
-
-; rdar://13625505
-; Here we have 9 fixed integer arguments the 9th argument in on stack, the
-; varargs start right after at 8-byte alignment.
-define void @fn9(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, ...) nounwind noinline ssp {
-; CHECK-LABEL: fn9:
-; 9th fixed argument
-; CHECK: ldr {{w[0-9]+}}, [sp, #64]
-; CHECK: add [[ARGS:x[0-9]+]], sp, #72
-; CHECK: add {{x[0-9]+}}, [[ARGS]], #8
-; First vararg
-; CHECK: ldr {{w[0-9]+}}, [sp, #72]
-; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #8
-; Second vararg
-; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
-; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #8
-; Third vararg
-; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
-  %1 = alloca i32, align 4
-  %2 = alloca i32, align 4
-  %3 = alloca i32, align 4
-  %4 = alloca i32, align 4
-  %5 = alloca i32, align 4
-  %6 = alloca i32, align 4
-  %7 = alloca i32, align 4
-  %8 = alloca i32, align 4
-  %9 = alloca i32, align 4
-  %args = alloca i8*, align 8
-  %a10 = alloca i32, align 4
-  %a11 = alloca i32, align 4
-  %a12 = alloca i32, align 4
-  store i32 %a1, i32* %1, align 4
-  store i32 %a2, i32* %2, align 4
-  store i32 %a3, i32* %3, align 4
-  store i32 %a4, i32* %4, align 4
-  store i32 %a5, i32* %5, align 4
-  store i32 %a6, i32* %6, align 4
-  store i32 %a7, i32* %7, align 4
-  store i32 %a8, i32* %8, align 4
-  store i32 %a9, i32* %9, align 4
-  %10 = bitcast i8** %args to i8*
-  call void @llvm.va_start(i8* %10)
-  %11 = va_arg i8** %args, i32
-  store i32 %11, i32* %a10, align 4
-  %12 = va_arg i8** %args, i32
-  store i32 %12, i32* %a11, align 4
-  %13 = va_arg i8** %args, i32
-  store i32 %13, i32* %a12, align 4
-  ret void
-}
-
-declare void @llvm.va_start(i8*) nounwind
-
-define i32 @main() nounwind ssp {
-; CHECK-LABEL: main:
-; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
-; CHECK: str {{x[0-9]+}}, [sp, #8]
-; CHECK: str {{w[0-9]+}}, [sp]
-  %a1 = alloca i32, align 4
-  %a2 = alloca i32, align 4
-  %a3 = alloca i32, align 4
-  %a4 = alloca i32, align 4
-  %a5 = alloca i32, align 4
-  %a6 = alloca i32, align 4
-  %a7 = alloca i32, align 4
-  %a8 = alloca i32, align 4
-  %a9 = alloca i32, align 4
-  %a10 = alloca i32, align 4
-  %a11 = alloca i32, align 4
-  %a12 = alloca i32, align 4
-  store i32 1, i32* %a1, align 4
-  store i32 2, i32* %a2, align 4
-  store i32 3, i32* %a3, align 4
-  store i32 4, i32* %a4, align 4
-  store i32 5, i32* %a5, align 4
-  store i32 6, i32* %a6, align 4
-  store i32 7, i32* %a7, align 4
-  store i32 8, i32* %a8, align 4
-  store i32 9, i32* %a9, align 4
-  store i32 10, i32* %a10, align 4
-  store i32 11, i32* %a11, align 4
-  store i32 12, i32* %a12, align 4
-  %1 = load i32* %a1, align 4
-  %2 = load i32* %a2, align 4
-  %3 = load i32* %a3, align 4
-  %4 = load i32* %a4, align 4
-  %5 = load i32* %a5, align 4
-  %6 = load i32* %a6, align 4
-  %7 = load i32* %a7, align 4
-  %8 = load i32* %a8, align 4
-  %9 = load i32* %a9, align 4
-  %10 = load i32* %a10, align 4
-  %11 = load i32* %a11, align 4
-  %12 = load i32* %a12, align 4
-  call void (i32, i32, i32, i32, i32, i32, i32, i32, i32, ...)* @fn9(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12)
-  ret i32 0
-}
-
-;rdar://13668483
- at .str = private unnamed_addr constant [4 x i8] c"fmt\00", align 1
-define void @foo(i8* %fmt, ...) nounwind {
-entry:
-; CHECK-LABEL: foo:
-; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, #0x8
-; CHECK: ldr {{w[0-9]+}}, [sp, #48]
-; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #15
-; CHECK: and x[[ADDR:[0-9]+]], {{x[0-9]+}}, #0xfffffffffffffff0
-; CHECK: ldr {{q[0-9]+}}, [x[[ADDR]]]
-  %fmt.addr = alloca i8*, align 8
-  %args = alloca i8*, align 8
-  %vc = alloca i32, align 4
-  %vv = alloca <4 x i32>, align 16
-  store i8* %fmt, i8** %fmt.addr, align 8
-  %args1 = bitcast i8** %args to i8*
-  call void @llvm.va_start(i8* %args1)
-  %0 = va_arg i8** %args, i32
-  store i32 %0, i32* %vc, align 4
-  %1 = va_arg i8** %args, <4 x i32>
-  store <4 x i32> %1, <4 x i32>* %vv, align 16
-  ret void
-}
-
-define void @bar(i32 %x, <4 x i32> %y) nounwind {
-entry:
-; CHECK-LABEL: bar:
-; CHECK: str {{q[0-9]+}}, [sp, #16]
-; CHECK: str {{x[0-9]+}}, [sp]
-  %x.addr = alloca i32, align 4
-  %y.addr = alloca <4 x i32>, align 16
-  store i32 %x, i32* %x.addr, align 4
-  store <4 x i32> %y, <4 x i32>* %y.addr, align 16
-  %0 = load i32* %x.addr, align 4
-  %1 = load <4 x i32>* %y.addr, align 16
-  call void (i8*, ...)* @foo(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %0, <4 x i32> %1)
-  ret void
-}
-
-; rdar://13668927
-; When passing 16-byte aligned small structs as vararg, make sure the caller
-; side is 16-byte aligned on stack.
-%struct.s41 = type { i32, i16, i32, i16 }
-define void @foo2(i8* %fmt, ...) nounwind {
-entry:
-; CHECK-LABEL: foo2:
-; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, #0x8
-; CHECK: ldr {{w[0-9]+}}, [sp, #48]
-; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #15
-; CHECK: and x[[ADDR:[0-9]+]], {{x[0-9]+}}, #0xfffffffffffffff0
-; CHECK: ldr {{q[0-9]+}}, [x[[ADDR]]]
-  %fmt.addr = alloca i8*, align 8
-  %args = alloca i8*, align 8
-  %vc = alloca i32, align 4
-  %vs = alloca %struct.s41, align 16
-  store i8* %fmt, i8** %fmt.addr, align 8
-  %args1 = bitcast i8** %args to i8*
-  call void @llvm.va_start(i8* %args1)
-  %0 = va_arg i8** %args, i32
-  store i32 %0, i32* %vc, align 4
-  %ap.cur = load i8** %args
-  %1 = getelementptr i8* %ap.cur, i32 15
-  %2 = ptrtoint i8* %1 to i64
-  %3 = and i64 %2, -16
-  %ap.align = inttoptr i64 %3 to i8*
-  %ap.next = getelementptr i8* %ap.align, i32 16
-  store i8* %ap.next, i8** %args
-  %4 = bitcast i8* %ap.align to %struct.s41*
-  %5 = bitcast %struct.s41* %vs to i8*
-  %6 = bitcast %struct.s41* %4 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %5, i8* %6, i64 16, i32 16, i1 false)
-  ret void
-}
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
-
-define void @bar2(i32 %x, i128 %s41.coerce) nounwind {
-entry:
-; CHECK-LABEL: bar2:
-; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
-; CHECK: str {{x[0-9]+}}, [sp]
-  %x.addr = alloca i32, align 4
-  %s41 = alloca %struct.s41, align 16
-  store i32 %x, i32* %x.addr, align 4
-  %0 = bitcast %struct.s41* %s41 to i128*
-  store i128 %s41.coerce, i128* %0, align 1
-  %1 = load i32* %x.addr, align 4
-  %2 = bitcast %struct.s41* %s41 to i128*
-  %3 = load i128* %2, align 1
-  call void (i8*, ...)* @foo2(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %1, i128 %3)
-  ret void
-}

Removed: llvm/trunk/test/CodeGen/ARM64/abi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/abi.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/abi.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/abi.ll (removed)
@@ -1,238 +0,0 @@
-; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s
-; RUN: llc < %s -O0 | FileCheck -check-prefix=FAST %s
-target triple = "arm64-apple-darwin"
-
-; rdar://9932559
-define i64 @i8i16callee(i64 %a1, i64 %a2, i64 %a3, i8 signext %a4, i16 signext %a5, i64 %a6, i64 %a7, i64 %a8, i8 signext %b1, i16 signext %b2, i8 signext %b3, i8 signext %b4) nounwind readnone noinline {
-entry:
-; CHECK-LABEL: i8i16callee:
-; The 8th, 9th, 10th and 11th arguments are passed at sp, sp+2, sp+4, sp+5.
-; They are i8, i16, i8 and i8.
-; CHECK: ldrsb	{{w[0-9]+}}, [sp, #5]
-; CHECK: ldrsh	{{w[0-9]+}}, [sp, #2]
-; CHECK: ldrsb	{{w[0-9]+}}, [sp]
-; CHECK: ldrsb	{{w[0-9]+}}, [sp, #4]
-; FAST-LABEL: i8i16callee:
-; FAST: ldrb  {{w[0-9]+}}, [sp, #5]
-; FAST: ldrb  {{w[0-9]+}}, [sp, #4]
-; FAST: ldrh  {{w[0-9]+}}, [sp, #2]
-; FAST: ldrb  {{w[0-9]+}}, [sp]
-  %conv = sext i8 %a4 to i64
-  %conv3 = sext i16 %a5 to i64
-  %conv8 = sext i8 %b1 to i64
-  %conv9 = sext i16 %b2 to i64
-  %conv11 = sext i8 %b3 to i64
-  %conv13 = sext i8 %b4 to i64
-  %add10 = add i64 %a2, %a1
-  %add12 = add i64 %add10, %a3
-  %add14 = add i64 %add12, %conv
-  %add = add i64 %add14, %conv3
-  %add1 = add i64 %add, %a6
-  %add2 = add i64 %add1, %a7
-  %add4 = add i64 %add2, %a8
-  %add5 = add i64 %add4, %conv8
-  %add6 = add i64 %add5, %conv9
-  %add7 = add i64 %add6, %conv11
-  %add15 = add i64 %add7, %conv13
-  %sext = shl i64 %add15, 32
-  %conv17 = ashr exact i64 %sext, 32
-  ret i64 %conv17
-}
-
-define i32 @i8i16caller() nounwind readnone {
-entry:
-; CHECK: i8i16caller
-; The 8th, 9th, 10th and 11th arguments are passed at sp, sp+2, sp+4, sp+5.
-; They are i8, i16, i8 and i8.
-; CHECK: strb {{w[0-9]+}}, [sp, #5]
-; CHECK: strb {{w[0-9]+}}, [sp, #4]
-; CHECK: strh {{w[0-9]+}}, [sp, #2]
-; CHECK: strb {{w[0-9]+}}, [sp]
-; CHECK: bl
-; FAST: i8i16caller
-; FAST: strb {{w[0-9]+}}, [sp]
-; FAST: strh {{w[0-9]+}}, [sp, #2]
-; FAST: strb {{w[0-9]+}}, [sp, #4]
-; FAST: strb {{w[0-9]+}}, [sp, #5]
-; FAST: bl
-  %call = tail call i64 @i8i16callee(i64 0, i64 1, i64 2, i8 signext 3, i16 signext 4, i64 5, i64 6, i64 7, i8 signext 97, i16 signext 98, i8 signext 99, i8 signext 100)
-  %conv = trunc i64 %call to i32
-  ret i32 %conv
-}
-
-; rdar://12651543
-define double @circle_center([2 x float] %a) nounwind ssp {
-  %call = tail call double @ext([2 x float] %a) nounwind
-; CHECK: circle_center
-; CHECK: bl
-  ret double %call
-}
-declare double @ext([2 x float])
-
-; rdar://12656141
-; 16-byte vector should be aligned at 16-byte when passing on stack.
-; A double argument will be passed on stack, so vecotr should be at sp+16.
-define double @fixed_4i(<4 x i32>* nocapture %in) nounwind {
-entry:
-; CHECK: fixed_4i
-; CHECK: str [[REG_1:q[0-9]+]], [sp, #16]
-; FAST: fixed_4i
-; FAST: sub sp, sp, #64
-; FAST: mov x[[ADDR:[0-9]+]], sp
-; FAST: str [[REG_1:q[0-9]+]], [x[[ADDR]], #16]
-  %0 = load <4 x i32>* %in, align 16
-  %call = tail call double @args_vec_4i(double 3.000000e+00, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, double 3.000000e+00, <4 x i32> %0, i8 signext 3)
-  ret double %call
-}
-declare double @args_vec_4i(double, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, double, <4 x i32>, i8 signext)
-
-; rdar://12695237
-; d8 at sp, i in register w0.
- at g_d = common global double 0.000000e+00, align 8
-define void @test1(float %f1, double %d1, double %d2, double %d3, double %d4,
-       double %d5, double %d6, double %d7, double %d8, i32 %i) nounwind ssp {
-entry:
-; CHECK: test1
-; CHECK: ldr [[REG_1:d[0-9]+]], [sp]
-; CHECK: scvtf [[REG_2:s[0-9]+]], w0
-; CHECK: fadd s0, [[REG_2]], s0
-  %conv = sitofp i32 %i to float
-  %add = fadd float %conv, %f1
-  %conv1 = fpext float %add to double
-  %add2 = fadd double %conv1, %d7
-  %add3 = fadd double %add2, %d8
-  store double %add3, double* @g_d, align 8
-  ret void
-}
-
-; i9 at sp, d1 in register s0.
-define void @test2(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
-            i32 %i7, i32 %i8, i32 %i9, float %d1) nounwind ssp {
-entry:
-; CHECK: test2
-; CHECK: scvtf [[REG_2:s[0-9]+]], w0
-; CHECK: fadd s0, [[REG_2]], s0
-; CHECK: ldr [[REG_1:s[0-9]+]], [sp]
-  %conv = sitofp i32 %i1 to float
-  %add = fadd float %conv, %d1
-  %conv1 = fpext float %add to double
-  %conv2 = sitofp i32 %i8 to double
-  %add3 = fadd double %conv2, %conv1
-  %conv4 = sitofp i32 %i9 to double
-  %add5 = fadd double %conv4, %add3
-  store double %add5, double* @g_d, align 8
-  ret void
-}
-
-; rdar://12648441
-; Check alignment on stack for v64, f64, i64, f32, i32.
-define double @test3(<2 x i32>* nocapture %in) nounwind {
-entry:
-; CHECK: test3
-; CHECK: str [[REG_1:d[0-9]+]], [sp, #8]
-; FAST: test3
-; FAST: sub sp, sp, #32
-; FAST: mov x[[ADDR:[0-9]+]], sp
-; FAST: str [[REG_1:d[0-9]+]], [x[[ADDR]], #8]
-  %0 = load <2 x i32>* %in, align 8
-  %call = tail call double @args_vec_2i(double 3.000000e+00, <2 x i32> %0,
-          <2 x i32> %0, <2 x i32> %0, <2 x i32> %0, <2 x i32> %0, <2 x i32> %0,
-          <2 x i32> %0, float 3.000000e+00, <2 x i32> %0, i8 signext 3)
-  ret double %call
-}
-declare double @args_vec_2i(double, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>,
-               <2 x i32>, <2 x i32>, <2 x i32>, float, <2 x i32>, i8 signext)
-
-define double @test4(double* nocapture %in) nounwind {
-entry:
-; CHECK: test4
-; CHECK: str [[REG_1:d[0-9]+]], [sp, #8]
-; CHECK: str [[REG_2:w[0-9]+]], [sp]
-; CHECK: orr w0, wzr, #0x3
-  %0 = load double* %in, align 8
-  %call = tail call double @args_f64(double 3.000000e+00, double %0, double %0,
-          double %0, double %0, double %0, double %0, double %0,
-          float 3.000000e+00, double %0, i8 signext 3)
-  ret double %call
-}
-declare double @args_f64(double, double, double, double, double, double, double,
-               double, float, double, i8 signext)
-
-define i64 @test5(i64* nocapture %in) nounwind {
-entry:
-; CHECK: test5
-; CHECK: strb [[REG_3:w[0-9]+]], [sp, #16]
-; CHECK: str [[REG_1:x[0-9]+]], [sp, #8]
-; CHECK: str [[REG_2:w[0-9]+]], [sp]
-  %0 = load i64* %in, align 8
-  %call = tail call i64 @args_i64(i64 3, i64 %0, i64 %0, i64 %0, i64 %0, i64 %0,
-                         i64 %0, i64 %0, i32 3, i64 %0, i8 signext 3)
-  ret i64 %call
-}
-declare i64 @args_i64(i64, i64, i64, i64, i64, i64, i64, i64, i32, i64,
-             i8 signext)
-
-define i32 @test6(float* nocapture %in) nounwind {
-entry:
-; CHECK: test6
-; CHECK: strb [[REG_2:w[0-9]+]], [sp, #8]
-; CHECK: str [[REG_1:s[0-9]+]], [sp, #4]
-; CHECK: strh [[REG_3:w[0-9]+]], [sp]
-  %0 = load float* %in, align 4
-  %call = tail call i32 @args_f32(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
-          i32 7, i32 8, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
-          float 6.0, float 7.0, float 8.0, i16 signext 3, float %0,
-          i8 signext 3)
-  ret i32 %call
-}
-declare i32 @args_f32(i32, i32, i32, i32, i32, i32, i32, i32,
-                      float, float, float, float, float, float, float, float,
-                      i16 signext, float, i8 signext)
-
-define i32 @test7(i32* nocapture %in) nounwind {
-entry:
-; CHECK: test7
-; CHECK: strb [[REG_2:w[0-9]+]], [sp, #8]
-; CHECK: str [[REG_1:w[0-9]+]], [sp, #4]
-; CHECK: strh [[REG_3:w[0-9]+]], [sp]
-  %0 = load i32* %in, align 4
-  %call = tail call i32 @args_i32(i32 3, i32 %0, i32 %0, i32 %0, i32 %0, i32 %0,
-                         i32 %0, i32 %0, i16 signext 3, i32 %0, i8 signext 4)
-  ret i32 %call
-}
-declare i32 @args_i32(i32, i32, i32, i32, i32, i32, i32, i32, i16 signext, i32,
-             i8 signext)
-
-define i32 @test8(i32 %argc, i8** nocapture %argv) nounwind {
-entry:
-; CHECK: test8
-; CHECK: strb {{w[0-9]+}}, [sp, #3]
-; CHECK: strb wzr, [sp, #2]
-; CHECK: strb {{w[0-9]+}}, [sp, #1]
-; CHECK: strb wzr, [sp]
-; CHECK: bl
-; FAST: test8
-; FAST: strb {{w[0-9]+}}, [sp]
-; FAST: strb {{w[0-9]+}}, [sp, #1]
-; FAST: strb {{w[0-9]+}}, [sp, #2]
-; FAST: strb {{w[0-9]+}}, [sp, #3]
-; FAST: bl
-  tail call void @args_i1(i1 zeroext false, i1 zeroext true, i1 zeroext false,
-                  i1 zeroext true, i1 zeroext false, i1 zeroext true,
-                  i1 zeroext false, i1 zeroext true, i1 zeroext false,
-                  i1 zeroext true, i1 zeroext false, i1 zeroext true)
-  ret i32 0
-}
-
-declare void @args_i1(i1 zeroext, i1 zeroext, i1 zeroext, i1 zeroext,
-                      i1 zeroext, i1 zeroext, i1 zeroext, i1 zeroext,
-                      i1 zeroext, i1 zeroext, i1 zeroext, i1 zeroext)
-
-define i32 @i1_stack_incoming(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f,
-                               i64 %g, i64 %h, i64 %i, i1 zeroext %j) {
-; CHECK-LABEL: i1_stack_incoming:
-; CHECK: ldrb w0, [sp, #8]
-; CHECK: ret
-  %v = zext i1 %j to i32
-  ret i32 %v
-}

Removed: llvm/trunk/test/CodeGen/ARM64/abi_align.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/abi_align.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/abi_align.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/abi_align.ll (removed)
@@ -1,532 +0,0 @@
-; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s
-; RUN: llc < %s -O0 | FileCheck -check-prefix=FAST %s
-target triple = "arm64-apple-darwin"
-
-; rdar://12648441
-; Generated from arm64-arguments.c with -O2.
-; Test passing structs with size < 8, < 16 and > 16
-; with alignment of 16 and without
-
-; Structs with size < 8
-%struct.s38 = type { i32, i16 }
-; With alignment of 16, the size will be padded to multiple of 16 bytes.
-%struct.s39 = type { i32, i16, [10 x i8] }
-; Structs with size < 16
-%struct.s40 = type { i32, i16, i32, i16 }
-%struct.s41 = type { i32, i16, i32, i16 }
-; Structs with size > 16
-%struct.s42 = type { i32, i16, i32, i16, i32, i16 }
-%struct.s43 = type { i32, i16, i32, i16, i32, i16, [10 x i8] }
-
- at g38 = common global %struct.s38 zeroinitializer, align 4
- at g38_2 = common global %struct.s38 zeroinitializer, align 4
- at g39 = common global %struct.s39 zeroinitializer, align 16
- at g39_2 = common global %struct.s39 zeroinitializer, align 16
- at g40 = common global %struct.s40 zeroinitializer, align 4
- at g40_2 = common global %struct.s40 zeroinitializer, align 4
- at g41 = common global %struct.s41 zeroinitializer, align 16
- at g41_2 = common global %struct.s41 zeroinitializer, align 16
- at g42 = common global %struct.s42 zeroinitializer, align 4
- at g42_2 = common global %struct.s42 zeroinitializer, align 4
- at g43 = common global %struct.s43 zeroinitializer, align 16
- at g43_2 = common global %struct.s43 zeroinitializer, align 16
-
-; structs with size < 8 bytes, passed via i64 in x1 and x2
-define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce) #0 {
-entry:
-; CHECK: f38
-; CHECK: add w[[A:[0-9]+]], w1, w0
-; CHECK: add {{w[0-9]+}}, w[[A]], w2
-  %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce to i32
-  %s1.sroa.1.4.extract.shift = lshr i64 %s1.coerce, 32
-  %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce to i32
-  %s2.sroa.1.4.extract.shift = lshr i64 %s2.coerce, 32
-  %sext8 = shl nuw nsw i64 %s1.sroa.1.4.extract.shift, 16
-  %sext = trunc i64 %sext8 to i32
-  %conv = ashr exact i32 %sext, 16
-  %sext1011 = shl nuw nsw i64 %s2.sroa.1.4.extract.shift, 16
-  %sext10 = trunc i64 %sext1011 to i32
-  %conv6 = ashr exact i32 %sext10, 16
-  %add = add i32 %s1.sroa.0.0.extract.trunc, %i
-  %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
-  %add4 = add i32 %add3, %conv
-  %add7 = add i32 %add4, %conv6
-  ret i32 %add7
-}
-
-define i32 @caller38() #1 {
-entry:
-; CHECK: caller38
-; CHECK: ldr x1,
-; CHECK: ldr x2,
-  %0 = load i64* bitcast (%struct.s38* @g38 to i64*), align 4
-  %1 = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
-  %call = tail call i32 @f38(i32 3, i64 %0, i64 %1) #5
-  ret i32 %call
-}
-
-declare i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
-                i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce) #0
-
-; structs with size < 8 bytes, passed on stack at [sp+8] and [sp+16]
-; i9 at [sp]
-define i32 @caller38_stack() #1 {
-entry:
-; CHECK: caller38_stack
-; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
-; CHECK: movz w[[C:[0-9]+]], #0x9
-; CHECK: str w[[C]], [sp]
-  %0 = load i64* bitcast (%struct.s38* @g38 to i64*), align 4
-  %1 = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
-  %call = tail call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
-                                   i32 7, i32 8, i32 9, i64 %0, i64 %1) #5
-  ret i32 %call
-}
-
-; structs with size < 8 bytes, alignment of 16
-; passed via i128 in x1 and x3
-define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 {
-entry:
-; CHECK: f39
-; CHECK: add w[[A:[0-9]+]], w1, w0
-; CHECK: add {{w[0-9]+}}, w[[A]], w3
-  %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32
-  %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32
-  %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32
-  %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32
-  %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16
-  %sext = trunc i128 %sext8 to i32
-  %conv = ashr exact i32 %sext, 16
-  %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16
-  %sext10 = trunc i128 %sext1011 to i32
-  %conv6 = ashr exact i32 %sext10, 16
-  %add = add i32 %s1.sroa.0.0.extract.trunc, %i
-  %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
-  %add4 = add i32 %add3, %conv
-  %add7 = add i32 %add4, %conv6
-  ret i32 %add7
-}
-
-define i32 @caller39() #1 {
-entry:
-; CHECK: caller39
-; CHECK: ldp x1, x2,
-; CHECK: ldp x3, x4,
-  %0 = load i128* bitcast (%struct.s39* @g39 to i128*), align 16
-  %1 = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
-  %call = tail call i32 @f39(i32 3, i128 %0, i128 %1) #5
-  ret i32 %call
-}
-
-declare i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
-                i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0
-
-; structs with size < 8 bytes, alignment 16
-; passed on stack at [sp+16] and [sp+32]
-define i32 @caller39_stack() #1 {
-entry:
-; CHECK: caller39_stack
-; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
-; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
-; CHECK: movz w[[C:[0-9]+]], #0x9
-; CHECK: str w[[C]], [sp]
-  %0 = load i128* bitcast (%struct.s39* @g39 to i128*), align 16
-  %1 = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
-  %call = tail call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
-                                   i32 7, i32 8, i32 9, i128 %0, i128 %1) #5
-  ret i32 %call
-}
-
-; structs with size < 16 bytes
-; passed via i128 in x1 and x3
-define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0 {
-entry:
-; CHECK: f40
-; CHECK: add w[[A:[0-9]+]], w1, w0
-; CHECK: add {{w[0-9]+}}, w[[A]], w3
-  %s1.coerce.fca.0.extract = extractvalue [2 x i64] %s1.coerce, 0
-  %s2.coerce.fca.0.extract = extractvalue [2 x i64] %s2.coerce, 0
-  %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce.fca.0.extract to i32
-  %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce.fca.0.extract to i32
-  %s1.sroa.0.4.extract.shift = lshr i64 %s1.coerce.fca.0.extract, 32
-  %sext8 = shl nuw nsw i64 %s1.sroa.0.4.extract.shift, 16
-  %sext = trunc i64 %sext8 to i32
-  %conv = ashr exact i32 %sext, 16
-  %s2.sroa.0.4.extract.shift = lshr i64 %s2.coerce.fca.0.extract, 32
-  %sext1011 = shl nuw nsw i64 %s2.sroa.0.4.extract.shift, 16
-  %sext10 = trunc i64 %sext1011 to i32
-  %conv6 = ashr exact i32 %sext10, 16
-  %add = add i32 %s1.sroa.0.0.extract.trunc, %i
-  %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
-  %add4 = add i32 %add3, %conv
-  %add7 = add i32 %add4, %conv6
-  ret i32 %add7
-}
-
-define i32 @caller40() #1 {
-entry:
-; CHECK: caller40
-; CHECK: ldp x1, x2,
-; CHECK: ldp x3, x4,
-  %0 = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
-  %1 = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
-  %call = tail call i32 @f40(i32 3, [2 x i64] %0, [2 x i64] %1) #5
-  ret i32 %call
-}
-
-declare i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
-                i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0
-
-; structs with size < 16 bytes
-; passed on stack at [sp+8] and [sp+24]
-define i32 @caller40_stack() #1 {
-entry:
-; CHECK: caller40_stack
-; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #24]
-; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
-; CHECK: movz w[[C:[0-9]+]], #0x9
-; CHECK: str w[[C]], [sp]
-  %0 = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
-  %1 = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
-  %call = tail call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
-                         i32 7, i32 8, i32 9, [2 x i64] %0, [2 x i64] %1) #5
-  ret i32 %call
-}
-
-; structs with size < 16 bytes, alignment of 16
-; passed via i128 in x1 and x3
-define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 {
-entry:
-; CHECK: f41
-; CHECK: add w[[A:[0-9]+]], w1, w0
-; CHECK: add {{w[0-9]+}}, w[[A]], w3
-  %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32
-  %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32
-  %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32
-  %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32
-  %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16
-  %sext = trunc i128 %sext8 to i32
-  %conv = ashr exact i32 %sext, 16
-  %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16
-  %sext10 = trunc i128 %sext1011 to i32
-  %conv6 = ashr exact i32 %sext10, 16
-  %add = add i32 %s1.sroa.0.0.extract.trunc, %i
-  %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
-  %add4 = add i32 %add3, %conv
-  %add7 = add i32 %add4, %conv6
-  ret i32 %add7
-}
-
-define i32 @caller41() #1 {
-entry:
-; CHECK: caller41
-; CHECK: ldp x1, x2,
-; CHECK: ldp x3, x4,
-  %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
-  %1 = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
-  %call = tail call i32 @f41(i32 3, i128 %0, i128 %1) #5
-  ret i32 %call
-}
-
-declare i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
-                i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0
-
-; structs with size < 16 bytes, alignment of 16
-; passed on stack at [sp+16] and [sp+32]
-define i32 @caller41_stack() #1 {
-entry:
-; CHECK: caller41_stack
-; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
-; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
-; CHECK: movz w[[C:[0-9]+]], #0x9
-; CHECK: str w[[C]], [sp]
-  %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
-  %1 = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
-  %call = tail call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
-                            i32 7, i32 8, i32 9, i128 %0, i128 %1) #5
-  ret i32 %call
-}
-
-; structs with size of 22 bytes, passed indirectly in x1 and x2
-define i32 @f42(i32 %i, %struct.s42* nocapture %s1, %struct.s42* nocapture %s2) #2 {
-entry:
-; CHECK: f42
-; CHECK: ldr w[[A:[0-9]+]], [x1]
-; CHECK: ldr w[[B:[0-9]+]], [x2]
-; CHECK: add w[[C:[0-9]+]], w[[A]], w0
-; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]]
-; FAST: f42
-; FAST: ldr w[[A:[0-9]+]], [x1]
-; FAST: ldr w[[B:[0-9]+]], [x2]
-; FAST: add w[[C:[0-9]+]], w[[A]], w0
-; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
-  %i1 = getelementptr inbounds %struct.s42* %s1, i64 0, i32 0
-  %0 = load i32* %i1, align 4, !tbaa !0
-  %i2 = getelementptr inbounds %struct.s42* %s2, i64 0, i32 0
-  %1 = load i32* %i2, align 4, !tbaa !0
-  %s = getelementptr inbounds %struct.s42* %s1, i64 0, i32 1
-  %2 = load i16* %s, align 2, !tbaa !3
-  %conv = sext i16 %2 to i32
-  %s5 = getelementptr inbounds %struct.s42* %s2, i64 0, i32 1
-  %3 = load i16* %s5, align 2, !tbaa !3
-  %conv6 = sext i16 %3 to i32
-  %add = add i32 %0, %i
-  %add3 = add i32 %add, %1
-  %add4 = add i32 %add3, %conv
-  %add7 = add i32 %add4, %conv6
-  ret i32 %add7
-}
-
-; For s1, we allocate a 22-byte space, pass its address via x1
-define i32 @caller42() #3 {
-entry:
-; CHECK: caller42
-; CHECK: str {{x[0-9]+}}, [sp, #48]
-; CHECK: str {{q[0-9]+}}, [sp, #32]
-; CHECK: str {{x[0-9]+}}, [sp, #16]
-; CHECK: str {{q[0-9]+}}, [sp]
-; CHECK: add x1, sp, #32
-; CHECK: mov x2, sp
-; Space for s1 is allocated at sp+32
-; Space for s2 is allocated at sp
-
-; FAST: caller42
-; FAST: sub sp, sp, #96
-; Space for s1 is allocated at fp-24 = sp+72
-; Space for s2 is allocated at sp+48
-; FAST: sub x[[A:[0-9]+]], x29, #24
-; FAST: add x[[A:[0-9]+]], sp, #48
-; Call memcpy with size = 24 (0x18)
-; FAST: orr {{x[0-9]+}}, xzr, #0x18
-  %tmp = alloca %struct.s42, align 4
-  %tmp1 = alloca %struct.s42, align 4
-  %0 = bitcast %struct.s42* %tmp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
-  %1 = bitcast %struct.s42* %tmp1 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
-  %call = call i32 @f42(i32 3, %struct.s42* %tmp, %struct.s42* %tmp1) #5
-  ret i32 %call
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #4
-
-declare i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
-                       i32 %i7, i32 %i8, i32 %i9, %struct.s42* nocapture %s1,
-                       %struct.s42* nocapture %s2) #2
-
-define i32 @caller42_stack() #3 {
-entry:
-; CHECK: caller42_stack
-; CHECK: mov x29, sp
-; CHECK: sub sp, sp, #96
-; CHECK: stur {{x[0-9]+}}, [x29, #-16]
-; CHECK: stur {{q[0-9]+}}, [x29, #-32]
-; CHECK: str {{x[0-9]+}}, [sp, #48]
-; CHECK: str {{q[0-9]+}}, [sp, #32]
-; Space for s1 is allocated at x29-32 = sp+64
-; Space for s2 is allocated at sp+32
-; CHECK: add x[[B:[0-9]+]], sp, #32
-; CHECK: str x[[B]], [sp, #16]
-; CHECK: sub x[[A:[0-9]+]], x29, #32
-; Address of s1 is passed on stack at sp+8
-; CHECK: str x[[A]], [sp, #8]
-; CHECK: movz w[[C:[0-9]+]], #0x9
-; CHECK: str w[[C]], [sp]
-
-; FAST: caller42_stack
-; Space for s1 is allocated at fp-24
-; Space for s2 is allocated at fp-48
-; FAST: sub x[[A:[0-9]+]], x29, #24
-; FAST: sub x[[B:[0-9]+]], x29, #48
-; Call memcpy with size = 24 (0x18)
-; FAST: orr {{x[0-9]+}}, xzr, #0x18
-; FAST: str {{w[0-9]+}}, [sp]
-; Address of s1 is passed on stack at sp+8
-; FAST: str {{x[0-9]+}}, [sp, #8]
-; FAST: str {{x[0-9]+}}, [sp, #16]
-  %tmp = alloca %struct.s42, align 4
-  %tmp1 = alloca %struct.s42, align 4
-  %0 = bitcast %struct.s42* %tmp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
-  %1 = bitcast %struct.s42* %tmp1 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
-  %call = call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
-                       i32 8, i32 9, %struct.s42* %tmp, %struct.s42* %tmp1) #5
-  ret i32 %call
-}
-
-; structs with size of 22 bytes, alignment of 16
-; passed indirectly in x1 and x2
-define i32 @f43(i32 %i, %struct.s43* nocapture %s1, %struct.s43* nocapture %s2) #2 {
-entry:
-; CHECK: f43
-; CHECK: ldr w[[A:[0-9]+]], [x1]
-; CHECK: ldr w[[B:[0-9]+]], [x2]
-; CHECK: add w[[C:[0-9]+]], w[[A]], w0
-; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]]
-; FAST: f43
-; FAST: ldr w[[A:[0-9]+]], [x1]
-; FAST: ldr w[[B:[0-9]+]], [x2]
-; FAST: add w[[C:[0-9]+]], w[[A]], w0
-; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
-  %i1 = getelementptr inbounds %struct.s43* %s1, i64 0, i32 0
-  %0 = load i32* %i1, align 4, !tbaa !0
-  %i2 = getelementptr inbounds %struct.s43* %s2, i64 0, i32 0
-  %1 = load i32* %i2, align 4, !tbaa !0
-  %s = getelementptr inbounds %struct.s43* %s1, i64 0, i32 1
-  %2 = load i16* %s, align 2, !tbaa !3
-  %conv = sext i16 %2 to i32
-  %s5 = getelementptr inbounds %struct.s43* %s2, i64 0, i32 1
-  %3 = load i16* %s5, align 2, !tbaa !3
-  %conv6 = sext i16 %3 to i32
-  %add = add i32 %0, %i
-  %add3 = add i32 %add, %1
-  %add4 = add i32 %add3, %conv
-  %add7 = add i32 %add4, %conv6
-  ret i32 %add7
-}
-
-define i32 @caller43() #3 {
-entry:
-; CHECK: caller43
-; CHECK: str {{q[0-9]+}}, [sp, #48]
-; CHECK: str {{q[0-9]+}}, [sp, #32]
-; CHECK: str {{q[0-9]+}}, [sp, #16]
-; CHECK: str {{q[0-9]+}}, [sp]
-; CHECK: add x1, sp, #32
-; CHECK: mov x2, sp
-; Space for s1 is allocated at sp+32
-; Space for s2 is allocated at sp
-
-; FAST: caller43
-; FAST: mov x29, sp
-; Space for s1 is allocated at sp+32
-; Space for s2 is allocated at sp
-; FAST: add x1, sp, #32
-; FAST: mov x2, sp
-; FAST: str {{x[0-9]+}}, [sp, #32]
-; FAST: str {{x[0-9]+}}, [sp, #40]
-; FAST: str {{x[0-9]+}}, [sp, #48]
-; FAST: str {{x[0-9]+}}, [sp, #56]
-; FAST: str {{x[0-9]+}}, [sp]
-; FAST: str {{x[0-9]+}}, [sp, #8]
-; FAST: str {{x[0-9]+}}, [sp, #16]
-; FAST: str {{x[0-9]+}}, [sp, #24]
-  %tmp = alloca %struct.s43, align 16
-  %tmp1 = alloca %struct.s43, align 16
-  %0 = bitcast %struct.s43* %tmp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
-  %1 = bitcast %struct.s43* %tmp1 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
-  %call = call i32 @f43(i32 3, %struct.s43* %tmp, %struct.s43* %tmp1) #5
-  ret i32 %call
-}
-
-declare i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
-                       i32 %i7, i32 %i8, i32 %i9, %struct.s43* nocapture %s1,
-                       %struct.s43* nocapture %s2) #2
-
-define i32 @caller43_stack() #3 {
-entry:
-; CHECK: caller43_stack
-; CHECK: mov x29, sp
-; CHECK: sub sp, sp, #96
-; CHECK: stur {{q[0-9]+}}, [x29, #-16]
-; CHECK: stur {{q[0-9]+}}, [x29, #-32]
-; CHECK: str {{q[0-9]+}}, [sp, #48]
-; CHECK: str {{q[0-9]+}}, [sp, #32]
-; Space for s1 is allocated at x29-32 = sp+64
-; Space for s2 is allocated at sp+32
-; CHECK: add x[[B:[0-9]+]], sp, #32
-; CHECK: str x[[B]], [sp, #16]
-; CHECK: sub x[[A:[0-9]+]], x29, #32
-; Address of s1 is passed on stack at sp+8
-; CHECK: str x[[A]], [sp, #8]
-; CHECK: movz w[[C:[0-9]+]], #0x9
-; CHECK: str w[[C]], [sp]
-
-; FAST: caller43_stack
-; FAST: sub sp, sp, #96
-; Space for s1 is allocated at fp-32 = sp+64
-; Space for s2 is allocated at sp+32
-; FAST: sub x[[A:[0-9]+]], x29, #32
-; FAST: add x[[B:[0-9]+]], sp, #32
-; FAST: stur {{x[0-9]+}}, [x29, #-32]
-; FAST: stur {{x[0-9]+}}, [x29, #-24]
-; FAST: stur {{x[0-9]+}}, [x29, #-16]
-; FAST: stur {{x[0-9]+}}, [x29, #-8]
-; FAST: str {{x[0-9]+}}, [sp, #32]
-; FAST: str {{x[0-9]+}}, [sp, #40]
-; FAST: str {{x[0-9]+}}, [sp, #48]
-; FAST: str {{x[0-9]+}}, [sp, #56]
-; FAST: str {{w[0-9]+}}, [sp]
-; Address of s1 is passed on stack at sp+8
-; FAST: str {{x[0-9]+}}, [sp, #8]
-; FAST: str {{x[0-9]+}}, [sp, #16]
-  %tmp = alloca %struct.s43, align 16
-  %tmp1 = alloca %struct.s43, align 16
-  %0 = bitcast %struct.s43* %tmp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
-  %1 = bitcast %struct.s43* %tmp1 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
-  %call = call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
-                       i32 8, i32 9, %struct.s43* %tmp, %struct.s43* %tmp1) #5
-  ret i32 %call
-}
-
-; rdar://13668927
-; Check that we don't split an i128.
-declare i32 @callee_i128_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5,
-                               i32 %i6, i32 %i7, i128 %s1, i32 %i8)
-
-define i32 @i128_split() {
-entry:
-; CHECK: i128_split
-; "i128 %0" should be on stack at [sp].
-; "i32 8" should be on stack at [sp, #16].
-; CHECK: str {{w[0-9]+}}, [sp, #16]
-; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp]
-; FAST: i128_split
-; FAST: sub sp, sp, #48
-; FAST: mov x[[ADDR:[0-9]+]], sp
-; FAST: str {{w[0-9]+}}, [x[[ADDR]], #16]
-; Load/Store opt is disabled with -O0, so the i128 is split.
-; FAST: str {{x[0-9]+}}, [x[[ADDR]], #8]
-; FAST: str {{x[0-9]+}}, [x[[ADDR]]]
-  %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
-  %call = tail call i32 @callee_i128_split(i32 1, i32 2, i32 3, i32 4, i32 5,
-                                           i32 6, i32 7, i128 %0, i32 8) #5
-  ret i32 %call
-}
-
-declare i32 @callee_i64(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5,
-                               i32 %i6, i32 %i7, i64 %s1, i32 %i8)
-
-define i32 @i64_split() {
-entry:
-; CHECK: i64_split
-; "i64 %0" should be in register x7.
-; "i32 8" should be on stack at [sp].
-; CHECK: ldr x7, [{{x[0-9]+}}]
-; CHECK: str {{w[0-9]+}}, [sp]
-; FAST: i64_split
-; FAST: ldr x7, [{{x[0-9]+}}]
-; FAST: str {{w[0-9]+}}, [sp]
-  %0 = load i64* bitcast (%struct.s41* @g41 to i64*), align 16
-  %call = tail call i32 @callee_i64(i32 1, i32 2, i32 3, i32 4, i32 5,
-                                    i32 6, i32 7, i64 %0, i32 8) #5
-  ret i32 %call
-}
-
-attributes #0 = { noinline nounwind readnone "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
-attributes #1 = { nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
-attributes #2 = { noinline nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
-attributes #3 = { nounwind "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
-attributes #4 = { nounwind }
-attributes #5 = { nobuiltin }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"short", metadata !1}
-!4 = metadata !{i64 0, i64 4, metadata !0, i64 4, i64 2, metadata !3, i64 8, i64 4, metadata !0, i64 12, i64 2, metadata !3, i64 16, i64 4, metadata !0, i64 20, i64 2, metadata !3}

Removed: llvm/trunk/test/CodeGen/ARM64/addp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/addp.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/addp.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/addp.ll (removed)
@@ -1,32 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -mcpu=cyclone | FileCheck %s
-
-define double @foo(<2 x double> %a) nounwind {
-; CHECK-LABEL: foo:
-; CHECK: faddp.2d d0, v0
-; CHECK-NEXT: ret
-  %lane0.i = extractelement <2 x double> %a, i32 0
-  %lane1.i = extractelement <2 x double> %a, i32 1
-  %vpaddd.i = fadd double %lane0.i, %lane1.i
-  ret double %vpaddd.i
-}
-
-define i64 @foo0(<2 x i64> %a) nounwind {
-; CHECK-LABEL: foo0:
-; CHECK: addp.2d d0, v0
-; CHECK-NEXT: fmov x0, d0
-; CHECK-NEXT: ret
-  %lane0.i = extractelement <2 x i64> %a, i32 0
-  %lane1.i = extractelement <2 x i64> %a, i32 1
-  %vpaddd.i = add i64 %lane0.i, %lane1.i
-  ret i64 %vpaddd.i
-}
-
-define float @foo1(<2 x float> %a) nounwind {
-; CHECK-LABEL: foo1:
-; CHECK: faddp.2s
-; CHECK-NEXT: ret
-  %lane0.i = extractelement <2 x float> %a, i32 0
-  %lane1.i = extractelement <2 x float> %a, i32 1
-  %vpaddd.i = fadd float %lane0.i, %lane1.i
-  ret float %vpaddd.i
-}

Removed: llvm/trunk/test/CodeGen/ARM64/addr-mode-folding.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/addr-mode-folding.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/addr-mode-folding.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/addr-mode-folding.ll (removed)
@@ -1,171 +0,0 @@
-; RUN: llc -O3 -mtriple arm64-apple-ios3 %s -o - | FileCheck %s
-; <rdar://problem/13621857>
-
- at block = common global i8* null, align 8
-
-define i32 @fct(i32 %i1, i32 %i2) {
-; CHECK: @fct
-; Sign extension is used more than once, thus it should not be folded.
-; CodeGenPrepare is not sharing sext across uses, thus this is folded because
-; of that.
-; _CHECK-NOT_: , sxtw]
-entry:
-  %idxprom = sext i32 %i1 to i64
-  %0 = load i8** @block, align 8
-  %arrayidx = getelementptr inbounds i8* %0, i64 %idxprom
-  %1 = load i8* %arrayidx, align 1
-  %idxprom1 = sext i32 %i2 to i64
-  %arrayidx2 = getelementptr inbounds i8* %0, i64 %idxprom1
-  %2 = load i8* %arrayidx2, align 1
-  %cmp = icmp eq i8 %1, %2
-  br i1 %cmp, label %if.end, label %if.then
-
-if.then:                                          ; preds = %entry
-  %cmp7 = icmp ugt i8 %1, %2
-  %conv8 = zext i1 %cmp7 to i32
-  br label %return
-
-if.end:                                           ; preds = %entry
-  %inc = add nsw i32 %i1, 1
-  %inc9 = add nsw i32 %i2, 1
-  %idxprom10 = sext i32 %inc to i64
-  %arrayidx11 = getelementptr inbounds i8* %0, i64 %idxprom10
-  %3 = load i8* %arrayidx11, align 1
-  %idxprom12 = sext i32 %inc9 to i64
-  %arrayidx13 = getelementptr inbounds i8* %0, i64 %idxprom12
-  %4 = load i8* %arrayidx13, align 1
-  %cmp16 = icmp eq i8 %3, %4
-  br i1 %cmp16, label %if.end23, label %if.then18
-
-if.then18:                                        ; preds = %if.end
-  %cmp21 = icmp ugt i8 %3, %4
-  %conv22 = zext i1 %cmp21 to i32
-  br label %return
-
-if.end23:                                         ; preds = %if.end
-  %inc24 = add nsw i32 %i1, 2
-  %inc25 = add nsw i32 %i2, 2
-  %idxprom26 = sext i32 %inc24 to i64
-  %arrayidx27 = getelementptr inbounds i8* %0, i64 %idxprom26
-  %5 = load i8* %arrayidx27, align 1
-  %idxprom28 = sext i32 %inc25 to i64
-  %arrayidx29 = getelementptr inbounds i8* %0, i64 %idxprom28
-  %6 = load i8* %arrayidx29, align 1
-  %cmp32 = icmp eq i8 %5, %6
-  br i1 %cmp32, label %return, label %if.then34
-
-if.then34:                                        ; preds = %if.end23
-  %cmp37 = icmp ugt i8 %5, %6
-  %conv38 = zext i1 %cmp37 to i32
-  br label %return
-
-return:                                           ; preds = %if.end23, %if.then34, %if.then18, %if.then
-  %retval.0 = phi i32 [ %conv8, %if.then ], [ %conv22, %if.then18 ], [ %conv38, %if.then34 ], [ 1, %if.end23 ]
-  ret i32 %retval.0
-}
-
-define i32 @fct1(i32 %i1, i32 %i2) optsize {
-; CHECK: @fct1
-; Addressing are folded when optimizing for code size.
-; CHECK: , sxtw]
-; CHECK: , sxtw]
-entry:
-  %idxprom = sext i32 %i1 to i64
-  %0 = load i8** @block, align 8
-  %arrayidx = getelementptr inbounds i8* %0, i64 %idxprom
-  %1 = load i8* %arrayidx, align 1
-  %idxprom1 = sext i32 %i2 to i64
-  %arrayidx2 = getelementptr inbounds i8* %0, i64 %idxprom1
-  %2 = load i8* %arrayidx2, align 1
-  %cmp = icmp eq i8 %1, %2
-  br i1 %cmp, label %if.end, label %if.then
-
-if.then:                                          ; preds = %entry
-  %cmp7 = icmp ugt i8 %1, %2
-  %conv8 = zext i1 %cmp7 to i32
-  br label %return
-
-if.end:                                           ; preds = %entry
-  %inc = add nsw i32 %i1, 1
-  %inc9 = add nsw i32 %i2, 1
-  %idxprom10 = sext i32 %inc to i64
-  %arrayidx11 = getelementptr inbounds i8* %0, i64 %idxprom10
-  %3 = load i8* %arrayidx11, align 1
-  %idxprom12 = sext i32 %inc9 to i64
-  %arrayidx13 = getelementptr inbounds i8* %0, i64 %idxprom12
-  %4 = load i8* %arrayidx13, align 1
-  %cmp16 = icmp eq i8 %3, %4
-  br i1 %cmp16, label %if.end23, label %if.then18
-
-if.then18:                                        ; preds = %if.end
-  %cmp21 = icmp ugt i8 %3, %4
-  %conv22 = zext i1 %cmp21 to i32
-  br label %return
-
-if.end23:                                         ; preds = %if.end
-  %inc24 = add nsw i32 %i1, 2
-  %inc25 = add nsw i32 %i2, 2
-  %idxprom26 = sext i32 %inc24 to i64
-  %arrayidx27 = getelementptr inbounds i8* %0, i64 %idxprom26
-  %5 = load i8* %arrayidx27, align 1
-  %idxprom28 = sext i32 %inc25 to i64
-  %arrayidx29 = getelementptr inbounds i8* %0, i64 %idxprom28
-  %6 = load i8* %arrayidx29, align 1
-  %cmp32 = icmp eq i8 %5, %6
-  br i1 %cmp32, label %return, label %if.then34
-
-if.then34:                                        ; preds = %if.end23
-  %cmp37 = icmp ugt i8 %5, %6
-  %conv38 = zext i1 %cmp37 to i32
-  br label %return
-
-return:                                           ; preds = %if.end23, %if.then34, %if.then18, %if.then
-  %retval.0 = phi i32 [ %conv8, %if.then ], [ %conv22, %if.then18 ], [ %conv38, %if.then34 ], [ 1, %if.end23 ]
-  ret i32 %retval.0
-}
-
-; CHECK: @test
-; CHECK-NOT: , uxtw #2]
-define i32 @test(i32* %array, i8 zeroext %c, i32 %arg) {
-entry:
-  %conv = zext i8 %c to i32
-  %add = sub i32 0, %arg
-  %tobool = icmp eq i32 %conv, %add
-  br i1 %tobool, label %if.end, label %if.then
-
-if.then:                                          ; preds = %entry
-  %idxprom = zext i8 %c to i64
-  %arrayidx = getelementptr inbounds i32* %array, i64 %idxprom
-  %0 = load volatile i32* %arrayidx, align 4
-  %1 = load volatile i32* %arrayidx, align 4
-  %add3 = add nsw i32 %1, %0
-  br label %if.end
-
-if.end:                                           ; preds = %entry, %if.then
-  %res.0 = phi i32 [ %add3, %if.then ], [ 0, %entry ]
-  ret i32 %res.0
-}
-
-
-; CHECK: @test2
-; CHECK: , uxtw #2]
-; CHECK: , uxtw #2]
-define i32 @test2(i32* %array, i8 zeroext %c, i32 %arg) optsize {
-entry:
-  %conv = zext i8 %c to i32
-  %add = sub i32 0, %arg
-  %tobool = icmp eq i32 %conv, %add
-  br i1 %tobool, label %if.end, label %if.then
-
-if.then:                                          ; preds = %entry
-  %idxprom = zext i8 %c to i64
-  %arrayidx = getelementptr inbounds i32* %array, i64 %idxprom
-  %0 = load volatile i32* %arrayidx, align 4
-  %1 = load volatile i32* %arrayidx, align 4
-  %add3 = add nsw i32 %1, %0
-  br label %if.end
-
-if.end:                                           ; preds = %entry, %if.then
-  %res.0 = phi i32 [ %add3, %if.then ], [ 0, %entry ]
-  ret i32 %res.0
-}

Removed: llvm/trunk/test/CodeGen/ARM64/addr-type-promotion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/addr-type-promotion.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/addr-type-promotion.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/addr-type-promotion.ll (removed)
@@ -1,82 +0,0 @@
-; RUN: llc -march arm64 < %s | FileCheck %s
-; rdar://13452552
-; ModuleID = 'reduced_test.ll'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
-target triple = "arm64-apple-ios3.0.0"
-
- at block = common global i8* null, align 8
-
-define zeroext i8 @fullGtU(i32 %i1, i32 %i2) {
-; CHECK: fullGtU
-; CHECK: adrp [[PAGE:x[0-9]+]], _block at GOTPAGE
-; CHECK: ldr [[ADDR:x[0-9]+]], {{\[}}[[PAGE]], _block at GOTPAGEOFF]
-; CHECK-NEXT: ldr [[BLOCKBASE:x[0-9]+]], {{\[}}[[ADDR]]]
-; CHECK-NEXT: ldrb [[BLOCKVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE]],  w0, sxtw]
-; CHECK-NEXT: ldrb [[BLOCKVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE]], w1, sxtw]
-; CHECK-NEXT cmp [[BLOCKVAL1]], [[BLOCKVAL2]]
-; CHECK-NEXT b.ne
-; Next BB
-; CHECK: add [[BLOCKBASE2:x[0-9]+]], [[BLOCKBASE]], w1, sxtw
-; CHECK-NEXT: add [[BLOCKBASE1:x[0-9]+]], [[BLOCKBASE]], w0, sxtw
-; CHECK-NEXT: ldrb [[LOADEDVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #1]
-; CHECK-NEXT: ldrb [[LOADEDVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #1]
-; CHECK-NEXT: cmp [[LOADEDVAL1]], [[LOADEDVAL2]]
-; CHECK-NEXT: b.ne
-; Next BB
-; CHECK: ldrb [[LOADEDVAL3:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #2]
-; CHECK-NEXT: ldrb [[LOADEDVAL4:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #2]
-; CHECK-NEXT: cmp [[LOADEDVAL3]], [[LOADEDVAL4]]
-entry:
-  %idxprom = sext i32 %i1 to i64
-  %tmp = load i8** @block, align 8
-  %arrayidx = getelementptr inbounds i8* %tmp, i64 %idxprom
-  %tmp1 = load i8* %arrayidx, align 1
-  %idxprom1 = sext i32 %i2 to i64
-  %arrayidx2 = getelementptr inbounds i8* %tmp, i64 %idxprom1
-  %tmp2 = load i8* %arrayidx2, align 1
-  %cmp = icmp eq i8 %tmp1, %tmp2
-  br i1 %cmp, label %if.end, label %if.then
-
-if.then:                                          ; preds = %entry
-  %cmp7 = icmp ugt i8 %tmp1, %tmp2
-  %conv9 = zext i1 %cmp7 to i8
-  br label %return
-
-if.end:                                           ; preds = %entry
-  %inc = add nsw i32 %i1, 1
-  %inc10 = add nsw i32 %i2, 1
-  %idxprom11 = sext i32 %inc to i64
-  %arrayidx12 = getelementptr inbounds i8* %tmp, i64 %idxprom11
-  %tmp3 = load i8* %arrayidx12, align 1
-  %idxprom13 = sext i32 %inc10 to i64
-  %arrayidx14 = getelementptr inbounds i8* %tmp, i64 %idxprom13
-  %tmp4 = load i8* %arrayidx14, align 1
-  %cmp17 = icmp eq i8 %tmp3, %tmp4
-  br i1 %cmp17, label %if.end25, label %if.then19
-
-if.then19:                                        ; preds = %if.end
-  %cmp22 = icmp ugt i8 %tmp3, %tmp4
-  %conv24 = zext i1 %cmp22 to i8
-  br label %return
-
-if.end25:                                         ; preds = %if.end
-  %inc26 = add nsw i32 %i1, 2
-  %inc27 = add nsw i32 %i2, 2
-  %idxprom28 = sext i32 %inc26 to i64
-  %arrayidx29 = getelementptr inbounds i8* %tmp, i64 %idxprom28
-  %tmp5 = load i8* %arrayidx29, align 1
-  %idxprom30 = sext i32 %inc27 to i64
-  %arrayidx31 = getelementptr inbounds i8* %tmp, i64 %idxprom30
-  %tmp6 = load i8* %arrayidx31, align 1
-  %cmp34 = icmp eq i8 %tmp5, %tmp6
-  br i1 %cmp34, label %return, label %if.then36
-
-if.then36:                                        ; preds = %if.end25
-  %cmp39 = icmp ugt i8 %tmp5, %tmp6
-  %conv41 = zext i1 %cmp39 to i8
-  br label %return
-
-return:                                           ; preds = %if.then36, %if.end25, %if.then19, %if.then
-  %retval.0 = phi i8 [ %conv9, %if.then ], [ %conv24, %if.then19 ], [ %conv41, %if.then36 ], [ 0, %if.end25 ]
-  ret i8 %retval.0
-}

Removed: llvm/trunk/test/CodeGen/ARM64/addrmode.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/addrmode.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/addrmode.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/addrmode.ll (removed)
@@ -1,72 +0,0 @@
-; RUN: llc -march=arm64 < %s | FileCheck %s
-; rdar://10232252
-
- at object = external hidden global i64, section "__DATA, __objc_ivar", align 8
-
-; base + offset (imm9)
-; CHECK: @t1
-; CHECK: ldr xzr, [x{{[0-9]+}}, #8]
-; CHECK: ret
-define void @t1() {
-  %incdec.ptr = getelementptr inbounds i64* @object, i64 1
-  %tmp = load volatile i64* %incdec.ptr, align 8
-  ret void
-}
-
-; base + offset (> imm9)
-; CHECK: @t2
-; CHECK: sub [[ADDREG:x[0-9]+]], x{{[0-9]+}}, #264
-; CHECK: ldr xzr, [
-; CHECK: [[ADDREG]]]
-; CHECK: ret
-define void @t2() {
-  %incdec.ptr = getelementptr inbounds i64* @object, i64 -33
-  %tmp = load volatile i64* %incdec.ptr, align 8
-  ret void
-}
-
-; base + unsigned offset (> imm9 and <= imm12 * size of type in bytes)
-; CHECK: @t3
-; CHECK: ldr xzr, [x{{[0-9]+}}, #32760]
-; CHECK: ret
-define void @t3() {
-  %incdec.ptr = getelementptr inbounds i64* @object, i64 4095
-  %tmp = load volatile i64* %incdec.ptr, align 8
-  ret void
-}
-
-; base + unsigned offset (> imm12 * size of type in bytes)
-; CHECK: @t4
-; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, #8, lsl #12
-; CHECK: ldr xzr, [
-; CHECK: [[ADDREG]]]
-; CHECK: ret
-define void @t4() {
-  %incdec.ptr = getelementptr inbounds i64* @object, i64 4096
-  %tmp = load volatile i64* %incdec.ptr, align 8
-  ret void
-}
-
-; base + reg
-; CHECK: @t5
-; CHECK: ldr xzr, [x{{[0-9]+}}, x{{[0-9]+}}, lsl #3]
-; CHECK: ret
-define void @t5(i64 %a) {
-  %incdec.ptr = getelementptr inbounds i64* @object, i64 %a
-  %tmp = load volatile i64* %incdec.ptr, align 8
-  ret void
-}
-
-; base + reg + imm
-; CHECK: @t6
-; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #3
-; CHECK-NEXT: add [[ADDREG]], [[ADDREG]], #8, lsl #12
-; CHECK: ldr xzr, [
-; CHECK: [[ADDREG]]]
-; CHECK: ret
-define void @t6(i64 %a) {
-  %tmp1 = getelementptr inbounds i64* @object, i64 %a
-  %incdec.ptr = getelementptr inbounds i64* %tmp1, i64 4096
-  %tmp = load volatile i64* %incdec.ptr, align 8
-  ret void
-}

Removed: llvm/trunk/test/CodeGen/ARM64/alloc-no-stack-realign.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/alloc-no-stack-realign.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/alloc-no-stack-realign.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/alloc-no-stack-realign.ll (removed)
@@ -1,21 +0,0 @@
-; RUN: llc < %s -mtriple=arm64-apple-darwin -enable-misched=false | FileCheck %s
-
-; rdar://12713765
-; Make sure we are not creating stack objects that are assumed to be 64-byte
-; aligned.
- at T3_retval = common global <16 x float> zeroinitializer, align 16
-
-define void @test(<16 x float>* noalias sret %agg.result) nounwind ssp {
-entry:
-; CHECK: test
-; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], [sp, #32]
-; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], [sp]
-; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], {{\[}}[[BASE:x[0-9]+]], #32]
-; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], {{\[}}[[BASE]]]
- %retval = alloca <16 x float>, align 16
- %0 = load <16 x float>* @T3_retval, align 16
- store <16 x float> %0, <16 x float>* %retval
- %1 = load <16 x float>* %retval
- store <16 x float> %1, <16 x float>* %agg.result, align 16
- ret void
-}

Removed: llvm/trunk/test/CodeGen/ARM64/alloca-frame-pointer-offset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/alloca-frame-pointer-offset.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/alloca-frame-pointer-offset.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/alloca-frame-pointer-offset.ll (removed)
@@ -1,29 +0,0 @@
-; RUN: llc -march=arm64 -mcpu=cyclone < %s | FileCheck %s
-
-; CHECK: foo
-; CHECK: ldr w[[REG:[0-9]+]], [x19, #264]
-; CHECK: str w[[REG]], [x19, #132]
-; CHECK: ldr w{{[0-9]+}}, [x19, #264]
-
-define i32 @foo(i32 %a) nounwind {
-  %retval = alloca i32, align 4
-  %a.addr = alloca i32, align 4
-  %arr = alloca [32 x i32], align 4
-  %i = alloca i32, align 4
-  %arr2 = alloca [32 x i32], align 4
-  %j = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  %tmp = load i32* %a.addr, align 4
-  %tmp1 = zext i32 %tmp to i64
-  %v = mul i64 4, %tmp1
-  %vla = alloca i8, i64 %v, align 4
-  %tmp2 = bitcast i8* %vla to i32*
-  %tmp3 = load i32* %a.addr, align 4
-  store i32 %tmp3, i32* %i, align 4
-  %tmp4 = load i32* %a.addr, align 4
-  store i32 %tmp4, i32* %j, align 4
-  %tmp5 = load i32* %j, align 4
-  store i32 %tmp5, i32* %retval
-  %x = load i32* %retval
-  ret i32 %x
-}

Removed: llvm/trunk/test/CodeGen/ARM64/andCmpBrToTBZ.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/andCmpBrToTBZ.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/andCmpBrToTBZ.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/andCmpBrToTBZ.ll (removed)
@@ -1,72 +0,0 @@
-; RUN: llc -O1 -march=arm64 -enable-andcmp-sinking=true < %s | FileCheck %s
-; ModuleID = 'and-cbz-extr-mr.bc'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
-target triple = "arm64-apple-ios7.0.0"
-
-define zeroext i1 @foo(i1 %IsEditable, i1 %isTextField, i8* %str1, i8* %str2, i8* %str3, i8* %str4, i8* %str5, i8* %str6, i8* %str7, i8* %str8, i8* %str9, i8* %str10, i8* %str11, i8* %str12, i8* %str13, i32 %int1, i8* %str14) unnamed_addr #0 align 2 {
-; CHECK: _foo:
-entry:
-  %tobool = icmp eq i8* %str14, null
-  br i1 %tobool, label %return, label %if.end
-
-; CHECK: %if.end
-; CHECK: tbz
-if.end:                                           ; preds = %entry
-  %and.i.i.i = and i32 %int1, 4
-  %tobool.i.i.i = icmp eq i32 %and.i.i.i, 0
-  br i1 %tobool.i.i.i, label %if.end12, label %land.rhs.i
-
-land.rhs.i:                                       ; preds = %if.end
-  %cmp.i.i.i = icmp eq i8* %str12, %str13
-  br i1 %cmp.i.i.i, label %if.then3, label %lor.rhs.i.i.i
-
-lor.rhs.i.i.i:                                    ; preds = %land.rhs.i
-  %cmp.i13.i.i.i = icmp eq i8* %str10, %str11
-  br i1 %cmp.i13.i.i.i, label %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit, label %if.end5
-
-_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit: ; preds = %lor.rhs.i.i.i
-  %cmp.i.i.i.i = icmp eq i8* %str8, %str9
-  br i1 %cmp.i.i.i.i, label %if.then3, label %if.end5
-
-if.then3:                                         ; preds = %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit, %land.rhs.i
-  %tmp11 = load i8* %str14, align 8
-  %tmp12 = and i8 %tmp11, 2
-  %tmp13 = icmp ne i8 %tmp12, 0
-  br label %return
-
-if.end5:                                          ; preds = %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit, %lor.rhs.i.i.i
-; CHECK: %if.end5
-; CHECK: tbz
-  br i1 %tobool.i.i.i, label %if.end12, label %land.rhs.i19
-
-land.rhs.i19:                                     ; preds = %if.end5
-  %cmp.i.i.i18 = icmp eq i8* %str6, %str7
-  br i1 %cmp.i.i.i18, label %if.then7, label %lor.rhs.i.i.i23
-
-lor.rhs.i.i.i23:                                  ; preds = %land.rhs.i19
-  %cmp.i13.i.i.i22 = icmp eq i8* %str3, %str4
-  br i1 %cmp.i13.i.i.i22, label %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit28, label %if.end12
-
-_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit28: ; preds = %lor.rhs.i.i.i23
-  %cmp.i.i.i.i26 = icmp eq i8* %str1, %str2
-  br i1 %cmp.i.i.i.i26, label %if.then7, label %if.end12
-
-if.then7:                                         ; preds = %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit28, %land.rhs.i19
-  br i1 %isTextField, label %if.then9, label %if.end12
-
-if.then9:                                         ; preds = %if.then7
-  %tmp23 = load i8* %str5, align 8
-  %tmp24 = and i8 %tmp23, 2
-  %tmp25 = icmp ne i8 %tmp24, 0
-  br label %return
-
-if.end12:                                         ; preds = %if.then7, %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit28, %lor.rhs.i.i.i23, %if.end5, %if.end
-  %lnot = xor i1 %IsEditable, true
-  br label %return
-
-return:                                           ; preds = %if.end12, %if.then9, %if.then3, %entry
-  %retval.0 = phi i1 [ %tmp13, %if.then3 ], [ %tmp25, %if.then9 ], [ %lnot, %if.end12 ], [ true, %entry ]
-  ret i1 %retval.0
-}
-
-attributes #0 = { nounwind ssp }

Removed: llvm/trunk/test/CodeGen/ARM64/ands-bad-peephole.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/ands-bad-peephole.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/ands-bad-peephole.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/ands-bad-peephole.ll (removed)
@@ -1,31 +0,0 @@
-; RUN: llc %s -o - | FileCheck %s
-; Check that ANDS (tst) is not merged with ADD when the immediate
-; is not 0.
-; <rdar://problem/16693089>
-target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
-target triple = "arm64-apple-ios"
-
-; CHECK-LABEL: tst1:
-; CHECK: add [[REG:w[0-9]+]], w{{[0-9]+}}, #1
-; CHECK: tst [[REG]], #0x1
-define void @tst1() {
-entry:
-  br i1 undef, label %for.end, label %for.body
-
-for.body:                                         ; preds = %for.body, %entry
-  %result.09 = phi i32 [ %add2.result.0, %for.body ], [ 1, %entry ]
-  %i.08 = phi i32 [ %inc, %for.body ], [ 2, %entry ]
-  %and = and i32 %i.08, 1
-  %cmp1 = icmp eq i32 %and, 0
-  %add2.result.0 = select i1 %cmp1, i32 undef, i32 %result.09
-  %inc = add nsw i32 %i.08, 1
-  %cmp = icmp slt i32 %i.08, undef
-  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
-
-for.cond.for.end_crit_edge:                       ; preds = %for.body
-  %add2.result.0.lcssa = phi i32 [ %add2.result.0, %for.body ]
-  br label %for.end
-
-for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
-  ret void
-}

Removed: llvm/trunk/test/CodeGen/ARM64/anyregcc-crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/anyregcc-crash.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/anyregcc-crash.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/anyregcc-crash.ll (removed)
@@ -1,19 +0,0 @@
-; RUN: not llc < %s -mtriple=arm64-apple-darwin 2>&1 | FileCheck %s
-;
-; Check that misuse of anyregcc results in a compile time error.
-
-; CHECK: LLVM ERROR: ran out of registers during register allocation
-define i64 @anyreglimit(i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6, i64 %v7, i64 %v8,
-                        i64 %v9, i64 %v10, i64 %v11, i64 %v12, i64 %v13, i64 %v14, i64 %v15, i64 %v16,
-                        i64 %v17, i64 %v18, i64 %v19, i64 %v20, i64 %v21, i64 %v22, i64 %v23, i64 %v24,
-                        i64 %v25, i64 %v26, i64 %v27, i64 %v28, i64 %v29, i64 %v30, i64 %v31, i64 %v32) {
-entry:
-  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 32,
-                i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6, i64 %v7, i64 %v8,
-                i64 %v9, i64 %v10, i64 %v11, i64 %v12, i64 %v13, i64 %v14, i64 %v15, i64 %v16,
-                i64 %v17, i64 %v18, i64 %v19, i64 %v20, i64 %v21, i64 %v22, i64 %v23, i64 %v24,
-                i64 %v25, i64 %v26, i64 %v27, i64 %v28, i64 %v29, i64 %v30, i64 %v31, i64 %v32)
-  ret i64 %result
-}
-
-declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)

Removed: llvm/trunk/test/CodeGen/ARM64/anyregcc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/anyregcc.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/anyregcc.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/anyregcc.ll (removed)
@@ -1,363 +0,0 @@
-; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s
-
-; Stackmap Header: no constants - 6 callsites
-; CHECK-LABEL: .section	__LLVM_STACKMAPS,__llvm_stackmaps
-; CHECK-NEXT:  __LLVM_StackMaps:
-; Header
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 0
-; CHECK-NEXT:   .short 0
-; Num Functions
-; CHECK-NEXT:   .long 8
-; Num LargeConstants
-; CHECK-NEXT:   .long 0
-; Num Callsites
-; CHECK-NEXT:   .long 8
-
-; Functions and stack size
-; CHECK-NEXT:   .quad _test
-; CHECK-NEXT:   .quad 16
-; CHECK-NEXT:   .quad _property_access1
-; CHECK-NEXT:   .quad 16
-; CHECK-NEXT:   .quad _property_access2
-; CHECK-NEXT:   .quad 32
-; CHECK-NEXT:   .quad _property_access3
-; CHECK-NEXT:   .quad 32
-; CHECK-NEXT:   .quad _anyreg_test1
-; CHECK-NEXT:   .quad 16
-; CHECK-NEXT:   .quad _anyreg_test2
-; CHECK-NEXT:   .quad 16
-; CHECK-NEXT:   .quad _patchpoint_spilldef
-; CHECK-NEXT:   .quad 112
-; CHECK-NEXT:   .quad _patchpoint_spillargs
-; CHECK-NEXT:   .quad 128
-
-
-; test
-; CHECK-LABEL:  .long   L{{.*}}-_test
-; CHECK-NEXT:   .short  0
-; 3 locations
-; CHECK-NEXT:   .short  3
-; Loc 0: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 4
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 1: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 4
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 2: Constant 3
-; CHECK-NEXT:   .byte 4
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short  0
-; CHECK-NEXT:   .long 3
-define i64 @test() nounwind ssp uwtable {
-entry:
-  call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 0, i32 16, i8* null, i32 2, i32 1, i32 2, i64 3)
-  ret i64 0
-}
-
-; property access 1 - %obj is an anyreg call argument and should therefore be in a register
-; CHECK-LABEL:  .long   L{{.*}}-_property_access1
-; CHECK-NEXT:   .short  0
-; 2 locations
-; CHECK-NEXT:   .short  2
-; Loc 0: Register <-- this is the return register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 1: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-define i64 @property_access1(i8* %obj) nounwind ssp uwtable {
-entry:
-  %f = inttoptr i64 281474417671919 to i8*
-  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 1, i32 20, i8* %f, i32 1, i8* %obj)
-  ret i64 %ret
-}
-
-; property access 2 - %obj is an anyreg call argument and should therefore be in a register
-; CHECK-LABEL:  .long   L{{.*}}-_property_access2
-; CHECK-NEXT:   .short  0
-; 2 locations
-; CHECK-NEXT:   .short  2
-; Loc 0: Register <-- this is the return register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 1: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-define i64 @property_access2() nounwind ssp uwtable {
-entry:
-  %obj = alloca i64, align 8
-  %f = inttoptr i64 281474417671919 to i8*
-  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 20, i8* %f, i32 1, i64* %obj)
-  ret i64 %ret
-}
-
-; property access 3 - %obj is a frame index
-; CHECK-LABEL:  .long   L{{.*}}-_property_access3
-; CHECK-NEXT:   .short  0
-; 2 locations
-; CHECK-NEXT:   .short  2
-; Loc 0: Register <-- this is the return register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 1: Direct FP - 8
-; CHECK-NEXT:   .byte 2
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short 29
-; CHECK-NEXT:   .long -8
-define i64 @property_access3() nounwind ssp uwtable {
-entry:
-  %obj = alloca i64, align 8
-  %f = inttoptr i64 281474417671919 to i8*
-  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 3, i32 20, i8* %f, i32 0, i64* %obj)
-  ret i64 %ret
-}
-
-; anyreg_test1
-; CHECK-LABEL:  .long   L{{.*}}-_anyreg_test1
-; CHECK-NEXT:   .short  0
-; 14 locations
-; CHECK-NEXT:   .short  14
-; Loc 0: Register <-- this is the return register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 1: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 2: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 3: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 4: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 5: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 6: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 7: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 8: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 9: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 10: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 11: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 12: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 13: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-define i64 @anyreg_test1(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
-entry:
-  %f = inttoptr i64 281474417671919 to i8*
-  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 4, i32 20, i8* %f, i32 13, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
-  ret i64 %ret
-}
-
-; anyreg_test2
-; CHECK-LABEL:  .long   L{{.*}}-_anyreg_test2
-; CHECK-NEXT:   .short  0
-; 14 locations
-; CHECK-NEXT:   .short  14
-; Loc 0: Register <-- this is the return register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 1: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 2: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 3: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 4: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 5: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 6: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 7: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 8: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 9: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 10: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 11: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 12: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-; Loc 13: Register
-; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short {{[0-9]+}}
-; CHECK-NEXT:   .long 0
-define i64 @anyreg_test2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
-entry:
-  %f = inttoptr i64 281474417671919 to i8*
-  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
-  ret i64 %ret
-}
-
-; Test spilling the return value of an anyregcc call.
-;
-; <rdar://problem/15432754> [JS] Assertion: "Folded a def to a non-store!"
-;
-; CHECK-LABEL: .long L{{.*}}-_patchpoint_spilldef
-; CHECK-NEXT: .short 0
-; CHECK-NEXT: .short 3
-; Loc 0: Register (some register that will be spilled to the stack)
-; CHECK-NEXT: .byte  1
-; CHECK-NEXT: .byte  8
-; CHECK-NEXT: .short {{[0-9]+}}
-; CHECK-NEXT: .long  0
-; Loc 1: Register
-; CHECK-NEXT: .byte  1
-; CHECK-NEXT: .byte  8
-; CHECK-NEXT: .short {{[0-9]+}}
-; CHECK-NEXT: .long  0
-; Loc 1: Register
-; CHECK-NEXT: .byte  1
-; CHECK-NEXT: .byte  8
-; CHECK-NEXT: .short {{[0-9]+}}
-; CHECK-NEXT: .long  0
-define i64 @patchpoint_spilldef(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
-entry:
-  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 16, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2)
-  tail call void asm sideeffect "nop", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"() nounwind
-  ret i64 %result
-}
-
-; Test spilling the arguments of an anyregcc call.
-;
-; <rdar://problem/15487687> [JS] AnyRegCC argument ends up being spilled
-;
-; CHECK-LABEL: .long L{{.*}}-_patchpoint_spillargs
-; CHECK-NEXT: .short 0
-; CHECK-NEXT: .short 5
-; Loc 0: Return a register
-; CHECK-NEXT: .byte  1
-; CHECK-NEXT: .byte  8
-; CHECK-NEXT: .short {{[0-9]+}}
-; CHECK-NEXT: .long  0
-; Loc 1: Arg0 in a Register
-; CHECK-NEXT: .byte  1
-; CHECK-NEXT: .byte  8
-; CHECK-NEXT: .short {{[0-9]+}}
-; CHECK-NEXT: .long  0
-; Loc 2: Arg1 in a Register
-; CHECK-NEXT: .byte  1
-; CHECK-NEXT: .byte  8
-; CHECK-NEXT: .short {{[0-9]+}}
-; CHECK-NEXT: .long  0
-; Loc 3: Arg2 spilled to FP -96
-; CHECK-NEXT: .byte  3
-; CHECK-NEXT: .byte  8
-; CHECK-NEXT: .short 29
-; CHECK-NEXT: .long -96
-; Loc 4: Arg3 spilled to FP - 88
-; CHECK-NEXT: .byte  3
-; CHECK-NEXT: .byte  8
-; CHECK-NEXT: .short 29
-; CHECK-NEXT: .long -88
-define i64 @patchpoint_spillargs(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
-entry:
-  tail call void asm sideeffect "nop", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"() nounwind
-  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 13, i32 16, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
-  ret i64 %result
-}
-
-declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
-declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)

Removed: llvm/trunk/test/CodeGen/ARM64/arith-saturating.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/arith-saturating.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/arith-saturating.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/arith-saturating.ll (removed)
@@ -1,153 +0,0 @@
-; RUN: llc < %s -march=arm64 -mcpu=cyclone | FileCheck %s
-
-define i32 @qadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
-; CHECK-LABEL: qadds:
-; CHECK: sqadd s0, s0, s1
-  %vecext = extractelement <4 x i32> %b, i32 0
-  %vecext1 = extractelement <4 x i32> %c, i32 0
-  %vqadd.i = tail call i32 @llvm.arm64.neon.sqadd.i32(i32 %vecext, i32 %vecext1) nounwind
-  ret i32 %vqadd.i
-}
-
-define i64 @qaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
-; CHECK-LABEL: qaddd:
-; CHECK: sqadd d0, d0, d1
-  %vecext = extractelement <2 x i64> %b, i32 0
-  %vecext1 = extractelement <2 x i64> %c, i32 0
-  %vqadd.i = tail call i64 @llvm.arm64.neon.sqadd.i64(i64 %vecext, i64 %vecext1) nounwind
-  ret i64 %vqadd.i
-}
-
-define i32 @uqadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
-; CHECK-LABEL: uqadds:
-; CHECK: uqadd s0, s0, s1
-  %vecext = extractelement <4 x i32> %b, i32 0
-  %vecext1 = extractelement <4 x i32> %c, i32 0
-  %vqadd.i = tail call i32 @llvm.arm64.neon.uqadd.i32(i32 %vecext, i32 %vecext1) nounwind
-  ret i32 %vqadd.i
-}
-
-define i64 @uqaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
-; CHECK-LABEL: uqaddd:
-; CHECK: uqadd d0, d0, d1
-  %vecext = extractelement <2 x i64> %b, i32 0
-  %vecext1 = extractelement <2 x i64> %c, i32 0
-  %vqadd.i = tail call i64 @llvm.arm64.neon.uqadd.i64(i64 %vecext, i64 %vecext1) nounwind
-  ret i64 %vqadd.i
-}
-
-declare i64 @llvm.arm64.neon.uqadd.i64(i64, i64) nounwind readnone
-declare i32 @llvm.arm64.neon.uqadd.i32(i32, i32) nounwind readnone
-declare i64 @llvm.arm64.neon.sqadd.i64(i64, i64) nounwind readnone
-declare i32 @llvm.arm64.neon.sqadd.i32(i32, i32) nounwind readnone
-
-define i32 @qsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
-; CHECK-LABEL: qsubs:
-; CHECK: sqsub s0, s0, s1
-  %vecext = extractelement <4 x i32> %b, i32 0
-  %vecext1 = extractelement <4 x i32> %c, i32 0
-  %vqsub.i = tail call i32 @llvm.arm64.neon.sqsub.i32(i32 %vecext, i32 %vecext1) nounwind
-  ret i32 %vqsub.i
-}
-
-define i64 @qsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
-; CHECK-LABEL: qsubd:
-; CHECK: sqsub d0, d0, d1
-  %vecext = extractelement <2 x i64> %b, i32 0
-  %vecext1 = extractelement <2 x i64> %c, i32 0
-  %vqsub.i = tail call i64 @llvm.arm64.neon.sqsub.i64(i64 %vecext, i64 %vecext1) nounwind
-  ret i64 %vqsub.i
-}
-
-define i32 @uqsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
-; CHECK-LABEL: uqsubs:
-; CHECK: uqsub s0, s0, s1
-  %vecext = extractelement <4 x i32> %b, i32 0
-  %vecext1 = extractelement <4 x i32> %c, i32 0
-  %vqsub.i = tail call i32 @llvm.arm64.neon.uqsub.i32(i32 %vecext, i32 %vecext1) nounwind
-  ret i32 %vqsub.i
-}
-
-define i64 @uqsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
-; CHECK-LABEL: uqsubd:
-; CHECK: uqsub d0, d0, d1
-  %vecext = extractelement <2 x i64> %b, i32 0
-  %vecext1 = extractelement <2 x i64> %c, i32 0
-  %vqsub.i = tail call i64 @llvm.arm64.neon.uqsub.i64(i64 %vecext, i64 %vecext1) nounwind
-  ret i64 %vqsub.i
-}
-
-declare i64 @llvm.arm64.neon.uqsub.i64(i64, i64) nounwind readnone
-declare i32 @llvm.arm64.neon.uqsub.i32(i32, i32) nounwind readnone
-declare i64 @llvm.arm64.neon.sqsub.i64(i64, i64) nounwind readnone
-declare i32 @llvm.arm64.neon.sqsub.i32(i32, i32) nounwind readnone
-
-define i32 @qabss(<4 x i32> %b, <4 x i32> %c) nounwind readnone {
-; CHECK-LABEL: qabss:
-; CHECK: sqabs s0, s0
-; CHECK: ret
-  %vecext = extractelement <4 x i32> %b, i32 0
-  %vqabs.i = tail call i32 @llvm.arm64.neon.sqabs.i32(i32 %vecext) nounwind
-  ret i32 %vqabs.i
-}
-
-define i64 @qabsd(<2 x i64> %b, <2 x i64> %c) nounwind readnone {
-; CHECK-LABEL: qabsd:
-; CHECK: sqabs d0, d0
-; CHECK: ret
-  %vecext = extractelement <2 x i64> %b, i32 0
-  %vqabs.i = tail call i64 @llvm.arm64.neon.sqabs.i64(i64 %vecext) nounwind
-  ret i64 %vqabs.i
-}
-
-define i32 @qnegs(<4 x i32> %b, <4 x i32> %c) nounwind readnone {
-; CHECK-LABEL: qnegs:
-; CHECK: sqneg s0, s0
-; CHECK: ret
-  %vecext = extractelement <4 x i32> %b, i32 0
-  %vqneg.i = tail call i32 @llvm.arm64.neon.sqneg.i32(i32 %vecext) nounwind
-  ret i32 %vqneg.i
-}
-
-define i64 @qnegd(<2 x i64> %b, <2 x i64> %c) nounwind readnone {
-; CHECK-LABEL: qnegd:
-; CHECK: sqneg d0, d0
-; CHECK: ret
-  %vecext = extractelement <2 x i64> %b, i32 0
-  %vqneg.i = tail call i64 @llvm.arm64.neon.sqneg.i64(i64 %vecext) nounwind
-  ret i64 %vqneg.i
-}
-
-declare i64 @llvm.arm64.neon.sqneg.i64(i64) nounwind readnone
-declare i32 @llvm.arm64.neon.sqneg.i32(i32) nounwind readnone
-declare i64 @llvm.arm64.neon.sqabs.i64(i64) nounwind readnone
-declare i32 @llvm.arm64.neon.sqabs.i32(i32) nounwind readnone
-
-
-define i32 @vqmovund(<2 x i64> %b) nounwind readnone {
-; CHECK-LABEL: vqmovund:
-; CHECK: sqxtun s0, d0
-  %vecext = extractelement <2 x i64> %b, i32 0
-  %vqmovun.i = tail call i32 @llvm.arm64.neon.scalar.sqxtun.i32.i64(i64 %vecext) nounwind
-  ret i32 %vqmovun.i
-}
-
-define i32 @vqmovnd_s(<2 x i64> %b) nounwind readnone {
-; CHECK-LABEL: vqmovnd_s:
-; CHECK: sqxtn s0, d0
-  %vecext = extractelement <2 x i64> %b, i32 0
-  %vqmovn.i = tail call i32 @llvm.arm64.neon.scalar.sqxtn.i32.i64(i64 %vecext) nounwind
-  ret i32 %vqmovn.i
-}
-
-define i32 @vqmovnd_u(<2 x i64> %b) nounwind readnone {
-; CHECK-LABEL: vqmovnd_u:
-; CHECK: uqxtn s0, d0
-  %vecext = extractelement <2 x i64> %b, i32 0
-  %vqmovn.i = tail call i32 @llvm.arm64.neon.scalar.uqxtn.i32.i64(i64 %vecext) nounwind
-  ret i32 %vqmovn.i
-}
-
-declare i32 @llvm.arm64.neon.scalar.uqxtn.i32.i64(i64) nounwind readnone
-declare i32 @llvm.arm64.neon.scalar.sqxtn.i32.i64(i64) nounwind readnone
-declare i32 @llvm.arm64.neon.scalar.sqxtun.i32.i64(i64) nounwind readnone

Removed: llvm/trunk/test/CodeGen/ARM64/arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/arith.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/arith.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/arith.ll (removed)
@@ -1,262 +0,0 @@
-; RUN: llc < %s -march=arm64 -asm-verbose=false | FileCheck %s
-
-define i32 @t1(i32 %a, i32 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t1:
-; CHECK: add w0, w1, w0
-; CHECK: ret
-  %add = add i32 %b, %a
-  ret i32 %add
-}
-
-define i32 @t2(i32 %a, i32 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t2:
-; CHECK: udiv w0, w0, w1
-; CHECK: ret
-  %udiv = udiv i32 %a, %b
-  ret i32 %udiv
-}
-
-define i64 @t3(i64 %a, i64 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t3:
-; CHECK: udiv x0, x0, x1
-; CHECK: ret
-  %udiv = udiv i64 %a, %b
-  ret i64 %udiv
-}
-
-define i32 @t4(i32 %a, i32 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t4:
-; CHECK: sdiv w0, w0, w1
-; CHECK: ret
-  %sdiv = sdiv i32 %a, %b
-  ret i32 %sdiv
-}
-
-define i64 @t5(i64 %a, i64 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t5:
-; CHECK: sdiv x0, x0, x1
-; CHECK: ret
-  %sdiv = sdiv i64 %a, %b
-  ret i64 %sdiv
-}
-
-define i32 @t6(i32 %a, i32 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t6:
-; CHECK: lsl w0, w0, w1
-; CHECK: ret
-  %shl = shl i32 %a, %b
-  ret i32 %shl
-}
-
-define i64 @t7(i64 %a, i64 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t7:
-; CHECK: lsl x0, x0, x1
-; CHECK: ret
-  %shl = shl i64 %a, %b
-  ret i64 %shl
-}
-
-define i32 @t8(i32 %a, i32 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t8:
-; CHECK: lsr w0, w0, w1
-; CHECK: ret
-  %lshr = lshr i32 %a, %b
-  ret i32 %lshr
-}
-
-define i64 @t9(i64 %a, i64 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t9:
-; CHECK: lsr x0, x0, x1
-; CHECK: ret
-  %lshr = lshr i64 %a, %b
-  ret i64 %lshr
-}
-
-define i32 @t10(i32 %a, i32 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t10:
-; CHECK: asr w0, w0, w1
-; CHECK: ret
-  %ashr = ashr i32 %a, %b
-  ret i32 %ashr
-}
-
-define i64 @t11(i64 %a, i64 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t11:
-; CHECK: asr x0, x0, x1
-; CHECK: ret
-  %ashr = ashr i64 %a, %b
-  ret i64 %ashr
-}
-
-define i32 @t12(i16 %a, i32 %x) nounwind ssp {
-entry:
-; CHECK-LABEL: t12:
-; CHECK: add	w0, w1, w0, sxth
-; CHECK: ret
-  %c = sext i16 %a to i32
-  %e = add i32 %x, %c
-  ret i32 %e
-}
-
-define i32 @t13(i16 %a, i32 %x) nounwind ssp {
-entry:
-; CHECK-LABEL: t13:
-; CHECK: add	w0, w1, w0, sxth #2
-; CHECK: ret
-  %c = sext i16 %a to i32
-  %d = shl i32 %c, 2
-  %e = add i32 %x, %d
-  ret i32 %e
-}
-
-define i64 @t14(i16 %a, i64 %x) nounwind ssp {
-entry:
-; CHECK-LABEL: t14:
-; CHECK: add	x0, x1, w0, uxth #3
-; CHECK: ret
-  %c = zext i16 %a to i64
-  %d = shl i64 %c, 3
-  %e = add i64 %x, %d
-  ret i64 %e
-}
-
-; rdar://9160598
-define i64 @t15(i64 %a, i64 %x) nounwind ssp {
-entry:
-; CHECK-LABEL: t15:
-; CHECK: add x0, x1, w0, uxtw
-; CHECK: ret
-  %b = and i64 %a, 4294967295
-  %c = add i64 %x, %b
-  ret i64 %c
-}
-
-define i64 @t16(i64 %x) nounwind ssp {
-entry:
-; CHECK-LABEL: t16:
-; CHECK: lsl x0, x0, #1
-; CHECK: ret
-  %a = shl i64 %x, 1
-  ret i64 %a
-}
-
-; rdar://9166974
-define i64 @t17(i16 %a, i64 %x) nounwind ssp {
-entry:
-; CHECK-LABEL: t17:
-; CHECK: sxth [[REG:x[0-9]+]], w0
-; CHECK: neg x0, [[REG]], lsl #32
-; CHECK: ret
-  %tmp16 = sext i16 %a to i64
-  %tmp17 = mul i64 %tmp16, -4294967296
-  ret i64 %tmp17
-}
-
-define i32 @t18(i32 %a, i32 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t18:
-; CHECK: sdiv w0, w0, w1
-; CHECK: ret
-  %sdiv = call i32 @llvm.arm64.sdiv.i32(i32 %a, i32 %b)
-  ret i32 %sdiv
-}
-
-define i64 @t19(i64 %a, i64 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t19:
-; CHECK: sdiv x0, x0, x1
-; CHECK: ret
-  %sdiv = call i64 @llvm.arm64.sdiv.i64(i64 %a, i64 %b)
-  ret i64 %sdiv
-}
-
-define i32 @t20(i32 %a, i32 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t20:
-; CHECK: udiv w0, w0, w1
-; CHECK: ret
-  %udiv = call i32 @llvm.arm64.udiv.i32(i32 %a, i32 %b)
-  ret i32 %udiv
-}
-
-define i64 @t21(i64 %a, i64 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t21:
-; CHECK: udiv x0, x0, x1
-; CHECK: ret
-  %udiv = call i64 @llvm.arm64.udiv.i64(i64 %a, i64 %b)
-  ret i64 %udiv
-}
-
-declare i32 @llvm.arm64.sdiv.i32(i32, i32) nounwind readnone
-declare i64 @llvm.arm64.sdiv.i64(i64, i64) nounwind readnone
-declare i32 @llvm.arm64.udiv.i32(i32, i32) nounwind readnone
-declare i64 @llvm.arm64.udiv.i64(i64, i64) nounwind readnone
-
-; 32-bit not.
-define i32 @inv_32(i32 %x) nounwind ssp {
-entry:
-; CHECK: inv_32
-; CHECK: mvn w0, w0
-; CHECK: ret
-  %inv = xor i32 %x, -1
-  ret i32 %inv
-}
-
-; 64-bit not.
-define i64 @inv_64(i64 %x) nounwind ssp {
-entry:
-; CHECK: inv_64
-; CHECK: mvn x0, x0
-; CHECK: ret
-  %inv = xor i64 %x, -1
-  ret i64 %inv
-}
-
-; Multiplying by a power of two plus or minus one is better done via shift
-; and add/sub rather than the madd/msub instructions. The latter are 4+ cycles,
-; and the former are two (total for the two instruction sequence for subtract).
-define i32 @f0(i32 %a) nounwind readnone ssp {
-; CHECK-LABEL: f0:
-; CHECK-NEXT: add w0, w0, w0, lsl #3
-; CHECK-NEXT: ret
-  %res = mul i32 %a, 9
-  ret i32 %res
-}
-
-define i64 @f1(i64 %a) nounwind readnone ssp {
-; CHECK-LABEL: f1:
-; CHECK-NEXT: lsl x8, x0, #4
-; CHECK-NEXT: sub x0, x8, x0
-; CHECK-NEXT: ret
-  %res = mul i64 %a, 15
-  ret i64 %res
-}
-
-define i32 @f2(i32 %a) nounwind readnone ssp {
-; CHECK-LABEL: f2:
-; CHECK-NEXT: lsl w8, w0, #3
-; CHECK-NEXT: sub w0, w8, w0
-; CHECK-NEXT: ret
-  %res = mul nsw i32 %a, 7
-  ret i32 %res
-}
-
-define i64 @f3(i64 %a) nounwind readnone ssp {
-; CHECK-LABEL: f3:
-; CHECK-NEXT: add x0, x0, x0, lsl #4
-; CHECK-NEXT: ret
-  %res = mul nsw i64 %a, 17
-  ret i64 %res
-}

Removed: llvm/trunk/test/CodeGen/ARM64/arm64-dead-def-elimination-flag.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/arm64-dead-def-elimination-flag.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/arm64-dead-def-elimination-flag.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/arm64-dead-def-elimination-flag.ll (removed)
@@ -1,16 +0,0 @@
-; RUN: llc -march=arm64 -arm64-dead-def-elimination=false < %s | FileCheck %s
-
-target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
-target triple = "arm64-apple-ios7.0.0"
-
-; Function Attrs: nounwind ssp uwtable
-define i32 @test1() #0 {
-  %tmp1 = alloca i8
-  %tmp2 = icmp eq i8* %tmp1, null
-  %tmp3 = zext i1 %tmp2 to i32
-
-  ret i32 %tmp3
-
-  ; CHECK-LABEL: test1
-  ; CHECK: adds {{x[0-9]+}}, sp, #15
-}

Removed: llvm/trunk/test/CodeGen/ARM64/atomic-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/atomic-128.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/atomic-128.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/atomic-128.ll (removed)
@@ -1,225 +0,0 @@
-; RUN: llc < %s -march=arm64 -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone | FileCheck %s
-
- at var = global i128 0
-
-define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) {
-; CHECK-LABEL: val_compare_and_swap:
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxp   [[RESULTLO:x[0-9]+]], [[RESULTHI:x[0-9]+]], [x[[ADDR:[0-9]+]]]
-; CHECK-DAG: eor     [[MISMATCH_LO:x[0-9]+]], [[RESULTLO]], x2
-; CHECK-DAG: eor     [[MISMATCH_HI:x[0-9]+]], [[RESULTHI]], x3
-; CHECK: orr [[MISMATCH:x[0-9]+]], [[MISMATCH_LO]], [[MISMATCH_HI]]
-; CHECK: cbnz    [[MISMATCH]], [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: stxp   [[SCRATCH_RES:w[0-9]+]], x4, x5, [x[[ADDR]]]
-; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
-; CHECK: [[DONE]]:
-  %val = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire
-  ret i128 %val
-}
-
-define void @fetch_and_nand(i128* %p, i128 %bits) {
-; CHECK-LABEL: fetch_and_nand:
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
-; CHECK-DAG: bic    [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
-; CHECK-DAG: bic    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
-; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
-; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
-
-; CHECK-DAG: str    [[DEST_REGHI]]
-; CHECK-DAG: str    [[DEST_REGLO]]
-  %val = atomicrmw nand i128* %p, i128 %bits release
-  store i128 %val, i128* @var, align 16
-  ret void
-}
-
-define void @fetch_and_or(i128* %p, i128 %bits) {
-; CHECK-LABEL: fetch_and_or:
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
-; CHECK-DAG: orr    [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
-; CHECK-DAG: orr    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
-; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
-; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
-
-; CHECK-DAG: str    [[DEST_REGHI]]
-; CHECK-DAG: str    [[DEST_REGLO]]
-  %val = atomicrmw or i128* %p, i128 %bits seq_cst
-  store i128 %val, i128* @var, align 16
-  ret void
-}
-
-define void @fetch_and_add(i128* %p, i128 %bits) {
-; CHECK-LABEL: fetch_and_add:
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
-; CHECK: adds   [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
-; CHECK: adcs   [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
-; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
-; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
-
-; CHECK-DAG: str    [[DEST_REGHI]]
-; CHECK-DAG: str    [[DEST_REGLO]]
-  %val = atomicrmw add i128* %p, i128 %bits seq_cst
-  store i128 %val, i128* @var, align 16
-  ret void
-}
-
-define void @fetch_and_sub(i128* %p, i128 %bits) {
-; CHECK-LABEL: fetch_and_sub:
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
-; CHECK: subs   [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
-; CHECK: sbcs    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
-; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
-; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
-
-; CHECK-DAG: str    [[DEST_REGHI]]
-; CHECK-DAG: str    [[DEST_REGLO]]
-  %val = atomicrmw sub i128* %p, i128 %bits seq_cst
-  store i128 %val, i128* @var, align 16
-  ret void
-}
-
-define void @fetch_and_min(i128* %p, i128 %bits) {
-; CHECK-LABEL: fetch_and_min:
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxp   [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
-; CHECK: cmp     [[DEST_REGLO]], x2
-; CHECK: cset    [[LOCMP:w[0-9]+]], ls
-; CHECK: cmp     [[DEST_REGHI:x[0-9]+]], x3
-; CHECK: cset    [[HICMP:w[0-9]+]], le
-; CHECK: csel    [[CMP:w[0-9]+]], [[LOCMP]], [[HICMP]], eq
-; CHECK: cmp     [[CMP]], #0
-; CHECK-DAG: csel    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, ne
-; CHECK-DAG: csel    [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, ne
-; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
-; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
-
-; CHECK-DAG: str    [[DEST_REGHI]]
-; CHECK-DAG: str    [[DEST_REGLO]]
-  %val = atomicrmw min i128* %p, i128 %bits seq_cst
-  store i128 %val, i128* @var, align 16
-  ret void
-}
-
-define void @fetch_and_max(i128* %p, i128 %bits) {
-; CHECK-LABEL: fetch_and_max:
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
-; CHECK: cmp     [[DEST_REGLO]], x2
-; CHECK: cset    [[LOCMP:w[0-9]+]], hi
-; CHECK: cmp     [[DEST_REGHI:x[0-9]+]], x3
-; CHECK: cset    [[HICMP:w[0-9]+]], gt
-; CHECK: csel    [[CMP:w[0-9]+]], [[LOCMP]], [[HICMP]], eq
-; CHECK: cmp     [[CMP]], #0
-; CHECK-DAG: csel    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, ne
-; CHECK-DAG: csel    [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, ne
-; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
-; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
-
-; CHECK-DAG: str    [[DEST_REGHI]]
-; CHECK-DAG: str    [[DEST_REGLO]]
-  %val = atomicrmw max i128* %p, i128 %bits seq_cst
-  store i128 %val, i128* @var, align 16
-  ret void
-}
-
-define void @fetch_and_umin(i128* %p, i128 %bits) {
-; CHECK-LABEL: fetch_and_umin:
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
-; CHECK: cmp     [[DEST_REGLO]], x2
-; CHECK: cset    [[LOCMP:w[0-9]+]], ls
-; CHECK: cmp     [[DEST_REGHI:x[0-9]+]], x3
-; CHECK: cset    [[HICMP:w[0-9]+]], ls
-; CHECK: csel    [[CMP:w[0-9]+]], [[LOCMP]], [[HICMP]], eq
-; CHECK: cmp     [[CMP]], #0
-; CHECK-DAG: csel    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, ne
-; CHECK-DAG: csel    [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, ne
-; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
-; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
-
-; CHECK-DAG: str    [[DEST_REGHI]]
-; CHECK-DAG: str    [[DEST_REGLO]]
-  %val = atomicrmw umin i128* %p, i128 %bits seq_cst
-  store i128 %val, i128* @var, align 16
-  ret void
-}
-
-define void @fetch_and_umax(i128* %p, i128 %bits) {
-; CHECK-LABEL: fetch_and_umax:
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
-; CHECK: cmp     [[DEST_REGLO]], x2
-; CHECK: cset    [[LOCMP:w[0-9]+]], hi
-; CHECK: cmp     [[DEST_REGHI:x[0-9]+]], x3
-; CHECK: cset    [[HICMP:w[0-9]+]], hi
-; CHECK: csel    [[CMP:w[0-9]+]], [[LOCMP]], [[HICMP]], eq
-; CHECK: cmp     [[CMP]], #0
-; CHECK-DAG: csel    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, ne
-; CHECK-DAG: csel    [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, ne
-; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
-; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
-
-; CHECK-DAG: str    [[DEST_REGHI]]
-; CHECK-DAG: str    [[DEST_REGLO]]
-  %val = atomicrmw umax i128* %p, i128 %bits seq_cst
-  store i128 %val, i128* @var, align 16
-  ret void
-}
-
-define i128 @atomic_load_seq_cst(i128* %p) {
-; CHECK-LABEL: atomic_load_seq_cst:
-; CHECK-NOT: dmb
-; CHECK-LABEL: ldaxp
-; CHECK-NOT: dmb
-   %r = load atomic i128* %p seq_cst, align 16
-   ret i128 %r
-}
-
-define i128 @atomic_load_relaxed(i128* %p) {
-; CHECK-LABEL: atomic_load_relaxed:
-; CHECK-NOT: dmb
-; CHECK: ldxp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x0]
-; CHECK-NOT: dmb
-   %r = load atomic i128* %p monotonic, align 16
-   ret i128 %r
-}
-
-
-define void @atomic_store_seq_cst(i128 %in, i128* %p) {
-; CHECK-LABEL: atomic_store_seq_cst:
-; CHECK-NOT: dmb
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxp xzr, xzr, [x2]
-; CHECK: stlxp [[SUCCESS:w[0-9]+]], x0, x1, [x2]
-; CHECK: cbnz [[SUCCESS]], [[LABEL]]
-; CHECK-NOT: dmb
-   store atomic i128 %in, i128* %p seq_cst, align 16
-   ret void
-}
-
-define void @atomic_store_release(i128 %in, i128* %p) {
-; CHECK-LABEL: atomic_store_release:
-; CHECK-NOT: dmb
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldxp xzr, xzr, [x2]
-; CHECK: stlxp [[SUCCESS:w[0-9]+]], x0, x1, [x2]
-; CHECK: cbnz [[SUCCESS]], [[LABEL]]
-; CHECK-NOT: dmb
-   store atomic i128 %in, i128* %p release, align 16
-   ret void
-}
-
-define void @atomic_store_relaxed(i128 %in, i128* %p) {
-; CHECK-LABEL: atomic_store_relaxed:
-; CHECK-NOT: dmb
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldxp xzr, xzr, [x2]
-; CHECK: stxp [[SUCCESS:w[0-9]+]], x0, x1, [x2]
-; CHECK: cbnz [[SUCCESS]], [[LABEL]]
-; CHECK-NOT: dmb
-   store atomic i128 %in, i128* %p unordered, align 16
-   ret void
-}

Removed: llvm/trunk/test/CodeGen/ARM64/atomic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/atomic.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/atomic.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/atomic.ll (removed)
@@ -1,331 +0,0 @@
-; RUN: llc < %s -march=arm64 -verify-machineinstrs -mcpu=cyclone | FileCheck %s
-
-define i32 @val_compare_and_swap(i32* %p) {
-; CHECK-LABEL: val_compare_and_swap:
-; CHECK: orr    [[NEWVAL_REG:w[0-9]+]], wzr, #0x4
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxr   [[RESULT:w[0-9]+]], [x0]
-; CHECK: cmp    [[RESULT]], #7
-; CHECK: b.ne   [[LABEL2:.?LBB[0-9]+_[0-9]+]]
-; CHECK: stxr   [[SCRATCH_REG:w[0-9]+]], [[NEWVAL_REG]], [x0]
-; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
-; CHECK: [[LABEL2]]:
-  %val = cmpxchg i32* %p, i32 7, i32 4 acquire acquire
-  ret i32 %val
-}
-
-define i64 @val_compare_and_swap_64(i64* %p) {
-; CHECK-LABEL: val_compare_and_swap_64:
-; CHECK: orr    w[[NEWVAL_REG:[0-9]+]], wzr, #0x4
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldxr   [[RESULT:x[0-9]+]], [x0]
-; CHECK: cmp    [[RESULT]], #7
-; CHECK: b.ne   [[LABEL2:.?LBB[0-9]+_[0-9]+]]
-; CHECK-NOT: stxr x[[NEWVAL_REG]], x[[NEWVAL_REG]]
-; CHECK: stxr   [[SCRATCH_REG:w[0-9]+]], x[[NEWVAL_REG]], [x0]
-; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
-; CHECK: [[LABEL2]]:
-  %val = cmpxchg i64* %p, i64 7, i64 4 monotonic monotonic
-  ret i64 %val
-}
-
-define i32 @fetch_and_nand(i32* %p) {
-; CHECK-LABEL: fetch_and_nand:
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldxr   w[[DEST_REG:[0-9]+]], [x0]
-; CHECK: and    [[SCRATCH2_REG:w[0-9]+]], w[[DEST_REG]], #0xfffffff8
-; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]]
-; CHECK: stlxr   [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
-; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
-; CHECK: mov    x0, x[[DEST_REG]]
-  %val = atomicrmw nand i32* %p, i32 7 release
-  ret i32 %val
-}
-
-define i64 @fetch_and_nand_64(i64* %p) {
-; CHECK-LABEL: fetch_and_nand_64:
-; CHECK: mov    x[[ADDR:[0-9]+]], x0
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxr   [[DEST_REG:x[0-9]+]], [x[[ADDR]]]
-; CHECK: and    [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], #0xfffffffffffffff8
-; CHECK: stlxr   [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x[[ADDR]]]
-; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
-
-  %val = atomicrmw nand i64* %p, i64 7 acq_rel
-  ret i64 %val
-}
-
-define i32 @fetch_and_or(i32* %p) {
-; CHECK-LABEL: fetch_and_or:
-; CHECK: movz   [[OLDVAL_REG:w[0-9]+]], #0x5
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxr   w[[DEST_REG:[0-9]+]], [x0]
-; CHECK: orr    [[SCRATCH2_REG:w[0-9]+]], w[[DEST_REG]], [[OLDVAL_REG]]
-; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]]
-; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
-; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
-; CHECK: mov    x0, x[[DEST_REG]]
-  %val = atomicrmw or i32* %p, i32 5 seq_cst
-  ret i32 %val
-}
-
-define i64 @fetch_and_or_64(i64* %p) {
-; CHECK: fetch_and_or_64:
-; CHECK: mov    x[[ADDR:[0-9]+]], x0
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldxr   [[DEST_REG:x[0-9]+]], [x[[ADDR]]]
-; CHECK: orr    [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], #0x7
-; CHECK: stxr   [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x[[ADDR]]]
-; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
-  %val = atomicrmw or i64* %p, i64 7 monotonic
-  ret i64 %val
-}
-
-define void @acquire_fence() {
-   fence acquire
-   ret void
-   ; CHECK-LABEL: acquire_fence:
-   ; CHECK: dmb ishld
-}
-
-define void @release_fence() {
-   fence release
-   ret void
-   ; CHECK-LABEL: release_fence:
-   ; CHECK: dmb ish{{$}}
-}
-
-define void @seq_cst_fence() {
-   fence seq_cst
-   ret void
-   ; CHECK-LABEL: seq_cst_fence:
-   ; CHECK: dmb ish{{$}}
-}
-
-define i32 @atomic_load(i32* %p) {
-   %r = load atomic i32* %p seq_cst, align 4
-   ret i32 %r
-   ; CHECK-LABEL: atomic_load:
-   ; CHECK: ldar
-}
-
-define i8 @atomic_load_relaxed_8(i8* %p, i32 %off32) {
-; CHECK-LABEL: atomic_load_relaxed_8:
-  %ptr_unsigned = getelementptr i8* %p, i32 4095
-  %val_unsigned = load atomic i8* %ptr_unsigned monotonic, align 1
-; CHECK: ldrb {{w[0-9]+}}, [x0, #4095]
-
-  %ptr_regoff = getelementptr i8* %p, i32 %off32
-  %val_regoff = load atomic i8* %ptr_regoff unordered, align 1
-  %tot1 = add i8 %val_unsigned, %val_regoff
-; CHECK: ldrb {{w[0-9]+}}, [x0, w1, sxtw]
-
-  %ptr_unscaled = getelementptr i8* %p, i32 -256
-  %val_unscaled = load atomic i8* %ptr_unscaled monotonic, align 1
-  %tot2 = add i8 %tot1, %val_unscaled
-; CHECK: ldurb {{w[0-9]+}}, [x0, #-256]
-
-  %ptr_random = getelementptr i8* %p, i32 1191936 ; 0x123000 (i.e. ADD imm)
-  %val_random = load atomic i8* %ptr_random unordered, align 1
-  %tot3 = add i8 %tot2, %val_random
-; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
-; CHECK: ldrb {{w[0-9]+}}, [x[[ADDR]]]
-
-  ret i8 %tot3
-}
-
-define i16 @atomic_load_relaxed_16(i16* %p, i32 %off32) {
-; CHECK-LABEL: atomic_load_relaxed_16:
-  %ptr_unsigned = getelementptr i16* %p, i32 4095
-  %val_unsigned = load atomic i16* %ptr_unsigned monotonic, align 2
-; CHECK: ldrh {{w[0-9]+}}, [x0, #8190]
-
-  %ptr_regoff = getelementptr i16* %p, i32 %off32
-  %val_regoff = load atomic i16* %ptr_regoff unordered, align 2
-  %tot1 = add i16 %val_unsigned, %val_regoff
-; CHECK: ldrh {{w[0-9]+}}, [x0, w1, sxtw #1]
-
-  %ptr_unscaled = getelementptr i16* %p, i32 -128
-  %val_unscaled = load atomic i16* %ptr_unscaled monotonic, align 2
-  %tot2 = add i16 %tot1, %val_unscaled
-; CHECK: ldurh {{w[0-9]+}}, [x0, #-256]
-
-  %ptr_random = getelementptr i16* %p, i32 595968 ; 0x123000/2 (i.e. ADD imm)
-  %val_random = load atomic i16* %ptr_random unordered, align 2
-  %tot3 = add i16 %tot2, %val_random
-; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
-; CHECK: ldrh {{w[0-9]+}}, [x[[ADDR]]]
-
-  ret i16 %tot3
-}
-
-define i32 @atomic_load_relaxed_32(i32* %p, i32 %off32) {
-; CHECK-LABEL: atomic_load_relaxed_32:
-  %ptr_unsigned = getelementptr i32* %p, i32 4095
-  %val_unsigned = load atomic i32* %ptr_unsigned monotonic, align 4
-; CHECK: ldr {{w[0-9]+}}, [x0, #16380]
-
-  %ptr_regoff = getelementptr i32* %p, i32 %off32
-  %val_regoff = load atomic i32* %ptr_regoff unordered, align 4
-  %tot1 = add i32 %val_unsigned, %val_regoff
-; CHECK: ldr {{w[0-9]+}}, [x0, w1, sxtw #2]
-
-  %ptr_unscaled = getelementptr i32* %p, i32 -64
-  %val_unscaled = load atomic i32* %ptr_unscaled monotonic, align 4
-  %tot2 = add i32 %tot1, %val_unscaled
-; CHECK: ldur {{w[0-9]+}}, [x0, #-256]
-
-  %ptr_random = getelementptr i32* %p, i32 297984 ; 0x123000/4 (i.e. ADD imm)
-  %val_random = load atomic i32* %ptr_random unordered, align 4
-  %tot3 = add i32 %tot2, %val_random
-; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
-; CHECK: ldr {{w[0-9]+}}, [x[[ADDR]]]
-
-  ret i32 %tot3
-}
-
-define i64 @atomic_load_relaxed_64(i64* %p, i32 %off32) {
-; CHECK-LABEL: atomic_load_relaxed_64:
-  %ptr_unsigned = getelementptr i64* %p, i32 4095
-  %val_unsigned = load atomic i64* %ptr_unsigned monotonic, align 8
-; CHECK: ldr {{x[0-9]+}}, [x0, #32760]
-
-  %ptr_regoff = getelementptr i64* %p, i32 %off32
-  %val_regoff = load atomic i64* %ptr_regoff unordered, align 8
-  %tot1 = add i64 %val_unsigned, %val_regoff
-; CHECK: ldr {{x[0-9]+}}, [x0, w1, sxtw #3]
-
-  %ptr_unscaled = getelementptr i64* %p, i32 -32
-  %val_unscaled = load atomic i64* %ptr_unscaled monotonic, align 8
-  %tot2 = add i64 %tot1, %val_unscaled
-; CHECK: ldur {{x[0-9]+}}, [x0, #-256]
-
-  %ptr_random = getelementptr i64* %p, i32 148992 ; 0x123000/8 (i.e. ADD imm)
-  %val_random = load atomic i64* %ptr_random unordered, align 8
-  %tot3 = add i64 %tot2, %val_random
-; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
-; CHECK: ldr {{x[0-9]+}}, [x[[ADDR]]]
-
-  ret i64 %tot3
-}
-
-
-define void @atomc_store(i32* %p) {
-   store atomic i32 4, i32* %p seq_cst, align 4
-   ret void
-   ; CHECK-LABEL: atomc_store:
-   ; CHECK: stlr
-}
-
-define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) {
-; CHECK-LABEL: atomic_store_relaxed_8:
-  %ptr_unsigned = getelementptr i8* %p, i32 4095
-  store atomic i8 %val, i8* %ptr_unsigned monotonic, align 1
-; CHECK: strb {{w[0-9]+}}, [x0, #4095]
-
-  %ptr_regoff = getelementptr i8* %p, i32 %off32
-  store atomic i8 %val, i8* %ptr_regoff unordered, align 1
-; CHECK: strb {{w[0-9]+}}, [x0, w1, sxtw]
-
-  %ptr_unscaled = getelementptr i8* %p, i32 -256
-  store atomic i8 %val, i8* %ptr_unscaled monotonic, align 1
-; CHECK: sturb {{w[0-9]+}}, [x0, #-256]
-
-  %ptr_random = getelementptr i8* %p, i32 1191936 ; 0x123000 (i.e. ADD imm)
-  store atomic i8 %val, i8* %ptr_random unordered, align 1
-; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
-; CHECK: strb {{w[0-9]+}}, [x[[ADDR]]]
-
-  ret void
-}
-
-define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) {
-; CHECK-LABEL: atomic_store_relaxed_16:
-  %ptr_unsigned = getelementptr i16* %p, i32 4095
-  store atomic i16 %val, i16* %ptr_unsigned monotonic, align 2
-; CHECK: strh {{w[0-9]+}}, [x0, #8190]
-
-  %ptr_regoff = getelementptr i16* %p, i32 %off32
-  store atomic i16 %val, i16* %ptr_regoff unordered, align 2
-; CHECK: strh {{w[0-9]+}}, [x0, w1, sxtw #1]
-
-  %ptr_unscaled = getelementptr i16* %p, i32 -128
-  store atomic i16 %val, i16* %ptr_unscaled monotonic, align 2
-; CHECK: sturh {{w[0-9]+}}, [x0, #-256]
-
-  %ptr_random = getelementptr i16* %p, i32 595968 ; 0x123000/2 (i.e. ADD imm)
-  store atomic i16 %val, i16* %ptr_random unordered, align 2
-; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
-; CHECK: strh {{w[0-9]+}}, [x[[ADDR]]]
-
-  ret void
-}
-
-define void @atomic_store_relaxed_32(i32* %p, i32 %off32, i32 %val) {
-; CHECK-LABEL: atomic_store_relaxed_32:
-  %ptr_unsigned = getelementptr i32* %p, i32 4095
-  store atomic i32 %val, i32* %ptr_unsigned monotonic, align 4
-; CHECK: str {{w[0-9]+}}, [x0, #16380]
-
-  %ptr_regoff = getelementptr i32* %p, i32 %off32
-  store atomic i32 %val, i32* %ptr_regoff unordered, align 4
-; CHECK: str {{w[0-9]+}}, [x0, w1, sxtw #2]
-
-  %ptr_unscaled = getelementptr i32* %p, i32 -64
-  store atomic i32 %val, i32* %ptr_unscaled monotonic, align 4
-; CHECK: stur {{w[0-9]+}}, [x0, #-256]
-
-  %ptr_random = getelementptr i32* %p, i32 297984 ; 0x123000/4 (i.e. ADD imm)
-  store atomic i32 %val, i32* %ptr_random unordered, align 4
-; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
-; CHECK: str {{w[0-9]+}}, [x[[ADDR]]]
-
-  ret void
-}
-
-define void @atomic_store_relaxed_64(i64* %p, i32 %off32, i64 %val) {
-; CHECK-LABEL: atomic_store_relaxed_64:
-  %ptr_unsigned = getelementptr i64* %p, i32 4095
-  store atomic i64 %val, i64* %ptr_unsigned monotonic, align 8
-; CHECK: str {{x[0-9]+}}, [x0, #32760]
-
-  %ptr_regoff = getelementptr i64* %p, i32 %off32
-  store atomic i64 %val, i64* %ptr_regoff unordered, align 8
-; CHECK: str {{x[0-9]+}}, [x0, w1, sxtw #3]
-
-  %ptr_unscaled = getelementptr i64* %p, i32 -32
-  store atomic i64 %val, i64* %ptr_unscaled monotonic, align 8
-; CHECK: stur {{x[0-9]+}}, [x0, #-256]
-
-  %ptr_random = getelementptr i64* %p, i32 148992 ; 0x123000/8 (i.e. ADD imm)
-  store atomic i64 %val, i64* %ptr_random unordered, align 8
-; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
-; CHECK: str {{x[0-9]+}}, [x[[ADDR]]]
-
-  ret void
-}
-
-; rdar://11531169
-; rdar://11531308
-
-%"class.X::Atomic" = type { %struct.x_atomic_t }
-%struct.x_atomic_t = type { i32 }
-
- at counter = external hidden global %"class.X::Atomic", align 4
-
-define i32 @next_id() nounwind optsize ssp align 2 {
-entry:
-  %0 = atomicrmw add i32* getelementptr inbounds (%"class.X::Atomic"* @counter, i64 0, i32 0, i32 0), i32 1 seq_cst
-  %add.i = add i32 %0, 1
-  %tobool = icmp eq i32 %add.i, 0
-  br i1 %tobool, label %if.else, label %return
-
-if.else:                                          ; preds = %entry
-  %1 = atomicrmw add i32* getelementptr inbounds (%"class.X::Atomic"* @counter, i64 0, i32 0, i32 0), i32 1 seq_cst
-  %add.i2 = add i32 %1, 1
-  br label %return
-
-return:                                           ; preds = %if.else, %entry
-  %retval.0 = phi i32 [ %add.i2, %if.else ], [ %add.i, %entry ]
-  ret i32 %retval.0
-}

Removed: llvm/trunk/test/CodeGen/ARM64/basic-pic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/basic-pic.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/basic-pic.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/basic-pic.ll (removed)
@@ -1,54 +0,0 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -relocation-model=pic %s -o - | FileCheck %s
-
- at var = global i32 0
-
-define i32 @get_globalvar() {
-; CHECK-LABEL: get_globalvar:
-
-  %val = load i32* @var
-; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var
-; CHECK: ldr x[[GOTLOC:[0-9]+]], [x[[GOTHI]], :got_lo12:var]
-; CHECK: ldr w0, [x[[GOTLOC]]]
-
-  ret i32 %val
-}
-
-define i32* @get_globalvaraddr() {
-; CHECK-LABEL: get_globalvaraddr:
-
-  %val = load i32* @var
-; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var
-; CHECK: ldr x0, [x[[GOTHI]], :got_lo12:var]
-
-  ret i32* @var
-}
-
- at hiddenvar = hidden global i32 0
-
-define i32 @get_hiddenvar() {
-; CHECK-LABEL: get_hiddenvar:
-
-  %val = load i32* @hiddenvar
-; CHECK: adrp x[[HI:[0-9]+]], hiddenvar
-; CHECK: ldr w0, [x[[HI]], :lo12:hiddenvar]
-
-  ret i32 %val
-}
-
-define i32* @get_hiddenvaraddr() {
-; CHECK-LABEL: get_hiddenvaraddr:
-
-  %val = load i32* @hiddenvar
-; CHECK: adrp [[HI:x[0-9]+]], hiddenvar
-; CHECK: add x0, [[HI]], :lo12:hiddenvar
-
-  ret i32* @hiddenvar
-}
-
-define void()* @get_func() {
-; CHECK-LABEL: get_func:
-
-  ret void()* bitcast(void()*()* @get_func to void()*)
-; CHECK: adrp x[[GOTHI:[0-9]+]], :got:get_func
-; CHECK: ldr x0, [x[[GOTHI]], :got_lo12:get_func]
-}

Removed: llvm/trunk/test/CodeGen/ARM64/big-endian-bitconverts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/big-endian-bitconverts.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/big-endian-bitconverts.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/big-endian-bitconverts.ll (removed)
@@ -1,1101 +0,0 @@
-; RUN: llc -mtriple arm64_be < %s -arm64-load-store-opt=false -O1 -o - | FileCheck %s
-; RUN: llc -mtriple arm64_be < %s -arm64-load-store-opt=false -O0 -fast-isel=true -o - | FileCheck %s
-
-; CHECK-LABEL: test_i64_f64:
-define void @test_i64_f64(double* %p, i64* %q) {
-; CHECK: ldr
-; CHECK: str
-    %1 = load double* %p
-    %2 = fadd double %1, %1
-    %3 = bitcast double %2 to i64
-    %4 = add i64 %3, %3
-    store i64 %4, i64* %q
-    ret void
-}
-
-; CHECK-LABEL: test_i64_v1i64:
-define void @test_i64_v1i64(<1 x i64>* %p, i64* %q) {
-; CHECK: ldr
-; CHECK: str
-    %1 = load <1 x i64>* %p
-    %2 = add <1 x i64> %1, %1
-    %3 = bitcast <1 x i64> %2 to i64
-    %4 = add i64 %3, %3
-    store i64 %4, i64* %q
-    ret void
-}
-
-; CHECK-LABEL: test_i64_v2f32:
-define void @test_i64_v2f32(<2 x float>* %p, i64* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2s }
-; CHECK: rev64 v{{[0-9]+}}.2s
-; CHECK: str
-    %1 = load <2 x float>* %p
-    %2 = fadd <2 x float> %1, %1
-    %3 = bitcast <2 x float> %2 to i64
-    %4 = add i64 %3, %3
-    store i64 %4, i64* %q
-    ret void
-}
-
-; CHECK-LABEL: test_i64_v2i32:
-define void @test_i64_v2i32(<2 x i32>* %p, i64* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2s }
-; CHECK: rev64 v{{[0-9]+}}.2s
-; CHECK: str
-    %1 = load <2 x i32>* %p
-    %2 = add <2 x i32> %1, %1
-    %3 = bitcast <2 x i32> %2 to i64
-    %4 = add i64 %3, %3
-    store i64 %4, i64* %q
-    ret void
-}
-
-; CHECK-LABEL: test_i64_v4i16:
-define void @test_i64_v4i16(<4 x i16>* %p, i64* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.4h }
-; CHECK: rev64 v{{[0-9]+}}.4h
-; CHECK: str
-    %1 = load <4 x i16>* %p
-    %2 = add <4 x i16> %1, %1
-    %3 = bitcast <4 x i16> %2 to i64
-    %4 = add i64 %3, %3
-    store i64 %4, i64* %q
-    ret void
-}
-
-; CHECK-LABEL: test_i64_v8i8:
-define void @test_i64_v8i8(<8 x i8>* %p, i64* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.8b }
-; CHECK: rev64 v{{[0-9]+}}.8b
-; CHECK: str
-    %1 = load <8 x i8>* %p
-    %2 = add <8 x i8> %1, %1
-    %3 = bitcast <8 x i8> %2 to i64
-    %4 = add i64 %3, %3
-    store i64 %4, i64* %q
-    ret void
-}
-
-; CHECK-LABEL: test_f64_i64:
-define void @test_f64_i64(i64* %p, double* %q) {
-; CHECK: ldr
-; CHECK: str
-    %1 = load i64* %p
-    %2 = add i64 %1, %1
-    %3 = bitcast i64 %2 to double
-    %4 = fadd double %3, %3
-    store double %4, double* %q
-    ret void
-}
-
-; CHECK-LABEL: test_f64_v1i64:
-define void @test_f64_v1i64(<1 x i64>* %p, double* %q) {
-; CHECK: ldr
-; CHECK: str
-    %1 = load <1 x i64>* %p
-    %2 = add <1 x i64> %1, %1
-    %3 = bitcast <1 x i64> %2 to double
-    %4 = fadd double %3, %3
-    store double %4, double* %q
-    ret void
-}
-
-; CHECK-LABEL: test_f64_v2f32:
-define void @test_f64_v2f32(<2 x float>* %p, double* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2s }
-; CHECK: rev64 v{{[0-9]+}}.2s
-; CHECK: str
-    %1 = load <2 x float>* %p
-    %2 = fadd <2 x float> %1, %1
-    %3 = bitcast <2 x float> %2 to double
-    %4 = fadd double %3, %3
-    store double %4, double* %q
-    ret void
-}
-
-; CHECK-LABEL: test_f64_v2i32:
-define void @test_f64_v2i32(<2 x i32>* %p, double* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2s }
-; CHECK: rev64 v{{[0-9]+}}.2s
-; CHECK: str
-    %1 = load <2 x i32>* %p
-    %2 = add <2 x i32> %1, %1
-    %3 = bitcast <2 x i32> %2 to double
-    %4 = fadd double %3, %3
-    store double %4, double* %q
-    ret void
-}
-
-; CHECK-LABEL: test_f64_v4i16:
-define void @test_f64_v4i16(<4 x i16>* %p, double* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.4h }
-; CHECK: rev64 v{{[0-9]+}}.4h
-; CHECK: str
-    %1 = load <4 x i16>* %p
-    %2 = add <4 x i16> %1, %1
-    %3 = bitcast <4 x i16> %2 to double
-    %4 = fadd double %3, %3
-    store double %4, double* %q
-    ret void
-}
-
-; CHECK-LABEL: test_f64_v8i8:
-define void @test_f64_v8i8(<8 x i8>* %p, double* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.8b }
-; CHECK: rev64 v{{[0-9]+}}.8b
-; CHECK: str
-    %1 = load <8 x i8>* %p
-    %2 = add <8 x i8> %1, %1
-    %3 = bitcast <8 x i8> %2 to double
-    %4 = fadd double %3, %3
-    store double %4, double* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v1i64_i64:
-define void @test_v1i64_i64(i64* %p, <1 x i64>* %q) {
-; CHECK: ldr
-; CHECK: str
-    %1 = load i64* %p
-    %2 = add i64 %1, %1
-    %3 = bitcast i64 %2 to <1 x i64>
-    %4 = add <1 x i64> %3, %3
-    store <1 x i64> %4, <1 x i64>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v1i64_f64:
-define void @test_v1i64_f64(double* %p, <1 x i64>* %q) {
-; CHECK: ldr
-; CHECK: str
-    %1 = load double* %p
-    %2 = fadd double %1, %1
-    %3 = bitcast double %2 to <1 x i64>
-    %4 = add <1 x i64> %3, %3
-    store <1 x i64> %4, <1 x i64>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v1i64_v2f32:
-define void @test_v1i64_v2f32(<2 x float>* %p, <1 x i64>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2s }
-; CHECK: rev64 v{{[0-9]+}}.2s
-; CHECK: str
-    %1 = load <2 x float>* %p
-    %2 = fadd <2 x float> %1, %1
-    %3 = bitcast <2 x float> %2 to <1 x i64>
-    %4 = add <1 x i64> %3, %3
-    store <1 x i64> %4, <1 x i64>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v1i64_v2i32:
-define void @test_v1i64_v2i32(<2 x i32>* %p, <1 x i64>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2s }
-; CHECK: rev64 v{{[0-9]+}}.2s
-; CHECK: str
-    %1 = load <2 x i32>* %p
-    %2 = add <2 x i32> %1, %1
-    %3 = bitcast <2 x i32> %2 to <1 x i64>
-    %4 = add <1 x i64> %3, %3
-    store <1 x i64> %4, <1 x i64>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v1i64_v4i16:
-define void @test_v1i64_v4i16(<4 x i16>* %p, <1 x i64>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.4h }
-; CHECK: rev64 v{{[0-9]+}}.4h
-; CHECK: str
-    %1 = load <4 x i16>* %p
-    %2 = add <4 x i16> %1, %1
-    %3 = bitcast <4 x i16> %2 to <1 x i64>
-    %4 = add <1 x i64> %3, %3
-    store <1 x i64> %4, <1 x i64>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v1i64_v8i8:
-define void @test_v1i64_v8i8(<8 x i8>* %p, <1 x i64>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.8b }
-; CHECK: rev64 v{{[0-9]+}}.8b
-; CHECK: str
-    %1 = load <8 x i8>* %p
-    %2 = add <8 x i8> %1, %1
-    %3 = bitcast <8 x i8> %2 to <1 x i64>
-    %4 = add <1 x i64> %3, %3
-    store <1 x i64> %4, <1 x i64>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2f32_i64:
-define void @test_v2f32_i64(i64* %p, <2 x float>* %q) {
-; CHECK: ldr
-; CHECK: rev64 v{{[0-9]+}}.2s
-; CHECK: st1 { v{{[0-9]+}}.2s }
-    %1 = load i64* %p
-    %2 = add i64 %1, %1
-    %3 = bitcast i64 %2 to <2 x float>
-    %4 = fadd <2 x float> %3, %3
-    store <2 x float> %4, <2 x float>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2f32_f64:
-define void @test_v2f32_f64(double* %p, <2 x float>* %q) {
-; CHECK: ldr
-; CHECK: rev64 v{{[0-9]+}}.2s
-; CHECK: st1 { v{{[0-9]+}}.2s }
-    %1 = load double* %p
-    %2 = fadd double %1, %1
-    %3 = bitcast double %2 to <2 x float>
-    %4 = fadd <2 x float> %3, %3
-    store <2 x float> %4, <2 x float>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2f32_v1i64:
-define void @test_v2f32_v1i64(<1 x i64>* %p, <2 x float>* %q) {
-; CHECK: ldr
-; CHECK: rev64 v{{[0-9]+}}.2s
-; CHECK: st1 { v{{[0-9]+}}.2s }
-    %1 = load <1 x i64>* %p
-    %2 = add <1 x i64> %1, %1
-    %3 = bitcast <1 x i64> %2 to <2 x float>
-    %4 = fadd <2 x float> %3, %3
-    store <2 x float> %4, <2 x float>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2f32_v2i32:
-define void @test_v2f32_v2i32(<2 x i32>* %p, <2 x float>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2s }
-; CHECK: st1 { v{{[0-9]+}}.2s }
-    %1 = load <2 x i32>* %p
-    %2 = add <2 x i32> %1, %1
-    %3 = bitcast <2 x i32> %2 to <2 x float>
-    %4 = fadd <2 x float> %3, %3
-    store <2 x float> %4, <2 x float>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2f32_v4i16:
-define void @test_v2f32_v4i16(<4 x i16>* %p, <2 x float>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.4h }
-; CHECK: rev32 v{{[0-9]+}}.4h
-; CHECK: st1 { v{{[0-9]+}}.2s }
-    %1 = load <4 x i16>* %p
-    %2 = add <4 x i16> %1, %1
-    %3 = bitcast <4 x i16> %2 to <2 x float>
-    %4 = fadd <2 x float> %3, %3
-    store <2 x float> %4, <2 x float>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2f32_v8i8:
-define void @test_v2f32_v8i8(<8 x i8>* %p, <2 x float>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.8b }
-; CHECK: rev32 v{{[0-9]+}}.8b
-; CHECK: st1 { v{{[0-9]+}}.2s }
-    %1 = load <8 x i8>* %p
-    %2 = add <8 x i8> %1, %1
-    %3 = bitcast <8 x i8> %2 to <2 x float>
-    %4 = fadd <2 x float> %3, %3
-    store <2 x float> %4, <2 x float>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2i32_i64:
-define void @test_v2i32_i64(i64* %p, <2 x i32>* %q) {
-; CHECK: ldr
-; CHECK: rev64 v{{[0-9]+}}.2s
-; CHECK: st1 { v{{[0-9]+}}.2s }
-    %1 = load i64* %p
-    %2 = add i64 %1, %1
-    %3 = bitcast i64 %2 to <2 x i32>
-    %4 = add <2 x i32> %3, %3
-    store <2 x i32> %4, <2 x i32>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2i32_f64:
-define void @test_v2i32_f64(double* %p, <2 x i32>* %q) {
-; CHECK: ldr
-; CHECK: rev64 v{{[0-9]+}}.2s
-; CHECK: st1 { v{{[0-9]+}}.2s }
-    %1 = load double* %p
-    %2 = fadd double %1, %1
-    %3 = bitcast double %2 to <2 x i32>
-    %4 = add <2 x i32> %3, %3
-    store <2 x i32> %4, <2 x i32>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2i32_v1i64:
-define void @test_v2i32_v1i64(<1 x i64>* %p, <2 x i32>* %q) {
-; CHECK: ldr
-; CHECK: rev64 v{{[0-9]+}}.2s
-; CHECK: st1 { v{{[0-9]+}}.2s }
-    %1 = load <1 x i64>* %p
-    %2 = add <1 x i64> %1, %1
-    %3 = bitcast <1 x i64> %2 to <2 x i32>
-    %4 = add <2 x i32> %3, %3
-    store <2 x i32> %4, <2 x i32>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2i32_v2f32:
-define void @test_v2i32_v2f32(<2 x float>* %p, <2 x i32>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2s }
-; CHECK: st1 { v{{[0-9]+}}.2s }
-    %1 = load <2 x float>* %p
-    %2 = fadd <2 x float> %1, %1
-    %3 = bitcast <2 x float> %2 to <2 x i32>
-    %4 = add <2 x i32> %3, %3
-    store <2 x i32> %4, <2 x i32>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2i32_v4i16:
-define void @test_v2i32_v4i16(<4 x i16>* %p, <2 x i32>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.4h }
-; CHECK: rev32 v{{[0-9]+}}.4h
-; CHECK: st1 { v{{[0-9]+}}.2s }
-    %1 = load <4 x i16>* %p
-    %2 = add <4 x i16> %1, %1
-    %3 = bitcast <4 x i16> %2 to <2 x i32>
-    %4 = add <2 x i32> %3, %3
-    store <2 x i32> %4, <2 x i32>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2i32_v8i8:
-define void @test_v2i32_v8i8(<8 x i8>* %p, <2 x i32>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.8b }
-; CHECK: rev32 v{{[0-9]+}}.8b
-; CHECK: st1 { v{{[0-9]+}}.2s }
-    %1 = load <8 x i8>* %p
-    %2 = add <8 x i8> %1, %1
-    %3 = bitcast <8 x i8> %2 to <2 x i32>
-    %4 = add <2 x i32> %3, %3
-    store <2 x i32> %4, <2 x i32>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4i16_i64:
-define void @test_v4i16_i64(i64* %p, <4 x i16>* %q) {
-; CHECK: ldr
-; CHECK: rev64 v{{[0-9]+}}.4h
-; CHECK: st1 { v{{[0-9]+}}.4h }
-    %1 = load i64* %p
-    %2 = add i64 %1, %1
-    %3 = bitcast i64 %2 to <4 x i16>
-    %4 = add <4 x i16> %3, %3
-    store <4 x i16> %4, <4 x i16>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4i16_f64:
-define void @test_v4i16_f64(double* %p, <4 x i16>* %q) {
-; CHECK: ldr
-; CHECK: rev64 v{{[0-9]+}}.4h
-; CHECK: st1 { v{{[0-9]+}}.4h }
-    %1 = load double* %p
-    %2 = fadd double %1, %1
-    %3 = bitcast double %2 to <4 x i16>
-    %4 = add <4 x i16> %3, %3
-    store <4 x i16> %4, <4 x i16>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4i16_v1i64:
-define void @test_v4i16_v1i64(<1 x i64>* %p, <4 x i16>* %q) {
-; CHECK: ldr
-; CHECK: rev64 v{{[0-9]+}}.4h
-; CHECK: st1 { v{{[0-9]+}}.4h }
-    %1 = load <1 x i64>* %p
-    %2 = add <1 x i64> %1, %1
-    %3 = bitcast <1 x i64> %2 to <4 x i16>
-    %4 = add <4 x i16> %3, %3
-    store <4 x i16> %4, <4 x i16>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4i16_v2f32:
-define void @test_v4i16_v2f32(<2 x float>* %p, <4 x i16>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2s }
-; CHECK: rev32 v{{[0-9]+}}.4h
-; CHECK: st1 { v{{[0-9]+}}.4h }
-    %1 = load <2 x float>* %p
-    %2 = fadd <2 x float> %1, %1
-    %3 = bitcast <2 x float> %2 to <4 x i16>
-    %4 = add <4 x i16> %3, %3
-    store <4 x i16> %4, <4 x i16>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4i16_v2i32:
-define void @test_v4i16_v2i32(<2 x i32>* %p, <4 x i16>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2s }
-; CHECK: rev32 v{{[0-9]+}}.4h
-; CHECK: st1 { v{{[0-9]+}}.4h }
-    %1 = load <2 x i32>* %p
-    %2 = add <2 x i32> %1, %1
-    %3 = bitcast <2 x i32> %2 to <4 x i16>
-    %4 = add <4 x i16> %3, %3
-    store <4 x i16> %4, <4 x i16>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4i16_v8i8:
-define void @test_v4i16_v8i8(<8 x i8>* %p, <4 x i16>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.8b }
-; CHECK: rev16 v{{[0-9]+}}.8b
-; CHECK: st1 { v{{[0-9]+}}.4h }
-    %1 = load <8 x i8>* %p
-    %2 = add <8 x i8> %1, %1
-    %3 = bitcast <8 x i8> %2 to <4 x i16>
-    %4 = add <4 x i16> %3, %3
-    store <4 x i16> %4, <4 x i16>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v8i8_i64:
-define void @test_v8i8_i64(i64* %p, <8 x i8>* %q) {
-; CHECK: ldr
-; CHECK: rev64 v{{[0-9]+}}.8b
-; CHECK: st1 { v{{[0-9]+}}.8b }
-    %1 = load i64* %p
-    %2 = add i64 %1, %1
-    %3 = bitcast i64 %2 to <8 x i8>
-    %4 = add <8 x i8> %3, %3
-    store <8 x i8> %4, <8 x i8>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v8i8_f64:
-define void @test_v8i8_f64(double* %p, <8 x i8>* %q) {
-; CHECK: ldr
-; CHECK: rev64 v{{[0-9]+}}.8b
-; CHECK: st1 { v{{[0-9]+}}.8b }
-    %1 = load double* %p
-    %2 = fadd double %1, %1
-    %3 = bitcast double %2 to <8 x i8>
-    %4 = add <8 x i8> %3, %3
-    store <8 x i8> %4, <8 x i8>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v8i8_v1i64:
-define void @test_v8i8_v1i64(<1 x i64>* %p, <8 x i8>* %q) {
-; CHECK: ldr
-; CHECK: rev64 v{{[0-9]+}}.8b
-; CHECK: st1 { v{{[0-9]+}}.8b }
-    %1 = load <1 x i64>* %p
-    %2 = add <1 x i64> %1, %1
-    %3 = bitcast <1 x i64> %2 to <8 x i8>
-    %4 = add <8 x i8> %3, %3
-    store <8 x i8> %4, <8 x i8>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v8i8_v2f32:
-define void @test_v8i8_v2f32(<2 x float>* %p, <8 x i8>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2s }
-; CHECK: rev32 v{{[0-9]+}}.8b
-; CHECK: st1 { v{{[0-9]+}}.8b }
-    %1 = load <2 x float>* %p
-    %2 = fadd <2 x float> %1, %1
-    %3 = bitcast <2 x float> %2 to <8 x i8>
-    %4 = add <8 x i8> %3, %3
-    store <8 x i8> %4, <8 x i8>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v8i8_v2i32:
-define void @test_v8i8_v2i32(<2 x i32>* %p, <8 x i8>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2s }
-; CHECK: rev32 v{{[0-9]+}}.8b
-; CHECK: st1 { v{{[0-9]+}}.8b }
-    %1 = load <2 x i32>* %p
-    %2 = add <2 x i32> %1, %1
-    %3 = bitcast <2 x i32> %2 to <8 x i8>
-    %4 = add <8 x i8> %3, %3
-    store <8 x i8> %4, <8 x i8>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v8i8_v4i16:
-define void @test_v8i8_v4i16(<4 x i16>* %p, <8 x i8>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.4h }
-; CHECK: rev16 v{{[0-9]+}}.8b
-; CHECK: st1 { v{{[0-9]+}}.8b }
-    %1 = load <4 x i16>* %p
-    %2 = add <4 x i16> %1, %1
-    %3 = bitcast <4 x i16> %2 to <8 x i8>
-    %4 = add <8 x i8> %3, %3
-    store <8 x i8> %4, <8 x i8>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_f128_v2f64:
-define void @test_f128_v2f64(<2 x double>* %p, fp128* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2d }
-; CHECK: ext
-; CHECK: str
-    %1 = load <2 x double>* %p
-    %2 = fadd <2 x double> %1, %1
-    %3 = bitcast <2 x double> %2 to fp128
-    %4 = fadd fp128 %3, %3
-    store fp128 %4, fp128* %q
-    ret void
-}
-
-; CHECK-LABEL: test_f128_v2i64:
-define void @test_f128_v2i64(<2 x i64>* %p, fp128* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2d }
-; CHECK: ext
-; CHECK: str
-    %1 = load <2 x i64>* %p
-    %2 = add <2 x i64> %1, %1
-    %3 = bitcast <2 x i64> %2 to fp128
-    %4 = fadd fp128 %3, %3
-    store fp128 %4, fp128* %q
-    ret void
-}
-
-; CHECK-LABEL: test_f128_v4f32:
-define void @test_f128_v4f32(<4 x float>* %p, fp128* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2d }
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-; CHECK: str q
-    %1 = load <4 x float>* %p
-    %2 = fadd <4 x float> %1, %1
-    %3 = bitcast <4 x float> %2 to fp128
-    %4 = fadd fp128 %3, %3
-    store fp128 %4, fp128* %q
-    ret void
-}
-
-; CHECK-LABEL: test_f128_v4i32:
-define void @test_f128_v4i32(<4 x i32>* %p, fp128* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.4s }
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-; CHECK: str
-    %1 = load <4 x i32>* %p
-    %2 = add <4 x i32> %1, %1
-    %3 = bitcast <4 x i32> %2 to fp128
-    %4 = fadd fp128 %3, %3
-    store fp128 %4, fp128* %q
-    ret void
-}
-
-; CHECK-LABEL: test_f128_v8i16:
-define void @test_f128_v8i16(<8 x i16>* %p, fp128* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.8h }
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-; CHECK: str
-    %1 = load <8 x i16>* %p
-    %2 = add <8 x i16> %1, %1
-    %3 = bitcast <8 x i16> %2 to fp128
-    %4 = fadd fp128 %3, %3
-    store fp128 %4, fp128* %q
-    ret void
-}
-
-; CHECK-LABEL: test_f128_v16i8:
-define void @test_f128_v16i8(<16 x i8>* %p, fp128* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.16b }
-; CHECK: ext
-; CHECK: str q
-    %1 = load <16 x i8>* %p
-    %2 = add <16 x i8> %1, %1
-    %3 = bitcast <16 x i8> %2 to fp128
-    %4 = fadd fp128 %3, %3
-    store fp128 %4, fp128* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2f64_f128:
-define void @test_v2f64_f128(fp128* %p, <2 x double>* %q) {
-; CHECK: ldr
-; CHECK: ext
-; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load fp128* %p
-    %2 = fadd fp128 %1, %1
-    %3 = bitcast fp128 %2 to <2 x double>
-    %4 = fadd <2 x double> %3, %3
-    store <2 x double> %4, <2 x double>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2f64_v2i64:
-define void @test_v2f64_v2i64(<2 x i64>* %p, <2 x double>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2d }
-; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <2 x i64>* %p
-    %2 = add <2 x i64> %1, %1
-    %3 = bitcast <2 x i64> %2 to <2 x double>
-    %4 = fadd <2 x double> %3, %3
-    store <2 x double> %4, <2 x double>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2f64_v4f32:
-define void @test_v2f64_v4f32(<4 x float>* %p, <2 x double>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2d }
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <4 x float>* %p
-    %2 = fadd <4 x float> %1, %1
-    %3 = bitcast <4 x float> %2 to <2 x double>
-    %4 = fadd <2 x double> %3, %3
-    store <2 x double> %4, <2 x double>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2f64_v4i32:
-define void @test_v2f64_v4i32(<4 x i32>* %p, <2 x double>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.4s }
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <4 x i32>* %p
-    %2 = add <4 x i32> %1, %1
-    %3 = bitcast <4 x i32> %2 to <2 x double>
-    %4 = fadd <2 x double> %3, %3
-    store <2 x double> %4, <2 x double>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2f64_v8i16:
-define void @test_v2f64_v8i16(<8 x i16>* %p, <2 x double>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.8h }
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <8 x i16>* %p
-    %2 = add <8 x i16> %1, %1
-    %3 = bitcast <8 x i16> %2 to <2 x double>
-    %4 = fadd <2 x double> %3, %3
-    store <2 x double> %4, <2 x double>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2f64_v16i8:
-define void @test_v2f64_v16i8(<16 x i8>* %p, <2 x double>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.16b }
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <16 x i8>* %p
-    %2 = add <16 x i8> %1, %1
-    %3 = bitcast <16 x i8> %2 to <2 x double>
-    %4 = fadd <2 x double> %3, %3
-    store <2 x double> %4, <2 x double>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2i64_f128:
-define void @test_v2i64_f128(fp128* %p, <2 x i64>* %q) {
-; CHECK: ldr
-; CHECK: ext
-; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load fp128* %p
-    %2 = fadd fp128 %1, %1
-    %3 = bitcast fp128 %2 to <2 x i64>
-    %4 = add <2 x i64> %3, %3
-    store <2 x i64> %4, <2 x i64>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2i64_v2f64:
-define void @test_v2i64_v2f64(<2 x double>* %p, <2 x i64>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2d }
-; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <2 x double>* %p
-    %2 = fadd <2 x double> %1, %1
-    %3 = bitcast <2 x double> %2 to <2 x i64>
-    %4 = add <2 x i64> %3, %3
-    store <2 x i64> %4, <2 x i64>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2i64_v4f32:
-define void @test_v2i64_v4f32(<4 x float>* %p, <2 x i64>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2d }
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <4 x float>* %p
-    %2 = fadd <4 x float> %1, %1
-    %3 = bitcast <4 x float> %2 to <2 x i64>
-    %4 = add <2 x i64> %3, %3
-    store <2 x i64> %4, <2 x i64>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2i64_v4i32:
-define void @test_v2i64_v4i32(<4 x i32>* %p, <2 x i64>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.4s }
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <4 x i32>* %p
-    %2 = add <4 x i32> %1, %1
-    %3 = bitcast <4 x i32> %2 to <2 x i64>
-    %4 = add <2 x i64> %3, %3
-    store <2 x i64> %4, <2 x i64>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2i64_v8i16:
-define void @test_v2i64_v8i16(<8 x i16>* %p, <2 x i64>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.8h }
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <8 x i16>* %p
-    %2 = add <8 x i16> %1, %1
-    %3 = bitcast <8 x i16> %2 to <2 x i64>
-    %4 = add <2 x i64> %3, %3
-    store <2 x i64> %4, <2 x i64>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2i64_v16i8:
-define void @test_v2i64_v16i8(<16 x i8>* %p, <2 x i64>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.16b }
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <16 x i8>* %p
-    %2 = add <16 x i8> %1, %1
-    %3 = bitcast <16 x i8> %2 to <2 x i64>
-    %4 = add <2 x i64> %3, %3
-    store <2 x i64> %4, <2 x i64>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4f32_f128:
-define void @test_v4f32_f128(fp128* %p, <4 x float>* %q) {
-; CHECK: ldr q
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load fp128* %p
-    %2 = fadd fp128 %1, %1
-    %3 = bitcast fp128 %2 to <4 x float>
-    %4 = fadd <4 x float> %3, %3
-    store <4 x float> %4, <4 x float>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4f32_v2f64:
-define void @test_v4f32_v2f64(<2 x double>* %p, <4 x float>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2d }
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <2 x double>* %p
-    %2 = fadd <2 x double> %1, %1
-    %3 = bitcast <2 x double> %2 to <4 x float>
-    %4 = fadd <4 x float> %3, %3
-    store <4 x float> %4, <4 x float>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4f32_v2i64:
-define void @test_v4f32_v2i64(<2 x i64>* %p, <4 x float>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2d }
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <2 x i64>* %p
-    %2 = add <2 x i64> %1, %1
-    %3 = bitcast <2 x i64> %2 to <4 x float>
-    %4 = fadd <4 x float> %3, %3
-    store <4 x float> %4, <4 x float>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4f32_v4i32:
-define void @test_v4f32_v4i32(<4 x i32>* %p, <4 x float>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.4s }
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <4 x i32>* %p
-    %2 = add <4 x i32> %1, %1
-    %3 = bitcast <4 x i32> %2 to <4 x float>
-    %4 = fadd <4 x float> %3, %3
-    store <4 x float> %4, <4 x float>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4f32_v8i16:
-define void @test_v4f32_v8i16(<8 x i16>* %p, <4 x float>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.8h }
-; CHECK: rev32 v{{[0-9]+}}.8h
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <8 x i16>* %p
-    %2 = add <8 x i16> %1, %1
-    %3 = bitcast <8 x i16> %2 to <4 x float>
-    %4 = fadd <4 x float> %3, %3
-    store <4 x float> %4, <4 x float>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4f32_v16i8:
-define void @test_v4f32_v16i8(<16 x i8>* %p, <4 x float>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.16b }
-; CHECK: rev32 v{{[0-9]+}}.16b
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <16 x i8>* %p
-    %2 = add <16 x i8> %1, %1
-    %3 = bitcast <16 x i8> %2 to <4 x float>
-    %4 = fadd <4 x float> %3, %3
-    store <4 x float> %4, <4 x float>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4i32_f128:
-define void @test_v4i32_f128(fp128* %p, <4 x i32>* %q) {
-; CHECK: ldr
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-; CHECK: st1 { v{{[0-9]+}}.4s }
-    %1 = load fp128* %p
-    %2 = fadd fp128 %1, %1
-    %3 = bitcast fp128 %2 to <4 x i32>
-    %4 = add <4 x i32> %3, %3
-    store <4 x i32> %4, <4 x i32>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4i32_v2f64:
-define void @test_v4i32_v2f64(<2 x double>* %p, <4 x i32>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2d }
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: st1 { v{{[0-9]+}}.4s }
-    %1 = load <2 x double>* %p
-    %2 = fadd <2 x double> %1, %1
-    %3 = bitcast <2 x double> %2 to <4 x i32>
-    %4 = add <4 x i32> %3, %3
-    store <4 x i32> %4, <4 x i32>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4i32_v2i64:
-define void @test_v4i32_v2i64(<2 x i64>* %p, <4 x i32>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2d }
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: st1 { v{{[0-9]+}}.4s }
-    %1 = load <2 x i64>* %p
-    %2 = add <2 x i64> %1, %1
-    %3 = bitcast <2 x i64> %2 to <4 x i32>
-    %4 = add <4 x i32> %3, %3
-    store <4 x i32> %4, <4 x i32>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4i32_v4f32:
-define void @test_v4i32_v4f32(<4 x float>* %p, <4 x i32>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2d }
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: st1 { v{{[0-9]+}}.4s }
-    %1 = load <4 x float>* %p
-    %2 = fadd <4 x float> %1, %1
-    %3 = bitcast <4 x float> %2 to <4 x i32>
-    %4 = add <4 x i32> %3, %3
-    store <4 x i32> %4, <4 x i32>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4i32_v8i16:
-define void @test_v4i32_v8i16(<8 x i16>* %p, <4 x i32>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.8h }
-; CHECK: rev32 v{{[0-9]+}}.8h
-; CHECK: st1 { v{{[0-9]+}}.4s }
-    %1 = load <8 x i16>* %p
-    %2 = add <8 x i16> %1, %1
-    %3 = bitcast <8 x i16> %2 to <4 x i32>
-    %4 = add <4 x i32> %3, %3
-    store <4 x i32> %4, <4 x i32>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4i32_v16i8:
-define void @test_v4i32_v16i8(<16 x i8>* %p, <4 x i32>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.16b }
-; CHECK: rev32 v{{[0-9]+}}.16b
-; CHECK: st1 { v{{[0-9]+}}.4s }
-    %1 = load <16 x i8>* %p
-    %2 = add <16 x i8> %1, %1
-    %3 = bitcast <16 x i8> %2 to <4 x i32>
-    %4 = add <4 x i32> %3, %3
-    store <4 x i32> %4, <4 x i32>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v8i16_f128:
-define void @test_v8i16_f128(fp128* %p, <8 x i16>* %q) {
-; CHECK: ldr
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-; CHECK: st1 { v{{[0-9]+}}.8h }
-    %1 = load fp128* %p
-    %2 = fadd fp128 %1, %1
-    %3 = bitcast fp128 %2 to <8 x i16>
-    %4 = add <8 x i16> %3, %3
-    store <8 x i16> %4, <8 x i16>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v8i16_v2f64:
-define void @test_v8i16_v2f64(<2 x double>* %p, <8 x i16>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2d }
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: st1 { v{{[0-9]+}}.8h }
-    %1 = load <2 x double>* %p
-    %2 = fadd <2 x double> %1, %1
-    %3 = bitcast <2 x double> %2 to <8 x i16>
-    %4 = add <8 x i16> %3, %3
-    store <8 x i16> %4, <8 x i16>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v8i16_v2i64:
-define void @test_v8i16_v2i64(<2 x i64>* %p, <8 x i16>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2d }
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: st1 { v{{[0-9]+}}.8h }
-    %1 = load <2 x i64>* %p
-    %2 = add <2 x i64> %1, %1
-    %3 = bitcast <2 x i64> %2 to <8 x i16>
-    %4 = add <8 x i16> %3, %3
-    store <8 x i16> %4, <8 x i16>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v8i16_v4f32:
-define void @test_v8i16_v4f32(<4 x float>* %p, <8 x i16>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2d }
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: rev32 v{{[0-9]+}}.8h
-; CHECK: st1 { v{{[0-9]+}}.8h }
-    %1 = load <4 x float>* %p
-    %2 = fadd <4 x float> %1, %1
-    %3 = bitcast <4 x float> %2 to <8 x i16>
-    %4 = add <8 x i16> %3, %3
-    store <8 x i16> %4, <8 x i16>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v8i16_v4i32:
-define void @test_v8i16_v4i32(<4 x i32>* %p, <8 x i16>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.4s }
-; CHECK: rev32 v{{[0-9]+}}.8h
-; CHECK: st1 { v{{[0-9]+}}.8h }
-    %1 = load <4 x i32>* %p
-    %2 = add <4 x i32> %1, %1
-    %3 = bitcast <4 x i32> %2 to <8 x i16>
-    %4 = add <8 x i16> %3, %3
-    store <8 x i16> %4, <8 x i16>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v8i16_v16i8:
-define void @test_v8i16_v16i8(<16 x i8>* %p, <8 x i16>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.16b }
-; CHECK: rev16 v{{[0-9]+}}.16b
-; CHECK: st1 { v{{[0-9]+}}.8h }
-    %1 = load <16 x i8>* %p
-    %2 = add <16 x i8> %1, %1
-    %3 = bitcast <16 x i8> %2 to <8 x i16>
-    %4 = add <8 x i16> %3, %3
-    store <8 x i16> %4, <8 x i16>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v16i8_f128:
-define void @test_v16i8_f128(fp128* %p, <16 x i8>* %q) {
-; CHECK: ldr q
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: ext
-; CHECK: st1 { v{{[0-9]+}}.16b }
-    %1 = load fp128* %p
-    %2 = fadd fp128 %1, %1
-    %3 = bitcast fp128 %2 to <16 x i8>
-    %4 = add <16 x i8> %3, %3
-    store <16 x i8> %4, <16 x i8>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v16i8_v2f64:
-define void @test_v16i8_v2f64(<2 x double>* %p, <16 x i8>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2d }
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: st1 { v{{[0-9]+}}.16b }
-    %1 = load <2 x double>* %p
-    %2 = fadd <2 x double> %1, %1
-    %3 = bitcast <2 x double> %2 to <16 x i8>
-    %4 = add <16 x i8> %3, %3
-    store <16 x i8> %4, <16 x i8>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v16i8_v2i64:
-define void @test_v16i8_v2i64(<2 x i64>* %p, <16 x i8>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2d }
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: st1 { v{{[0-9]+}}.16b }
-    %1 = load <2 x i64>* %p
-    %2 = add <2 x i64> %1, %1
-    %3 = bitcast <2 x i64> %2 to <16 x i8>
-    %4 = add <16 x i8> %3, %3
-    store <16 x i8> %4, <16 x i8>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v16i8_v4f32:
-define void @test_v16i8_v4f32(<4 x float>* %p, <16 x i8>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.2d }
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: rev32 v{{[0-9]+}}.16b
-; CHECK: st1 { v{{[0-9]+}}.16b }
-    %1 = load <4 x float>* %p
-    %2 = fadd <4 x float> %1, %1
-    %3 = bitcast <4 x float> %2 to <16 x i8>
-    %4 = add <16 x i8> %3, %3
-    store <16 x i8> %4, <16 x i8>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v16i8_v4i32:
-define void @test_v16i8_v4i32(<4 x i32>* %p, <16 x i8>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.4s }
-; CHECK: rev32 v{{[0-9]+}}.16b
-; CHECK: st1 { v{{[0-9]+}}.16b }
-    %1 = load <4 x i32>* %p
-    %2 = add <4 x i32> %1, %1
-    %3 = bitcast <4 x i32> %2 to <16 x i8>
-    %4 = add <16 x i8> %3, %3
-    store <16 x i8> %4, <16 x i8>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v16i8_v8i16:
-define void @test_v16i8_v8i16(<8 x i16>* %p, <16 x i8>* %q) {
-; CHECK: ld1 { v{{[0-9]+}}.8h }
-; CHECK: rev16 v{{[0-9]+}}.16b
-; CHECK: st1 { v{{[0-9]+}}.16b }
-    %1 = load <8 x i16>* %p
-    %2 = add <8 x i16> %1, %1
-    %3 = bitcast <8 x i16> %2 to <16 x i8>
-    %4 = add <16 x i8> %3, %3
-    store <16 x i8> %4, <16 x i8>* %q
-    ret void
-}

Removed: llvm/trunk/test/CodeGen/ARM64/big-endian-eh.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/big-endian-eh.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/big-endian-eh.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/big-endian-eh.ll (removed)
@@ -1,73 +0,0 @@
-; RUN: llc -mtriple arm64_be-linux-gnu -filetype obj < %s | llvm-objdump -s - | FileCheck %s
-
-; ARM EHABI for big endian
-; This test case checks whether CIE length record is laid out in big endian format.
-;
-; This is the LLVM assembly generated from following C++ code:
-;
-; extern void foo(int);
-; void test(int a, int b) {
-;   try {
-;   foo(a);
-; } catch (...) {
-;   foo(b);
-; }
-;}
-
-define void @_Z4testii(i32 %a, i32 %b) #0 {
-entry:
-  invoke void @_Z3fooi(i32 %a)
-          to label %try.cont unwind label %lpad
-
-lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-          catch i8* null
-  %1 = extractvalue { i8*, i32 } %0, 0
-  %2 = tail call i8* @__cxa_begin_catch(i8* %1) #2
-  invoke void @_Z3fooi(i32 %b)
-          to label %invoke.cont2 unwind label %lpad1
-
-invoke.cont2:                                     ; preds = %lpad
-  tail call void @__cxa_end_catch()
-  br label %try.cont
-
-try.cont:                                         ; preds = %entry, %invoke.cont2
-  ret void
-
-lpad1:                                            ; preds = %lpad
-  %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-          cleanup
-  invoke void @__cxa_end_catch()
-          to label %eh.resume unwind label %terminate.lpad
-
-eh.resume:                                        ; preds = %lpad1
-  resume { i8*, i32 } %3
-
-terminate.lpad:                                   ; preds = %lpad1
-  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-          catch i8* null
-  %5 = extractvalue { i8*, i32 } %4, 0
-  tail call void @__clang_call_terminate(i8* %5) #3
-  unreachable
-}
-
-declare void @_Z3fooi(i32) #0
-
-declare i32 @__gxx_personality_v0(...)
-
-declare i8* @__cxa_begin_catch(i8*)
-
-declare void @__cxa_end_catch()
-
-; Function Attrs: noinline noreturn nounwind
-define linkonce_odr hidden void @__clang_call_terminate(i8*) #1 {
-  %2 = tail call i8* @__cxa_begin_catch(i8* %0) #2
-  tail call void @_ZSt9terminatev() #3
-  unreachable
-}
-
-declare void @_ZSt9terminatev()
-
-; CHECK-LABEL: Contents of section .eh_frame:
-; CHECK-NEXT: 0000 0000001c
-

Removed: llvm/trunk/test/CodeGen/ARM64/big-endian-varargs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/big-endian-varargs.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/big-endian-varargs.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/big-endian-varargs.ll (removed)
@@ -1,58 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-
-; Vararg saving must save Q registers using the equivalent of STR/STP.
-
-target datalayout = "E-m:e-i64:64-i128:128-n32:64-S128"
-target triple = "arm64_be-arm-none-eabi"
-
-%struct.__va_list = type { i8*, i8*, i8*, i32, i32 }
-
-declare void @llvm.va_start(i8*) nounwind
-declare void @llvm.va_end(i8*) nounwind
-
-define double @callee(i32 %a, ...) {
-; CHECK: stp
-; CHECK: stp
-; CHECK: stp
-; CHECK: stp
-; CHECK: stp
-; CHECK: stp
-entry:
-  %vl = alloca %struct.__va_list, align 8
-  %vl1 = bitcast %struct.__va_list* %vl to i8*
-  call void @llvm.va_start(i8* %vl1)
-  %vr_offs_p = getelementptr inbounds %struct.__va_list* %vl, i64 0, i32 4
-  %vr_offs = load i32* %vr_offs_p, align 4
-  %0 = icmp sgt i32 %vr_offs, -1
-  br i1 %0, label %vaarg.on_stack, label %vaarg.maybe_reg
-
-vaarg.maybe_reg:                                  ; preds = %entry
-  %new_reg_offs = add i32 %vr_offs, 16
-  store i32 %new_reg_offs, i32* %vr_offs_p, align 4
-  %inreg = icmp slt i32 %new_reg_offs, 1
-  br i1 %inreg, label %vaarg.in_reg, label %vaarg.on_stack
-
-vaarg.in_reg:                                     ; preds = %vaarg.maybe_reg
-  %reg_top_p = getelementptr inbounds %struct.__va_list* %vl, i64 0, i32 2
-  %reg_top = load i8** %reg_top_p, align 8
-  %1 = sext i32 %vr_offs to i64
-  %2 = getelementptr i8* %reg_top, i64 %1
-  %3 = ptrtoint i8* %2 to i64
-  %align_be = add i64 %3, 8
-  %4 = inttoptr i64 %align_be to i8*
-  br label %vaarg.end
-
-vaarg.on_stack:                                   ; preds = %vaarg.maybe_reg, %entry
-  %stack_p = getelementptr inbounds %struct.__va_list* %vl, i64 0, i32 0
-  %stack = load i8** %stack_p, align 8
-  %new_stack = getelementptr i8* %stack, i64 8
-  store i8* %new_stack, i8** %stack_p, align 8
-  br label %vaarg.end
-
-vaarg.end:                                        ; preds = %vaarg.on_stack, %vaarg.in_reg
-  %.sink = phi i8* [ %4, %vaarg.in_reg ], [ %stack, %vaarg.on_stack ]
-  %5 = bitcast i8* %.sink to double*
-  %6 = load double* %5, align 8
-  call void @llvm.va_end(i8* %vl1)
-  ret double %6
-}

Removed: llvm/trunk/test/CodeGen/ARM64/big-endian-vector-callee.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/big-endian-vector-callee.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/big-endian-vector-callee.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/big-endian-vector-callee.ll (removed)
@@ -1,848 +0,0 @@
-; RUN: llc -mtriple arm64_be < %s -arm64-load-store-opt=false -o - | FileCheck %s
-; RUN: llc -mtriple arm64_be < %s -fast-isel=true -arm64-load-store-opt=false -o - | FileCheck %s
-
-; CHECK-LABEL: test_i64_f64:
-define i64 @test_i64_f64(double %p) {
-; CHECK-NOT: rev
-    %1 = fadd double %p, %p
-    %2 = bitcast double %1 to i64
-    %3 = add i64 %2, %2
-    ret i64 %3
-}
-
-; CHECK-LABEL: test_i64_v1i64:
-define i64 @test_i64_v1i64(<1 x i64> %p) {
-; CHECK-NOT: rev
-    %1 = add <1 x i64> %p, %p
-    %2 = bitcast <1 x i64> %1 to i64
-    %3 = add i64 %2, %2
-    ret i64 %3
-}
-
-; CHECK-LABEL: test_i64_v2f32:
-define i64 @test_i64_v2f32(<2 x float> %p) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = fadd <2 x float> %p, %p
-    %2 = bitcast <2 x float> %1 to i64
-    %3 = add i64 %2, %2
-    ret i64 %3
-}
-
-; CHECK-LABEL: test_i64_v2i32:
-define i64 @test_i64_v2i32(<2 x i32> %p) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = add <2 x i32> %p, %p
-    %2 = bitcast <2 x i32> %1 to i64
-    %3 = add i64 %2, %2
-    ret i64 %3
-}
-
-; CHECK-LABEL: test_i64_v4i16:
-define i64 @test_i64_v4i16(<4 x i16> %p) {
-; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = add <4 x i16> %p, %p
-    %2 = bitcast <4 x i16> %1 to i64
-    %3 = add i64 %2, %2
-    ret i64 %3
-}
-
-; CHECK-LABEL: test_i64_v8i8:
-define i64 @test_i64_v8i8(<8 x i8> %p) {
-; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = add <8 x i8> %p, %p
-    %2 = bitcast <8 x i8> %1 to i64
-    %3 = add i64 %2, %2
-    ret i64 %3
-}
-
-; CHECK-LABEL: test_f64_i64:
-define double @test_f64_i64(i64 %p) {
-; CHECK-NOT: rev
-    %1 = add i64 %p, %p
-    %2 = bitcast i64 %1 to double
-    %3 = fadd double %2, %2
-    ret double %3
-}
-
-; CHECK-LABEL: test_f64_v1i64:
-define double @test_f64_v1i64(<1 x i64> %p) {
-; CHECK-NOT: rev
-    %1 = add <1 x i64> %p, %p
-    %2 = bitcast <1 x i64> %1 to double
-    %3 = fadd double %2, %2
-    ret double %3
-}
-
-; CHECK-LABEL: test_f64_v2f32:
-define double @test_f64_v2f32(<2 x float> %p) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = fadd <2 x float> %p, %p
-    %2 = bitcast <2 x float> %1 to double
-    %3 = fadd double %2, %2
-    ret double %3
-}
-
-; CHECK-LABEL: test_f64_v2i32:
-define double @test_f64_v2i32(<2 x i32> %p) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = add <2 x i32> %p, %p
-    %2 = bitcast <2 x i32> %1 to double
-    %3 = fadd double %2, %2
-    ret double %3
-}
-
-; CHECK-LABEL: test_f64_v4i16:
-define double @test_f64_v4i16(<4 x i16> %p) {
-; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = add <4 x i16> %p, %p
-    %2 = bitcast <4 x i16> %1 to double
-    %3 = fadd double %2, %2
-    ret double %3
-}
-
-; CHECK-LABEL: test_f64_v8i8:
-define double @test_f64_v8i8(<8 x i8> %p) {
-; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = add <8 x i8> %p, %p
-    %2 = bitcast <8 x i8> %1 to double
-    %3 = fadd double %2, %2
-    ret double %3
-}
-
-; CHECK-LABEL: test_v1i64_i64:
-define <1 x i64> @test_v1i64_i64(i64 %p) {
-; CHECK-NOT: rev
-    %1 = add i64 %p, %p
-    %2 = bitcast i64 %1 to <1 x i64>
-    %3 = add <1 x i64> %2, %2
-    ret <1 x i64> %3
-}
-
-; CHECK-LABEL: test_v1i64_f64:
-define <1 x i64> @test_v1i64_f64(double %p) {
-; CHECK-NOT: rev
-    %1 = fadd double %p, %p
-    %2 = bitcast double %1 to <1 x i64>
-    %3 = add <1 x i64> %2, %2
-    ret <1 x i64> %3
-}
-
-; CHECK-LABEL: test_v1i64_v2f32:
-define <1 x i64> @test_v1i64_v2f32(<2 x float> %p) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = fadd <2 x float> %p, %p
-    %2 = bitcast <2 x float> %1 to <1 x i64>
-    %3 = add <1 x i64> %2, %2
-    ret <1 x i64> %3
-}
-
-; CHECK-LABEL: test_v1i64_v2i32:
-define <1 x i64> @test_v1i64_v2i32(<2 x i32> %p) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = add <2 x i32> %p, %p
-    %2 = bitcast <2 x i32> %1 to <1 x i64>
-    %3 = add <1 x i64> %2, %2
-    ret <1 x i64> %3
-}
-
-; CHECK-LABEL: test_v1i64_v4i16:
-define <1 x i64> @test_v1i64_v4i16(<4 x i16> %p) {
-; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = add <4 x i16> %p, %p
-    %2 = bitcast <4 x i16> %1 to <1 x i64>
-    %3 = add <1 x i64> %2, %2
-    ret <1 x i64> %3
-}
-
-; CHECK-LABEL: test_v1i64_v8i8:
-define <1 x i64> @test_v1i64_v8i8(<8 x i8> %p) {
-; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = add <8 x i8> %p, %p
-    %2 = bitcast <8 x i8> %1 to <1 x i64>
-    %3 = add <1 x i64> %2, %2
-    ret <1 x i64> %3
-}
-
-; CHECK-LABEL: test_v2f32_i64:
-define <2 x float> @test_v2f32_i64(i64 %p) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = add i64 %p, %p
-    %2 = bitcast i64 %1 to <2 x float>
-    %3 = fadd <2 x float> %2, %2
-    ret <2 x float> %3
-}
-
-; CHECK-LABEL: test_v2f32_f64:
-define <2 x float> @test_v2f32_f64(double %p) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = fadd double %p, %p
-    %2 = bitcast double %1 to <2 x float>
-    %3 = fadd <2 x float> %2, %2
-    ret <2 x float> %3
-}
-
-; CHECK-LABEL: test_v2f32_v1i64:
-define <2 x float> @test_v2f32_v1i64(<1 x i64> %p) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = add <1 x i64> %p, %p
-    %2 = bitcast <1 x i64> %1 to <2 x float>
-    %3 = fadd <2 x float> %2, %2
-    ret <2 x float> %3
-}
-
-; CHECK-LABEL: test_v2f32_v2i32:
-define <2 x float> @test_v2f32_v2i32(<2 x i32> %p) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = add <2 x i32> %p, %p
-    %2 = bitcast <2 x i32> %1 to <2 x float>
-    %3 = fadd <2 x float> %2, %2
-    ret <2 x float> %3
-}
-
-; CHECK-LABEL: test_v2f32_v4i16:
-define <2 x float> @test_v2f32_v4i16(<4 x i16> %p) {
-; CHECK: rev64 v{{[0-9]+}}.4h
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = add <4 x i16> %p, %p
-    %2 = bitcast <4 x i16> %1 to <2 x float>
-    %3 = fadd <2 x float> %2, %2
-    ret <2 x float> %3
-}
-
-; CHECK-LABEL: test_v2f32_v8i8:
-define <2 x float> @test_v2f32_v8i8(<8 x i8> %p) {
-; CHECK: rev64 v{{[0-9]+}}.8b
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = add <8 x i8> %p, %p
-    %2 = bitcast <8 x i8> %1 to <2 x float>
-    %3 = fadd <2 x float> %2, %2
-    ret <2 x float> %3
-}
-
-; CHECK-LABEL: test_v2i32_i64:
-define <2 x i32> @test_v2i32_i64(i64 %p) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = add i64 %p, %p
-    %2 = bitcast i64 %1 to <2 x i32>
-    %3 = add <2 x i32> %2, %2
-    ret <2 x i32> %3
-}
-
-; CHECK-LABEL: test_v2i32_f64:
-define <2 x i32> @test_v2i32_f64(double %p) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = fadd double %p, %p
-    %2 = bitcast double %1 to <2 x i32>
-    %3 = add <2 x i32> %2, %2
-    ret <2 x i32> %3
-}
-
-; CHECK-LABEL: test_v2i32_v1i64:
-define <2 x i32> @test_v2i32_v1i64(<1 x i64> %p) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = add <1 x i64> %p, %p
-    %2 = bitcast <1 x i64> %1 to <2 x i32>
-    %3 = add <2 x i32> %2, %2
-    ret <2 x i32> %3
-}
-
-; CHECK-LABEL: test_v2i32_v2f32:
-define <2 x i32> @test_v2i32_v2f32(<2 x float> %p) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = fadd <2 x float> %p, %p
-    %2 = bitcast <2 x float> %1 to <2 x i32>
-    %3 = add <2 x i32> %2, %2
-    ret <2 x i32> %3
-}
-
-; CHECK-LABEL: test_v2i32_v4i16:
-define <2 x i32> @test_v2i32_v4i16(<4 x i16> %p) {
-; CHECK: rev64 v{{[0-9]+}}.4h
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = add <4 x i16> %p, %p
-    %2 = bitcast <4 x i16> %1 to <2 x i32>
-    %3 = add <2 x i32> %2, %2
-    ret <2 x i32> %3
-}
-
-; CHECK-LABEL: test_v2i32_v8i8:
-define <2 x i32> @test_v2i32_v8i8(<8 x i8> %p) {
-; CHECK: rev64 v{{[0-9]+}}.8b
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = add <8 x i8> %p, %p
-    %2 = bitcast <8 x i8> %1 to <2 x i32>
-    %3 = add <2 x i32> %2, %2
-    ret <2 x i32> %3
-}
-
-; CHECK-LABEL: test_v4i16_i64:
-define <4 x i16> @test_v4i16_i64(i64 %p) {
-; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = add i64 %p, %p
-    %2 = bitcast i64 %1 to <4 x i16>
-    %3 = add <4 x i16> %2, %2
-    ret <4 x i16> %3
-}
-
-; CHECK-LABEL: test_v4i16_f64:
-define <4 x i16> @test_v4i16_f64(double %p) {
-; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = fadd double %p, %p
-    %2 = bitcast double %1 to <4 x i16>
-    %3 = add <4 x i16> %2, %2
-    ret <4 x i16> %3
-}
-
-; CHECK-LABEL: test_v4i16_v1i64:
-define <4 x i16> @test_v4i16_v1i64(<1 x i64> %p) {
-; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = add <1 x i64> %p, %p
-    %2 = bitcast <1 x i64> %1 to <4 x i16>
-    %3 = add <4 x i16> %2, %2
-    ret <4 x i16> %3
-}
-
-; CHECK-LABEL: test_v4i16_v2f32:
-define <4 x i16> @test_v4i16_v2f32(<2 x float> %p) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = fadd <2 x float> %p, %p
-    %2 = bitcast <2 x float> %1 to <4 x i16>
-    %3 = add <4 x i16> %2, %2
-    ret <4 x i16> %3
-}
-
-; CHECK-LABEL: test_v4i16_v2i32:
-define <4 x i16> @test_v4i16_v2i32(<2 x i32> %p) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = add <2 x i32> %p, %p
-    %2 = bitcast <2 x i32> %1 to <4 x i16>
-    %3 = add <4 x i16> %2, %2
-    ret <4 x i16> %3
-}
-
-; CHECK-LABEL: test_v4i16_v8i8:
-define <4 x i16> @test_v4i16_v8i8(<8 x i8> %p) {
-; CHECK: rev64 v{{[0-9]+}}.8b
-; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = add <8 x i8> %p, %p
-    %2 = bitcast <8 x i8> %1 to <4 x i16>
-    %3 = add <4 x i16> %2, %2
-    ret <4 x i16> %3
-}
-
-; CHECK-LABEL: test_v8i8_i64:
-define <8 x i8> @test_v8i8_i64(i64 %p) {
-; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = add i64 %p, %p
-    %2 = bitcast i64 %1 to <8 x i8>
-    %3 = add <8 x i8> %2, %2
-    ret <8 x i8> %3
-}
-
-; CHECK-LABEL: test_v8i8_f64:
-define <8 x i8> @test_v8i8_f64(double %p) {
-; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = fadd double %p, %p
-    %2 = bitcast double %1 to <8 x i8>
-    %3 = add <8 x i8> %2, %2
-    ret <8 x i8> %3
-}
-
-; CHECK-LABEL: test_v8i8_v1i64:
-define <8 x i8> @test_v8i8_v1i64(<1 x i64> %p) {
-; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = add <1 x i64> %p, %p
-    %2 = bitcast <1 x i64> %1 to <8 x i8>
-    %3 = add <8 x i8> %2, %2
-    ret <8 x i8> %3
-}
-
-; CHECK-LABEL: test_v8i8_v2f32:
-define <8 x i8> @test_v8i8_v2f32(<2 x float> %p) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = fadd <2 x float> %p, %p
-    %2 = bitcast <2 x float> %1 to <8 x i8>
-    %3 = add <8 x i8> %2, %2
-    ret <8 x i8> %3
-}
-
-; CHECK-LABEL: test_v8i8_v2i32:
-define <8 x i8> @test_v8i8_v2i32(<2 x i32> %p) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = add <2 x i32> %p, %p
-    %2 = bitcast <2 x i32> %1 to <8 x i8>
-    %3 = add <8 x i8> %2, %2
-    ret <8 x i8> %3
-}
-
-; CHECK-LABEL: test_v8i8_v4i16:
-define <8 x i8> @test_v8i8_v4i16(<4 x i16> %p) {
-; CHECK: rev64 v{{[0-9]+}}.4h
-; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = add <4 x i16> %p, %p
-    %2 = bitcast <4 x i16> %1 to <8 x i8>
-    %3 = add <8 x i8> %2, %2
-    ret <8 x i8> %3
-}
-
-; CHECK-LABEL: test_f128_v2f64:
-define fp128 @test_f128_v2f64(<2 x double> %p) {
-; CHECK: ext
-    %1 = fadd <2 x double> %p, %p
-    %2 = bitcast <2 x double> %1 to fp128
-    %3 = fadd fp128 %2, %2
-    ret fp128 %3
-}
-
-; CHECK-LABEL: test_f128_v2i64:
-define fp128 @test_f128_v2i64(<2 x i64> %p) {
-; CHECK: ext
-    %1 = add <2 x i64> %p, %p
-    %2 = bitcast <2 x i64> %1 to fp128
-    %3 = fadd fp128 %2, %2
-    ret fp128 %3
-}
-
-; CHECK-LABEL: test_f128_v4f32:
-define fp128 @test_f128_v4f32(<4 x float> %p) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = fadd <4 x float> %p, %p
-    %2 = bitcast <4 x float> %1 to fp128
-    %3 = fadd fp128 %2, %2
-    ret fp128 %3
-}
-
-; CHECK-LABEL: test_f128_v4i32:
-define fp128 @test_f128_v4i32(<4 x i32> %p) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = add <4 x i32> %p, %p
-    %2 = bitcast <4 x i32> %1 to fp128
-    %3 = fadd fp128 %2, %2
-    ret fp128 %3
-}
-
-; CHECK-LABEL: test_f128_v8i16:
-define fp128 @test_f128_v8i16(<8 x i16> %p) {
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-    %1 = add <8 x i16> %p, %p
-    %2 = bitcast <8 x i16> %1 to fp128
-    %3 = fadd fp128 %2, %2
-    ret fp128 %3
-}
-
-; CHECK-LABEL: test_f128_v16i8:
-define fp128 @test_f128_v16i8(<16 x i8> %p) {
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: ext
-    %1 = add <16 x i8> %p, %p
-    %2 = bitcast <16 x i8> %1 to fp128
-    %3 = fadd fp128 %2, %2
-    ret fp128 %3
-}
-
-; CHECK-LABEL: test_v2f64_f128:
-define <2 x double> @test_v2f64_f128(fp128 %p) {
-; CHECK: ext
-    %1 = fadd fp128 %p, %p
-    %2 = bitcast fp128 %1 to <2 x double>
-    %3 = fadd <2 x double> %2, %2
-    ret <2 x double> %3
-}
-
-; CHECK-LABEL: test_v2f64_v2i64:
-define <2 x double> @test_v2f64_v2i64(<2 x i64> %p) {
-; CHECK: ext
-; CHECK: ext
-    %1 = add <2 x i64> %p, %p
-    %2 = bitcast <2 x i64> %1 to <2 x double>
-    %3 = fadd <2 x double> %2, %2
-    ret <2 x double> %3
-}
-
-; CHECK-LABEL: test_v2f64_v4f32:
-define <2 x double> @test_v2f64_v4f32(<4 x float> %p) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-; CHECK: ext
-    %1 = fadd <4 x float> %p, %p
-    %2 = bitcast <4 x float> %1 to <2 x double>
-    %3 = fadd <2 x double> %2, %2
-    ret <2 x double> %3
-}
-
-; CHECK-LABEL: test_v2f64_v4i32:
-define <2 x double> @test_v2f64_v4i32(<4 x i32> %p) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-; CHECK: ext
-    %1 = add <4 x i32> %p, %p
-    %2 = bitcast <4 x i32> %1 to <2 x double>
-    %3 = fadd <2 x double> %2, %2
-    ret <2 x double> %3
-}
-
-; CHECK-LABEL: test_v2f64_v8i16:
-define <2 x double> @test_v2f64_v8i16(<8 x i16> %p) {
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-; CHECK: ext
-    %1 = add <8 x i16> %p, %p
-    %2 = bitcast <8 x i16> %1 to <2 x double>
-    %3 = fadd <2 x double> %2, %2
-    ret <2 x double> %3
-}
-
-; CHECK-LABEL: test_v2f64_v16i8:
-define <2 x double> @test_v2f64_v16i8(<16 x i8> %p) {
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: ext
-; CHECK: ext
-    %1 = add <16 x i8> %p, %p
-    %2 = bitcast <16 x i8> %1 to <2 x double>
-    %3 = fadd <2 x double> %2, %2
-    ret <2 x double> %3
-}
-
-; CHECK-LABEL: test_v2i64_f128:
-define <2 x i64> @test_v2i64_f128(fp128 %p) {
-; CHECK: ext
-    %1 = fadd fp128 %p, %p
-    %2 = bitcast fp128 %1 to <2 x i64>
-    %3 = add <2 x i64> %2, %2
-    ret <2 x i64> %3
-}
-
-; CHECK-LABEL: test_v2i64_v2f64:
-define <2 x i64> @test_v2i64_v2f64(<2 x double> %p) {
-; CHECK: ext
-; CHECK: ext
-    %1 = fadd <2 x double> %p, %p
-    %2 = bitcast <2 x double> %1 to <2 x i64>
-    %3 = add <2 x i64> %2, %2
-    ret <2 x i64> %3
-}
-
-; CHECK-LABEL: test_v2i64_v4f32:
-define <2 x i64> @test_v2i64_v4f32(<4 x float> %p) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-; CHECK: ext
-    %1 = fadd <4 x float> %p, %p
-    %2 = bitcast <4 x float> %1 to <2 x i64>
-    %3 = add <2 x i64> %2, %2
-    ret <2 x i64> %3
-}
-
-; CHECK-LABEL: test_v2i64_v4i32:
-define <2 x i64> @test_v2i64_v4i32(<4 x i32> %p) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-; CHECK: ext
-    %1 = add <4 x i32> %p, %p
-    %2 = bitcast <4 x i32> %1 to <2 x i64>
-    %3 = add <2 x i64> %2, %2
-    ret <2 x i64> %3
-}
-
-; CHECK-LABEL: test_v2i64_v8i16:
-define <2 x i64> @test_v2i64_v8i16(<8 x i16> %p) {
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-; CHECK: ext
-    %1 = add <8 x i16> %p, %p
-    %2 = bitcast <8 x i16> %1 to <2 x i64>
-    %3 = add <2 x i64> %2, %2
-    ret <2 x i64> %3
-}
-
-; CHECK-LABEL: test_v2i64_v16i8:
-define <2 x i64> @test_v2i64_v16i8(<16 x i8> %p) {
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: ext
-; CHECK: ext
-    %1 = add <16 x i8> %p, %p
-    %2 = bitcast <16 x i8> %1 to <2 x i64>
-    %3 = add <2 x i64> %2, %2
-    ret <2 x i64> %3
-}
-
-; CHECK-LABEL: test_v4f32_f128:
-define <4 x float> @test_v4f32_f128(fp128 %p) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = fadd fp128 %p, %p
-    %2 = bitcast fp128 %1 to <4 x float>
-    %3 = fadd <4 x float> %2, %2
-    ret <4 x float> %3
-}
-
-; CHECK-LABEL: test_v4f32_v2f64:
-define <4 x float> @test_v4f32_v2f64(<2 x double> %p) {
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = fadd <2 x double> %p, %p
-    %2 = bitcast <2 x double> %1 to <4 x float>
-    %3 = fadd <4 x float> %2, %2
-    ret <4 x float> %3
-}
-
-; CHECK-LABEL: test_v4f32_v2i64:
-define <4 x float> @test_v4f32_v2i64(<2 x i64> %p) {
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = add <2 x i64> %p, %p
-    %2 = bitcast <2 x i64> %1 to <4 x float>
-    %3 = fadd <4 x float> %2, %2
-    ret <4 x float> %3
-}
-
-; CHECK-LABEL: test_v4f32_v4i32:
-define <4 x float> @test_v4f32_v4i32(<4 x i32> %p) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = add <4 x i32> %p, %p
-    %2 = bitcast <4 x i32> %1 to <4 x float>
-    %3 = fadd <4 x float> %2, %2
-    ret <4 x float> %3
-}
-
-; CHECK-LABEL: test_v4f32_v8i16:
-define <4 x float> @test_v4f32_v8i16(<8 x i16> %p) {
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = add <8 x i16> %p, %p
-    %2 = bitcast <8 x i16> %1 to <4 x float>
-    %3 = fadd <4 x float> %2, %2
-    ret <4 x float> %3
-}
-
-; CHECK-LABEL: test_v4f32_v16i8:
-define <4 x float> @test_v4f32_v16i8(<16 x i8> %p) {
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = add <16 x i8> %p, %p
-    %2 = bitcast <16 x i8> %1 to <4 x float>
-    %3 = fadd <4 x float> %2, %2
-    ret <4 x float> %3
-}
-
-; CHECK-LABEL: test_v4i32_f128:
-define <4 x i32> @test_v4i32_f128(fp128 %p) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = fadd fp128 %p, %p
-    %2 = bitcast fp128 %1 to <4 x i32>
-    %3 = add <4 x i32> %2, %2
-    ret <4 x i32> %3
-}
-
-; CHECK-LABEL: test_v4i32_v2f64:
-define <4 x i32> @test_v4i32_v2f64(<2 x double> %p) {
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = fadd <2 x double> %p, %p
-    %2 = bitcast <2 x double> %1 to <4 x i32>
-    %3 = add <4 x i32> %2, %2
-    ret <4 x i32> %3
-}
-
-; CHECK-LABEL: test_v4i32_v2i64:
-define <4 x i32> @test_v4i32_v2i64(<2 x i64> %p) {
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = add <2 x i64> %p, %p
-    %2 = bitcast <2 x i64> %1 to <4 x i32>
-    %3 = add <4 x i32> %2, %2
-    ret <4 x i32> %3
-}
-
-; CHECK-LABEL: test_v4i32_v4f32:
-define <4 x i32> @test_v4i32_v4f32(<4 x float> %p) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = fadd <4 x float> %p, %p
-    %2 = bitcast <4 x float> %1 to <4 x i32>
-    %3 = add <4 x i32> %2, %2
-    ret <4 x i32> %3
-}
-
-; CHECK-LABEL: test_v4i32_v8i16:
-define <4 x i32> @test_v4i32_v8i16(<8 x i16> %p) {
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = add <8 x i16> %p, %p
-    %2 = bitcast <8 x i16> %1 to <4 x i32>
-    %3 = add <4 x i32> %2, %2
-    ret <4 x i32> %3
-}
-
-; CHECK-LABEL: test_v4i32_v16i8:
-define <4 x i32> @test_v4i32_v16i8(<16 x i8> %p) {
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = add <16 x i8> %p, %p
-    %2 = bitcast <16 x i8> %1 to <4 x i32>
-    %3 = add <4 x i32> %2, %2
-    ret <4 x i32> %3
-}
-
-; CHECK-LABEL: test_v8i16_f128:
-define <8 x i16> @test_v8i16_f128(fp128 %p) {
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-    %1 = fadd fp128 %p, %p
-    %2 = bitcast fp128 %1 to <8 x i16>
-    %3 = add <8 x i16> %2, %2
-    ret <8 x i16> %3
-}
-
-; CHECK-LABEL: test_v8i16_v2f64:
-define <8 x i16> @test_v8i16_v2f64(<2 x double> %p) {
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-    %1 = fadd <2 x double> %p, %p
-    %2 = bitcast <2 x double> %1 to <8 x i16>
-    %3 = add <8 x i16> %2, %2
-    ret <8 x i16> %3
-}
-
-; CHECK-LABEL: test_v8i16_v2i64:
-define <8 x i16> @test_v8i16_v2i64(<2 x i64> %p) {
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-    %1 = add <2 x i64> %p, %p
-    %2 = bitcast <2 x i64> %1 to <8 x i16>
-    %3 = add <8 x i16> %2, %2
-    ret <8 x i16> %3
-}
-
-; CHECK-LABEL: test_v8i16_v4f32:
-define <8 x i16> @test_v8i16_v4f32(<4 x float> %p) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-    %1 = fadd <4 x float> %p, %p
-    %2 = bitcast <4 x float> %1 to <8 x i16>
-    %3 = add <8 x i16> %2, %2
-    ret <8 x i16> %3
-}
-
-; CHECK-LABEL: test_v8i16_v4i32:
-define <8 x i16> @test_v8i16_v4i32(<4 x i32> %p) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-    %1 = add <4 x i32> %p, %p
-    %2 = bitcast <4 x i32> %1 to <8 x i16>
-    %3 = add <8 x i16> %2, %2
-    ret <8 x i16> %3
-}
-
-; CHECK-LABEL: test_v8i16_v16i8:
-define <8 x i16> @test_v8i16_v16i8(<16 x i8> %p) {
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-    %1 = add <16 x i8> %p, %p
-    %2 = bitcast <16 x i8> %1 to <8 x i16>
-    %3 = add <8 x i16> %2, %2
-    ret <8 x i16> %3
-}
-
-; CHECK-LABEL: test_v16i8_f128:
-define <16 x i8> @test_v16i8_f128(fp128 %p) {
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: ext
-    %1 = fadd fp128 %p, %p
-    %2 = bitcast fp128 %1 to <16 x i8>
-    %3 = add <16 x i8> %2, %2
-    ret <16 x i8> %3
-}
-
-; CHECK-LABEL: test_v16i8_v2f64:
-define <16 x i8> @test_v16i8_v2f64(<2 x double> %p) {
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: ext
-    %1 = fadd <2 x double> %p, %p
-    %2 = bitcast <2 x double> %1 to <16 x i8>
-    %3 = add <16 x i8> %2, %2
-    ret <16 x i8> %3
-}
-
-; CHECK-LABEL: test_v16i8_v2i64:
-define <16 x i8> @test_v16i8_v2i64(<2 x i64> %p) {
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: ext
-    %1 = add <2 x i64> %p, %p
-    %2 = bitcast <2 x i64> %1 to <16 x i8>
-    %3 = add <16 x i8> %2, %2
-    ret <16 x i8> %3
-}
-
-; CHECK-LABEL: test_v16i8_v4f32:
-define <16 x i8> @test_v16i8_v4f32(<4 x float> %p) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: ext
-    %1 = fadd <4 x float> %p, %p
-    %2 = bitcast <4 x float> %1 to <16 x i8>
-    %3 = add <16 x i8> %2, %2
-    ret <16 x i8> %3
-}
-
-; CHECK-LABEL: test_v16i8_v4i32:
-define <16 x i8> @test_v16i8_v4i32(<4 x i32> %p) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: ext
-    %1 = add <4 x i32> %p, %p
-    %2 = bitcast <4 x i32> %1 to <16 x i8>
-    %3 = add <16 x i8> %2, %2
-    ret <16 x i8> %3
-}
-
-; CHECK-LABEL: test_v16i8_v8i16:
-define <16 x i8> @test_v16i8_v8i16(<8 x i16> %p) {
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: ext
-    %1 = add <8 x i16> %p, %p
-    %2 = bitcast <8 x i16> %1 to <16 x i8>
-    %3 = add <16 x i8> %2, %2
-    ret <16 x i8> %3
-}

Removed: llvm/trunk/test/CodeGen/ARM64/big-endian-vector-caller.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/big-endian-vector-caller.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/big-endian-vector-caller.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/big-endian-vector-caller.ll (removed)
@@ -1,1100 +0,0 @@
-; RUN: llc -mtriple arm64_be < %s -arm64-load-store-opt=false -o - | FileCheck %s
-; RUN: llc -mtriple arm64_be < %s -arm64-load-store-opt=false -fast-isel=true -O0 -o - | FileCheck %s
-
-; CHECK-LABEL: test_i64_f64:
-declare i64 @test_i64_f64_helper(double %p)
-define void @test_i64_f64(double* %p, i64* %q) {
-; CHECK-NOT: rev
-    %1 = load double* %p
-    %2 = fadd double %1, %1
-    %3 = call i64 @test_i64_f64_helper(double %2)
-    %4 = add i64 %3, %3
-    store i64 %4, i64* %q
-    ret void
-}
-
-; CHECK-LABEL: test_i64_v1i64:
-declare i64 @test_i64_v1i64_helper(<1 x i64> %p)
-define void @test_i64_v1i64(<1 x i64>* %p, i64* %q) {
-; CHECK-NOT: rev
-    %1 = load <1 x i64>* %p
-    %2 = add <1 x i64> %1, %1
-    %3 = call i64 @test_i64_v1i64_helper(<1 x i64> %2)
-    %4 = add i64 %3, %3
-    store i64 %4, i64* %q
-    ret void
-}
-
-; CHECK-LABEL: test_i64_v2f32:
-declare i64 @test_i64_v2f32_helper(<2 x float> %p)
-define void @test_i64_v2f32(<2 x float>* %p, i64* %q) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <2 x float>* %p
-    %2 = fadd <2 x float> %1, %1
-    %3 = call i64 @test_i64_v2f32_helper(<2 x float> %2)
-    %4 = add i64 %3, %3
-    store i64 %4, i64* %q
-    ret void
-}
-
-; CHECK-LABEL: test_i64_v2i32:
-declare i64 @test_i64_v2i32_helper(<2 x i32> %p)
-define void @test_i64_v2i32(<2 x i32>* %p, i64* %q) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <2 x i32>* %p
-    %2 = add <2 x i32> %1, %1
-    %3 = call i64 @test_i64_v2i32_helper(<2 x i32> %2)
-    %4 = add i64 %3, %3
-    store i64 %4, i64* %q
-    ret void
-}
-
-; CHECK-LABEL: test_i64_v4i16:
-declare i64 @test_i64_v4i16_helper(<4 x i16> %p)
-define void @test_i64_v4i16(<4 x i16>* %p, i64* %q) {
-; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = load <4 x i16>* %p
-    %2 = add <4 x i16> %1, %1
-    %3 = call i64 @test_i64_v4i16_helper(<4 x i16> %2)
-    %4 = add i64 %3, %3
-    store i64 %4, i64* %q
-    ret void
-}
-
-; CHECK-LABEL: test_i64_v8i8:
-declare i64 @test_i64_v8i8_helper(<8 x i8> %p)
-define void @test_i64_v8i8(<8 x i8>* %p, i64* %q) {
-; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = load <8 x i8>* %p
-    %2 = add <8 x i8> %1, %1
-    %3 = call i64 @test_i64_v8i8_helper(<8 x i8> %2)
-    %4 = add i64 %3, %3
-    store i64 %4, i64* %q
-    ret void
-}
-
-; CHECK-LABEL: test_f64_i64:
-declare double @test_f64_i64_helper(i64 %p)
-define void @test_f64_i64(i64* %p, double* %q) {
-; CHECK-NOT: rev
-    %1 = load i64* %p
-    %2 = add i64 %1, %1
-    %3 = call double @test_f64_i64_helper(i64 %2)
-    %4 = fadd double %3, %3
-    store double %4, double* %q
-    ret void
-}
-
-; CHECK-LABEL: test_f64_v1i64:
-declare double @test_f64_v1i64_helper(<1 x i64> %p)
-define void @test_f64_v1i64(<1 x i64>* %p, double* %q) {
-; CHECK-NOT: rev
-    %1 = load <1 x i64>* %p
-    %2 = add <1 x i64> %1, %1
-    %3 = call double @test_f64_v1i64_helper(<1 x i64> %2)
-    %4 = fadd double %3, %3
-    store double %4, double* %q
-    ret void
-}
-
-; CHECK-LABEL: test_f64_v2f32:
-declare double @test_f64_v2f32_helper(<2 x float> %p)
-define void @test_f64_v2f32(<2 x float>* %p, double* %q) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <2 x float>* %p
-    %2 = fadd <2 x float> %1, %1
-    %3 = call double @test_f64_v2f32_helper(<2 x float> %2)
-    %4 = fadd double %3, %3
-    store double %4, double* %q
-    ret void
-}
-
-; CHECK-LABEL: test_f64_v2i32:
-declare double @test_f64_v2i32_helper(<2 x i32> %p)
-define void @test_f64_v2i32(<2 x i32>* %p, double* %q) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <2 x i32>* %p
-    %2 = add <2 x i32> %1, %1
-    %3 = call double @test_f64_v2i32_helper(<2 x i32> %2)
-    %4 = fadd double %3, %3
-    store double %4, double* %q
-    ret void
-}
-
-; CHECK-LABEL: test_f64_v4i16:
-declare double @test_f64_v4i16_helper(<4 x i16> %p)
-define void @test_f64_v4i16(<4 x i16>* %p, double* %q) {
-; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = load <4 x i16>* %p
-    %2 = add <4 x i16> %1, %1
-    %3 = call double @test_f64_v4i16_helper(<4 x i16> %2)
-    %4 = fadd double %3, %3
-    store double %4, double* %q
-    ret void
-}
-
-; CHECK-LABEL: test_f64_v8i8:
-declare double @test_f64_v8i8_helper(<8 x i8> %p)
-define void @test_f64_v8i8(<8 x i8>* %p, double* %q) {
-; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = load <8 x i8>* %p
-    %2 = add <8 x i8> %1, %1
-    %3 = call double @test_f64_v8i8_helper(<8 x i8> %2)
-    %4 = fadd double %3, %3
-    store double %4, double* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v1i64_i64:
-declare <1 x i64> @test_v1i64_i64_helper(i64 %p)
-define void @test_v1i64_i64(i64* %p, <1 x i64>* %q) {
-; CHECK-NOT: rev
-    %1 = load i64* %p
-    %2 = add i64 %1, %1
-    %3 = call <1 x i64> @test_v1i64_i64_helper(i64 %2)
-    %4 = add <1 x i64> %3, %3
-    store <1 x i64> %4, <1 x i64>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v1i64_f64:
-declare <1 x i64> @test_v1i64_f64_helper(double %p)
-define void @test_v1i64_f64(double* %p, <1 x i64>* %q) {
-; CHECK-NOT: rev
-    %1 = load double* %p
-    %2 = fadd double %1, %1
-    %3 = call <1 x i64> @test_v1i64_f64_helper(double %2)
-    %4 = add <1 x i64> %3, %3
-    store <1 x i64> %4, <1 x i64>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v1i64_v2f32:
-declare <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %p)
-define void @test_v1i64_v2f32(<2 x float>* %p, <1 x i64>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <2 x float>* %p
-    %2 = fadd <2 x float> %1, %1
-    %3 = call <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %2)
-    %4 = add <1 x i64> %3, %3
-    store <1 x i64> %4, <1 x i64>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v1i64_v2i32:
-declare <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %p)
-define void @test_v1i64_v2i32(<2 x i32>* %p, <1 x i64>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <2 x i32>* %p
-    %2 = add <2 x i32> %1, %1
-    %3 = call <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %2)
-    %4 = add <1 x i64> %3, %3
-    store <1 x i64> %4, <1 x i64>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v1i64_v4i16:
-declare <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %p)
-define void @test_v1i64_v4i16(<4 x i16>* %p, <1 x i64>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = load <4 x i16>* %p
-    %2 = add <4 x i16> %1, %1
-    %3 = call <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %2)
-    %4 = add <1 x i64> %3, %3
-    store <1 x i64> %4, <1 x i64>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v1i64_v8i8:
-declare <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %p)
-define void @test_v1i64_v8i8(<8 x i8>* %p, <1 x i64>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = load <8 x i8>* %p
-    %2 = add <8 x i8> %1, %1
-    %3 = call <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %2)
-    %4 = add <1 x i64> %3, %3
-    store <1 x i64> %4, <1 x i64>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2f32_i64:
-declare <2 x float> @test_v2f32_i64_helper(i64 %p)
-define void @test_v2f32_i64(i64* %p, <2 x float>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load i64* %p
-    %2 = add i64 %1, %1
-    %3 = call <2 x float> @test_v2f32_i64_helper(i64 %2)
-    %4 = fadd <2 x float> %3, %3
-    store <2 x float> %4, <2 x float>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2f32_f64:
-declare <2 x float> @test_v2f32_f64_helper(double %p)
-define void @test_v2f32_f64(double* %p, <2 x float>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load double* %p
-    %2 = fadd double %1, %1
-    %3 = call <2 x float> @test_v2f32_f64_helper(double %2)
-    %4 = fadd <2 x float> %3, %3
-    store <2 x float> %4, <2 x float>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2f32_v1i64:
-declare <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %p)
-define void @test_v2f32_v1i64(<1 x i64>* %p, <2 x float>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <1 x i64>* %p
-    %2 = add <1 x i64> %1, %1
-    %3 = call <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %2)
-    %4 = fadd <2 x float> %3, %3
-    store <2 x float> %4, <2 x float>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2f32_v2i32:
-declare <2 x float> @test_v2f32_v2i32_helper(<2 x i32> %p)
-define void @test_v2f32_v2i32(<2 x i32>* %p, <2 x float>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <2 x i32>* %p
-    %2 = add <2 x i32> %1, %1
-    %3 = call <2 x float> @test_v2f32_v2i32_helper(<2 x i32> %2)
-    %4 = fadd <2 x float> %3, %3
-    store <2 x float> %4, <2 x float>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2f32_v4i16:
-declare <2 x float> @test_v2f32_v4i16_helper(<4 x i16> %p)
-define void @test_v2f32_v4i16(<4 x i16>* %p, <2 x float>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.4h
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <4 x i16>* %p
-    %2 = add <4 x i16> %1, %1
-    %3 = call <2 x float> @test_v2f32_v4i16_helper(<4 x i16> %2)
-    %4 = fadd <2 x float> %3, %3
-    store <2 x float> %4, <2 x float>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2f32_v8i8:
-declare <2 x float> @test_v2f32_v8i8_helper(<8 x i8> %p)
-define void @test_v2f32_v8i8(<8 x i8>* %p, <2 x float>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.8b
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <8 x i8>* %p
-    %2 = add <8 x i8> %1, %1
-    %3 = call <2 x float> @test_v2f32_v8i8_helper(<8 x i8> %2)
-    %4 = fadd <2 x float> %3, %3
-    store <2 x float> %4, <2 x float>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2i32_i64:
-declare <2 x i32> @test_v2i32_i64_helper(i64 %p)
-define void @test_v2i32_i64(i64* %p, <2 x i32>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load i64* %p
-    %2 = add i64 %1, %1
-    %3 = call <2 x i32> @test_v2i32_i64_helper(i64 %2)
-    %4 = add <2 x i32> %3, %3
-    store <2 x i32> %4, <2 x i32>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2i32_f64:
-declare <2 x i32> @test_v2i32_f64_helper(double %p)
-define void @test_v2i32_f64(double* %p, <2 x i32>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load double* %p
-    %2 = fadd double %1, %1
-    %3 = call <2 x i32> @test_v2i32_f64_helper(double %2)
-    %4 = add <2 x i32> %3, %3
-    store <2 x i32> %4, <2 x i32>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2i32_v1i64:
-declare <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %p)
-define void @test_v2i32_v1i64(<1 x i64>* %p, <2 x i32>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <1 x i64>* %p
-    %2 = add <1 x i64> %1, %1
-    %3 = call <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %2)
-    %4 = add <2 x i32> %3, %3
-    store <2 x i32> %4, <2 x i32>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2i32_v2f32:
-declare <2 x i32> @test_v2i32_v2f32_helper(<2 x float> %p)
-define void @test_v2i32_v2f32(<2 x float>* %p, <2 x i32>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <2 x float>* %p
-    %2 = fadd <2 x float> %1, %1
-    %3 = call <2 x i32> @test_v2i32_v2f32_helper(<2 x float> %2)
-    %4 = add <2 x i32> %3, %3
-    store <2 x i32> %4, <2 x i32>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2i32_v4i16:
-declare <2 x i32> @test_v2i32_v4i16_helper(<4 x i16> %p)
-define void @test_v2i32_v4i16(<4 x i16>* %p, <2 x i32>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.4h
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <4 x i16>* %p
-    %2 = add <4 x i16> %1, %1
-    %3 = call <2 x i32> @test_v2i32_v4i16_helper(<4 x i16> %2)
-    %4 = add <2 x i32> %3, %3
-    store <2 x i32> %4, <2 x i32>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2i32_v8i8:
-declare <2 x i32> @test_v2i32_v8i8_helper(<8 x i8> %p)
-define void @test_v2i32_v8i8(<8 x i8>* %p, <2 x i32>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.8b
-; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <8 x i8>* %p
-    %2 = add <8 x i8> %1, %1
-    %3 = call <2 x i32> @test_v2i32_v8i8_helper(<8 x i8> %2)
-    %4 = add <2 x i32> %3, %3
-    store <2 x i32> %4, <2 x i32>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4i16_i64:
-declare <4 x i16> @test_v4i16_i64_helper(i64 %p)
-define void @test_v4i16_i64(i64* %p, <4 x i16>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = load i64* %p
-    %2 = add i64 %1, %1
-    %3 = call <4 x i16> @test_v4i16_i64_helper(i64 %2)
-    %4 = add <4 x i16> %3, %3
-    store <4 x i16> %4, <4 x i16>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4i16_f64:
-declare <4 x i16> @test_v4i16_f64_helper(double %p)
-define void @test_v4i16_f64(double* %p, <4 x i16>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = load double* %p
-    %2 = fadd double %1, %1
-    %3 = call <4 x i16> @test_v4i16_f64_helper(double %2)
-    %4 = add <4 x i16> %3, %3
-    store <4 x i16> %4, <4 x i16>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4i16_v1i64:
-declare <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %p)
-define void @test_v4i16_v1i64(<1 x i64>* %p, <4 x i16>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = load <1 x i64>* %p
-    %2 = add <1 x i64> %1, %1
-    %3 = call <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %2)
-    %4 = add <4 x i16> %3, %3
-    store <4 x i16> %4, <4 x i16>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4i16_v2f32:
-declare <4 x i16> @test_v4i16_v2f32_helper(<2 x float> %p)
-define void @test_v4i16_v2f32(<2 x float>* %p, <4 x i16>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = load <2 x float>* %p
-    %2 = fadd <2 x float> %1, %1
-    %3 = call <4 x i16> @test_v4i16_v2f32_helper(<2 x float> %2)
-    %4 = add <4 x i16> %3, %3
-    store <4 x i16> %4, <4 x i16>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4i16_v2i32:
-declare <4 x i16> @test_v4i16_v2i32_helper(<2 x i32> %p)
-define void @test_v4i16_v2i32(<2 x i32>* %p, <4 x i16>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = load <2 x i32>* %p
-    %2 = add <2 x i32> %1, %1
-    %3 = call <4 x i16> @test_v4i16_v2i32_helper(<2 x i32> %2)
-    %4 = add <4 x i16> %3, %3
-    store <4 x i16> %4, <4 x i16>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4i16_v8i8:
-declare <4 x i16> @test_v4i16_v8i8_helper(<8 x i8> %p)
-define void @test_v4i16_v8i8(<8 x i8>* %p, <4 x i16>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.8b
-; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = load <8 x i8>* %p
-    %2 = add <8 x i8> %1, %1
-    %3 = call <4 x i16> @test_v4i16_v8i8_helper(<8 x i8> %2)
-    %4 = add <4 x i16> %3, %3
-    store <4 x i16> %4, <4 x i16>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v8i8_i64:
-declare <8 x i8> @test_v8i8_i64_helper(i64 %p)
-define void @test_v8i8_i64(i64* %p, <8 x i8>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = load i64* %p
-    %2 = add i64 %1, %1
-    %3 = call <8 x i8> @test_v8i8_i64_helper(i64 %2)
-    %4 = add <8 x i8> %3, %3
-    store <8 x i8> %4, <8 x i8>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v8i8_f64:
-declare <8 x i8> @test_v8i8_f64_helper(double %p)
-define void @test_v8i8_f64(double* %p, <8 x i8>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = load double* %p
-    %2 = fadd double %1, %1
-    %3 = call <8 x i8> @test_v8i8_f64_helper(double %2)
-    %4 = add <8 x i8> %3, %3
-    store <8 x i8> %4, <8 x i8>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v8i8_v1i64:
-declare <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %p)
-define void @test_v8i8_v1i64(<1 x i64>* %p, <8 x i8>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = load <1 x i64>* %p
-    %2 = add <1 x i64> %1, %1
-    %3 = call <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %2)
-    %4 = add <8 x i8> %3, %3
-    store <8 x i8> %4, <8 x i8>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v8i8_v2f32:
-declare <8 x i8> @test_v8i8_v2f32_helper(<2 x float> %p)
-define void @test_v8i8_v2f32(<2 x float>* %p, <8 x i8>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = load <2 x float>* %p
-    %2 = fadd <2 x float> %1, %1
-    %3 = call <8 x i8> @test_v8i8_v2f32_helper(<2 x float> %2)
-    %4 = add <8 x i8> %3, %3
-    store <8 x i8> %4, <8 x i8>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v8i8_v2i32:
-declare <8 x i8> @test_v8i8_v2i32_helper(<2 x i32> %p)
-define void @test_v8i8_v2i32(<2 x i32>* %p, <8 x i8>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.2s
-; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = load <2 x i32>* %p
-    %2 = add <2 x i32> %1, %1
-    %3 = call <8 x i8> @test_v8i8_v2i32_helper(<2 x i32> %2)
-    %4 = add <8 x i8> %3, %3
-    store <8 x i8> %4, <8 x i8>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v8i8_v4i16:
-declare <8 x i8> @test_v8i8_v4i16_helper(<4 x i16> %p)
-define void @test_v8i8_v4i16(<4 x i16>* %p, <8 x i8>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.4h
-; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = load <4 x i16>* %p
-    %2 = add <4 x i16> %1, %1
-    %3 = call <8 x i8> @test_v8i8_v4i16_helper(<4 x i16> %2)
-    %4 = add <8 x i8> %3, %3
-    store <8 x i8> %4, <8 x i8>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_f128_v2f64:
-declare fp128 @test_f128_v2f64_helper(<2 x double> %p)
-define void @test_f128_v2f64(<2 x double>* %p, fp128* %q) {
-; CHECK: ext
-    %1 = load <2 x double>* %p
-    %2 = fadd <2 x double> %1, %1
-    %3 = call fp128 @test_f128_v2f64_helper(<2 x double> %2)
-    %4 = fadd fp128 %3, %3
-    store fp128 %4, fp128* %q
-    ret void
-}
-
-; CHECK-LABEL: test_f128_v2i64:
-declare fp128 @test_f128_v2i64_helper(<2 x i64> %p)
-define void @test_f128_v2i64(<2 x i64>* %p, fp128* %q) {
-; CHECK: ext
-    %1 = load <2 x i64>* %p
-    %2 = add <2 x i64> %1, %1
-    %3 = call fp128 @test_f128_v2i64_helper(<2 x i64> %2)
-    %4 = fadd fp128 %3, %3
-    store fp128 %4, fp128* %q
-    ret void
-}
-
-; CHECK-LABEL: test_f128_v4f32:
-declare fp128 @test_f128_v4f32_helper(<4 x float> %p)
-define void @test_f128_v4f32(<4 x float>* %p, fp128* %q) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = load <4 x float>* %p
-    %2 = fadd <4 x float> %1, %1
-    %3 = call fp128 @test_f128_v4f32_helper(<4 x float> %2)
-    %4 = fadd fp128 %3, %3
-    store fp128 %4, fp128* %q
-    ret void
-}
-
-; CHECK-LABEL: test_f128_v4i32:
-declare fp128 @test_f128_v4i32_helper(<4 x i32> %p)
-define void @test_f128_v4i32(<4 x i32>* %p, fp128* %q) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = load <4 x i32>* %p
-    %2 = add <4 x i32> %1, %1
-    %3 = call fp128 @test_f128_v4i32_helper(<4 x i32> %2)
-    %4 = fadd fp128 %3, %3
-    store fp128 %4, fp128* %q
-    ret void
-}
-
-; CHECK-LABEL: test_f128_v8i16:
-declare fp128 @test_f128_v8i16_helper(<8 x i16> %p)
-define void @test_f128_v8i16(<8 x i16>* %p, fp128* %q) {
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-    %1 = load <8 x i16>* %p
-    %2 = add <8 x i16> %1, %1
-    %3 = call fp128 @test_f128_v8i16_helper(<8 x i16> %2)
-    %4 = fadd fp128 %3, %3
-    store fp128 %4, fp128* %q
-    ret void
-}
-
-; CHECK-LABEL: test_f128_v16i8:
-declare fp128 @test_f128_v16i8_helper(<16 x i8> %p)
-define void @test_f128_v16i8(<16 x i8>* %p, fp128* %q) {
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: ext
-    %1 = load <16 x i8>* %p
-    %2 = add <16 x i8> %1, %1
-    %3 = call fp128 @test_f128_v16i8_helper(<16 x i8> %2)
-    %4 = fadd fp128 %3, %3
-    store fp128 %4, fp128* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2f64_f128:
-declare <2 x double> @test_v2f64_f128_helper(fp128 %p)
-define void @test_v2f64_f128(fp128* %p, <2 x double>* %q) {
-; CHECK: ext
-    %1 = load fp128* %p
-    %2 = fadd fp128 %1, %1
-    %3 = call <2 x double> @test_v2f64_f128_helper(fp128 %2)
-    %4 = fadd <2 x double> %3, %3
-    store <2 x double> %4, <2 x double>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2f64_v2i64:
-declare <2 x double> @test_v2f64_v2i64_helper(<2 x i64> %p)
-define void @test_v2f64_v2i64(<2 x i64>* %p, <2 x double>* %q) {
-; CHECK: ext
-; CHECK: ext
-    %1 = load <2 x i64>* %p
-    %2 = add <2 x i64> %1, %1
-    %3 = call <2 x double> @test_v2f64_v2i64_helper(<2 x i64> %2)
-    %4 = fadd <2 x double> %3, %3
-    store <2 x double> %4, <2 x double>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2f64_v4f32:
-declare <2 x double> @test_v2f64_v4f32_helper(<4 x float> %p)
-define void @test_v2f64_v4f32(<4 x float>* %p, <2 x double>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-; CHECK: ext
-    %1 = load <4 x float>* %p
-    %2 = fadd <4 x float> %1, %1
-    %3 = call <2 x double> @test_v2f64_v4f32_helper(<4 x float> %2)
-    %4 = fadd <2 x double> %3, %3
-    store <2 x double> %4, <2 x double>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2f64_v4i32:
-declare <2 x double> @test_v2f64_v4i32_helper(<4 x i32> %p)
-define void @test_v2f64_v4i32(<4 x i32>* %p, <2 x double>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-; CHECK: ext
-    %1 = load <4 x i32>* %p
-    %2 = add <4 x i32> %1, %1
-    %3 = call <2 x double> @test_v2f64_v4i32_helper(<4 x i32> %2)
-    %4 = fadd <2 x double> %3, %3
-    store <2 x double> %4, <2 x double>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2f64_v8i16:
-declare <2 x double> @test_v2f64_v8i16_helper(<8 x i16> %p)
-define void @test_v2f64_v8i16(<8 x i16>* %p, <2 x double>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-; CHECK: ext
-    %1 = load <8 x i16>* %p
-    %2 = add <8 x i16> %1, %1
-    %3 = call <2 x double> @test_v2f64_v8i16_helper(<8 x i16> %2)
-    %4 = fadd <2 x double> %3, %3
-    store <2 x double> %4, <2 x double>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2f64_v16i8:
-declare <2 x double> @test_v2f64_v16i8_helper(<16 x i8> %p)
-define void @test_v2f64_v16i8(<16 x i8>* %p, <2 x double>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: ext
-; CHECK: ext
-    %1 = load <16 x i8>* %p
-    %2 = add <16 x i8> %1, %1
-    %3 = call <2 x double> @test_v2f64_v16i8_helper(<16 x i8> %2)
-    %4 = fadd <2 x double> %3, %3
-    store <2 x double> %4, <2 x double>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2i64_f128:
-declare <2 x i64> @test_v2i64_f128_helper(fp128 %p)
-define void @test_v2i64_f128(fp128* %p, <2 x i64>* %q) {
-; CHECK: ext
-    %1 = load fp128* %p
-    %2 = fadd fp128 %1, %1
-    %3 = call <2 x i64> @test_v2i64_f128_helper(fp128 %2)
-    %4 = add <2 x i64> %3, %3
-    store <2 x i64> %4, <2 x i64>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2i64_v2f64:
-declare <2 x i64> @test_v2i64_v2f64_helper(<2 x double> %p)
-define void @test_v2i64_v2f64(<2 x double>* %p, <2 x i64>* %q) {
-; CHECK: ext
-; CHECK: ext
-    %1 = load <2 x double>* %p
-    %2 = fadd <2 x double> %1, %1
-    %3 = call <2 x i64> @test_v2i64_v2f64_helper(<2 x double> %2)
-    %4 = add <2 x i64> %3, %3
-    store <2 x i64> %4, <2 x i64>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2i64_v4f32:
-declare <2 x i64> @test_v2i64_v4f32_helper(<4 x float> %p)
-define void @test_v2i64_v4f32(<4 x float>* %p, <2 x i64>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-; CHECK: ext
-    %1 = load <4 x float>* %p
-    %2 = fadd <4 x float> %1, %1
-    %3 = call <2 x i64> @test_v2i64_v4f32_helper(<4 x float> %2)
-    %4 = add <2 x i64> %3, %3
-    store <2 x i64> %4, <2 x i64>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2i64_v4i32:
-declare <2 x i64> @test_v2i64_v4i32_helper(<4 x i32> %p)
-define void @test_v2i64_v4i32(<4 x i32>* %p, <2 x i64>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-; CHECK: ext
-    %1 = load <4 x i32>* %p
-    %2 = add <4 x i32> %1, %1
-    %3 = call <2 x i64> @test_v2i64_v4i32_helper(<4 x i32> %2)
-    %4 = add <2 x i64> %3, %3
-    store <2 x i64> %4, <2 x i64>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2i64_v8i16:
-declare <2 x i64> @test_v2i64_v8i16_helper(<8 x i16> %p)
-define void @test_v2i64_v8i16(<8 x i16>* %p, <2 x i64>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-; CHECK: ext
-    %1 = load <8 x i16>* %p
-    %2 = add <8 x i16> %1, %1
-    %3 = call <2 x i64> @test_v2i64_v8i16_helper(<8 x i16> %2)
-    %4 = add <2 x i64> %3, %3
-    store <2 x i64> %4, <2 x i64>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v2i64_v16i8:
-declare <2 x i64> @test_v2i64_v16i8_helper(<16 x i8> %p)
-define void @test_v2i64_v16i8(<16 x i8>* %p, <2 x i64>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: ext
-; CHECK: ext
-    %1 = load <16 x i8>* %p
-    %2 = add <16 x i8> %1, %1
-    %3 = call <2 x i64> @test_v2i64_v16i8_helper(<16 x i8> %2)
-    %4 = add <2 x i64> %3, %3
-    store <2 x i64> %4, <2 x i64>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4f32_f128:
-declare <4 x float> @test_v4f32_f128_helper(fp128 %p)
-define void @test_v4f32_f128(fp128* %p, <4 x float>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = load fp128* %p
-    %2 = fadd fp128 %1, %1
-    %3 = call <4 x float> @test_v4f32_f128_helper(fp128 %2)
-    %4 = fadd <4 x float> %3, %3
-    store <4 x float> %4, <4 x float>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4f32_v2f64:
-declare <4 x float> @test_v4f32_v2f64_helper(<2 x double> %p)
-define void @test_v4f32_v2f64(<2 x double>* %p, <4 x float>* %q) {
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = load <2 x double>* %p
-    %2 = fadd <2 x double> %1, %1
-    %3 = call <4 x float> @test_v4f32_v2f64_helper(<2 x double> %2)
-    %4 = fadd <4 x float> %3, %3
-    store <4 x float> %4, <4 x float>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4f32_v2i64:
-declare <4 x float> @test_v4f32_v2i64_helper(<2 x i64> %p)
-define void @test_v4f32_v2i64(<2 x i64>* %p, <4 x float>* %q) {
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = load <2 x i64>* %p
-    %2 = add <2 x i64> %1, %1
-    %3 = call <4 x float> @test_v4f32_v2i64_helper(<2 x i64> %2)
-    %4 = fadd <4 x float> %3, %3
-    store <4 x float> %4, <4 x float>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4f32_v4i32:
-declare <4 x float> @test_v4f32_v4i32_helper(<4 x i32> %p)
-define void @test_v4f32_v4i32(<4 x i32>* %p, <4 x float>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = load <4 x i32>* %p
-    %2 = add <4 x i32> %1, %1
-    %3 = call <4 x float> @test_v4f32_v4i32_helper(<4 x i32> %2)
-    %4 = fadd <4 x float> %3, %3
-    store <4 x float> %4, <4 x float>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4f32_v8i16:
-declare <4 x float> @test_v4f32_v8i16_helper(<8 x i16> %p)
-define void @test_v4f32_v8i16(<8 x i16>* %p, <4 x float>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = load <8 x i16>* %p
-    %2 = add <8 x i16> %1, %1
-    %3 = call <4 x float> @test_v4f32_v8i16_helper(<8 x i16> %2)
-    %4 = fadd <4 x float> %3, %3
-    store <4 x float> %4, <4 x float>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4f32_v16i8:
-declare <4 x float> @test_v4f32_v16i8_helper(<16 x i8> %p)
-define void @test_v4f32_v16i8(<16 x i8>* %p, <4 x float>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = load <16 x i8>* %p
-    %2 = add <16 x i8> %1, %1
-    %3 = call <4 x float> @test_v4f32_v16i8_helper(<16 x i8> %2)
-    %4 = fadd <4 x float> %3, %3
-    store <4 x float> %4, <4 x float>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4i32_f128:
-declare <4 x i32> @test_v4i32_f128_helper(fp128 %p)
-define void @test_v4i32_f128(fp128* %p, <4 x i32>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = load fp128* %p
-    %2 = fadd fp128 %1, %1
-    %3 = call <4 x i32> @test_v4i32_f128_helper(fp128 %2)
-    %4 = add <4 x i32> %3, %3
-    store <4 x i32> %4, <4 x i32>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4i32_v2f64:
-declare <4 x i32> @test_v4i32_v2f64_helper(<2 x double> %p)
-define void @test_v4i32_v2f64(<2 x double>* %p, <4 x i32>* %q) {
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = load <2 x double>* %p
-    %2 = fadd <2 x double> %1, %1
-    %3 = call <4 x i32> @test_v4i32_v2f64_helper(<2 x double> %2)
-    %4 = add <4 x i32> %3, %3
-    store <4 x i32> %4, <4 x i32>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4i32_v2i64:
-declare <4 x i32> @test_v4i32_v2i64_helper(<2 x i64> %p)
-define void @test_v4i32_v2i64(<2 x i64>* %p, <4 x i32>* %q) {
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = load <2 x i64>* %p
-    %2 = add <2 x i64> %1, %1
-    %3 = call <4 x i32> @test_v4i32_v2i64_helper(<2 x i64> %2)
-    %4 = add <4 x i32> %3, %3
-    store <4 x i32> %4, <4 x i32>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4i32_v4f32:
-declare <4 x i32> @test_v4i32_v4f32_helper(<4 x float> %p)
-define void @test_v4i32_v4f32(<4 x float>* %p, <4 x i32>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = load <4 x float>* %p
-    %2 = fadd <4 x float> %1, %1
-    %3 = call <4 x i32> @test_v4i32_v4f32_helper(<4 x float> %2)
-    %4 = add <4 x i32> %3, %3
-    store <4 x i32> %4, <4 x i32>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4i32_v8i16:
-declare <4 x i32> @test_v4i32_v8i16_helper(<8 x i16> %p)
-define void @test_v4i32_v8i16(<8 x i16>* %p, <4 x i32>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = load <8 x i16>* %p
-    %2 = add <8 x i16> %1, %1
-    %3 = call <4 x i32> @test_v4i32_v8i16_helper(<8 x i16> %2)
-    %4 = add <4 x i32> %3, %3
-    store <4 x i32> %4, <4 x i32>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v4i32_v16i8:
-declare <4 x i32> @test_v4i32_v16i8_helper(<16 x i8> %p)
-define void @test_v4i32_v16i8(<16 x i8>* %p, <4 x i32>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-    %1 = load <16 x i8>* %p
-    %2 = add <16 x i8> %1, %1
-    %3 = call <4 x i32> @test_v4i32_v16i8_helper(<16 x i8> %2)
-    %4 = add <4 x i32> %3, %3
-    store <4 x i32> %4, <4 x i32>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v8i16_f128:
-declare <8 x i16> @test_v8i16_f128_helper(fp128 %p)
-define void @test_v8i16_f128(fp128* %p, <8 x i16>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-    %1 = load fp128* %p
-    %2 = fadd fp128 %1, %1
-    %3 = call <8 x i16> @test_v8i16_f128_helper(fp128 %2)
-    %4 = add <8 x i16> %3, %3
-    store <8 x i16> %4, <8 x i16>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v8i16_v2f64:
-declare <8 x i16> @test_v8i16_v2f64_helper(<2 x double> %p)
-define void @test_v8i16_v2f64(<2 x double>* %p, <8 x i16>* %q) {
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-    %1 = load <2 x double>* %p
-    %2 = fadd <2 x double> %1, %1
-    %3 = call <8 x i16> @test_v8i16_v2f64_helper(<2 x double> %2)
-    %4 = add <8 x i16> %3, %3
-    store <8 x i16> %4, <8 x i16>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v8i16_v2i64:
-declare <8 x i16> @test_v8i16_v2i64_helper(<2 x i64> %p)
-define void @test_v8i16_v2i64(<2 x i64>* %p, <8 x i16>* %q) {
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-    %1 = load <2 x i64>* %p
-    %2 = add <2 x i64> %1, %1
-    %3 = call <8 x i16> @test_v8i16_v2i64_helper(<2 x i64> %2)
-    %4 = add <8 x i16> %3, %3
-    store <8 x i16> %4, <8 x i16>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v8i16_v4f32:
-declare <8 x i16> @test_v8i16_v4f32_helper(<4 x float> %p)
-define void @test_v8i16_v4f32(<4 x float>* %p, <8 x i16>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-    %1 = load <4 x float>* %p
-    %2 = fadd <4 x float> %1, %1
-    %3 = call <8 x i16> @test_v8i16_v4f32_helper(<4 x float> %2)
-    %4 = add <8 x i16> %3, %3
-    store <8 x i16> %4, <8 x i16>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v8i16_v4i32:
-declare <8 x i16> @test_v8i16_v4i32_helper(<4 x i32> %p)
-define void @test_v8i16_v4i32(<4 x i32>* %p, <8 x i16>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-    %1 = load <4 x i32>* %p
-    %2 = add <4 x i32> %1, %1
-    %3 = call <8 x i16> @test_v8i16_v4i32_helper(<4 x i32> %2)
-    %4 = add <8 x i16> %3, %3
-    store <8 x i16> %4, <8 x i16>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v8i16_v16i8:
-declare <8 x i16> @test_v8i16_v16i8_helper(<16 x i8> %p)
-define void @test_v8i16_v16i8(<16 x i8>* %p, <8 x i16>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-    %1 = load <16 x i8>* %p
-    %2 = add <16 x i8> %1, %1
-    %3 = call <8 x i16> @test_v8i16_v16i8_helper(<16 x i8> %2)
-    %4 = add <8 x i16> %3, %3
-    store <8 x i16> %4, <8 x i16>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v16i8_f128:
-declare <16 x i8> @test_v16i8_f128_helper(fp128 %p)
-define void @test_v16i8_f128(fp128* %p, <16 x i8>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: ext
-    %1 = load fp128* %p
-    %2 = fadd fp128 %1, %1
-    %3 = call <16 x i8> @test_v16i8_f128_helper(fp128 %2)
-    %4 = add <16 x i8> %3, %3
-    store <16 x i8> %4, <16 x i8>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v16i8_v2f64:
-declare <16 x i8> @test_v16i8_v2f64_helper(<2 x double> %p)
-define void @test_v16i8_v2f64(<2 x double>* %p, <16 x i8>* %q) {
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: ext
-    %1 = load <2 x double>* %p
-    %2 = fadd <2 x double> %1, %1
-    %3 = call <16 x i8> @test_v16i8_v2f64_helper(<2 x double> %2)
-    %4 = add <16 x i8> %3, %3
-    store <16 x i8> %4, <16 x i8>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v16i8_v2i64:
-declare <16 x i8> @test_v16i8_v2i64_helper(<2 x i64> %p)
-define void @test_v16i8_v2i64(<2 x i64>* %p, <16 x i8>* %q) {
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: ext
-    %1 = load <2 x i64>* %p
-    %2 = add <2 x i64> %1, %1
-    %3 = call <16 x i8> @test_v16i8_v2i64_helper(<2 x i64> %2)
-    %4 = add <16 x i8> %3, %3
-    store <16 x i8> %4, <16 x i8>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v16i8_v4f32:
-declare <16 x i8> @test_v16i8_v4f32_helper(<4 x float> %p)
-define void @test_v16i8_v4f32(<4 x float>* %p, <16 x i8>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: ext
-    %1 = load <4 x float>* %p
-    %2 = fadd <4 x float> %1, %1
-    %3 = call <16 x i8> @test_v16i8_v4f32_helper(<4 x float> %2)
-    %4 = add <16 x i8> %3, %3
-    store <16 x i8> %4, <16 x i8>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v16i8_v4i32:
-declare <16 x i8> @test_v16i8_v4i32_helper(<4 x i32> %p)
-define void @test_v16i8_v4i32(<4 x i32>* %p, <16 x i8>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.4s
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: ext
-    %1 = load <4 x i32>* %p
-    %2 = add <4 x i32> %1, %1
-    %3 = call <16 x i8> @test_v16i8_v4i32_helper(<4 x i32> %2)
-    %4 = add <16 x i8> %3, %3
-    store <16 x i8> %4, <16 x i8>* %q
-    ret void
-}
-
-; CHECK-LABEL: test_v16i8_v8i16:
-declare <16 x i8> @test_v16i8_v8i16_helper(<8 x i16> %p)
-define void @test_v16i8_v8i16(<8 x i16>* %p, <16 x i8>* %q) {
-; CHECK: rev64 v{{[0-9]+}}.8h
-; CHECK: ext
-; CHECK: rev64 v{{[0-9]+}}.16b
-; CHECK: ext
-    %1 = load <8 x i16>* %p
-    %2 = add <8 x i16> %1, %1
-    %3 = call <16 x i8> @test_v16i8_v8i16_helper(<8 x i16> %2)
-    %4 = add <16 x i8> %3, %3
-    store <16 x i8> %4, <16 x i8>* %q
-    ret void
-}

Removed: llvm/trunk/test/CodeGen/ARM64/big-imm-offsets.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/big-imm-offsets.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/big-imm-offsets.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/big-imm-offsets.ll (removed)
@@ -1,14 +0,0 @@
-; RUN: llc -march=arm64 < %s
-
-
-; Make sure large offsets aren't mistaken for valid immediate offsets.
-; <rdar://problem/13190511>
-define void @f(i32* nocapture %p) {
-entry:
-  %a = ptrtoint i32* %p to i64
-  %ao = add i64 %a, 25769803792
-  %b = inttoptr i64 %ao to i32*
-  store volatile i32 0, i32* %b, align 4
-  store volatile i32 0, i32* %b, align 4
-  ret void
-}

Removed: llvm/trunk/test/CodeGen/ARM64/big-stack.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/big-stack.ll?rev=209576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/big-stack.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/big-stack.ll (removed)
@@ -1,21 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-target triple = "arm64-apple-macosx10"
-
-; Check that big stacks are generated correctly.
-; Currently, this is done by a sequence of sub instructions,
-; which can encode immediate with a 12 bits mask an optionally
-; shift left (up to 12). I.e., 16773120 is the biggest value.
-; <rdar://12513931>
-; CHECK-LABEL: foo:
-; CHECK: sub sp, sp, #4095, lsl #12
-; CHECK: sub sp, sp, #4095, lsl #12
-; CHECK: sub sp, sp, #2, lsl #12
-define void @foo() nounwind ssp {
-entry:
-  %buffer = alloca [33554432 x i8], align 1
-  %arraydecay = getelementptr inbounds [33554432 x i8]* %buffer, i64 0, i64 0
-  call void @doit(i8* %arraydecay) nounwind
-  ret void
-}
-
-declare void @doit(i8*)





More information about the llvm-commits mailing list