r276728 - Update for LLVM changes

David Majnemer via cfe-commits cfe-commits at lists.llvm.org
Mon Jul 25 22:52:37 PDT 2016


Modified: cfe/trunk/test/CodeGen/aarch64-neon-misc.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-misc.c?rev=276728&r1=276727&r2=276728&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-misc.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-neon-misc.c Tue Jul 26 00:52:37 2016
@@ -6,7 +6,7 @@
 
 #include <arm_neon.h>
 
-// CHECK-LABEL: define <8 x i8> @test_vceqz_s8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vceqz_s8(
 // CHECK:   [[TMP0:%.*]] = icmp eq <8 x i8> %a, zeroinitializer
 // CHECK:   [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
 // CHECK:   ret <8 x i8> [[VCEQZ_I]]
@@ -14,57 +14,52 @@ uint8x8_t test_vceqz_s8(int8x8_t a) {
   return vceqz_s8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vceqz_s16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vceqz_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK:   [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer
-// CHECK:   [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
+// CHECK:   [[TMP1:%.*]] = icmp eq <4 x i16> %a, zeroinitializer
+// CHECK:   [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
 // CHECK:   ret <4 x i16> [[VCEQZ_I]]
 uint16x4_t test_vceqz_s16(int16x4_t a) {
   return vceqz_s16(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vceqz_s32(<2 x i32> %a) #0 {
+// CHECK-LABEL: @test_vceqz_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK:   [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer
-// CHECK:   [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
+// CHECK:   [[TMP1:%.*]] = icmp eq <2 x i32> %a, zeroinitializer
+// CHECK:   [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32>
 // CHECK:   ret <2 x i32> [[VCEQZ_I]]
 uint32x2_t test_vceqz_s32(int32x2_t a) {
   return vceqz_s32(a);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vceqz_s64(<1 x i64> %a) #0 {
+// CHECK-LABEL: @test_vceqz_s64(
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK:   [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer
-// CHECK:   [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
+// CHECK:   [[TMP1:%.*]] = icmp eq <1 x i64> %a, zeroinitializer
+// CHECK:   [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64>
 // CHECK:   ret <1 x i64> [[VCEQZ_I]]
 uint64x1_t test_vceqz_s64(int64x1_t a) {
   return vceqz_s64(a);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vceqz_u64(<1 x i64> %a) #0 {
+// CHECK-LABEL: @test_vceqz_u64(
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK:   [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer
-// CHECK:   [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
+// CHECK:   [[TMP1:%.*]] = icmp eq <1 x i64> %a, zeroinitializer
+// CHECK:   [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64>
 // CHECK:   ret <1 x i64> [[VCEQZ_I]]
 uint64x1_t test_vceqz_u64(uint64x1_t a) {
   return vceqz_u64(a);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vceqz_p64(<1 x i64> %a) #0 {
+// CHECK-LABEL: @test_vceqz_p64(
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK:   [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer
-// CHECK:   [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
+// CHECK:   [[TMP1:%.*]] = icmp eq <1 x i64> %a, zeroinitializer
+// CHECK:   [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64>
 // CHECK:   ret <1 x i64> [[VCEQZ_I]]
 uint64x1_t test_vceqz_p64(poly64x1_t a) {
   return vceqz_p64(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vceqzq_s8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vceqzq_s8(
 // CHECK:   [[TMP0:%.*]] = icmp eq <16 x i8> %a, zeroinitializer
 // CHECK:   [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
 // CHECK:   ret <16 x i8> [[VCEQZ_I]]
@@ -72,37 +67,34 @@ uint8x16_t test_vceqzq_s8(int8x16_t a) {
   return vceqzq_s8(a);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vceqzq_s16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vceqzq_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[TMP2:%.*]] = icmp eq <8 x i16> [[TMP1]], zeroinitializer
-// CHECK:   [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
+// CHECK:   [[TMP1:%.*]] = icmp eq <8 x i16> %a, zeroinitializer
+// CHECK:   [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
 // CHECK:   ret <8 x i16> [[VCEQZ_I]]
 uint16x8_t test_vceqzq_s16(int16x8_t a) {
   return vceqzq_s16(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vceqzq_s32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vceqzq_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
-// CHECK:   [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
+// CHECK:   [[TMP1:%.*]] = icmp eq <4 x i32> %a, zeroinitializer
+// CHECK:   [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
 // CHECK:   ret <4 x i32> [[VCEQZ_I]]
 uint32x4_t test_vceqzq_s32(int32x4_t a) {
   return vceqzq_s32(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vceqzq_s64(<2 x i64> %a) #0 {
+// CHECK-LABEL: @test_vceqzq_s64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer
-// CHECK:   [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
+// CHECK:   [[TMP1:%.*]] = icmp eq <2 x i64> %a, zeroinitializer
+// CHECK:   [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
 // CHECK:   ret <2 x i64> [[VCEQZ_I]]
 uint64x2_t test_vceqzq_s64(int64x2_t a) {
   return vceqzq_s64(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vceqz_u8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vceqz_u8(
 // CHECK:   [[TMP0:%.*]] = icmp eq <8 x i8> %a, zeroinitializer
 // CHECK:   [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
 // CHECK:   ret <8 x i8> [[VCEQZ_I]]
@@ -110,27 +102,25 @@ uint8x8_t test_vceqz_u8(uint8x8_t a) {
   return vceqz_u8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vceqz_u16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vceqz_u16(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK:   [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer
-// CHECK:   [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
+// CHECK:   [[TMP1:%.*]] = icmp eq <4 x i16> %a, zeroinitializer
+// CHECK:   [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
 // CHECK:   ret <4 x i16> [[VCEQZ_I]]
 uint16x4_t test_vceqz_u16(uint16x4_t a) {
   return vceqz_u16(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vceqz_u32(<2 x i32> %a) #0 {
+// CHECK-LABEL: @test_vceqz_u32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK:   [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer
-// CHECK:   [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
+// CHECK:   [[TMP1:%.*]] = icmp eq <2 x i32> %a, zeroinitializer
+// CHECK:   [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32>
 // CHECK:   ret <2 x i32> [[VCEQZ_I]]
 uint32x2_t test_vceqz_u32(uint32x2_t a) {
   return vceqz_u32(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vceqzq_u8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vceqzq_u8(
 // CHECK:   [[TMP0:%.*]] = icmp eq <16 x i8> %a, zeroinitializer
 // CHECK:   [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
 // CHECK:   ret <16 x i8> [[VCEQZ_I]]
@@ -138,67 +128,61 @@ uint8x16_t test_vceqzq_u8(uint8x16_t a)
   return vceqzq_u8(a);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vceqzq_u16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vceqzq_u16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[TMP2:%.*]] = icmp eq <8 x i16> [[TMP1]], zeroinitializer
-// CHECK:   [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
+// CHECK:   [[TMP1:%.*]] = icmp eq <8 x i16> %a, zeroinitializer
+// CHECK:   [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
 // CHECK:   ret <8 x i16> [[VCEQZ_I]]
 uint16x8_t test_vceqzq_u16(uint16x8_t a) {
   return vceqzq_u16(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vceqzq_u32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vceqzq_u32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
-// CHECK:   [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
+// CHECK:   [[TMP1:%.*]] = icmp eq <4 x i32> %a, zeroinitializer
+// CHECK:   [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
 // CHECK:   ret <4 x i32> [[VCEQZ_I]]
 uint32x4_t test_vceqzq_u32(uint32x4_t a) {
   return vceqzq_u32(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vceqzq_u64(<2 x i64> %a) #0 {
+// CHECK-LABEL: @test_vceqzq_u64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer
-// CHECK:   [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
+// CHECK:   [[TMP1:%.*]] = icmp eq <2 x i64> %a, zeroinitializer
+// CHECK:   [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
 // CHECK:   ret <2 x i64> [[VCEQZ_I]]
 uint64x2_t test_vceqzq_u64(uint64x2_t a) {
   return vceqzq_u64(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vceqz_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vceqz_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[TMP2:%.*]] = fcmp oeq <2 x float> [[TMP1]], zeroinitializer
-// CHECK:   [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
+// CHECK:   [[TMP1:%.*]] = fcmp oeq <2 x float> %a, zeroinitializer
+// CHECK:   [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32>
 // CHECK:   ret <2 x i32> [[VCEQZ_I]]
 uint32x2_t test_vceqz_f32(float32x2_t a) {
   return vceqz_f32(a);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vceqz_f64(<1 x double> %a) #0 {
+// CHECK-LABEL: @test_vceqz_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK:   [[TMP2:%.*]] = fcmp oeq <1 x double> [[TMP1]], zeroinitializer
-// CHECK:   [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
+// CHECK:   [[TMP1:%.*]] = fcmp oeq <1 x double> %a, zeroinitializer
+// CHECK:   [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64>
 // CHECK:   ret <1 x i64> [[VCEQZ_I]]
 uint64x1_t test_vceqz_f64(float64x1_t a) {
   return vceqz_f64(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vceqzq_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vceqzq_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[TMP2:%.*]] = fcmp oeq <4 x float> [[TMP1]], zeroinitializer
-// CHECK:   [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
+// CHECK:   [[TMP1:%.*]] = fcmp oeq <4 x float> %a, zeroinitializer
+// CHECK:   [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
 // CHECK:   ret <4 x i32> [[VCEQZ_I]]
 uint32x4_t test_vceqzq_f32(float32x4_t a) {
   return vceqzq_f32(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vceqz_p8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vceqz_p8(
 // CHECK:   [[TMP0:%.*]] = icmp eq <8 x i8> %a, zeroinitializer
 // CHECK:   [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
 // CHECK:   ret <8 x i8> [[VCEQZ_I]]
@@ -206,7 +190,7 @@ uint8x8_t test_vceqz_p8(poly8x8_t a) {
   return vceqz_p8(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vceqzq_p8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vceqzq_p8(
 // CHECK:   [[TMP0:%.*]] = icmp eq <16 x i8> %a, zeroinitializer
 // CHECK:   [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
 // CHECK:   ret <16 x i8> [[VCEQZ_I]]
@@ -214,47 +198,43 @@ uint8x16_t test_vceqzq_p8(poly8x16_t a)
   return vceqzq_p8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vceqz_p16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vceqz_p16(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK:   [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer
-// CHECK:   [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
+// CHECK:   [[TMP1:%.*]] = icmp eq <4 x i16> %a, zeroinitializer
+// CHECK:   [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
 // CHECK:   ret <4 x i16> [[VCEQZ_I]]
 uint16x4_t test_vceqz_p16(poly16x4_t a) {
   return vceqz_p16(a);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vceqzq_p16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vceqzq_p16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[TMP2:%.*]] = icmp eq <8 x i16> [[TMP1]], zeroinitializer
-// CHECK:   [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
+// CHECK:   [[TMP1:%.*]] = icmp eq <8 x i16> %a, zeroinitializer
+// CHECK:   [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
 // CHECK:   ret <8 x i16> [[VCEQZ_I]]
 uint16x8_t test_vceqzq_p16(poly16x8_t a) {
   return vceqzq_p16(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vceqzq_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vceqzq_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[TMP2:%.*]] = fcmp oeq <2 x double> [[TMP1]], zeroinitializer
-// CHECK:   [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
+// CHECK:   [[TMP1:%.*]] = fcmp oeq <2 x double> %a, zeroinitializer
+// CHECK:   [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
 // CHECK:   ret <2 x i64> [[VCEQZ_I]]
 uint64x2_t test_vceqzq_f64(float64x2_t a) {
   return vceqzq_f64(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vceqzq_p64(<2 x i64> %a) #0 {
+// CHECK-LABEL: @test_vceqzq_p64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer
-// CHECK:   [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
+// CHECK:   [[TMP1:%.*]] = icmp eq <2 x i64> %a, zeroinitializer
+// CHECK:   [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
 // CHECK:   ret <2 x i64> [[VCEQZ_I]]
 uint64x2_t test_vceqzq_p64(poly64x2_t a) {
   return vceqzq_p64(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vcgez_s8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vcgez_s8(
 // CHECK:   [[TMP0:%.*]] = icmp sge <8 x i8> %a, zeroinitializer
 // CHECK:   [[VCGEZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
 // CHECK:   ret <8 x i8> [[VCGEZ_I]]
@@ -262,37 +242,34 @@ uint8x8_t test_vcgez_s8(int8x8_t a) {
   return vcgez_s8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vcgez_s16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vcgez_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK:   [[TMP2:%.*]] = icmp sge <4 x i16> [[TMP1]], zeroinitializer
-// CHECK:   [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
+// CHECK:   [[TMP1:%.*]] = icmp sge <4 x i16> %a, zeroinitializer
+// CHECK:   [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
 // CHECK:   ret <4 x i16> [[VCGEZ_I]]
 uint16x4_t test_vcgez_s16(int16x4_t a) {
   return vcgez_s16(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vcgez_s32(<2 x i32> %a) #0 {
+// CHECK-LABEL: @test_vcgez_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK:   [[TMP2:%.*]] = icmp sge <2 x i32> [[TMP1]], zeroinitializer
-// CHECK:   [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
+// CHECK:   [[TMP1:%.*]] = icmp sge <2 x i32> %a, zeroinitializer
+// CHECK:   [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32>
 // CHECK:   ret <2 x i32> [[VCGEZ_I]]
 uint32x2_t test_vcgez_s32(int32x2_t a) {
   return vcgez_s32(a);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vcgez_s64(<1 x i64> %a) #0 {
+// CHECK-LABEL: @test_vcgez_s64(
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK:   [[TMP2:%.*]] = icmp sge <1 x i64> [[TMP1]], zeroinitializer
-// CHECK:   [[VCGEZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
+// CHECK:   [[TMP1:%.*]] = icmp sge <1 x i64> %a, zeroinitializer
+// CHECK:   [[VCGEZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64>
 // CHECK:   ret <1 x i64> [[VCGEZ_I]]
 uint64x1_t test_vcgez_s64(int64x1_t a) {
   return vcgez_s64(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vcgezq_s8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vcgezq_s8(
 // CHECK:   [[TMP0:%.*]] = icmp sge <16 x i8> %a, zeroinitializer
 // CHECK:   [[VCGEZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
 // CHECK:   ret <16 x i8> [[VCGEZ_I]]
@@ -300,77 +277,70 @@ uint8x16_t test_vcgezq_s8(int8x16_t a) {
   return vcgezq_s8(a);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vcgezq_s16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vcgezq_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[TMP2:%.*]] = icmp sge <8 x i16> [[TMP1]], zeroinitializer
-// CHECK:   [[VCGEZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
+// CHECK:   [[TMP1:%.*]] = icmp sge <8 x i16> %a, zeroinitializer
+// CHECK:   [[VCGEZ_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
 // CHECK:   ret <8 x i16> [[VCGEZ_I]]
 uint16x8_t test_vcgezq_s16(int16x8_t a) {
   return vcgezq_s16(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcgezq_s32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vcgezq_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[TMP2:%.*]] = icmp sge <4 x i32> [[TMP1]], zeroinitializer
-// CHECK:   [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
+// CHECK:   [[TMP1:%.*]] = icmp sge <4 x i32> %a, zeroinitializer
+// CHECK:   [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
 // CHECK:   ret <4 x i32> [[VCGEZ_I]]
 uint32x4_t test_vcgezq_s32(int32x4_t a) {
   return vcgezq_s32(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vcgezq_s64(<2 x i64> %a) #0 {
+// CHECK-LABEL: @test_vcgezq_s64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[TMP2:%.*]] = icmp sge <2 x i64> [[TMP1]], zeroinitializer
-// CHECK:   [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
+// CHECK:   [[TMP1:%.*]] = icmp sge <2 x i64> %a, zeroinitializer
+// CHECK:   [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
 // CHECK:   ret <2 x i64> [[VCGEZ_I]]
 uint64x2_t test_vcgezq_s64(int64x2_t a) {
   return vcgezq_s64(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vcgez_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vcgez_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[TMP2:%.*]] = fcmp oge <2 x float> [[TMP1]], zeroinitializer
-// CHECK:   [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
+// CHECK:   [[TMP1:%.*]] = fcmp oge <2 x float> %a, zeroinitializer
+// CHECK:   [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32>
 // CHECK:   ret <2 x i32> [[VCGEZ_I]]
 uint32x2_t test_vcgez_f32(float32x2_t a) {
   return vcgez_f32(a);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vcgez_f64(<1 x double> %a) #0 {
+// CHECK-LABEL: @test_vcgez_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK:   [[TMP2:%.*]] = fcmp oge <1 x double> [[TMP1]], zeroinitializer
-// CHECK:   [[VCGEZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
+// CHECK:   [[TMP1:%.*]] = fcmp oge <1 x double> %a, zeroinitializer
+// CHECK:   [[VCGEZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64>
 // CHECK:   ret <1 x i64> [[VCGEZ_I]]
 uint64x1_t test_vcgez_f64(float64x1_t a) {
   return vcgez_f64(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcgezq_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vcgezq_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[TMP2:%.*]] = fcmp oge <4 x float> [[TMP1]], zeroinitializer
-// CHECK:   [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
+// CHECK:   [[TMP1:%.*]] = fcmp oge <4 x float> %a, zeroinitializer
+// CHECK:   [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
 // CHECK:   ret <4 x i32> [[VCGEZ_I]]
 uint32x4_t test_vcgezq_f32(float32x4_t a) {
   return vcgezq_f32(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vcgezq_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vcgezq_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[TMP2:%.*]] = fcmp oge <2 x double> [[TMP1]], zeroinitializer
-// CHECK:   [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
+// CHECK:   [[TMP1:%.*]] = fcmp oge <2 x double> %a, zeroinitializer
+// CHECK:   [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
 // CHECK:   ret <2 x i64> [[VCGEZ_I]]
 uint64x2_t test_vcgezq_f64(float64x2_t a) {
   return vcgezq_f64(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vclez_s8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vclez_s8(
 // CHECK:   [[TMP0:%.*]] = icmp sle <8 x i8> %a, zeroinitializer
 // CHECK:   [[VCLEZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
 // CHECK:   ret <8 x i8> [[VCLEZ_I]]
@@ -378,37 +348,34 @@ uint8x8_t test_vclez_s8(int8x8_t a) {
   return vclez_s8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vclez_s16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vclez_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK:   [[TMP2:%.*]] = icmp sle <4 x i16> [[TMP1]], zeroinitializer
-// CHECK:   [[VCLEZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
+// CHECK:   [[TMP1:%.*]] = icmp sle <4 x i16> %a, zeroinitializer
+// CHECK:   [[VCLEZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
 // CHECK:   ret <4 x i16> [[VCLEZ_I]]
 uint16x4_t test_vclez_s16(int16x4_t a) {
   return vclez_s16(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vclez_s32(<2 x i32> %a) #0 {
+// CHECK-LABEL: @test_vclez_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK:   [[TMP2:%.*]] = icmp sle <2 x i32> [[TMP1]], zeroinitializer
-// CHECK:   [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
+// CHECK:   [[TMP1:%.*]] = icmp sle <2 x i32> %a, zeroinitializer
+// CHECK:   [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32>
 // CHECK:   ret <2 x i32> [[VCLEZ_I]]
 uint32x2_t test_vclez_s32(int32x2_t a) {
   return vclez_s32(a);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vclez_s64(<1 x i64> %a) #0 {
+// CHECK-LABEL: @test_vclez_s64(
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK:   [[TMP2:%.*]] = icmp sle <1 x i64> [[TMP1]], zeroinitializer
-// CHECK:   [[VCLEZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
+// CHECK:   [[TMP1:%.*]] = icmp sle <1 x i64> %a, zeroinitializer
+// CHECK:   [[VCLEZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64>
 // CHECK:   ret <1 x i64> [[VCLEZ_I]]
 uint64x1_t test_vclez_s64(int64x1_t a) {
   return vclez_s64(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vclezq_s8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vclezq_s8(
 // CHECK:   [[TMP0:%.*]] = icmp sle <16 x i8> %a, zeroinitializer
 // CHECK:   [[VCLEZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
 // CHECK:   ret <16 x i8> [[VCLEZ_I]]
@@ -416,77 +383,70 @@ uint8x16_t test_vclezq_s8(int8x16_t a) {
   return vclezq_s8(a);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vclezq_s16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vclezq_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[TMP2:%.*]] = icmp sle <8 x i16> [[TMP1]], zeroinitializer
-// CHECK:   [[VCLEZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
+// CHECK:   [[TMP1:%.*]] = icmp sle <8 x i16> %a, zeroinitializer
+// CHECK:   [[VCLEZ_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
 // CHECK:   ret <8 x i16> [[VCLEZ_I]]
 uint16x8_t test_vclezq_s16(int16x8_t a) {
   return vclezq_s16(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vclezq_s32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vclezq_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[TMP2:%.*]] = icmp sle <4 x i32> [[TMP1]], zeroinitializer
-// CHECK:   [[VCLEZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
+// CHECK:   [[TMP1:%.*]] = icmp sle <4 x i32> %a, zeroinitializer
+// CHECK:   [[VCLEZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
 // CHECK:   ret <4 x i32> [[VCLEZ_I]]
 uint32x4_t test_vclezq_s32(int32x4_t a) {
   return vclezq_s32(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vclezq_s64(<2 x i64> %a) #0 {
+// CHECK-LABEL: @test_vclezq_s64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[TMP2:%.*]] = icmp sle <2 x i64> [[TMP1]], zeroinitializer
-// CHECK:   [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
+// CHECK:   [[TMP1:%.*]] = icmp sle <2 x i64> %a, zeroinitializer
+// CHECK:   [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
 // CHECK:   ret <2 x i64> [[VCLEZ_I]]
 uint64x2_t test_vclezq_s64(int64x2_t a) {
   return vclezq_s64(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vclez_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vclez_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[TMP2:%.*]] = fcmp ole <2 x float> [[TMP1]], zeroinitializer
-// CHECK:   [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
+// CHECK:   [[TMP1:%.*]] = fcmp ole <2 x float> %a, zeroinitializer
+// CHECK:   [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32>
 // CHECK:   ret <2 x i32> [[VCLEZ_I]]
 uint32x2_t test_vclez_f32(float32x2_t a) {
   return vclez_f32(a);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vclez_f64(<1 x double> %a) #0 {
+// CHECK-LABEL: @test_vclez_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK:   [[TMP2:%.*]] = fcmp ole <1 x double> [[TMP1]], zeroinitializer
-// CHECK:   [[VCLEZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
+// CHECK:   [[TMP1:%.*]] = fcmp ole <1 x double> %a, zeroinitializer
+// CHECK:   [[VCLEZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64>
 // CHECK:   ret <1 x i64> [[VCLEZ_I]]
 uint64x1_t test_vclez_f64(float64x1_t a) {
   return vclez_f64(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vclezq_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vclezq_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[TMP2:%.*]] = fcmp ole <4 x float> [[TMP1]], zeroinitializer
-// CHECK:   [[VCLEZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
+// CHECK:   [[TMP1:%.*]] = fcmp ole <4 x float> %a, zeroinitializer
+// CHECK:   [[VCLEZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
 // CHECK:   ret <4 x i32> [[VCLEZ_I]]
 uint32x4_t test_vclezq_f32(float32x4_t a) {
   return vclezq_f32(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vclezq_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vclezq_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[TMP2:%.*]] = fcmp ole <2 x double> [[TMP1]], zeroinitializer
-// CHECK:   [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
+// CHECK:   [[TMP1:%.*]] = fcmp ole <2 x double> %a, zeroinitializer
+// CHECK:   [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
 // CHECK:   ret <2 x i64> [[VCLEZ_I]]
 uint64x2_t test_vclezq_f64(float64x2_t a) {
   return vclezq_f64(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vcgtz_s8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vcgtz_s8(
 // CHECK:   [[TMP0:%.*]] = icmp sgt <8 x i8> %a, zeroinitializer
 // CHECK:   [[VCGTZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
 // CHECK:   ret <8 x i8> [[VCGTZ_I]]
@@ -494,37 +454,34 @@ uint8x8_t test_vcgtz_s8(int8x8_t a) {
   return vcgtz_s8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vcgtz_s16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vcgtz_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK:   [[TMP2:%.*]] = icmp sgt <4 x i16> [[TMP1]], zeroinitializer
-// CHECK:   [[VCGTZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
+// CHECK:   [[TMP1:%.*]] = icmp sgt <4 x i16> %a, zeroinitializer
+// CHECK:   [[VCGTZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
 // CHECK:   ret <4 x i16> [[VCGTZ_I]]
 uint16x4_t test_vcgtz_s16(int16x4_t a) {
   return vcgtz_s16(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vcgtz_s32(<2 x i32> %a) #0 {
+// CHECK-LABEL: @test_vcgtz_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK:   [[TMP2:%.*]] = icmp sgt <2 x i32> [[TMP1]], zeroinitializer
-// CHECK:   [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
+// CHECK:   [[TMP1:%.*]] = icmp sgt <2 x i32> %a, zeroinitializer
+// CHECK:   [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32>
 // CHECK:   ret <2 x i32> [[VCGTZ_I]]
 uint32x2_t test_vcgtz_s32(int32x2_t a) {
   return vcgtz_s32(a);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vcgtz_s64(<1 x i64> %a) #0 {
+// CHECK-LABEL: @test_vcgtz_s64(
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK:   [[TMP2:%.*]] = icmp sgt <1 x i64> [[TMP1]], zeroinitializer
-// CHECK:   [[VCGTZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
+// CHECK:   [[TMP1:%.*]] = icmp sgt <1 x i64> %a, zeroinitializer
+// CHECK:   [[VCGTZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64>
 // CHECK:   ret <1 x i64> [[VCGTZ_I]]
 uint64x1_t test_vcgtz_s64(int64x1_t a) {
   return vcgtz_s64(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vcgtzq_s8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vcgtzq_s8(
 // CHECK:   [[TMP0:%.*]] = icmp sgt <16 x i8> %a, zeroinitializer
 // CHECK:   [[VCGTZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
 // CHECK:   ret <16 x i8> [[VCGTZ_I]]
@@ -532,77 +489,70 @@ uint8x16_t test_vcgtzq_s8(int8x16_t a) {
   return vcgtzq_s8(a);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vcgtzq_s16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vcgtzq_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP1]], zeroinitializer
-// CHECK:   [[VCGTZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
+// CHECK:   [[TMP1:%.*]] = icmp sgt <8 x i16> %a, zeroinitializer
+// CHECK:   [[VCGTZ_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
 // CHECK:   ret <8 x i16> [[VCGTZ_I]]
 uint16x8_t test_vcgtzq_s16(int16x8_t a) {
   return vcgtzq_s16(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcgtzq_s32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vcgtzq_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[TMP2:%.*]] = icmp sgt <4 x i32> [[TMP1]], zeroinitializer
-// CHECK:   [[VCGTZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
+// CHECK:   [[TMP1:%.*]] = icmp sgt <4 x i32> %a, zeroinitializer
+// CHECK:   [[VCGTZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
 // CHECK:   ret <4 x i32> [[VCGTZ_I]]
 uint32x4_t test_vcgtzq_s32(int32x4_t a) {
   return vcgtzq_s32(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vcgtzq_s64(<2 x i64> %a) #0 {
+// CHECK-LABEL: @test_vcgtzq_s64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[TMP2:%.*]] = icmp sgt <2 x i64> [[TMP1]], zeroinitializer
-// CHECK:   [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
+// CHECK:   [[TMP1:%.*]] = icmp sgt <2 x i64> %a, zeroinitializer
+// CHECK:   [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
 // CHECK:   ret <2 x i64> [[VCGTZ_I]]
 uint64x2_t test_vcgtzq_s64(int64x2_t a) {
   return vcgtzq_s64(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vcgtz_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vcgtz_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[TMP2:%.*]] = fcmp ogt <2 x float> [[TMP1]], zeroinitializer
-// CHECK:   [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
+// CHECK:   [[TMP1:%.*]] = fcmp ogt <2 x float> %a, zeroinitializer
+// CHECK:   [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32>
 // CHECK:   ret <2 x i32> [[VCGTZ_I]]
 uint32x2_t test_vcgtz_f32(float32x2_t a) {
   return vcgtz_f32(a);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vcgtz_f64(<1 x double> %a) #0 {
+// CHECK-LABEL: @test_vcgtz_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK:   [[TMP2:%.*]] = fcmp ogt <1 x double> [[TMP1]], zeroinitializer
-// CHECK:   [[VCGTZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
+// CHECK:   [[TMP1:%.*]] = fcmp ogt <1 x double> %a, zeroinitializer
+// CHECK:   [[VCGTZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64>
 // CHECK:   ret <1 x i64> [[VCGTZ_I]]
 uint64x1_t test_vcgtz_f64(float64x1_t a) {
   return vcgtz_f64(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcgtzq_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vcgtzq_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[TMP2:%.*]] = fcmp ogt <4 x float> [[TMP1]], zeroinitializer
-// CHECK:   [[VCGTZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
+// CHECK:   [[TMP1:%.*]] = fcmp ogt <4 x float> %a, zeroinitializer
+// CHECK:   [[VCGTZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
 // CHECK:   ret <4 x i32> [[VCGTZ_I]]
 uint32x4_t test_vcgtzq_f32(float32x4_t a) {
   return vcgtzq_f32(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vcgtzq_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vcgtzq_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[TMP2:%.*]] = fcmp ogt <2 x double> [[TMP1]], zeroinitializer
-// CHECK:   [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
+// CHECK:   [[TMP1:%.*]] = fcmp ogt <2 x double> %a, zeroinitializer
+// CHECK:   [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
 // CHECK:   ret <2 x i64> [[VCGTZ_I]]
 uint64x2_t test_vcgtzq_f64(float64x2_t a) {
   return vcgtzq_f64(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vcltz_s8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vcltz_s8(
 // CHECK:   [[TMP0:%.*]] = icmp slt <8 x i8> %a, zeroinitializer
 // CHECK:   [[VCLTZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
 // CHECK:   ret <8 x i8> [[VCLTZ_I]]
@@ -610,37 +560,34 @@ uint8x8_t test_vcltz_s8(int8x8_t a) {
   return vcltz_s8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vcltz_s16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vcltz_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK:   [[TMP2:%.*]] = icmp slt <4 x i16> [[TMP1]], zeroinitializer
-// CHECK:   [[VCLTZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
+// CHECK:   [[TMP1:%.*]] = icmp slt <4 x i16> %a, zeroinitializer
+// CHECK:   [[VCLTZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
 // CHECK:   ret <4 x i16> [[VCLTZ_I]]
 uint16x4_t test_vcltz_s16(int16x4_t a) {
   return vcltz_s16(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vcltz_s32(<2 x i32> %a) #0 {
+// CHECK-LABEL: @test_vcltz_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK:   [[TMP2:%.*]] = icmp slt <2 x i32> [[TMP1]], zeroinitializer
-// CHECK:   [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
+// CHECK:   [[TMP1:%.*]] = icmp slt <2 x i32> %a, zeroinitializer
+// CHECK:   [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32>
 // CHECK:   ret <2 x i32> [[VCLTZ_I]]
 uint32x2_t test_vcltz_s32(int32x2_t a) {
   return vcltz_s32(a);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vcltz_s64(<1 x i64> %a) #0 {
+// CHECK-LABEL: @test_vcltz_s64(
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK:   [[TMP2:%.*]] = icmp slt <1 x i64> [[TMP1]], zeroinitializer
-// CHECK:   [[VCLTZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
+// CHECK:   [[TMP1:%.*]] = icmp slt <1 x i64> %a, zeroinitializer
+// CHECK:   [[VCLTZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64>
 // CHECK:   ret <1 x i64> [[VCLTZ_I]]
 uint64x1_t test_vcltz_s64(int64x1_t a) {
   return vcltz_s64(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vcltzq_s8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vcltzq_s8(
 // CHECK:   [[TMP0:%.*]] = icmp slt <16 x i8> %a, zeroinitializer
 // CHECK:   [[VCLTZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
 // CHECK:   ret <16 x i8> [[VCLTZ_I]]
@@ -648,1593 +595,1454 @@ uint8x16_t test_vcltzq_s8(int8x16_t a) {
   return vcltzq_s8(a);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vcltzq_s16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vcltzq_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP1]], zeroinitializer
-// CHECK:   [[VCLTZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
+// CHECK:   [[TMP1:%.*]] = icmp slt <8 x i16> %a, zeroinitializer
+// CHECK:   [[VCLTZ_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
 // CHECK:   ret <8 x i16> [[VCLTZ_I]]
 uint16x8_t test_vcltzq_s16(int16x8_t a) {
   return vcltzq_s16(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcltzq_s32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vcltzq_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], zeroinitializer
-// CHECK:   [[VCLTZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
+// CHECK:   [[TMP1:%.*]] = icmp slt <4 x i32> %a, zeroinitializer
+// CHECK:   [[VCLTZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
 // CHECK:   ret <4 x i32> [[VCLTZ_I]]
 uint32x4_t test_vcltzq_s32(int32x4_t a) {
   return vcltzq_s32(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vcltzq_s64(<2 x i64> %a) #0 {
+// CHECK-LABEL: @test_vcltzq_s64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[TMP2:%.*]] = icmp slt <2 x i64> [[TMP1]], zeroinitializer
-// CHECK:   [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
+// CHECK:   [[TMP1:%.*]] = icmp slt <2 x i64> %a, zeroinitializer
+// CHECK:   [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
 // CHECK:   ret <2 x i64> [[VCLTZ_I]]
 uint64x2_t test_vcltzq_s64(int64x2_t a) {
   return vcltzq_s64(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vcltz_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vcltz_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[TMP2:%.*]] = fcmp olt <2 x float> [[TMP1]], zeroinitializer
-// CHECK:   [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
+// CHECK:   [[TMP1:%.*]] = fcmp olt <2 x float> %a, zeroinitializer
+// CHECK:   [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32>
 // CHECK:   ret <2 x i32> [[VCLTZ_I]]
 uint32x2_t test_vcltz_f32(float32x2_t a) {
   return vcltz_f32(a);
 }
- 
-// CHECK-LABEL: define <1 x i64> @test_vcltz_f64(<1 x double> %a) #0 {
+
+// CHECK-LABEL: @test_vcltz_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK:   [[TMP2:%.*]] = fcmp olt <1 x double> [[TMP1]], zeroinitializer
-// CHECK:   [[VCLTZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
+// CHECK:   [[TMP1:%.*]] = fcmp olt <1 x double> %a, zeroinitializer
+// CHECK:   [[VCLTZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64>
 // CHECK:   ret <1 x i64> [[VCLTZ_I]]
 uint64x1_t test_vcltz_f64(float64x1_t a) {
   return vcltz_f64(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcltzq_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vcltzq_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[TMP2:%.*]] = fcmp olt <4 x float> [[TMP1]], zeroinitializer
-// CHECK:   [[VCLTZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
+// CHECK:   [[TMP1:%.*]] = fcmp olt <4 x float> %a, zeroinitializer
+// CHECK:   [[VCLTZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
 // CHECK:   ret <4 x i32> [[VCLTZ_I]]
 uint32x4_t test_vcltzq_f32(float32x4_t a) {
   return vcltzq_f32(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vcltzq_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vcltzq_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[TMP2:%.*]] = fcmp olt <2 x double> [[TMP1]], zeroinitializer
-// CHECK:   [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
+// CHECK:   [[TMP1:%.*]] = fcmp olt <2 x double> %a, zeroinitializer
+// CHECK:   [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
 // CHECK:   ret <2 x i64> [[VCLTZ_I]]
 uint64x2_t test_vcltzq_f64(float64x2_t a) {
   return vcltzq_f64(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vrev16_s8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vrev16_s8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 int8x8_t test_vrev16_s8(int8x8_t a) {
   return vrev16_s8(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vrev16_u8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vrev16_u8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 uint8x8_t test_vrev16_u8(uint8x8_t a) {
   return vrev16_u8(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vrev16_p8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vrev16_p8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 poly8x8_t test_vrev16_p8(poly8x8_t a) {
   return vrev16_p8(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vrev16q_s8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vrev16q_s8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 int8x16_t test_vrev16q_s8(int8x16_t a) {
   return vrev16q_s8(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vrev16q_u8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vrev16q_u8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 uint8x16_t test_vrev16q_u8(uint8x16_t a) {
   return vrev16q_u8(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vrev16q_p8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vrev16q_p8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 poly8x16_t test_vrev16q_p8(poly8x16_t a) {
   return vrev16q_p8(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vrev32_s8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vrev32_s8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 int8x8_t test_vrev32_s8(int8x8_t a) {
   return vrev32_s8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vrev32_s16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vrev32_s16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 int16x4_t test_vrev32_s16(int16x4_t a) {
   return vrev32_s16(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vrev32_u8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vrev32_u8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 uint8x8_t test_vrev32_u8(uint8x8_t a) {
   return vrev32_u8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vrev32_u16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vrev32_u16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 uint16x4_t test_vrev32_u16(uint16x4_t a) {
   return vrev32_u16(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vrev32_p8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vrev32_p8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 poly8x8_t test_vrev32_p8(poly8x8_t a) {
   return vrev32_p8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vrev32_p16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vrev32_p16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 poly16x4_t test_vrev32_p16(poly16x4_t a) {
   return vrev32_p16(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vrev32q_s8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vrev32q_s8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 int8x16_t test_vrev32q_s8(int8x16_t a) {
   return vrev32q_s8(a);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vrev32q_s16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vrev32q_s16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 int16x8_t test_vrev32q_s16(int16x8_t a) {
   return vrev32q_s16(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vrev32q_u8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vrev32q_u8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 uint8x16_t test_vrev32q_u8(uint8x16_t a) {
   return vrev32q_u8(a);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vrev32q_u16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vrev32q_u16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 uint16x8_t test_vrev32q_u16(uint16x8_t a) {
   return vrev32q_u16(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vrev32q_p8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vrev32q_p8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 poly8x16_t test_vrev32q_p8(poly8x16_t a) {
   return vrev32q_p8(a);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vrev32q_p16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vrev32q_p16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 poly16x8_t test_vrev32q_p16(poly16x8_t a) {
   return vrev32q_p16(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vrev64_s8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vrev64_s8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 int8x8_t test_vrev64_s8(int8x8_t a) {
   return vrev64_s8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vrev64_s16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vrev64_s16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 int16x4_t test_vrev64_s16(int16x4_t a) {
   return vrev64_s16(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vrev64_s32(<2 x i32> %a) #0 {
+// CHECK-LABEL: @test_vrev64_s32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0>
 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 int32x2_t test_vrev64_s32(int32x2_t a) {
   return vrev64_s32(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vrev64_u8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vrev64_u8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 uint8x8_t test_vrev64_u8(uint8x8_t a) {
   return vrev64_u8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vrev64_u16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vrev64_u16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 uint16x4_t test_vrev64_u16(uint16x4_t a) {
   return vrev64_u16(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vrev64_u32(<2 x i32> %a) #0 {
+// CHECK-LABEL: @test_vrev64_u32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0>
 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 uint32x2_t test_vrev64_u32(uint32x2_t a) {
   return vrev64_u32(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vrev64_p8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vrev64_p8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 poly8x8_t test_vrev64_p8(poly8x8_t a) {
   return vrev64_p8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vrev64_p16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vrev64_p16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 poly16x4_t test_vrev64_p16(poly16x4_t a) {
   return vrev64_p16(a);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vrev64_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vrev64_f32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %a, <2 x i32> <i32 1, i32 0>
 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
 float32x2_t test_vrev64_f32(float32x2_t a) {
   return vrev64_f32(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vrev64q_s8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vrev64q_s8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 int8x16_t test_vrev64q_s8(int8x16_t a) {
   return vrev64q_s8(a);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vrev64q_s16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vrev64q_s16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 int16x8_t test_vrev64q_s16(int16x8_t a) {
   return vrev64q_s16(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vrev64q_s32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vrev64q_s32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 int32x4_t test_vrev64q_s32(int32x4_t a) {
   return vrev64q_s32(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vrev64q_u8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vrev64q_u8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 uint8x16_t test_vrev64q_u8(uint8x16_t a) {
   return vrev64q_u8(a);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vrev64q_u16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vrev64q_u16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 uint16x8_t test_vrev64q_u16(uint16x8_t a) {
   return vrev64q_u16(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vrev64q_u32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vrev64q_u32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 uint32x4_t test_vrev64q_u32(uint32x4_t a) {
   return vrev64q_u32(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vrev64q_p8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vrev64q_p8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 poly8x16_t test_vrev64q_p8(poly8x16_t a) {
   return vrev64q_p8(a);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vrev64q_p16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vrev64q_p16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 poly16x8_t test_vrev64q_p16(poly16x8_t a) {
   return vrev64q_p16(a);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vrev64q_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vrev64q_f32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
 float32x4_t test_vrev64q_f32(float32x4_t a) {
   return vrev64q_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vpaddl_s8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vpaddl_s8(
 // CHECK:   [[VPADDL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %a) #2
 // CHECK:   ret <4 x i16> [[VPADDL_I]]
 int16x4_t test_vpaddl_s8(int8x8_t a) {
   return vpaddl_s8(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vpaddl_s16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vpaddl_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK:   [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK:   [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> [[VPADDL_I]]) #2
+// CHECK:   [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> %a) #2
 // CHECK:   ret <2 x i32> [[VPADDL1_I]]
 int32x2_t test_vpaddl_s16(int16x4_t a) {
   return vpaddl_s16(a);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vpaddl_s32(<2 x i32> %a) #0 {
+// CHECK-LABEL: @test_vpaddl_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK:   [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK:   [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> [[VPADDL_I]]) #2
+// CHECK:   [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> %a) #2
 // CHECK:   ret <1 x i64> [[VPADDL1_I]]
 int64x1_t test_vpaddl_s32(int32x2_t a) {
   return vpaddl_s32(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vpaddl_u8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vpaddl_u8(
 // CHECK:   [[VPADDL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %a) #2
 // CHECK:   ret <4 x i16> [[VPADDL_I]]
 uint16x4_t test_vpaddl_u8(uint8x8_t a) {
   return vpaddl_u8(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vpaddl_u16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vpaddl_u16(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK:   [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK:   [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> [[VPADDL_I]]) #2
+// CHECK:   [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> %a) #2
 // CHECK:   ret <2 x i32> [[VPADDL1_I]]
 uint32x2_t test_vpaddl_u16(uint16x4_t a) {
   return vpaddl_u16(a);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vpaddl_u32(<2 x i32> %a) #0 {
+// CHECK-LABEL: @test_vpaddl_u32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK:   [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK:   [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> [[VPADDL_I]]) #2
+// CHECK:   [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> %a) #2
 // CHECK:   ret <1 x i64> [[VPADDL1_I]]
 uint64x1_t test_vpaddl_u32(uint32x2_t a) {
   return vpaddl_u32(a);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vpaddlq_s8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vpaddlq_s8(
 // CHECK:   [[VPADDL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %a) #2
 // CHECK:   ret <8 x i16> [[VPADDL_I]]
 int16x8_t test_vpaddlq_s8(int8x16_t a) {
   return vpaddlq_s8(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vpaddlq_s16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vpaddlq_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK:   [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> [[VPADDL_I]]) #2
+// CHECK:   [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> %a) #2
 // CHECK:   ret <4 x i32> [[VPADDL1_I]]
 int32x4_t test_vpaddlq_s16(int16x8_t a) {
   return vpaddlq_s16(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vpaddlq_s32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vpaddlq_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK:   [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> [[VPADDL_I]]) #2
+// CHECK:   [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> %a) #2
 // CHECK:   ret <2 x i64> [[VPADDL1_I]]
 int64x2_t test_vpaddlq_s32(int32x4_t a) {
   return vpaddlq_s32(a);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vpaddlq_u8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vpaddlq_u8(
 // CHECK:   [[VPADDL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %a) #2
 // CHECK:   ret <8 x i16> [[VPADDL_I]]
 uint16x8_t test_vpaddlq_u8(uint8x16_t a) {
   return vpaddlq_u8(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vpaddlq_u16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vpaddlq_u16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK:   [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> [[VPADDL_I]]) #2
+// CHECK:   [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %a) #2
 // CHECK:   ret <4 x i32> [[VPADDL1_I]]
 uint32x4_t test_vpaddlq_u16(uint16x8_t a) {
   return vpaddlq_u16(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vpaddlq_u32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vpaddlq_u32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK:   [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> [[VPADDL_I]]) #2
+// CHECK:   [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %a) #2
 // CHECK:   ret <2 x i64> [[VPADDL1_I]]
 uint64x2_t test_vpaddlq_u32(uint32x4_t a) {
   return vpaddlq_u32(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vpadal_s8(<4 x i16> %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: @test_vpadal_s8(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 // CHECK:   [[VPADAL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %b) #2
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK:   [[TMP2:%.*]] = add <4 x i16> [[VPADAL_I]], [[TMP1]]
-// CHECK:   ret <4 x i16> [[TMP2]]
+// CHECK:   [[TMP1:%.*]] = add <4 x i16> [[VPADAL_I]], %a
+// CHECK:   ret <4 x i16> [[TMP1]]
 int16x4_t test_vpadal_s8(int16x4_t a, int8x8_t b) {
   return vpadal_s8(a, b);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vpadal_s16(<2 x i32> %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: @test_vpadal_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK:   [[VPADAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK:   [[VPADAL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> [[VPADAL_I]]) #2
-// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK:   [[TMP3:%.*]] = add <2 x i32> [[VPADAL1_I]], [[TMP2]]
-// CHECK:   ret <2 x i32> [[TMP3]]
+// CHECK:   [[VPADAL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> %b) #2
+// CHECK:   [[TMP2:%.*]] = add <2 x i32> [[VPADAL1_I]], %a
+// CHECK:   ret <2 x i32> [[TMP2]]
 int32x2_t test_vpadal_s16(int32x2_t a, int16x4_t b) {
   return vpadal_s16(a, b);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vpadal_s32(<1 x i64> %a, <2 x i32> %b) #0 {
+// CHECK-LABEL: @test_vpadal_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK:   [[VPADAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK:   [[VPADAL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> [[VPADAL_I]]) #2
-// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK:   [[TMP3:%.*]] = add <1 x i64> [[VPADAL1_I]], [[TMP2]]
-// CHECK:   ret <1 x i64> [[TMP3]]
+// CHECK:   [[VPADAL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> %b) #2
+// CHECK:   [[TMP2:%.*]] = add <1 x i64> [[VPADAL1_I]], %a
+// CHECK:   ret <1 x i64> [[TMP2]]
 int64x1_t test_vpadal_s32(int64x1_t a, int32x2_t b) {
   return vpadal_s32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vpadal_u8(<4 x i16> %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: @test_vpadal_u8(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 // CHECK:   [[VPADAL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %b) #2
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK:   [[TMP2:%.*]] = add <4 x i16> [[VPADAL_I]], [[TMP1]]
-// CHECK:   ret <4 x i16> [[TMP2]]
+// CHECK:   [[TMP1:%.*]] = add <4 x i16> [[VPADAL_I]], %a
+// CHECK:   ret <4 x i16> [[TMP1]]
 uint16x4_t test_vpadal_u8(uint16x4_t a, uint8x8_t b) {
   return vpadal_u8(a, b);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vpadal_u16(<2 x i32> %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: @test_vpadal_u16(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK:   [[VPADAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK:   [[VPADAL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> [[VPADAL_I]]) #2
-// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK:   [[TMP3:%.*]] = add <2 x i32> [[VPADAL1_I]], [[TMP2]]
-// CHECK:   ret <2 x i32> [[TMP3]]
+// CHECK:   [[VPADAL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> %b) #2
+// CHECK:   [[TMP2:%.*]] = add <2 x i32> [[VPADAL1_I]], %a
+// CHECK:   ret <2 x i32> [[TMP2]]
 uint32x2_t test_vpadal_u16(uint32x2_t a, uint16x4_t b) {
   return vpadal_u16(a, b);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vpadal_u32(<1 x i64> %a, <2 x i32> %b) #0 {
+// CHECK-LABEL: @test_vpadal_u32(
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK:   [[VPADAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK:   [[VPADAL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> [[VPADAL_I]]) #2
-// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK:   [[TMP3:%.*]] = add <1 x i64> [[VPADAL1_I]], [[TMP2]]
-// CHECK:   ret <1 x i64> [[TMP3]]
+// CHECK:   [[VPADAL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> %b) #2
+// CHECK:   [[TMP2:%.*]] = add <1 x i64> [[VPADAL1_I]], %a
+// CHECK:   ret <1 x i64> [[TMP2]]
 uint64x1_t test_vpadal_u32(uint64x1_t a, uint32x2_t b) {
   return vpadal_u32(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vpadalq_s8(<8 x i16> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: @test_vpadalq_s8(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[VPADAL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %b) #2
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[TMP2:%.*]] = add <8 x i16> [[VPADAL_I]], [[TMP1]]
-// CHECK:   ret <8 x i16> [[TMP2]]
+// CHECK:   [[TMP1:%.*]] = add <8 x i16> [[VPADAL_I]], %a
+// CHECK:   ret <8 x i16> [[TMP1]]
 int16x8_t test_vpadalq_s8(int16x8_t a, int8x16_t b) {
   return vpadalq_s8(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vpadalq_s16(<4 x i32> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vpadalq_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK:   [[VPADAL_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK:   [[VPADAL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> [[VPADAL_I]]) #2
-// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[TMP3:%.*]] = add <4 x i32> [[VPADAL1_I]], [[TMP2]]
-// CHECK:   ret <4 x i32> [[TMP3]]
+// CHECK:   [[VPADAL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> %b) #2
+// CHECK:   [[TMP2:%.*]] = add <4 x i32> [[VPADAL1_I]], %a
+// CHECK:   ret <4 x i32> [[TMP2]]
 int32x4_t test_vpadalq_s16(int32x4_t a, int16x8_t b) {
   return vpadalq_s16(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vpadalq_s32(<2 x i64> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: @test_vpadalq_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK:   [[VPADAL_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK:   [[VPADAL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> [[VPADAL_I]]) #2
-// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[TMP3:%.*]] = add <2 x i64> [[VPADAL1_I]], [[TMP2]]
-// CHECK:   ret <2 x i64> [[TMP3]]
+// CHECK:   [[VPADAL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> %b) #2
+// CHECK:   [[TMP2:%.*]] = add <2 x i64> [[VPADAL1_I]], %a
+// CHECK:   ret <2 x i64> [[TMP2]]
 int64x2_t test_vpadalq_s32(int64x2_t a, int32x4_t b) {
   return vpadalq_s32(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vpadalq_u8(<8 x i16> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: @test_vpadalq_u8(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[VPADAL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %b) #2
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[TMP2:%.*]] = add <8 x i16> [[VPADAL_I]], [[TMP1]]
-// CHECK:   ret <8 x i16> [[TMP2]]
+// CHECK:   [[TMP1:%.*]] = add <8 x i16> [[VPADAL_I]], %a
+// CHECK:   ret <8 x i16> [[TMP1]]
 uint16x8_t test_vpadalq_u8(uint16x8_t a, uint8x16_t b) {
   return vpadalq_u8(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vpadalq_u16(<4 x i32> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vpadalq_u16(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK:   [[VPADAL_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK:   [[VPADAL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> [[VPADAL_I]]) #2
-// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[TMP3:%.*]] = add <4 x i32> [[VPADAL1_I]], [[TMP2]]
-// CHECK:   ret <4 x i32> [[TMP3]]
+// CHECK:   [[VPADAL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %b) #2
+// CHECK:   [[TMP2:%.*]] = add <4 x i32> [[VPADAL1_I]], %a
+// CHECK:   ret <4 x i32> [[TMP2]]
 uint32x4_t test_vpadalq_u16(uint32x4_t a, uint16x8_t b) {
   return vpadalq_u16(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vpadalq_u32(<2 x i64> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: @test_vpadalq_u32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK:   [[VPADAL_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK:   [[VPADAL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> [[VPADAL_I]]) #2
-// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[TMP3:%.*]] = add <2 x i64> [[VPADAL1_I]], [[TMP2]]
-// CHECK:   ret <2 x i64> [[TMP3]]
+// CHECK:   [[VPADAL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %b) #2
+// CHECK:   [[TMP2:%.*]] = add <2 x i64> [[VPADAL1_I]], %a
+// CHECK:   ret <2 x i64> [[TMP2]]
 uint64x2_t test_vpadalq_u32(uint64x2_t a, uint32x4_t b) {
   return vpadalq_u32(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vqabs_s8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vqabs_s8(
 // CHECK:   [[VQABS_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> %a) #2
 // CHECK:   ret <8 x i8> [[VQABS_V_I]]
 int8x8_t test_vqabs_s8(int8x8_t a) {
   return vqabs_s8(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqabsq_s8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vqabsq_s8(
 // CHECK:   [[VQABSQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqabs.v16i8(<16 x i8> %a) #2
 // CHECK:   ret <16 x i8> [[VQABSQ_V_I]]
 int8x16_t test_vqabsq_s8(int8x16_t a) {
   return vqabsq_s8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vqabs_s16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vqabs_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK:   [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK:   [[VQABS_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> [[VQABS_V_I]]) #2
+// CHECK:   [[VQABS_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> %a) #2
 // CHECK:   [[VQABS_V2_I:%.*]] = bitcast <4 x i16> [[VQABS_V1_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <4 x i16>
-// CHECK:   ret <4 x i16> [[TMP1]]
+// CHECK:   ret <4 x i16> [[VQABS_V1_I]]
 int16x4_t test_vqabs_s16(int16x4_t a) {
   return vqabs_s16(a);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vqabsq_s16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vqabsq_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK:   [[VQABSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[VQABSQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16> [[VQABSQ_V_I]]) #2
+// CHECK:   [[VQABSQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16> %a) #2
 // CHECK:   [[VQABSQ_V2_I:%.*]] = bitcast <8 x i16> [[VQABSQ_V1_I]] to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VQABSQ_V2_I]] to <8 x i16>
-// CHECK:   ret <8 x i16> [[TMP1]]
+// CHECK:   ret <8 x i16> [[VQABSQ_V1_I]]
 int16x8_t test_vqabsq_s16(int16x8_t a) {
   return vqabsq_s16(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vqabs_s32(<2 x i32> %a) #0 {
+// CHECK-LABEL: @test_vqabs_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK:   [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK:   [[VQABS_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32> [[VQABS_V_I]]) #2
+// CHECK:   [[VQABS_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32> %a) #2
 // CHECK:   [[VQABS_V2_I:%.*]] = bitcast <2 x i32> [[VQABS_V1_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <2 x i32>
-// CHECK:   ret <2 x i32> [[TMP1]]
+// CHECK:   ret <2 x i32> [[VQABS_V1_I]]
 int32x2_t test_vqabs_s32(int32x2_t a) {
   return vqabs_s32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vqabsq_s32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vqabsq_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK:   [[VQABSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[VQABSQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32> [[VQABSQ_V_I]]) #2
+// CHECK:   [[VQABSQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32> %a) #2
 // CHECK:   [[VQABSQ_V2_I:%.*]] = bitcast <4 x i32> [[VQABSQ_V1_I]] to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VQABSQ_V2_I]] to <4 x i32>
-// CHECK:   ret <4 x i32> [[TMP1]]
+// CHECK:   ret <4 x i32> [[VQABSQ_V1_I]]
 int32x4_t test_vqabsq_s32(int32x4_t a) {
   return vqabsq_s32(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vqabsq_s64(<2 x i64> %a) #0 {
+// CHECK-LABEL: @test_vqabsq_s64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK:   [[VQABSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[VQABSQ_V1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqabs.v2i64(<2 x i64> [[VQABSQ_V_I]]) #2
+// CHECK:   [[VQABSQ_V1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqabs.v2i64(<2 x i64> %a) #2
 // CHECK:   [[VQABSQ_V2_I:%.*]] = bitcast <2 x i64> [[VQABSQ_V1_I]] to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VQABSQ_V2_I]] to <2 x i64>
-// CHECK:   ret <2 x i64> [[TMP1]]
+// CHECK:   ret <2 x i64> [[VQABSQ_V1_I]]
 int64x2_t test_vqabsq_s64(int64x2_t a) {
   return vqabsq_s64(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vqneg_s8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vqneg_s8(
 // CHECK:   [[VQNEG_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> %a) #2
 // CHECK:   ret <8 x i8> [[VQNEG_V_I]]
 int8x8_t test_vqneg_s8(int8x8_t a) {
   return vqneg_s8(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqnegq_s8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vqnegq_s8(
 // CHECK:   [[VQNEGQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqneg.v16i8(<16 x i8> %a) #2
 // CHECK:   ret <16 x i8> [[VQNEGQ_V_I]]
 int8x16_t test_vqnegq_s8(int8x16_t a) {
   return vqnegq_s8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vqneg_s16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vqneg_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK:   [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK:   [[VQNEG_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> [[VQNEG_V_I]]) #2
+// CHECK:   [[VQNEG_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> %a) #2
 // CHECK:   [[VQNEG_V2_I:%.*]] = bitcast <4 x i16> [[VQNEG_V1_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <4 x i16>
-// CHECK:   ret <4 x i16> [[TMP1]]
+// CHECK:   ret <4 x i16> [[VQNEG_V1_I]]
 int16x4_t test_vqneg_s16(int16x4_t a) {
   return vqneg_s16(a);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vqnegq_s16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vqnegq_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK:   [[VQNEGQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[VQNEGQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16> [[VQNEGQ_V_I]]) #2
+// CHECK:   [[VQNEGQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16> %a) #2
 // CHECK:   [[VQNEGQ_V2_I:%.*]] = bitcast <8 x i16> [[VQNEGQ_V1_I]] to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VQNEGQ_V2_I]] to <8 x i16>
-// CHECK:   ret <8 x i16> [[TMP1]]
+// CHECK:   ret <8 x i16> [[VQNEGQ_V1_I]]
 int16x8_t test_vqnegq_s16(int16x8_t a) {
   return vqnegq_s16(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vqneg_s32(<2 x i32> %a) #0 {
+// CHECK-LABEL: @test_vqneg_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK:   [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK:   [[VQNEG_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32> [[VQNEG_V_I]]) #2
+// CHECK:   [[VQNEG_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32> %a) #2
 // CHECK:   [[VQNEG_V2_I:%.*]] = bitcast <2 x i32> [[VQNEG_V1_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <2 x i32>
-// CHECK:   ret <2 x i32> [[TMP1]]
+// CHECK:   ret <2 x i32> [[VQNEG_V1_I]]
 int32x2_t test_vqneg_s32(int32x2_t a) {
   return vqneg_s32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vqnegq_s32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vqnegq_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK:   [[VQNEGQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[VQNEGQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32> [[VQNEGQ_V_I]]) #2
+// CHECK:   [[VQNEGQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32> %a) #2
 // CHECK:   [[VQNEGQ_V2_I:%.*]] = bitcast <4 x i32> [[VQNEGQ_V1_I]] to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VQNEGQ_V2_I]] to <4 x i32>
-// CHECK:   ret <4 x i32> [[TMP1]]
+// CHECK:   ret <4 x i32> [[VQNEGQ_V1_I]]
 int32x4_t test_vqnegq_s32(int32x4_t a) {
   return vqnegq_s32(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vqnegq_s64(<2 x i64> %a) #0 {
+// CHECK-LABEL: @test_vqnegq_s64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK:   [[VQNEGQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[VQNEGQ_V1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqneg.v2i64(<2 x i64> [[VQNEGQ_V_I]]) #2
+// CHECK:   [[VQNEGQ_V1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqneg.v2i64(<2 x i64> %a) #2
 // CHECK:   [[VQNEGQ_V2_I:%.*]] = bitcast <2 x i64> [[VQNEGQ_V1_I]] to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VQNEGQ_V2_I]] to <2 x i64>
-// CHECK:   ret <2 x i64> [[TMP1]]
+// CHECK:   ret <2 x i64> [[VQNEGQ_V1_I]]
 int64x2_t test_vqnegq_s64(int64x2_t a) {
   return vqnegq_s64(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vneg_s8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vneg_s8(
 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> zeroinitializer, %a
 // CHECK:   ret <8 x i8> [[SUB_I]]
 int8x8_t test_vneg_s8(int8x8_t a) {
   return vneg_s8(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vnegq_s8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vnegq_s8(
 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> zeroinitializer, %a
 // CHECK:   ret <16 x i8> [[SUB_I]]
 int8x16_t test_vnegq_s8(int8x16_t a) {
   return vnegq_s8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vneg_s16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vneg_s16(
 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> zeroinitializer, %a
 // CHECK:   ret <4 x i16> [[SUB_I]]
 int16x4_t test_vneg_s16(int16x4_t a) {
   return vneg_s16(a);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vnegq_s16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vnegq_s16(
 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> zeroinitializer, %a
 // CHECK:   ret <8 x i16> [[SUB_I]]
 int16x8_t test_vnegq_s16(int16x8_t a) {
   return vnegq_s16(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vneg_s32(<2 x i32> %a) #0 {
+// CHECK-LABEL: @test_vneg_s32(
 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> zeroinitializer, %a
 // CHECK:   ret <2 x i32> [[SUB_I]]
 int32x2_t test_vneg_s32(int32x2_t a) {
   return vneg_s32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vnegq_s32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vnegq_s32(
 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, %a
 // CHECK:   ret <4 x i32> [[SUB_I]]
 int32x4_t test_vnegq_s32(int32x4_t a) {
   return vnegq_s32(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vnegq_s64(<2 x i64> %a) #0 {
+// CHECK-LABEL: @test_vnegq_s64(
 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> zeroinitializer, %a
 // CHECK:   ret <2 x i64> [[SUB_I]]
 int64x2_t test_vnegq_s64(int64x2_t a) {
   return vnegq_s64(a);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vneg_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vneg_f32(
 // CHECK:   [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
 // CHECK:   ret <2 x float> [[SUB_I]]
 float32x2_t test_vneg_f32(float32x2_t a) {
   return vneg_f32(a);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vnegq_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vnegq_f32(
 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
 // CHECK:   ret <4 x float> [[SUB_I]]
 float32x4_t test_vnegq_f32(float32x4_t a) {
   return vnegq_f32(a);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vnegq_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vnegq_f64(
 // CHECK:   [[SUB_I:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
 // CHECK:   ret <2 x double> [[SUB_I]]
 float64x2_t test_vnegq_f64(float64x2_t a) {
   return vnegq_f64(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vabs_s8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vabs_s8(
 // CHECK:   [[VABS_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.abs.v8i8(<8 x i8> %a) #2
 // CHECK:   ret <8 x i8> [[VABS_I]]
 int8x8_t test_vabs_s8(int8x8_t a) {
   return vabs_s8(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vabsq_s8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vabsq_s8(
 // CHECK:   [[VABS_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.abs.v16i8(<16 x i8> %a) #2
 // CHECK:   ret <16 x i8> [[VABS_I]]
 int8x16_t test_vabsq_s8(int8x16_t a) {
   return vabsq_s8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vabs_s16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vabs_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK:   [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK:   [[VABS1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16> [[VABS_I]]) #2
+// CHECK:   [[VABS1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16> %a) #2
 // CHECK:   ret <4 x i16> [[VABS1_I]]
 int16x4_t test_vabs_s16(int16x4_t a) {
   return vabs_s16(a);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vabsq_s16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vabsq_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK:   [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[VABS1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16> [[VABS_I]]) #2
+// CHECK:   [[VABS1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16> %a) #2
 // CHECK:   ret <8 x i16> [[VABS1_I]]
 int16x8_t test_vabsq_s16(int16x8_t a) {
   return vabsq_s16(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vabs_s32(<2 x i32> %a) #0 {
+// CHECK-LABEL: @test_vabs_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK:   [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK:   [[VABS1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32> [[VABS_I]]) #2
+// CHECK:   [[VABS1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32> %a) #2
 // CHECK:   ret <2 x i32> [[VABS1_I]]
 int32x2_t test_vabs_s32(int32x2_t a) {
   return vabs_s32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vabsq_s32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vabsq_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK:   [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[VABS1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32> [[VABS_I]]) #2
+// CHECK:   [[VABS1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32> %a) #2
 // CHECK:   ret <4 x i32> [[VABS1_I]]
 int32x4_t test_vabsq_s32(int32x4_t a) {
   return vabsq_s32(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vabsq_s64(<2 x i64> %a) #0 {
+// CHECK-LABEL: @test_vabsq_s64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK:   [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[VABS1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.abs.v2i64(<2 x i64> [[VABS_I]]) #2
+// CHECK:   [[VABS1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.abs.v2i64(<2 x i64> %a) #2
 // CHECK:   ret <2 x i64> [[VABS1_I]]
 int64x2_t test_vabsq_s64(int64x2_t a) {
   return vabsq_s64(a);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vabs_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vabs_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VABS1_I:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[VABS_I]]) #2
+// CHECK:   [[VABS1_I:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x float> [[VABS1_I]]
 float32x2_t test_vabs_f32(float32x2_t a) {
   return vabs_f32(a);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vabsq_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vabsq_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VABS1_I:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[VABS_I]]) #2
+// CHECK:   [[VABS1_I:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x float> [[VABS1_I]]
 float32x4_t test_vabsq_f32(float32x4_t a) {
   return vabsq_f32(a);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vabsq_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vabsq_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[VABS1_I:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[VABS_I]]) #2
+// CHECK:   [[VABS1_I:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a) #2
 // CHECK:   ret <2 x double> [[VABS1_I]]
 float64x2_t test_vabsq_f64(float64x2_t a) {
   return vabsq_f64(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vuqadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: @test_vuqadd_s8(
 // CHECK:   [[VUQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #2
 // CHECK:   ret <8 x i8> [[VUQADD_I]]
 int8x8_t test_vuqadd_s8(int8x8_t a, int8x8_t b) {
   return vuqadd_s8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vuqaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: @test_vuqaddq_s8(
 // CHECK:   [[VUQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #2
 // CHECK:   ret <16 x i8> [[VUQADD_I]]
 int8x16_t test_vuqaddq_s8(int8x16_t a, int8x16_t b) {
   return vuqaddq_s8(a, b);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vuqadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: @test_vuqadd_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK:   [[VUQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK:   [[VUQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK:   [[VUQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> [[VUQADD_I]], <4 x i16> [[VUQADD1_I]]) #2
+// CHECK:   [[VUQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %a, <4 x i16> %b) #2
 // CHECK:   ret <4 x i16> [[VUQADD2_I]]
 int16x4_t test_vuqadd_s16(int16x4_t a, int16x4_t b) {
   return vuqadd_s16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vuqaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vuqaddq_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK:   [[VUQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[VUQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK:   [[VUQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> [[VUQADD_I]], <8 x i16> [[VUQADD1_I]]) #2
+// CHECK:   [[VUQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %a, <8 x i16> %b) #2
 // CHECK:   ret <8 x i16> [[VUQADD2_I]]
 int16x8_t test_vuqaddq_s16(int16x8_t a, int16x8_t b) {
   return vuqaddq_s16(a, b);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vuqadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK-LABEL: @test_vuqadd_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK:   [[VUQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK:   [[VUQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK:   [[VUQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> [[VUQADD_I]], <2 x i32> [[VUQADD1_I]]) #2
+// CHECK:   [[VUQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %a, <2 x i32> %b) #2
 // CHECK:   ret <2 x i32> [[VUQADD2_I]]
 int32x2_t test_vuqadd_s32(int32x2_t a, int32x2_t b) {
   return vuqadd_s32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vuqaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: @test_vuqaddq_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK:   [[VUQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[VUQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK:   [[VUQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> [[VUQADD_I]], <4 x i32> [[VUQADD1_I]]) #2
+// CHECK:   [[VUQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %a, <4 x i32> %b) #2
 // CHECK:   ret <4 x i32> [[VUQADD2_I]]
 int32x4_t test_vuqaddq_s32(int32x4_t a, int32x4_t b) {
   return vuqaddq_s32(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vuqaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: @test_vuqaddq_s64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK:   [[VUQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[VUQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK:   [[VUQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> [[VUQADD_I]], <2 x i64> [[VUQADD1_I]]) #2
+// CHECK:   [[VUQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %a, <2 x i64> %b) #2
 // CHECK:   ret <2 x i64> [[VUQADD2_I]]
 int64x2_t test_vuqaddq_s64(int64x2_t a, int64x2_t b) {
   return vuqaddq_s64(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vcls_s8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vcls_s8(
 // CHECK:   [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.cls.v8i8(<8 x i8> %a) #2
 // CHECK:   ret <8 x i8> [[VCLS_V_I]]
 int8x8_t test_vcls_s8(int8x8_t a) {
   return vcls_s8(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vclsq_s8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vclsq_s8(
 // CHECK:   [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.cls.v16i8(<16 x i8> %a) #2
 // CHECK:   ret <16 x i8> [[VCLSQ_V_I]]
 int8x16_t test_vclsq_s8(int8x16_t a) {
   return vclsq_s8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vcls_s16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vcls_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK:   [[VCLS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK:   [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.cls.v4i16(<4 x i16> [[VCLS_V_I]]) #2
+// CHECK:   [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.cls.v4i16(<4 x i16> %a) #2
 // CHECK:   [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VCLS_V2_I]] to <4 x i16>
-// CHECK:   ret <4 x i16> [[TMP1]]
+// CHECK:   ret <4 x i16> [[VCLS_V1_I]]
 int16x4_t test_vcls_s16(int16x4_t a) {
   return vcls_s16(a);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vclsq_s16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vclsq_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK:   [[VCLSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.cls.v8i16(<8 x i16> [[VCLSQ_V_I]]) #2
+// CHECK:   [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.cls.v8i16(<8 x i16> %a) #2
 // CHECK:   [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VCLSQ_V2_I]] to <8 x i16>
-// CHECK:   ret <8 x i16> [[TMP1]]
+// CHECK:   ret <8 x i16> [[VCLSQ_V1_I]]
 int16x8_t test_vclsq_s16(int16x8_t a) {
   return vclsq_s16(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vcls_s32(<2 x i32> %a) #0 {
+// CHECK-LABEL: @test_vcls_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK:   [[VCLS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK:   [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.cls.v2i32(<2 x i32> [[VCLS_V_I]]) #2
+// CHECK:   [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.cls.v2i32(<2 x i32> %a) #2
 // CHECK:   [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VCLS_V2_I]] to <2 x i32>
-// CHECK:   ret <2 x i32> [[TMP1]]
+// CHECK:   ret <2 x i32> [[VCLS_V1_I]]
 int32x2_t test_vcls_s32(int32x2_t a) {
   return vcls_s32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vclsq_s32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vclsq_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK:   [[VCLSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.cls.v4i32(<4 x i32> [[VCLSQ_V_I]]) #2
+// CHECK:   [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.cls.v4i32(<4 x i32> %a) #2
 // CHECK:   [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VCLSQ_V2_I]] to <4 x i32>
-// CHECK:   ret <4 x i32> [[TMP1]]
+// CHECK:   ret <4 x i32> [[VCLSQ_V1_I]]
 int32x4_t test_vclsq_s32(int32x4_t a) {
   return vclsq_s32(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vclz_s8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vclz_s8(
 // CHECK:   [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #2
 // CHECK:   ret <8 x i8> [[VCLZ_V_I]]
 int8x8_t test_vclz_s8(int8x8_t a) {
   return vclz_s8(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vclzq_s8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vclzq_s8(
 // CHECK:   [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #2
 // CHECK:   ret <16 x i8> [[VCLZQ_V_I]]
 int8x16_t test_vclzq_s8(int8x16_t a) {
   return vclzq_s8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vclz_s16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vclz_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK:   [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK:   [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #2
+// CHECK:   [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) #2
 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16>
-// CHECK:   ret <4 x i16> [[TMP1]]
+// CHECK:   ret <4 x i16> [[VCLZ_V1_I]]
 int16x4_t test_vclz_s16(int16x4_t a) {
   return vclz_s16(a);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vclzq_s16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vclzq_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK:   [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[VCLZQ_V_I]], i1 false) #2
+// CHECK:   [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) #2
 // CHECK:   [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <8 x i16>
-// CHECK:   ret <8 x i16> [[TMP1]]
+// CHECK:   ret <8 x i16> [[VCLZQ_V1_I]]
 int16x8_t test_vclzq_s16(int16x8_t a) {
   return vclzq_s16(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vclz_s32(<2 x i32> %a) #0 {
+// CHECK-LABEL: @test_vclz_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK:   [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK:   [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #2
+// CHECK:   [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) #2
 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32>
-// CHECK:   ret <2 x i32> [[TMP1]]
+// CHECK:   ret <2 x i32> [[VCLZ_V1_I]]
 int32x2_t test_vclz_s32(int32x2_t a) {
   return vclz_s32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vclzq_s32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vclzq_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK:   [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[VCLZQ_V_I]], i1 false) #2
+// CHECK:   [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) #2
 // CHECK:   [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <4 x i32>
-// CHECK:   ret <4 x i32> [[TMP1]]
+// CHECK:   ret <4 x i32> [[VCLZQ_V1_I]]
 int32x4_t test_vclzq_s32(int32x4_t a) {
   return vclzq_s32(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vclz_u8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vclz_u8(
 // CHECK:   [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #2
 // CHECK:   ret <8 x i8> [[VCLZ_V_I]]
 uint8x8_t test_vclz_u8(uint8x8_t a) {
   return vclz_u8(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vclzq_u8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vclzq_u8(
 // CHECK:   [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #2
 // CHECK:   ret <16 x i8> [[VCLZQ_V_I]]
 uint8x16_t test_vclzq_u8(uint8x16_t a) {
   return vclzq_u8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vclz_u16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vclz_u16(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK:   [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK:   [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #2
+// CHECK:   [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) #2
 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16>
-// CHECK:   ret <4 x i16> [[TMP1]]
+// CHECK:   ret <4 x i16> [[VCLZ_V1_I]]
 uint16x4_t test_vclz_u16(uint16x4_t a) {
   return vclz_u16(a);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vclzq_u16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vclzq_u16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK:   [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[VCLZQ_V_I]], i1 false) #2
+// CHECK:   [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) #2
 // CHECK:   [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <8 x i16>
-// CHECK:   ret <8 x i16> [[TMP1]]
+// CHECK:   ret <8 x i16> [[VCLZQ_V1_I]]
 uint16x8_t test_vclzq_u16(uint16x8_t a) {
   return vclzq_u16(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vclz_u32(<2 x i32> %a) #0 {
+// CHECK-LABEL: @test_vclz_u32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK:   [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK:   [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #2
+// CHECK:   [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) #2
 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32>
-// CHECK:   ret <2 x i32> [[TMP1]]
+// CHECK:   ret <2 x i32> [[VCLZ_V1_I]]
 uint32x2_t test_vclz_u32(uint32x2_t a) {
   return vclz_u32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vclzq_u32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vclzq_u32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK:   [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[VCLZQ_V_I]], i1 false) #2
+// CHECK:   [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) #2
 // CHECK:   [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <4 x i32>
-// CHECK:   ret <4 x i32> [[TMP1]]
+// CHECK:   ret <4 x i32> [[VCLZQ_V1_I]]
 uint32x4_t test_vclzq_u32(uint32x4_t a) {
   return vclzq_u32(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vcnt_s8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vcnt_s8(
 // CHECK:   [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #2
 // CHECK:   ret <8 x i8> [[VCNT_V_I]]
 int8x8_t test_vcnt_s8(int8x8_t a) {
   return vcnt_s8(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vcntq_s8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vcntq_s8(
 // CHECK:   [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #2
 // CHECK:   ret <16 x i8> [[VCNTQ_V_I]]
 int8x16_t test_vcntq_s8(int8x16_t a) {
   return vcntq_s8(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vcnt_u8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vcnt_u8(
 // CHECK:   [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #2
 // CHECK:   ret <8 x i8> [[VCNT_V_I]]
 uint8x8_t test_vcnt_u8(uint8x8_t a) {
   return vcnt_u8(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vcntq_u8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vcntq_u8(
 // CHECK:   [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #2
 // CHECK:   ret <16 x i8> [[VCNTQ_V_I]]
 uint8x16_t test_vcntq_u8(uint8x16_t a) {
   return vcntq_u8(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vcnt_p8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vcnt_p8(
 // CHECK:   [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #2
 // CHECK:   ret <8 x i8> [[VCNT_V_I]]
 poly8x8_t test_vcnt_p8(poly8x8_t a) {
   return vcnt_p8(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vcntq_p8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vcntq_p8(
 // CHECK:   [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #2
 // CHECK:   ret <16 x i8> [[VCNTQ_V_I]]
 poly8x16_t test_vcntq_p8(poly8x16_t a) {
   return vcntq_p8(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vmvn_s8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vmvn_s8(
 // CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 // CHECK:   ret <8 x i8> [[NEG_I]]
 int8x8_t test_vmvn_s8(int8x8_t a) {
   return vmvn_s8(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vmvnq_s8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vmvnq_s8(
 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 // CHECK:   ret <16 x i8> [[NEG_I]]
 int8x16_t test_vmvnq_s8(int8x16_t a) {
   return vmvnq_s8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vmvn_s16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vmvn_s16(
 // CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
 // CHECK:   ret <4 x i16> [[NEG_I]]
 int16x4_t test_vmvn_s16(int16x4_t a) {
   return vmvn_s16(a);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vmvnq_s16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vmvnq_s16(
 // CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
 // CHECK:   ret <8 x i16> [[NEG_I]]
 int16x8_t test_vmvnq_s16(int16x8_t a) {
   return vmvnq_s16(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vmvn_s32(<2 x i32> %a) #0 {
+// CHECK-LABEL: @test_vmvn_s32(
 // CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %a, <i32 -1, i32 -1>
 // CHECK:   ret <2 x i32> [[NEG_I]]
 int32x2_t test_vmvn_s32(int32x2_t a) {
   return vmvn_s32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vmvnq_s32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vmvnq_s32(
 // CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
 // CHECK:   ret <4 x i32> [[NEG_I]]
 int32x4_t test_vmvnq_s32(int32x4_t a) {
   return vmvnq_s32(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vmvn_u8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vmvn_u8(
 // CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 // CHECK:   ret <8 x i8> [[NEG_I]]
 uint8x8_t test_vmvn_u8(uint8x8_t a) {
   return vmvn_u8(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vmvnq_u8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vmvnq_u8(
 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 // CHECK:   ret <16 x i8> [[NEG_I]]
 uint8x16_t test_vmvnq_u8(uint8x16_t a) {
   return vmvnq_u8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vmvn_u16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vmvn_u16(
 // CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
 // CHECK:   ret <4 x i16> [[NEG_I]]
 uint16x4_t test_vmvn_u16(uint16x4_t a) {
   return vmvn_u16(a);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vmvnq_u16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vmvnq_u16(
 // CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
 // CHECK:   ret <8 x i16> [[NEG_I]]
 uint16x8_t test_vmvnq_u16(uint16x8_t a) {
   return vmvnq_u16(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vmvn_u32(<2 x i32> %a) #0 {
+// CHECK-LABEL: @test_vmvn_u32(
 // CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %a, <i32 -1, i32 -1>
 // CHECK:   ret <2 x i32> [[NEG_I]]
 uint32x2_t test_vmvn_u32(uint32x2_t a) {
   return vmvn_u32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vmvnq_u32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vmvnq_u32(
 // CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
 // CHECK:   ret <4 x i32> [[NEG_I]]
 uint32x4_t test_vmvnq_u32(uint32x4_t a) {
   return vmvnq_u32(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vmvn_p8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vmvn_p8(
 // CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 // CHECK:   ret <8 x i8> [[NEG_I]]
 poly8x8_t test_vmvn_p8(poly8x8_t a) {
   return vmvn_p8(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vmvnq_p8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vmvnq_p8(
 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 // CHECK:   ret <16 x i8> [[NEG_I]]
 poly8x16_t test_vmvnq_p8(poly8x16_t a) {
   return vmvnq_p8(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vrbit_s8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vrbit_s8(
 // CHECK:   [[VRBIT_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a) #2
 // CHECK:   ret <8 x i8> [[VRBIT_I]]
 int8x8_t test_vrbit_s8(int8x8_t a) {
   return vrbit_s8(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vrbitq_s8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vrbitq_s8(
 // CHECK:   [[VRBIT_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a) #2
 // CHECK:   ret <16 x i8> [[VRBIT_I]]
 int8x16_t test_vrbitq_s8(int8x16_t a) {
   return vrbitq_s8(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vrbit_u8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vrbit_u8(
 // CHECK:   [[VRBIT_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a) #2
 // CHECK:   ret <8 x i8> [[VRBIT_I]]
 uint8x8_t test_vrbit_u8(uint8x8_t a) {
   return vrbit_u8(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vrbitq_u8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vrbitq_u8(
 // CHECK:   [[VRBIT_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a) #2
 // CHECK:   ret <16 x i8> [[VRBIT_I]]
 uint8x16_t test_vrbitq_u8(uint8x16_t a) {
   return vrbitq_u8(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vrbit_p8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vrbit_p8(
 // CHECK:   [[VRBIT_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a) #2
 // CHECK:   ret <8 x i8> [[VRBIT_I]]
 poly8x8_t test_vrbit_p8(poly8x8_t a) {
   return vrbit_p8(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vrbitq_p8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vrbitq_p8(
 // CHECK:   [[VRBIT_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a) #2
 // CHECK:   ret <16 x i8> [[VRBIT_I]]
 poly8x16_t test_vrbitq_p8(poly8x16_t a) {
   return vrbitq_p8(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vmovn_s16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vmovn_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[VMOVN_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
+// CHECK:   [[VMOVN_I:%.*]] = trunc <8 x i16> %a to <8 x i8>
 // CHECK:   ret <8 x i8> [[VMOVN_I]]
 int8x8_t test_vmovn_s16(int16x8_t a) {
   return vmovn_s16(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vmovn_s32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vmovn_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[VMOVN_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
+// CHECK:   [[VMOVN_I:%.*]] = trunc <4 x i32> %a to <4 x i16>
 // CHECK:   ret <4 x i16> [[VMOVN_I]]
 int16x4_t test_vmovn_s32(int32x4_t a) {
   return vmovn_s32(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vmovn_s64(<2 x i64> %a) #0 {
+// CHECK-LABEL: @test_vmovn_s64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[VMOVN_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
+// CHECK:   [[VMOVN_I:%.*]] = trunc <2 x i64> %a to <2 x i32>
 // CHECK:   ret <2 x i32> [[VMOVN_I]]
 int32x2_t test_vmovn_s64(int64x2_t a) {
   return vmovn_s64(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vmovn_u16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vmovn_u16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[VMOVN_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
+// CHECK:   [[VMOVN_I:%.*]] = trunc <8 x i16> %a to <8 x i8>
 // CHECK:   ret <8 x i8> [[VMOVN_I]]
 uint8x8_t test_vmovn_u16(uint16x8_t a) {
   return vmovn_u16(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vmovn_u32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vmovn_u32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[VMOVN_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
+// CHECK:   [[VMOVN_I:%.*]] = trunc <4 x i32> %a to <4 x i16>
 // CHECK:   ret <4 x i16> [[VMOVN_I]]
 uint16x4_t test_vmovn_u32(uint32x4_t a) {
   return vmovn_u32(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vmovn_u64(<2 x i64> %a) #0 {
+// CHECK-LABEL: @test_vmovn_u64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[VMOVN_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
+// CHECK:   [[VMOVN_I:%.*]] = trunc <2 x i64> %a to <2 x i32>
 // CHECK:   ret <2 x i32> [[VMOVN_I]]
 uint32x2_t test_vmovn_u64(uint64x2_t a) {
   return vmovn_u64(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vmovn_high_s16(<8 x i8> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vmovn_high_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[VMOVN_I_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
+// CHECK:   [[VMOVN_I_I:%.*]] = trunc <8 x i16> %b to <8 x i8>
 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VMOVN_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
 int8x16_t test_vmovn_high_s16(int8x8_t a, int16x8_t b) {
   return vmovn_high_s16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vmovn_high_s32(<4 x i16> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: @test_vmovn_high_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[VMOVN_I_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
+// CHECK:   [[VMOVN_I_I:%.*]] = trunc <4 x i32> %b to <4 x i16>
 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VMOVN_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
 int16x8_t test_vmovn_high_s32(int16x4_t a, int32x4_t b) {
   return vmovn_high_s32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vmovn_high_s64(<2 x i32> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: @test_vmovn_high_s64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[VMOVN_I_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
+// CHECK:   [[VMOVN_I_I:%.*]] = trunc <2 x i64> %b to <2 x i32>
 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VMOVN_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
 int32x4_t test_vmovn_high_s64(int32x2_t a, int64x2_t b) {
   return vmovn_high_s64(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vmovn_high_u16(<8 x i8> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vmovn_high_u16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[VMOVN_I_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
+// CHECK:   [[VMOVN_I_I:%.*]] = trunc <8 x i16> %b to <8 x i8>
 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VMOVN_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
 int8x16_t test_vmovn_high_u16(int8x8_t a, int16x8_t b) {
   return vmovn_high_u16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vmovn_high_u32(<4 x i16> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: @test_vmovn_high_u32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[VMOVN_I_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
+// CHECK:   [[VMOVN_I_I:%.*]] = trunc <4 x i32> %b to <4 x i16>
 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VMOVN_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
 int16x8_t test_vmovn_high_u32(int16x4_t a, int32x4_t b) {
   return vmovn_high_u32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vmovn_high_u64(<2 x i32> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: @test_vmovn_high_u64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[VMOVN_I_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
+// CHECK:   [[VMOVN_I_I:%.*]] = trunc <2 x i64> %b to <2 x i32>
 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VMOVN_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
 int32x4_t test_vmovn_high_u64(int32x2_t a, int64x2_t b) {
   return vmovn_high_u64(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vqmovun_s16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vqmovun_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK:   [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[VQMOVUN_V_I]]) #2
+// CHECK:   [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> %a) #2
 // CHECK:   ret <8 x i8> [[VQMOVUN_V1_I]]
 int8x8_t test_vqmovun_s16(int16x8_t a) {
   return vqmovun_s16(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vqmovun_s32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vqmovun_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK:   [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[VQMOVUN_V_I]]) #2
+// CHECK:   [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> %a) #2
 // CHECK:   [[VQMOVUN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I]] to <4 x i16>
-// CHECK:   ret <4 x i16> [[TMP1]]
+// CHECK:   ret <4 x i16> [[VQMOVUN_V1_I]]
 int16x4_t test_vqmovun_s32(int32x4_t a) {
   return vqmovun_s32(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vqmovun_s64(<2 x i64> %a) #0 {
+// CHECK-LABEL: @test_vqmovun_s64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK:   [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> [[VQMOVUN_V_I]]) #2
+// CHECK:   [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> %a) #2
 // CHECK:   [[VQMOVUN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I]] to <2 x i32>
-// CHECK:   ret <2 x i32> [[TMP1]]
+// CHECK:   ret <2 x i32> [[VQMOVUN_V1_I]]
 int32x2_t test_vqmovun_s64(int64x2_t a) {
   return vqmovun_s64(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqmovun_high_s16(<8 x i8> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vqmovun_high_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK:   [[VQMOVUN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[VQMOVUN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[VQMOVUN_V_I_I]]) #2
+// CHECK:   [[VQMOVUN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> %b) #2
 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQMOVUN_V1_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
 int8x16_t test_vqmovun_high_s16(int8x8_t a, int16x8_t b) {
   return vqmovun_high_s16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vqmovun_high_s32(<4 x i16> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: @test_vqmovun_high_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK:   [[VQMOVUN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[VQMOVUN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[VQMOVUN_V_I_I]]) #2
+// CHECK:   [[VQMOVUN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> %b) #2
 // CHECK:   [[VQMOVUN_V2_I_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I_I]] to <4 x i16>
-// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQMOVUN_V1_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
 int16x8_t test_vqmovun_high_s32(int16x4_t a, int32x4_t b) {
   return vqmovun_high_s32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vqmovun_high_s64(<2 x i32> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: @test_vqmovun_high_s64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK:   [[VQMOVUN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[VQMOVUN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> [[VQMOVUN_V_I_I]]) #2
+// CHECK:   [[VQMOVUN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> %b) #2
 // CHECK:   [[VQMOVUN_V2_I_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I_I]] to <2 x i32>
-// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQMOVUN_V1_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
 int32x4_t test_vqmovun_high_s64(int32x2_t a, int64x2_t b) {
   return vqmovun_high_s64(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vqmovn_s16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vqmovn_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK:   [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[VQMOVN_V_I]]) #2
+// CHECK:   [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> %a) #2
 // CHECK:   ret <8 x i8> [[VQMOVN_V1_I]]
 int8x8_t test_vqmovn_s16(int16x8_t a) {
   return vqmovn_s16(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vqmovn_s32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vqmovn_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK:   [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[VQMOVN_V_I]]) #2
+// CHECK:   [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %a) #2
 // CHECK:   [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <4 x i16>
-// CHECK:   ret <4 x i16> [[TMP1]]
+// CHECK:   ret <4 x i16> [[VQMOVN_V1_I]]
 int16x4_t test_vqmovn_s32(int32x4_t a) {
   return vqmovn_s32(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vqmovn_s64(<2 x i64> %a) #0 {
+// CHECK-LABEL: @test_vqmovn_s64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK:   [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> [[VQMOVN_V_I]]) #2
+// CHECK:   [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> %a) #2
 // CHECK:   [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <2 x i32>
-// CHECK:   ret <2 x i32> [[TMP1]]
+// CHECK:   ret <2 x i32> [[VQMOVN_V1_I]]
 int32x2_t test_vqmovn_s64(int64x2_t a) {
   return vqmovn_s64(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqmovn_high_s16(<8 x i8> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vqmovn_high_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK:   [[VQMOVN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[VQMOVN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[VQMOVN_V_I_I]]) #2
+// CHECK:   [[VQMOVN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> %b) #2
 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQMOVN_V1_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
 int8x16_t test_vqmovn_high_s16(int8x8_t a, int16x8_t b) {
   return vqmovn_high_s16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vqmovn_high_s32(<4 x i16> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: @test_vqmovn_high_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK:   [[VQMOVN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[VQMOVN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[VQMOVN_V_I_I]]) #2
+// CHECK:   [[VQMOVN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %b) #2
 // CHECK:   [[VQMOVN_V2_I_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I_I]] to <4 x i16>
-// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQMOVN_V1_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
 int16x8_t test_vqmovn_high_s32(int16x4_t a, int32x4_t b) {
   return vqmovn_high_s32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vqmovn_high_s64(<2 x i32> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: @test_vqmovn_high_s64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK:   [[VQMOVN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[VQMOVN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> [[VQMOVN_V_I_I]]) #2
+// CHECK:   [[VQMOVN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> %b) #2
 // CHECK:   [[VQMOVN_V2_I_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I_I]] to <2 x i32>
-// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQMOVN_V1_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
 int32x4_t test_vqmovn_high_s64(int32x2_t a, int64x2_t b) {
   return vqmovn_high_s64(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vqmovn_u16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vqmovn_u16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK:   [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[VQMOVN_V_I]]) #2
+// CHECK:   [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> %a) #2
 // CHECK:   ret <8 x i8> [[VQMOVN_V1_I]]
 uint8x8_t test_vqmovn_u16(uint16x8_t a) {
   return vqmovn_u16(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vqmovn_u32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vqmovn_u32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK:   [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[VQMOVN_V_I]]) #2
+// CHECK:   [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> %a) #2
 // CHECK:   [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <4 x i16>
-// CHECK:   ret <4 x i16> [[TMP1]]
+// CHECK:   ret <4 x i16> [[VQMOVN_V1_I]]
 uint16x4_t test_vqmovn_u32(uint32x4_t a) {
   return vqmovn_u32(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vqmovn_u64(<2 x i64> %a) #0 {
+// CHECK-LABEL: @test_vqmovn_u64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK:   [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> [[VQMOVN_V_I]]) #2
+// CHECK:   [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> %a) #2
 // CHECK:   [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <2 x i32>
-// CHECK:   ret <2 x i32> [[TMP1]]
+// CHECK:   ret <2 x i32> [[VQMOVN_V1_I]]
 uint32x2_t test_vqmovn_u64(uint64x2_t a) {
   return vqmovn_u64(a);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqmovn_high_u16(<8 x i8> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vqmovn_high_u16(
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK:   [[VQMOVN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK:   [[VQMOVN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[VQMOVN_V_I_I]]) #2
+// CHECK:   [[VQMOVN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> %b) #2
 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQMOVN_V1_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
 uint8x16_t test_vqmovn_high_u16(uint8x8_t a, uint16x8_t b) {
   return vqmovn_high_u16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vqmovn_high_u32(<4 x i16> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: @test_vqmovn_high_u32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK:   [[VQMOVN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[VQMOVN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[VQMOVN_V_I_I]]) #2
+// CHECK:   [[VQMOVN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> %b) #2
 // CHECK:   [[VQMOVN_V2_I_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I_I]] to <4 x i16>
-// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQMOVN_V1_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
 uint16x8_t test_vqmovn_high_u32(uint16x4_t a, uint32x4_t b) {
   return vqmovn_high_u32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vqmovn_high_u64(<2 x i32> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: @test_vqmovn_high_u64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK:   [[VQMOVN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[VQMOVN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> [[VQMOVN_V_I_I]]) #2
+// CHECK:   [[VQMOVN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> %b) #2
 // CHECK:   [[VQMOVN_V2_I_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I_I]] to <2 x i32>
-// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQMOVN_V1_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
 uint32x4_t test_vqmovn_high_u64(uint32x2_t a, uint64x2_t b) {
   return vqmovn_high_u64(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vshll_n_s8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vshll_n_s8(
 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16>
 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
 // CHECK:   ret <8 x i16> [[VSHLL_N]]
@@ -2242,7 +2050,7 @@ int16x8_t test_vshll_n_s8(int8x8_t a) {
   return vshll_n_s8(a, 8);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vshll_n_s16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vshll_n_s16(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
 // CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
@@ -2252,7 +2060,7 @@ int32x4_t test_vshll_n_s16(int16x4_t a)
   return vshll_n_s16(a, 16);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vshll_n_s32(<2 x i32> %a) #0 {
+// CHECK-LABEL: @test_vshll_n_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
 // CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
@@ -2262,7 +2070,7 @@ int64x2_t test_vshll_n_s32(int32x2_t a)
   return vshll_n_s32(a, 32);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vshll_n_u8(<8 x i8> %a) #0 {
+// CHECK-LABEL: @test_vshll_n_u8(
 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16>
 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
 // CHECK:   ret <8 x i16> [[VSHLL_N]]
@@ -2270,7 +2078,7 @@ uint16x8_t test_vshll_n_u8(uint8x8_t a)
   return vshll_n_u8(a, 8);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vshll_n_u16(<4 x i16> %a) #0 {
+// CHECK-LABEL: @test_vshll_n_u16(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
 // CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
@@ -2280,7 +2088,7 @@ uint32x4_t test_vshll_n_u16(uint16x4_t a
   return vshll_n_u16(a, 16);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vshll_n_u32(<2 x i32> %a) #0 {
+// CHECK-LABEL: @test_vshll_n_u32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
 // CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
@@ -2290,7 +2098,7 @@ uint64x2_t test_vshll_n_u32(uint32x2_t a
   return vshll_n_u32(a, 32);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vshll_high_n_s8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vshll_high_n_s8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -2299,7 +2107,7 @@ int16x8_t test_vshll_high_n_s8(int8x16_t
   return vshll_high_n_s8(a, 8);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vshll_high_n_s16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vshll_high_n_s16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
@@ -2310,7 +2118,7 @@ int32x4_t test_vshll_high_n_s16(int16x8_
   return vshll_high_n_s16(a, 16);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vshll_high_n_s32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vshll_high_n_s32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
@@ -2321,7 +2129,7 @@ int64x2_t test_vshll_high_n_s32(int32x4_
   return vshll_high_n_s32(a, 32);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vshll_high_n_u8(<16 x i8> %a) #0 {
+// CHECK-LABEL: @test_vshll_high_n_u8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -2330,7 +2138,7 @@ uint16x8_t test_vshll_high_n_u8(uint8x16
   return vshll_high_n_u8(a, 8);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vshll_high_n_u16(<8 x i16> %a) #0 {
+// CHECK-LABEL: @test_vshll_high_n_u16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
@@ -2341,7 +2149,7 @@ uint32x4_t test_vshll_high_n_u16(uint16x
   return vshll_high_n_u16(a, 16);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vshll_high_n_u32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vshll_high_n_u32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
@@ -2352,10 +2160,9 @@ uint64x2_t test_vshll_high_n_u32(uint32x
   return vshll_high_n_u32(a, 32);
 }
 
-// CHECK-LABEL: define <4 x half> @test_vcvt_f16_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vcvt_f16_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VCVT_F16_F32_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> [[VCVT_F16_F32_I]]) #2
+// CHECK:   [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> %a) #2
 // CHECK:   [[VCVT_F16_F322_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I]] to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I]] to <4 x half>
 // CHECK:   ret <4 x half> [[TMP1]]
@@ -2363,10 +2170,9 @@ float16x4_t test_vcvt_f16_f32(float32x4_
   return vcvt_f16_f32(a);
 }
 
-// CHECK-LABEL: define <8 x half> @test_vcvt_high_f16_f32(<4 x half> %a, <4 x float> %b) #0 {
+// CHECK-LABEL: @test_vcvt_high_f16_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK:   [[VCVT_F16_F32_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VCVT_F16_F321_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> [[VCVT_F16_F32_I_I]]) #2
+// CHECK:   [[VCVT_F16_F321_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> %b) #2
 // CHECK:   [[VCVT_F16_F322_I_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I_I]] to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I_I]] to <4 x half>
 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x half> %a, <4 x half> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -2375,693 +2181,617 @@ float16x8_t test_vcvt_high_f16_f32(float
   return vcvt_high_f16_f32(a, b);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vcvt_f32_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vcvt_f32_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[VCVT_I:%.*]] = fptrunc <2 x double> [[TMP1]] to <2 x float>
+// CHECK:   [[VCVT_I:%.*]] = fptrunc <2 x double> %a to <2 x float>
 // CHECK:   ret <2 x float> [[VCVT_I]]
 float32x2_t test_vcvt_f32_f64(float64x2_t a) {
   return vcvt_f32_f64(a);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 {
+// CHECK-LABEL: @test_vcvt_high_f32_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[VCVT_I_I:%.*]] = fptrunc <2 x double> [[TMP1]] to <2 x float>
+// CHECK:   [[VCVT_I_I:%.*]] = fptrunc <2 x double> %b to <2 x float>
 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x float> %a, <2 x float> [[VCVT_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK:   ret <4 x float> [[SHUFFLE_I_I]]
 float32x4_t test_vcvt_high_f32_f64(float32x2_t a, float64x2_t b) {
   return vcvt_high_f32_f64(a, b);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vcvtx_f32_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vcvtx_f32_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[VCVTX_F32_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[VCVTX_F32_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> [[VCVTX_F32_V_I]]) #2
+// CHECK:   [[VCVTX_F32_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %a) #2
 // CHECK:   ret <2 x float> [[VCVTX_F32_V1_I]]
 float32x2_t test_vcvtx_f32_f64(float64x2_t a) {
   return vcvtx_f32_f64(a);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 {
+// CHECK-LABEL: @test_vcvtx_high_f32_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK:   [[VCVTX_F32_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[VCVTX_F32_V1_I_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> [[VCVTX_F32_V_I_I]]) #2
+// CHECK:   [[VCVTX_F32_V1_I_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %b) #2
 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x float> %a, <2 x float> [[VCVTX_F32_V1_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK:   ret <4 x float> [[SHUFFLE_I_I]]
 float32x4_t test_vcvtx_high_f32_f64(float32x2_t a, float64x2_t b) {
   return vcvtx_high_f32_f64(a, b);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vcvt_f32_f16(<4 x half> %a) #0 {
+// CHECK-LABEL: @test_vcvt_f32_f16(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
 // CHECK:   [[VCVT_F32_F16_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
 // CHECK:   [[VCVT_F32_F161_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I]]) #2
 // CHECK:   [[VCVT_F32_F162_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I]] to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VCVT_F32_F162_I]] to <4 x float>
-// CHECK:   ret <4 x float> [[TMP1]]
+// CHECK:   ret <4 x float> [[VCVT_F32_F161_I]]
 float32x4_t test_vcvt_f32_f16(float16x4_t a) {
   return vcvt_f32_f16(a);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vcvt_high_f32_f16(<8 x half> %a) #0 {
+// CHECK-LABEL: @test_vcvt_high_f32_f16(
 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> [[SHUFFLE_I_I]] to <8 x i8>
 // CHECK:   [[VCVT_F32_F16_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
 // CHECK:   [[VCVT_F32_F161_I_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I_I]]) #2
 // CHECK:   [[VCVT_F32_F162_I_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I_I]] to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VCVT_F32_F162_I_I]] to <4 x float>
-// CHECK:   ret <4 x float> [[TMP1]]
+// CHECK:   ret <4 x float> [[VCVT_F32_F161_I_I]]
 float32x4_t test_vcvt_high_f32_f16(float16x8_t a) {
   return vcvt_high_f32_f16(a);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vcvt_f64_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vcvt_f64_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VCVT_I:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double>
+// CHECK:   [[VCVT_I:%.*]] = fpext <2 x float> %a to <2 x double>
 // CHECK:   ret <2 x double> [[VCVT_I]]
 float64x2_t test_vcvt_f64_f32(float32x2_t a) {
   return vcvt_f64_f32(a);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vcvt_high_f64_f32(
 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 2, i32 3>
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VCVT_I_I:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double>
+// CHECK:   [[VCVT_I_I:%.*]] = fpext <2 x float> [[SHUFFLE_I_I]] to <2 x double>
 // CHECK:   ret <2 x double> [[VCVT_I_I]]
 float64x2_t test_vcvt_high_f64_f32(float32x4_t a) {
   return vcvt_high_f64_f32(a);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vrndn_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vrndn_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VRNDN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VRNDN1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> [[VRNDN_I]]) #2
+// CHECK:   [[VRNDN1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x float> [[VRNDN1_I]]
 float32x2_t test_vrndn_f32(float32x2_t a) {
   return vrndn_f32(a);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vrndnq_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vrndnq_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VRNDN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VRNDN1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> [[VRNDN_I]]) #2
+// CHECK:   [[VRNDN1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x float> [[VRNDN1_I]]
 float32x4_t test_vrndnq_f32(float32x4_t a) {
   return vrndnq_f32(a);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vrndnq_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vrndnq_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[VRNDN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[VRNDN1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> [[VRNDN_I]]) #2
+// CHECK:   [[VRNDN1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %a) #2
 // CHECK:   ret <2 x double> [[VRNDN1_I]]
 float64x2_t test_vrndnq_f64(float64x2_t a) {
   return vrndnq_f64(a);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vrnda_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vrnda_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VRNDA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VRNDA1_I:%.*]] = call <2 x float> @llvm.round.v2f32(<2 x float> [[VRNDA_I]]) #2
+// CHECK:   [[VRNDA1_I:%.*]] = call <2 x float> @llvm.round.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x float> [[VRNDA1_I]]
 float32x2_t test_vrnda_f32(float32x2_t a) {
   return vrnda_f32(a);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vrndaq_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vrndaq_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VRNDA_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VRNDA1_I:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[VRNDA_I]]) #2
+// CHECK:   [[VRNDA1_I:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x float> [[VRNDA1_I]]
 float32x4_t test_vrndaq_f32(float32x4_t a) {
   return vrndaq_f32(a);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vrndaq_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vrndaq_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[VRNDA_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[VRNDA1_I:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[VRNDA_I]]) #2
+// CHECK:   [[VRNDA1_I:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> %a) #2
 // CHECK:   ret <2 x double> [[VRNDA1_I]]
 float64x2_t test_vrndaq_f64(float64x2_t a) {
   return vrndaq_f64(a);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vrndp_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vrndp_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VRNDP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VRNDP1_I:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[VRNDP_I]]) #2
+// CHECK:   [[VRNDP1_I:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x float> [[VRNDP1_I]]
 float32x2_t test_vrndp_f32(float32x2_t a) {
   return vrndp_f32(a);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vrndpq_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vrndpq_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VRNDP_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VRNDP1_I:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[VRNDP_I]]) #2
+// CHECK:   [[VRNDP1_I:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x float> [[VRNDP1_I]]
 float32x4_t test_vrndpq_f32(float32x4_t a) {
   return vrndpq_f32(a);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vrndpq_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vrndpq_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[VRNDP_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[VRNDP1_I:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[VRNDP_I]]) #2
+// CHECK:   [[VRNDP1_I:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a) #2
 // CHECK:   ret <2 x double> [[VRNDP1_I]]
 float64x2_t test_vrndpq_f64(float64x2_t a) {
   return vrndpq_f64(a);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vrndm_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vrndm_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VRNDM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VRNDM1_I:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[VRNDM_I]]) #2
+// CHECK:   [[VRNDM1_I:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x float> [[VRNDM1_I]]
 float32x2_t test_vrndm_f32(float32x2_t a) {
   return vrndm_f32(a);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vrndmq_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vrndmq_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VRNDM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VRNDM1_I:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[VRNDM_I]]) #2
+// CHECK:   [[VRNDM1_I:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x float> [[VRNDM1_I]]
 float32x4_t test_vrndmq_f32(float32x4_t a) {
   return vrndmq_f32(a);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vrndmq_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vrndmq_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[VRNDM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[VRNDM1_I:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[VRNDM_I]]) #2
+// CHECK:   [[VRNDM1_I:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> %a) #2
 // CHECK:   ret <2 x double> [[VRNDM1_I]]
 float64x2_t test_vrndmq_f64(float64x2_t a) {
   return vrndmq_f64(a);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vrndx_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vrndx_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VRNDX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VRNDX1_I:%.*]] = call <2 x float> @llvm.rint.v2f32(<2 x float> [[VRNDX_I]]) #2
+// CHECK:   [[VRNDX1_I:%.*]] = call <2 x float> @llvm.rint.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x float> [[VRNDX1_I]]
 float32x2_t test_vrndx_f32(float32x2_t a) {
   return vrndx_f32(a);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vrndxq_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vrndxq_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VRNDX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VRNDX1_I:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[VRNDX_I]]) #2
+// CHECK:   [[VRNDX1_I:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x float> [[VRNDX1_I]]
 float32x4_t test_vrndxq_f32(float32x4_t a) {
   return vrndxq_f32(a);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vrndxq_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vrndxq_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[VRNDX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[VRNDX1_I:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[VRNDX_I]]) #2
+// CHECK:   [[VRNDX1_I:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> %a) #2
 // CHECK:   ret <2 x double> [[VRNDX1_I]]
 float64x2_t test_vrndxq_f64(float64x2_t a) {
   return vrndxq_f64(a);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vrnd_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vrnd_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VRNDZ_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VRNDZ1_I:%.*]] = call <2 x float> @llvm.trunc.v2f32(<2 x float> [[VRNDZ_I]]) #2
+// CHECK:   [[VRNDZ1_I:%.*]] = call <2 x float> @llvm.trunc.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x float> [[VRNDZ1_I]]
 float32x2_t test_vrnd_f32(float32x2_t a) {
   return vrnd_f32(a);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vrndq_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vrndq_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VRNDZ_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VRNDZ1_I:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[VRNDZ_I]]) #2
+// CHECK:   [[VRNDZ1_I:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x float> [[VRNDZ1_I]]
 float32x4_t test_vrndq_f32(float32x4_t a) {
   return vrndq_f32(a);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vrndq_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vrndq_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[VRNDZ_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[VRNDZ1_I:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[VRNDZ_I]]) #2
+// CHECK:   [[VRNDZ1_I:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> %a) #2
 // CHECK:   ret <2 x double> [[VRNDZ1_I]]
 float64x2_t test_vrndq_f64(float64x2_t a) {
   return vrndq_f64(a);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vrndi_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vrndi_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VRNDI_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VRNDI1_I:%.*]] = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> [[VRNDI_I]]) #2
+// CHECK:   [[VRNDI1_I:%.*]] = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x float> [[VRNDI1_I]]
 float32x2_t test_vrndi_f32(float32x2_t a) {
   return vrndi_f32(a);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vrndiq_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vrndiq_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VRNDI_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VRNDI1_I:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[VRNDI_I]]) #2
+// CHECK:   [[VRNDI1_I:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x float> [[VRNDI1_I]]
 float32x4_t test_vrndiq_f32(float32x4_t a) {
   return vrndiq_f32(a);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vrndiq_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vrndiq_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[VRNDI_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[VRNDI1_I:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[VRNDI_I]]) #2
+// CHECK:   [[VRNDI1_I:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a) #2
 // CHECK:   ret <2 x double> [[VRNDI1_I]]
 float64x2_t test_vrndiq_f64(float64x2_t a) {
   return vrndiq_f64(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vcvt_s32_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vcvt_s32_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[TMP2:%.*]] = fptosi <2 x float> [[TMP1]] to <2 x i32>
-// CHECK:   ret <2 x i32> [[TMP2]]
+// CHECK:   [[TMP1:%.*]] = fptosi <2 x float> %a to <2 x i32>
+// CHECK:   ret <2 x i32> [[TMP1]]
 int32x2_t test_vcvt_s32_f32(float32x2_t a) {
   return vcvt_s32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtq_s32_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vcvtq_s32_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[TMP2:%.*]] = fptosi <4 x float> [[TMP1]] to <4 x i32>
-// CHECK:   ret <4 x i32> [[TMP2]]
+// CHECK:   [[TMP1:%.*]] = fptosi <4 x float> %a to <4 x i32>
+// CHECK:   ret <4 x i32> [[TMP1]]
 int32x4_t test_vcvtq_s32_f32(float32x4_t a) {
   return vcvtq_s32_f32(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vcvtq_s64_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vcvtq_s64_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[TMP2:%.*]] = fptosi <2 x double> [[TMP1]] to <2 x i64>
-// CHECK:   ret <2 x i64> [[TMP2]]
+// CHECK:   [[TMP1:%.*]] = fptosi <2 x double> %a to <2 x i64>
+// CHECK:   ret <2 x i64> [[TMP1]]
 int64x2_t test_vcvtq_s64_f64(float64x2_t a) {
   return vcvtq_s64_f64(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vcvt_u32_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vcvt_u32_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[TMP2:%.*]] = fptoui <2 x float> [[TMP1]] to <2 x i32>
-// CHECK:   ret <2 x i32> [[TMP2]]
+// CHECK:   [[TMP1:%.*]] = fptoui <2 x float> %a to <2 x i32>
+// CHECK:   ret <2 x i32> [[TMP1]]
 uint32x2_t test_vcvt_u32_f32(float32x2_t a) {
   return vcvt_u32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtq_u32_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vcvtq_u32_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[TMP2:%.*]] = fptoui <4 x float> [[TMP1]] to <4 x i32>
-// CHECK:   ret <4 x i32> [[TMP2]]
+// CHECK:   [[TMP1:%.*]] = fptoui <4 x float> %a to <4 x i32>
+// CHECK:   ret <4 x i32> [[TMP1]]
 uint32x4_t test_vcvtq_u32_f32(float32x4_t a) {
   return vcvtq_u32_f32(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vcvtq_u64_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vcvtq_u64_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[TMP2:%.*]] = fptoui <2 x double> [[TMP1]] to <2 x i64>
-// CHECK:   ret <2 x i64> [[TMP2]]
+// CHECK:   [[TMP1:%.*]] = fptoui <2 x double> %a to <2 x i64>
+// CHECK:   ret <2 x i64> [[TMP1]]
 uint64x2_t test_vcvtq_u64_f64(float64x2_t a) {
   return vcvtq_u64_f64(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vcvtn_s32_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vcvtn_s32_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VCVTN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VCVTN1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float> [[VCVTN_I]]) #2
+// CHECK:   [[VCVTN1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x i32> [[VCVTN1_I]]
 int32x2_t test_vcvtn_s32_f32(float32x2_t a) {
   return vcvtn_s32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vcvtnq_s32_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VCVTN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VCVTN1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float> [[VCVTN_I]]) #2
+// CHECK:   [[VCVTN1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x i32> [[VCVTN1_I]]
 int32x4_t test_vcvtnq_s32_f32(float32x4_t a) {
   return vcvtnq_s32_f32(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vcvtnq_s64_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vcvtnq_s64_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[VCVTN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[VCVTN1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double> [[VCVTN_I]]) #2
+// CHECK:   [[VCVTN1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double> %a) #2
 // CHECK:   ret <2 x i64> [[VCVTN1_I]]
 int64x2_t test_vcvtnq_s64_f64(float64x2_t a) {
   return vcvtnq_s64_f64(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vcvtn_u32_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vcvtn_u32_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VCVTN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VCVTN1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float> [[VCVTN_I]]) #2
+// CHECK:   [[VCVTN1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x i32> [[VCVTN1_I]]
 uint32x2_t test_vcvtn_u32_f32(float32x2_t a) {
   return vcvtn_u32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vcvtnq_u32_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VCVTN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VCVTN1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float> [[VCVTN_I]]) #2
+// CHECK:   [[VCVTN1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x i32> [[VCVTN1_I]]
 uint32x4_t test_vcvtnq_u32_f32(float32x4_t a) {
   return vcvtnq_u32_f32(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vcvtnq_u64_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vcvtnq_u64_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[VCVTN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[VCVTN1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double> [[VCVTN_I]]) #2
+// CHECK:   [[VCVTN1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double> %a) #2
 // CHECK:   ret <2 x i64> [[VCVTN1_I]]
 uint64x2_t test_vcvtnq_u64_f64(float64x2_t a) {
   return vcvtnq_u64_f64(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vcvtp_s32_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vcvtp_s32_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VCVTP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VCVTP1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float> [[VCVTP_I]]) #2
+// CHECK:   [[VCVTP1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x i32> [[VCVTP1_I]]
 int32x2_t test_vcvtp_s32_f32(float32x2_t a) {
   return vcvtp_s32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vcvtpq_s32_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VCVTP_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VCVTP1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float> [[VCVTP_I]]) #2
+// CHECK:   [[VCVTP1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x i32> [[VCVTP1_I]]
 int32x4_t test_vcvtpq_s32_f32(float32x4_t a) {
   return vcvtpq_s32_f32(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vcvtpq_s64_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vcvtpq_s64_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[VCVTP_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[VCVTP1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double> [[VCVTP_I]]) #2
+// CHECK:   [[VCVTP1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double> %a) #2
 // CHECK:   ret <2 x i64> [[VCVTP1_I]]
 int64x2_t test_vcvtpq_s64_f64(float64x2_t a) {
   return vcvtpq_s64_f64(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vcvtp_u32_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vcvtp_u32_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VCVTP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VCVTP1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float> [[VCVTP_I]]) #2
+// CHECK:   [[VCVTP1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x i32> [[VCVTP1_I]]
 uint32x2_t test_vcvtp_u32_f32(float32x2_t a) {
   return vcvtp_u32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vcvtpq_u32_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VCVTP_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VCVTP1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float> [[VCVTP_I]]) #2
+// CHECK:   [[VCVTP1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x i32> [[VCVTP1_I]]
 uint32x4_t test_vcvtpq_u32_f32(float32x4_t a) {
   return vcvtpq_u32_f32(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vcvtpq_u64_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vcvtpq_u64_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[VCVTP_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[VCVTP1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double> [[VCVTP_I]]) #2
+// CHECK:   [[VCVTP1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double> %a) #2
 // CHECK:   ret <2 x i64> [[VCVTP1_I]]
 uint64x2_t test_vcvtpq_u64_f64(float64x2_t a) {
   return vcvtpq_u64_f64(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vcvtm_s32_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vcvtm_s32_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VCVTM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VCVTM1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float> [[VCVTM_I]]) #2
+// CHECK:   [[VCVTM1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x i32> [[VCVTM1_I]]
 int32x2_t test_vcvtm_s32_f32(float32x2_t a) {
   return vcvtm_s32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vcvtmq_s32_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VCVTM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VCVTM1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float> [[VCVTM_I]]) #2
+// CHECK:   [[VCVTM1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x i32> [[VCVTM1_I]]
 int32x4_t test_vcvtmq_s32_f32(float32x4_t a) {
   return vcvtmq_s32_f32(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vcvtmq_s64_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vcvtmq_s64_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[VCVTM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[VCVTM1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double> [[VCVTM_I]]) #2
+// CHECK:   [[VCVTM1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double> %a) #2
 // CHECK:   ret <2 x i64> [[VCVTM1_I]]
 int64x2_t test_vcvtmq_s64_f64(float64x2_t a) {
   return vcvtmq_s64_f64(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vcvtm_u32_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vcvtm_u32_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VCVTM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VCVTM1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float> [[VCVTM_I]]) #2
+// CHECK:   [[VCVTM1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x i32> [[VCVTM1_I]]
 uint32x2_t test_vcvtm_u32_f32(float32x2_t a) {
   return vcvtm_u32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vcvtmq_u32_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VCVTM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VCVTM1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float> [[VCVTM_I]]) #2
+// CHECK:   [[VCVTM1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x i32> [[VCVTM1_I]]
 uint32x4_t test_vcvtmq_u32_f32(float32x4_t a) {
   return vcvtmq_u32_f32(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vcvtmq_u64_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vcvtmq_u64_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[VCVTM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[VCVTM1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double> [[VCVTM_I]]) #2
+// CHECK:   [[VCVTM1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double> %a) #2
 // CHECK:   ret <2 x i64> [[VCVTM1_I]]
 uint64x2_t test_vcvtmq_u64_f64(float64x2_t a) {
   return vcvtmq_u64_f64(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vcvta_s32_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vcvta_s32_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VCVTA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VCVTA1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float> [[VCVTA_I]]) #2
+// CHECK:   [[VCVTA1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x i32> [[VCVTA1_I]]
 int32x2_t test_vcvta_s32_f32(float32x2_t a) {
   return vcvta_s32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vcvtaq_s32_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VCVTA_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VCVTA1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float> [[VCVTA_I]]) #2
+// CHECK:   [[VCVTA1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x i32> [[VCVTA1_I]]
 int32x4_t test_vcvtaq_s32_f32(float32x4_t a) {
   return vcvtaq_s32_f32(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vcvtaq_s64_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vcvtaq_s64_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[VCVTA_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[VCVTA1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double> [[VCVTA_I]]) #2
+// CHECK:   [[VCVTA1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double> %a) #2
 // CHECK:   ret <2 x i64> [[VCVTA1_I]]
 int64x2_t test_vcvtaq_s64_f64(float64x2_t a) {
   return vcvtaq_s64_f64(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vcvta_u32_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vcvta_u32_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VCVTA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VCVTA1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float> [[VCVTA_I]]) #2
+// CHECK:   [[VCVTA1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x i32> [[VCVTA1_I]]
 uint32x2_t test_vcvta_u32_f32(float32x2_t a) {
   return vcvta_u32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vcvtaq_u32_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VCVTA_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VCVTA1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float> [[VCVTA_I]]) #2
+// CHECK:   [[VCVTA1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x i32> [[VCVTA1_I]]
 uint32x4_t test_vcvtaq_u32_f32(float32x4_t a) {
   return vcvtaq_u32_f32(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vcvtaq_u64_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vcvtaq_u64_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[VCVTA_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[VCVTA1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double> [[VCVTA_I]]) #2
+// CHECK:   [[VCVTA1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double> %a) #2
 // CHECK:   ret <2 x i64> [[VCVTA1_I]]
 uint64x2_t test_vcvtaq_u64_f64(float64x2_t a) {
   return vcvtaq_u64_f64(a);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vrsqrte_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vrsqrte_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VRSQRTE_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float> [[VRSQRTE_V_I]]) #2
+// CHECK:   [[VRSQRTE_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x float> [[VRSQRTE_V1_I]]
 float32x2_t test_vrsqrte_f32(float32x2_t a) {
   return vrsqrte_f32(a);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vrsqrteq_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vrsqrteq_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VRSQRTEQ_V1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float> [[VRSQRTEQ_V_I]]) #2
+// CHECK:   [[VRSQRTEQ_V1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x float> [[VRSQRTEQ_V1_I]]
 float32x4_t test_vrsqrteq_f32(float32x4_t a) {
   return vrsqrteq_f32(a);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vrsqrteq_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vrsqrteq_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[VRSQRTEQ_V1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double> [[VRSQRTEQ_V_I]]) #2
+// CHECK:   [[VRSQRTEQ_V1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double> %a) #2
 // CHECK:   ret <2 x double> [[VRSQRTEQ_V1_I]]
 float64x2_t test_vrsqrteq_f64(float64x2_t a) {
   return vrsqrteq_f64(a);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vrecpe_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vrecpe_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VRECPE_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float> [[VRECPE_V_I]]) #2
+// CHECK:   [[VRECPE_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x float> [[VRECPE_V1_I]]
 float32x2_t test_vrecpe_f32(float32x2_t a) {
   return vrecpe_f32(a);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vrecpeq_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vrecpeq_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VRECPEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VRECPEQ_V1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float> [[VRECPEQ_V_I]]) #2
+// CHECK:   [[VRECPEQ_V1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x float> [[VRECPEQ_V1_I]]
 float32x4_t test_vrecpeq_f32(float32x4_t a) {
   return vrecpeq_f32(a);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vrecpeq_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vrecpeq_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[VRECPEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[VRECPEQ_V1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double> [[VRECPEQ_V_I]]) #2
+// CHECK:   [[VRECPEQ_V1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double> %a) #2
 // CHECK:   ret <2 x double> [[VRECPEQ_V1_I]]
 float64x2_t test_vrecpeq_f64(float64x2_t a) {
   return vrecpeq_f64(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vrecpe_u32(<2 x i32> %a) #0 {
+// CHECK-LABEL: @test_vrecpe_u32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK:   [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK:   [[VRECPE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32> [[VRECPE_V_I]]) #2
+// CHECK:   [[VRECPE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32> %a) #2
 // CHECK:   ret <2 x i32> [[VRECPE_V1_I]]
 uint32x2_t test_vrecpe_u32(uint32x2_t a) {
   return vrecpe_u32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vrecpeq_u32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vrecpeq_u32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK:   [[VRECPEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[VRECPEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32> [[VRECPEQ_V_I]]) #2
+// CHECK:   [[VRECPEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32> %a) #2
 // CHECK:   ret <4 x i32> [[VRECPEQ_V1_I]]
 uint32x4_t test_vrecpeq_u32(uint32x4_t a) {
   return vrecpeq_u32(a);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vsqrt_f32(<2 x float> %a) #0 {
+// CHECK-LABEL: @test_vsqrt_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VSQRT_I:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[TMP1]]) #2
+// CHECK:   [[VSQRT_I:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x float> [[VSQRT_I]]
 float32x2_t test_vsqrt_f32(float32x2_t a) {
   return vsqrt_f32(a);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vsqrtq_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: @test_vsqrtq_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VSQRT_I:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP1]]) #2
+// CHECK:   [[VSQRT_I:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x float> [[VSQRT_I]]
 float32x4_t test_vsqrtq_f32(float32x4_t a) {
   return vsqrtq_f32(a);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vsqrtq_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: @test_vsqrtq_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK:   [[VSQRT_I:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP1]]) #2
+// CHECK:   [[VSQRT_I:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) #2
 // CHECK:   ret <2 x double> [[VSQRT_I]]
 float64x2_t test_vsqrtq_f64(float64x2_t a) {
   return vsqrtq_f64(a);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vcvt_f32_s32(<2 x i32> %a) #0 {
+// CHECK-LABEL: @test_vcvt_f32_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK:   [[VCVT_I:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
+// CHECK:   [[VCVT_I:%.*]] = sitofp <2 x i32> %a to <2 x float>
 // CHECK:   ret <2 x float> [[VCVT_I]]
 float32x2_t test_vcvt_f32_s32(int32x2_t a) {
   return vcvt_f32_s32(a);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vcvt_f32_u32(<2 x i32> %a) #0 {
+// CHECK-LABEL: @test_vcvt_f32_u32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK:   [[VCVT_I:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x float>
+// CHECK:   [[VCVT_I:%.*]] = uitofp <2 x i32> %a to <2 x float>
 // CHECK:   ret <2 x float> [[VCVT_I]]
 float32x2_t test_vcvt_f32_u32(uint32x2_t a) {
   return vcvt_f32_u32(a);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vcvtq_f32_s32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vcvtq_f32_s32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[VCVT_I:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
+// CHECK:   [[VCVT_I:%.*]] = sitofp <4 x i32> %a to <4 x float>
 // CHECK:   ret <4 x float> [[VCVT_I]]
 float32x4_t test_vcvtq_f32_s32(int32x4_t a) {
   return vcvtq_f32_s32(a);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vcvtq_f32_u32(<4 x i32> %a) #0 {
+// CHECK-LABEL: @test_vcvtq_f32_u32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK:   [[VCVT_I:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float>
+// CHECK:   [[VCVT_I:%.*]] = uitofp <4 x i32> %a to <4 x float>
 // CHECK:   ret <4 x float> [[VCVT_I]]
 float32x4_t test_vcvtq_f32_u32(uint32x4_t a) {
   return vcvtq_f32_u32(a);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vcvtq_f64_s64(<2 x i64> %a) #0 {
+// CHECK-LABEL: @test_vcvtq_f64_s64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[VCVT_I:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x double>
+// CHECK:   [[VCVT_I:%.*]] = sitofp <2 x i64> %a to <2 x double>
 // CHECK:   ret <2 x double> [[VCVT_I]]
 float64x2_t test_vcvtq_f64_s64(int64x2_t a) {
   return vcvtq_f64_s64(a);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vcvtq_f64_u64(<2 x i64> %a) #0 {
+// CHECK-LABEL: @test_vcvtq_f64_u64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[VCVT_I:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x double>
+// CHECK:   [[VCVT_I:%.*]] = uitofp <2 x i64> %a to <2 x double>
 // CHECK:   ret <2 x double> [[VCVT_I]]
 float64x2_t test_vcvtq_f64_u64(uint64x2_t a) {
   return vcvtq_f64_u64(a);

Modified: cfe/trunk/test/CodeGen/aarch64-neon-perm.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-perm.c?rev=276728&r1=276727&r2=276728&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-perm.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-neon-perm.c Tue Jul 26 00:52:37 2016
@@ -4,889 +4,889 @@
 // Test new aarch64 intrinsics and types
 #include <arm_neon.h>
 
-// CHECK-LABEL: define <8 x i8> @test_vuzp1_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: @test_vuzp1_s8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 int8x8_t test_vuzp1_s8(int8x8_t a, int8x8_t b) {
   return vuzp1_s8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vuzp1q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: @test_vuzp1q_s8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 int8x16_t test_vuzp1q_s8(int8x16_t a, int8x16_t b) {
   return vuzp1q_s8(a, b);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vuzp1_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: @test_vuzp1_s16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 int16x4_t test_vuzp1_s16(int16x4_t a, int16x4_t b) {
   return vuzp1_s16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vuzp1q_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vuzp1q_s16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 int16x8_t test_vuzp1q_s16(int16x8_t a, int16x8_t b) {
   return vuzp1q_s16(a, b);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vuzp1_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK-LABEL: @test_vuzp1_s32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 int32x2_t test_vuzp1_s32(int32x2_t a, int32x2_t b) {
   return vuzp1_s32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vuzp1q_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: @test_vuzp1q_s32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 int32x4_t test_vuzp1q_s32(int32x4_t a, int32x4_t b) {
   return vuzp1q_s32(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vuzp1q_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: @test_vuzp1q_s64(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 int64x2_t test_vuzp1q_s64(int64x2_t a, int64x2_t b) {
   return vuzp1q_s64(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vuzp1_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: @test_vuzp1_u8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 uint8x8_t test_vuzp1_u8(uint8x8_t a, uint8x8_t b) {
   return vuzp1_u8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vuzp1q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: @test_vuzp1q_u8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 uint8x16_t test_vuzp1q_u8(uint8x16_t a, uint8x16_t b) {
   return vuzp1q_u8(a, b);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vuzp1_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: @test_vuzp1_u16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 uint16x4_t test_vuzp1_u16(uint16x4_t a, uint16x4_t b) {
   return vuzp1_u16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vuzp1q_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vuzp1q_u16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 uint16x8_t test_vuzp1q_u16(uint16x8_t a, uint16x8_t b) {
   return vuzp1q_u16(a, b);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vuzp1_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK-LABEL: @test_vuzp1_u32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 uint32x2_t test_vuzp1_u32(uint32x2_t a, uint32x2_t b) {
   return vuzp1_u32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vuzp1q_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: @test_vuzp1q_u32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 uint32x4_t test_vuzp1q_u32(uint32x4_t a, uint32x4_t b) {
   return vuzp1q_u32(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vuzp1q_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: @test_vuzp1q_u64(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 uint64x2_t test_vuzp1q_u64(uint64x2_t a, uint64x2_t b) {
   return vuzp1q_u64(a, b);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vuzp1_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK-LABEL: @test_vuzp1_f32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
 float32x2_t test_vuzp1_f32(float32x2_t a, float32x2_t b) {
   return vuzp1_f32(a, b);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vuzp1q_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK-LABEL: @test_vuzp1q_f32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
 float32x4_t test_vuzp1q_f32(float32x4_t a, float32x4_t b) {
   return vuzp1q_f32(a, b);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vuzp1q_f64(<2 x double> %a, <2 x double> %b) #0 {
+// CHECK-LABEL: @test_vuzp1q_f64(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
 float64x2_t test_vuzp1q_f64(float64x2_t a, float64x2_t b) {
   return vuzp1q_f64(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vuzp1_p8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: @test_vuzp1_p8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 poly8x8_t test_vuzp1_p8(poly8x8_t a, poly8x8_t b) {
   return vuzp1_p8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vuzp1q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: @test_vuzp1q_p8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 poly8x16_t test_vuzp1q_p8(poly8x16_t a, poly8x16_t b) {
   return vuzp1q_p8(a, b);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vuzp1_p16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: @test_vuzp1_p16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 poly16x4_t test_vuzp1_p16(poly16x4_t a, poly16x4_t b) {
   return vuzp1_p16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vuzp1q_p16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vuzp1q_p16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 poly16x8_t test_vuzp1q_p16(poly16x8_t a, poly16x8_t b) {
   return vuzp1q_p16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vuzp2_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: @test_vuzp2_s8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 int8x8_t test_vuzp2_s8(int8x8_t a, int8x8_t b) {
   return vuzp2_s8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vuzp2q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: @test_vuzp2q_s8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 int8x16_t test_vuzp2q_s8(int8x16_t a, int8x16_t b) {
   return vuzp2q_s8(a, b);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vuzp2_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: @test_vuzp2_s16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 int16x4_t test_vuzp2_s16(int16x4_t a, int16x4_t b) {
   return vuzp2_s16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vuzp2q_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vuzp2q_s16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 int16x8_t test_vuzp2q_s16(int16x8_t a, int16x8_t b) {
   return vuzp2q_s16(a, b);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vuzp2_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK-LABEL: @test_vuzp2_s32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 int32x2_t test_vuzp2_s32(int32x2_t a, int32x2_t b) {
   return vuzp2_s32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vuzp2q_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: @test_vuzp2q_s32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 int32x4_t test_vuzp2q_s32(int32x4_t a, int32x4_t b) {
   return vuzp2q_s32(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vuzp2q_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: @test_vuzp2q_s64(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 int64x2_t test_vuzp2q_s64(int64x2_t a, int64x2_t b) {
   return vuzp2q_s64(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vuzp2_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: @test_vuzp2_u8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 uint8x8_t test_vuzp2_u8(uint8x8_t a, uint8x8_t b) {
   return vuzp2_u8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vuzp2q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: @test_vuzp2q_u8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 uint8x16_t test_vuzp2q_u8(uint8x16_t a, uint8x16_t b) {
   return vuzp2q_u8(a, b);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vuzp2_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: @test_vuzp2_u16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 uint16x4_t test_vuzp2_u16(uint16x4_t a, uint16x4_t b) {
   return vuzp2_u16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vuzp2q_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vuzp2q_u16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 uint16x8_t test_vuzp2q_u16(uint16x8_t a, uint16x8_t b) {
   return vuzp2q_u16(a, b);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vuzp2_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK-LABEL: @test_vuzp2_u32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 uint32x2_t test_vuzp2_u32(uint32x2_t a, uint32x2_t b) {
   return vuzp2_u32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vuzp2q_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: @test_vuzp2q_u32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 uint32x4_t test_vuzp2q_u32(uint32x4_t a, uint32x4_t b) {
   return vuzp2q_u32(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vuzp2q_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: @test_vuzp2q_u64(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 uint64x2_t test_vuzp2q_u64(uint64x2_t a, uint64x2_t b) {
   return vuzp2q_u64(a, b);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vuzp2_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK-LABEL: @test_vuzp2_f32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
 float32x2_t test_vuzp2_f32(float32x2_t a, float32x2_t b) {
   return vuzp2_f32(a, b);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vuzp2q_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK-LABEL: @test_vuzp2q_f32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
 float32x4_t test_vuzp2q_f32(float32x4_t a, float32x4_t b) {
   return vuzp2q_f32(a, b);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vuzp2q_f64(<2 x double> %a, <2 x double> %b) #0 {
+// CHECK-LABEL: @test_vuzp2q_f64(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
 float64x2_t test_vuzp2q_f64(float64x2_t a, float64x2_t b) {
   return vuzp2q_f64(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vuzp2_p8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: @test_vuzp2_p8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 poly8x8_t test_vuzp2_p8(poly8x8_t a, poly8x8_t b) {
   return vuzp2_p8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vuzp2q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: @test_vuzp2q_p8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 poly8x16_t test_vuzp2q_p8(poly8x16_t a, poly8x16_t b) {
   return vuzp2q_p8(a, b);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vuzp2_p16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: @test_vuzp2_p16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 poly16x4_t test_vuzp2_p16(poly16x4_t a, poly16x4_t b) {
   return vuzp2_p16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vuzp2q_p16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vuzp2q_p16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 poly16x8_t test_vuzp2q_p16(poly16x8_t a, poly16x8_t b) {
   return vuzp2q_p16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vzip1_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: @test_vzip1_s8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 int8x8_t test_vzip1_s8(int8x8_t a, int8x8_t b) {
   return vzip1_s8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vzip1q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: @test_vzip1q_s8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 int8x16_t test_vzip1q_s8(int8x16_t a, int8x16_t b) {
   return vzip1q_s8(a, b);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vzip1_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: @test_vzip1_s16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 int16x4_t test_vzip1_s16(int16x4_t a, int16x4_t b) {
   return vzip1_s16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vzip1q_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vzip1q_s16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 int16x8_t test_vzip1q_s16(int16x8_t a, int16x8_t b) {
   return vzip1q_s16(a, b);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vzip1_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK-LABEL: @test_vzip1_s32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 int32x2_t test_vzip1_s32(int32x2_t a, int32x2_t b) {
   return vzip1_s32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vzip1q_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: @test_vzip1q_s32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 int32x4_t test_vzip1q_s32(int32x4_t a, int32x4_t b) {
   return vzip1q_s32(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vzip1q_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: @test_vzip1q_s64(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 int64x2_t test_vzip1q_s64(int64x2_t a, int64x2_t b) {
   return vzip1q_s64(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vzip1_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: @test_vzip1_u8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 uint8x8_t test_vzip1_u8(uint8x8_t a, uint8x8_t b) {
   return vzip1_u8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vzip1q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: @test_vzip1q_u8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 uint8x16_t test_vzip1q_u8(uint8x16_t a, uint8x16_t b) {
   return vzip1q_u8(a, b);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vzip1_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: @test_vzip1_u16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 uint16x4_t test_vzip1_u16(uint16x4_t a, uint16x4_t b) {
   return vzip1_u16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vzip1q_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vzip1q_u16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 uint16x8_t test_vzip1q_u16(uint16x8_t a, uint16x8_t b) {
   return vzip1q_u16(a, b);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vzip1_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK-LABEL: @test_vzip1_u32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 uint32x2_t test_vzip1_u32(uint32x2_t a, uint32x2_t b) {
   return vzip1_u32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vzip1q_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: @test_vzip1q_u32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 uint32x4_t test_vzip1q_u32(uint32x4_t a, uint32x4_t b) {
   return vzip1q_u32(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vzip1q_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: @test_vzip1q_u64(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 uint64x2_t test_vzip1q_u64(uint64x2_t a, uint64x2_t b) {
   return vzip1q_u64(a, b);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vzip1_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK-LABEL: @test_vzip1_f32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
 float32x2_t test_vzip1_f32(float32x2_t a, float32x2_t b) {
   return vzip1_f32(a, b);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vzip1q_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK-LABEL: @test_vzip1q_f32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
 float32x4_t test_vzip1q_f32(float32x4_t a, float32x4_t b) {
   return vzip1q_f32(a, b);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vzip1q_f64(<2 x double> %a, <2 x double> %b) #0 {
+// CHECK-LABEL: @test_vzip1q_f64(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
 float64x2_t test_vzip1q_f64(float64x2_t a, float64x2_t b) {
   return vzip1q_f64(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vzip1_p8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: @test_vzip1_p8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 poly8x8_t test_vzip1_p8(poly8x8_t a, poly8x8_t b) {
   return vzip1_p8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vzip1q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: @test_vzip1q_p8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 poly8x16_t test_vzip1q_p8(poly8x16_t a, poly8x16_t b) {
   return vzip1q_p8(a, b);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vzip1_p16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: @test_vzip1_p16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 poly16x4_t test_vzip1_p16(poly16x4_t a, poly16x4_t b) {
   return vzip1_p16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vzip1q_p16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vzip1q_p16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 poly16x8_t test_vzip1q_p16(poly16x8_t a, poly16x8_t b) {
   return vzip1q_p16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vzip2_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: @test_vzip2_s8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 int8x8_t test_vzip2_s8(int8x8_t a, int8x8_t b) {
   return vzip2_s8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vzip2q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: @test_vzip2q_s8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 int8x16_t test_vzip2q_s8(int8x16_t a, int8x16_t b) {
   return vzip2q_s8(a, b);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vzip2_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: @test_vzip2_s16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 int16x4_t test_vzip2_s16(int16x4_t a, int16x4_t b) {
   return vzip2_s16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vzip2q_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vzip2q_s16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 int16x8_t test_vzip2q_s16(int16x8_t a, int16x8_t b) {
   return vzip2q_s16(a, b);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vzip2_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK-LABEL: @test_vzip2_s32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 int32x2_t test_vzip2_s32(int32x2_t a, int32x2_t b) {
   return vzip2_s32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vzip2q_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: @test_vzip2q_s32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 int32x4_t test_vzip2q_s32(int32x4_t a, int32x4_t b) {
   return vzip2q_s32(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vzip2q_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: @test_vzip2q_s64(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 int64x2_t test_vzip2q_s64(int64x2_t a, int64x2_t b) {
   return vzip2q_s64(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vzip2_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: @test_vzip2_u8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 uint8x8_t test_vzip2_u8(uint8x8_t a, uint8x8_t b) {
   return vzip2_u8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vzip2q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: @test_vzip2q_u8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 uint8x16_t test_vzip2q_u8(uint8x16_t a, uint8x16_t b) {
   return vzip2q_u8(a, b);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vzip2_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: @test_vzip2_u16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 uint16x4_t test_vzip2_u16(uint16x4_t a, uint16x4_t b) {
   return vzip2_u16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vzip2q_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vzip2q_u16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 uint16x8_t test_vzip2q_u16(uint16x8_t a, uint16x8_t b) {
   return vzip2q_u16(a, b);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vzip2_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK-LABEL: @test_vzip2_u32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 uint32x2_t test_vzip2_u32(uint32x2_t a, uint32x2_t b) {
   return vzip2_u32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vzip2q_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: @test_vzip2q_u32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 uint32x4_t test_vzip2q_u32(uint32x4_t a, uint32x4_t b) {
   return vzip2q_u32(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vzip2q_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: @test_vzip2q_u64(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 uint64x2_t test_vzip2q_u64(uint64x2_t a, uint64x2_t b) {
   return vzip2q_u64(a, b);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vzip2_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK-LABEL: @test_vzip2_f32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
 float32x2_t test_vzip2_f32(float32x2_t a, float32x2_t b) {
   return vzip2_f32(a, b);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vzip2q_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK-LABEL: @test_vzip2q_f32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
 float32x4_t test_vzip2q_f32(float32x4_t a, float32x4_t b) {
   return vzip2q_f32(a, b);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vzip2q_f64(<2 x double> %a, <2 x double> %b) #0 {
+// CHECK-LABEL: @test_vzip2q_f64(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
 float64x2_t test_vzip2q_f64(float64x2_t a, float64x2_t b) {
   return vzip2q_f64(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vzip2_p8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: @test_vzip2_p8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 poly8x8_t test_vzip2_p8(poly8x8_t a, poly8x8_t b) {
   return vzip2_p8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vzip2q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: @test_vzip2q_p8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 poly8x16_t test_vzip2q_p8(poly8x16_t a, poly8x16_t b) {
   return vzip2q_p8(a, b);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vzip2_p16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: @test_vzip2_p16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 poly16x4_t test_vzip2_p16(poly16x4_t a, poly16x4_t b) {
   return vzip2_p16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vzip2q_p16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vzip2q_p16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 poly16x8_t test_vzip2q_p16(poly16x8_t a, poly16x8_t b) {
   return vzip2q_p16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vtrn1_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: @test_vtrn1_s8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 int8x8_t test_vtrn1_s8(int8x8_t a, int8x8_t b) {
   return vtrn1_s8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vtrn1q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: @test_vtrn1q_s8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 int8x16_t test_vtrn1q_s8(int8x16_t a, int8x16_t b) {
   return vtrn1q_s8(a, b);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vtrn1_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: @test_vtrn1_s16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 int16x4_t test_vtrn1_s16(int16x4_t a, int16x4_t b) {
   return vtrn1_s16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vtrn1q_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vtrn1q_s16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 int16x8_t test_vtrn1q_s16(int16x8_t a, int16x8_t b) {
   return vtrn1q_s16(a, b);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vtrn1_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK-LABEL: @test_vtrn1_s32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 int32x2_t test_vtrn1_s32(int32x2_t a, int32x2_t b) {
   return vtrn1_s32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vtrn1q_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: @test_vtrn1q_s32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 int32x4_t test_vtrn1q_s32(int32x4_t a, int32x4_t b) {
   return vtrn1q_s32(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vtrn1q_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: @test_vtrn1q_s64(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 int64x2_t test_vtrn1q_s64(int64x2_t a, int64x2_t b) {
   return vtrn1q_s64(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vtrn1_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: @test_vtrn1_u8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 uint8x8_t test_vtrn1_u8(uint8x8_t a, uint8x8_t b) {
   return vtrn1_u8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vtrn1q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: @test_vtrn1q_u8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 uint8x16_t test_vtrn1q_u8(uint8x16_t a, uint8x16_t b) {
   return vtrn1q_u8(a, b);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vtrn1_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: @test_vtrn1_u16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 uint16x4_t test_vtrn1_u16(uint16x4_t a, uint16x4_t b) {
   return vtrn1_u16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vtrn1q_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vtrn1q_u16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 uint16x8_t test_vtrn1q_u16(uint16x8_t a, uint16x8_t b) {
   return vtrn1q_u16(a, b);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vtrn1_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK-LABEL: @test_vtrn1_u32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 uint32x2_t test_vtrn1_u32(uint32x2_t a, uint32x2_t b) {
   return vtrn1_u32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vtrn1q_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: @test_vtrn1q_u32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 uint32x4_t test_vtrn1q_u32(uint32x4_t a, uint32x4_t b) {
   return vtrn1q_u32(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vtrn1q_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: @test_vtrn1q_u64(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 uint64x2_t test_vtrn1q_u64(uint64x2_t a, uint64x2_t b) {
   return vtrn1q_u64(a, b);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vtrn1_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK-LABEL: @test_vtrn1_f32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
 float32x2_t test_vtrn1_f32(float32x2_t a, float32x2_t b) {
   return vtrn1_f32(a, b);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vtrn1q_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK-LABEL: @test_vtrn1q_f32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
 float32x4_t test_vtrn1q_f32(float32x4_t a, float32x4_t b) {
   return vtrn1q_f32(a, b);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vtrn1q_f64(<2 x double> %a, <2 x double> %b) #0 {
+// CHECK-LABEL: @test_vtrn1q_f64(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
 float64x2_t test_vtrn1q_f64(float64x2_t a, float64x2_t b) {
   return vtrn1q_f64(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vtrn1_p8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: @test_vtrn1_p8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 poly8x8_t test_vtrn1_p8(poly8x8_t a, poly8x8_t b) {
   return vtrn1_p8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vtrn1q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: @test_vtrn1q_p8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 poly8x16_t test_vtrn1q_p8(poly8x16_t a, poly8x16_t b) {
   return vtrn1q_p8(a, b);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vtrn1_p16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: @test_vtrn1_p16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 poly16x4_t test_vtrn1_p16(poly16x4_t a, poly16x4_t b) {
   return vtrn1_p16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vtrn1q_p16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vtrn1q_p16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 poly16x8_t test_vtrn1q_p16(poly16x8_t a, poly16x8_t b) {
   return vtrn1q_p16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vtrn2_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: @test_vtrn2_s8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 int8x8_t test_vtrn2_s8(int8x8_t a, int8x8_t b) {
   return vtrn2_s8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vtrn2q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: @test_vtrn2q_s8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 int8x16_t test_vtrn2q_s8(int8x16_t a, int8x16_t b) {
   return vtrn2q_s8(a, b);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vtrn2_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: @test_vtrn2_s16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 int16x4_t test_vtrn2_s16(int16x4_t a, int16x4_t b) {
   return vtrn2_s16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vtrn2q_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vtrn2q_s16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 int16x8_t test_vtrn2q_s16(int16x8_t a, int16x8_t b) {
   return vtrn2q_s16(a, b);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vtrn2_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK-LABEL: @test_vtrn2_s32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 int32x2_t test_vtrn2_s32(int32x2_t a, int32x2_t b) {
   return vtrn2_s32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vtrn2q_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: @test_vtrn2q_s32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 int32x4_t test_vtrn2q_s32(int32x4_t a, int32x4_t b) {
   return vtrn2q_s32(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vtrn2q_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: @test_vtrn2q_s64(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 int64x2_t test_vtrn2q_s64(int64x2_t a, int64x2_t b) {
   return vtrn2q_s64(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vtrn2_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: @test_vtrn2_u8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 uint8x8_t test_vtrn2_u8(uint8x8_t a, uint8x8_t b) {
   return vtrn2_u8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vtrn2q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: @test_vtrn2q_u8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 uint8x16_t test_vtrn2q_u8(uint8x16_t a, uint8x16_t b) {
   return vtrn2q_u8(a, b);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vtrn2_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: @test_vtrn2_u16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 uint16x4_t test_vtrn2_u16(uint16x4_t a, uint16x4_t b) {
   return vtrn2_u16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vtrn2q_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vtrn2q_u16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 uint16x8_t test_vtrn2q_u16(uint16x8_t a, uint16x8_t b) {
   return vtrn2q_u16(a, b);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vtrn2_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK-LABEL: @test_vtrn2_u32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 uint32x2_t test_vtrn2_u32(uint32x2_t a, uint32x2_t b) {
   return vtrn2_u32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vtrn2q_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: @test_vtrn2q_u32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 uint32x4_t test_vtrn2q_u32(uint32x4_t a, uint32x4_t b) {
   return vtrn2q_u32(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vtrn2q_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: @test_vtrn2q_u64(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 uint64x2_t test_vtrn2q_u64(uint64x2_t a, uint64x2_t b) {
   return vtrn2q_u64(a, b);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vtrn2_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK-LABEL: @test_vtrn2_f32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
 float32x2_t test_vtrn2_f32(float32x2_t a, float32x2_t b) {
   return vtrn2_f32(a, b);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vtrn2q_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK-LABEL: @test_vtrn2q_f32(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
 float32x4_t test_vtrn2q_f32(float32x4_t a, float32x4_t b) {
   return vtrn2q_f32(a, b);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vtrn2q_f64(<2 x double> %a, <2 x double> %b) #0 {
+// CHECK-LABEL: @test_vtrn2q_f64(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
 float64x2_t test_vtrn2q_f64(float64x2_t a, float64x2_t b) {
   return vtrn2q_f64(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vtrn2_p8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: @test_vtrn2_p8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 poly8x8_t test_vtrn2_p8(poly8x8_t a, poly8x8_t b) {
   return vtrn2_p8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vtrn2q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: @test_vtrn2q_p8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 poly8x16_t test_vtrn2q_p8(poly8x16_t a, poly8x16_t b) {
   return vtrn2q_p8(a, b);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vtrn2_p16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: @test_vtrn2_p16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 poly16x4_t test_vtrn2_p16(poly16x4_t a, poly16x4_t b) {
   return vtrn2_p16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vtrn2q_p16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: @test_vtrn2q_p16(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 poly16x8_t test_vtrn2q_p16(poly16x8_t a, poly16x8_t b) {
   return vtrn2q_p16(a, b);
 }
 
-// CHECK-LABEL: define %struct.int8x8x2_t @test_vuzp_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: @test_vuzp_s8(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
@@ -910,7 +910,7 @@ int8x8x2_t test_vuzp_s8(int8x8_t a, int8
   return vuzp_s8(a, b);
 }
 
-// CHECK-LABEL: define %struct.int16x4x2_t @test_vuzp_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: @test_vuzp_s16(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
@@ -918,26 +918,25 @@ int8x8x2_t test_vuzp_s8(int8x8_t a, int8
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
-// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
-// CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 // CHECK:   store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
-// CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-// CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.int16x4x2_t [[TMP9]], 0
-// CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
-// CHECK:   [[TMP12:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
-// CHECK:   ret %struct.int16x4x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
+// CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0
+// CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
+// CHECK:   [[TMP10:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
+// CHECK:   ret %struct.int16x4x2_t [[TMP10]]
 int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) {
   return vuzp_s16(a, b);
 }
-// CHECK-LABEL: define %struct.int32x2x2_t @test_vuzp_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+
+// CHECK-LABEL: @test_vuzp_s32(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
@@ -945,26 +944,25 @@ int16x4x2_t test_vuzp_s16(int16x4_t a, i
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
-// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
-// CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
+// CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
-// CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
-// CHECK:   store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.int32x2x2_t [[TMP9]], 0
-// CHECK:   store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8
-// CHECK:   [[TMP12:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
-// CHECK:   ret %struct.int32x2x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
+// CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+// CHECK:   store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0
+// CHECK:   store [2 x <2 x i32>] [[TMP9]], [2 x <2 x i32>]* [[TMP8]], align 8
+// CHECK:   [[TMP10:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
+// CHECK:   ret %struct.int32x2x2_t [[TMP10]]
 int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) {
   return vuzp_s32(a, b);
 }
-// CHECK-LABEL: define %struct.uint8x8x2_t @test_vuzp_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+
+// CHECK-LABEL: @test_vuzp_u8(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
@@ -987,7 +985,8 @@ int32x2x2_t test_vuzp_s32(int32x2_t a, i
 uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) {
   return vuzp_u8(a, b);
 }
-// CHECK-LABEL: define %struct.uint16x4x2_t @test_vuzp_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+
+// CHECK-LABEL: @test_vuzp_u16(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
@@ -995,26 +994,25 @@ uint8x8x2_t test_vuzp_u8(uint8x8_t a, ui
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
-// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
-// CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 // CHECK:   store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
-// CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-// CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP9]], 0
-// CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
-// CHECK:   [[TMP12:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
-// CHECK:   ret %struct.uint16x4x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
+// CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0
+// CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
+// CHECK:   [[TMP10:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
+// CHECK:   ret %struct.uint16x4x2_t [[TMP10]]
 uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) {
   return vuzp_u16(a, b);
 }
-// CHECK-LABEL: define %struct.uint32x2x2_t @test_vuzp_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+
+// CHECK-LABEL: @test_vuzp_u32(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
@@ -1022,26 +1020,25 @@ uint16x4x2_t test_vuzp_u16(uint16x4_t a,
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
-// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
-// CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
+// CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
-// CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
-// CHECK:   store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP9]], 0
-// CHECK:   store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8
-// CHECK:   [[TMP12:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
-// CHECK:   ret %struct.uint32x2x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
+// CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+// CHECK:   store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0
+// CHECK:   store [2 x <2 x i32>] [[TMP9]], [2 x <2 x i32>]* [[TMP8]], align 8
+// CHECK:   [[TMP10:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
+// CHECK:   ret %struct.uint32x2x2_t [[TMP10]]
 uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) {
   return vuzp_u32(a, b);
 }
-// CHECK-LABEL: define %struct.float32x2x2_t @test_vuzp_f32(<2 x float> %a, <2 x float> %b) #0 {
+
+// CHECK-LABEL: @test_vuzp_f32(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
@@ -1049,26 +1046,25 @@ uint32x2x2_t test_vuzp_u32(uint32x2_t a,
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
-// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
-// CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
+// CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   store <2 x float> [[VUZP_I]], <2 x float>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
-// CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3>
-// CHECK:   store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.float32x2x2_t [[TMP9]], 0
-// CHECK:   store [2 x <2 x float>] [[TMP11]], [2 x <2 x float>]* [[TMP10]], align 8
-// CHECK:   [[TMP12:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
-// CHECK:   ret %struct.float32x2x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
+// CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+// CHECK:   store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0
+// CHECK:   store [2 x <2 x float>] [[TMP9]], [2 x <2 x float>]* [[TMP8]], align 8
+// CHECK:   [[TMP10:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
+// CHECK:   ret %struct.float32x2x2_t [[TMP10]]
 float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) {
   return vuzp_f32(a, b);
 }
-// CHECK-LABEL: define %struct.poly8x8x2_t @test_vuzp_p8(<8 x i8> %a, <8 x i8> %b) #0 {
+
+// CHECK-LABEL: @test_vuzp_p8(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
@@ -1091,7 +1087,8 @@ float32x2x2_t test_vuzp_f32(float32x2_t
 poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) {
   return vuzp_p8(a, b);
 }
-// CHECK-LABEL: define %struct.poly16x4x2_t @test_vuzp_p16(<4 x i16> %a, <4 x i16> %b) #0 {
+
+// CHECK-LABEL: @test_vuzp_p16(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
@@ -1099,26 +1096,25 @@ poly8x8x2_t test_vuzp_p8(poly8x8_t a, po
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
-// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
-// CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 // CHECK:   store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
-// CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-// CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP9]], 0
-// CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
-// CHECK:   [[TMP12:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
-// CHECK:   ret %struct.poly16x4x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
+// CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0
+// CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
+// CHECK:   [[TMP10:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
+// CHECK:   ret %struct.poly16x4x2_t [[TMP10]]
 poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) {
   return vuzp_p16(a, b);
 }
-// CHECK-LABEL: define %struct.int8x16x2_t @test_vuzpq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+
+// CHECK-LABEL: @test_vuzpq_s8(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
@@ -1141,7 +1137,8 @@ poly16x4x2_t test_vuzp_p16(poly16x4_t a,
 int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) {
   return vuzpq_s8(a, b);
 }
-// CHECK-LABEL: define %struct.int16x8x2_t @test_vuzpq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+
+// CHECK-LABEL: @test_vuzpq_s16(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
@@ -1149,26 +1146,25 @@ int8x16x2_t test_vuzpq_s8(int8x16_t a, i
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
-// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
-// CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 // CHECK:   store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
-// CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-// CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.int16x8x2_t [[TMP9]], 0
-// CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
-// CHECK:   [[TMP12:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
-// CHECK:   ret %struct.int16x8x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
+// CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0
+// CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
+// CHECK:   [[TMP10:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
+// CHECK:   ret %struct.int16x8x2_t [[TMP10]]
 int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) {
   return vuzpq_s16(a, b);
 }
-// CHECK-LABEL: define %struct.int32x4x2_t @test_vuzpq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+
+// CHECK-LABEL: @test_vuzpq_s32(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
@@ -1176,26 +1172,25 @@ int16x8x2_t test_vuzpq_s16(int16x8_t a,
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
-// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
-// CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 // CHECK:   store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
-// CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-// CHECK:   store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.int32x4x2_t [[TMP9]], 0
-// CHECK:   store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16
-// CHECK:   [[TMP12:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
-// CHECK:   ret %struct.int32x4x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
+// CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK:   store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0
+// CHECK:   store [2 x <4 x i32>] [[TMP9]], [2 x <4 x i32>]* [[TMP8]], align 16
+// CHECK:   [[TMP10:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
+// CHECK:   ret %struct.int32x4x2_t [[TMP10]]
 int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) {
   return vuzpq_s32(a, b);
 }
-// CHECK-LABEL: define %struct.uint8x16x2_t @test_vuzpq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+
+// CHECK-LABEL: @test_vuzpq_u8(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
@@ -1218,7 +1213,8 @@ int32x4x2_t test_vuzpq_s32(int32x4_t a,
 uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) {
   return vuzpq_u8(a, b);
 }
-// CHECK-LABEL: define %struct.uint16x8x2_t @test_vuzpq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+
+// CHECK-LABEL: @test_vuzpq_u16(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
@@ -1226,26 +1222,25 @@ uint8x16x2_t test_vuzpq_u8(uint8x16_t a,
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
-// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
-// CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 // CHECK:   store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
-// CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-// CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP9]], 0
-// CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
-// CHECK:   [[TMP12:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
-// CHECK:   ret %struct.uint16x8x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
+// CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0
+// CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
+// CHECK:   [[TMP10:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
+// CHECK:   ret %struct.uint16x8x2_t [[TMP10]]
 uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) {
   return vuzpq_u16(a, b);
 }
-// CHECK-LABEL: define %struct.uint32x4x2_t @test_vuzpq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+
+// CHECK-LABEL: @test_vuzpq_u32(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
@@ -1253,26 +1248,25 @@ uint16x8x2_t test_vuzpq_u16(uint16x8_t a
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
-// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
-// CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 // CHECK:   store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
-// CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-// CHECK:   store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP9]], 0
-// CHECK:   store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16
-// CHECK:   [[TMP12:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
-// CHECK:   ret %struct.uint32x4x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
+// CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK:   store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0
+// CHECK:   store [2 x <4 x i32>] [[TMP9]], [2 x <4 x i32>]* [[TMP8]], align 16
+// CHECK:   [[TMP10:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
+// CHECK:   ret %struct.uint32x4x2_t [[TMP10]]
 uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) {
   return vuzpq_u32(a, b);
 }
-// CHECK-LABEL: define %struct.float32x4x2_t @test_vuzpq_f32(<4 x float> %a, <4 x float> %b) #0 {
+
+// CHECK-LABEL: @test_vuzpq_f32(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
@@ -1280,26 +1274,25 @@ uint32x4x2_t test_vuzpq_u32(uint32x4_t a
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
-// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
-// CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 // CHECK:   store <4 x float> [[VUZP_I]], <4 x float>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
-// CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-// CHECK:   store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.float32x4x2_t [[TMP9]], 0
-// CHECK:   store [2 x <4 x float>] [[TMP11]], [2 x <4 x float>]* [[TMP10]], align 16
-// CHECK:   [[TMP12:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
-// CHECK:   ret %struct.float32x4x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
+// CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK:   store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0
+// CHECK:   store [2 x <4 x float>] [[TMP9]], [2 x <4 x float>]* [[TMP8]], align 16
+// CHECK:   [[TMP10:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
+// CHECK:   ret %struct.float32x4x2_t [[TMP10]]
 float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) {
   return vuzpq_f32(a, b);
 }
-// CHECK-LABEL: define %struct.poly8x16x2_t @test_vuzpq_p8(<16 x i8> %a, <16 x i8> %b) #0 {
+
+// CHECK-LABEL: @test_vuzpq_p8(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
@@ -1322,7 +1315,8 @@ float32x4x2_t test_vuzpq_f32(float32x4_t
 poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) {
   return vuzpq_p8(a, b);
 }
-// CHECK-LABEL: define %struct.poly16x8x2_t @test_vuzpq_p16(<8 x i16> %a, <8 x i16> %b) #0 {
+
+// CHECK-LABEL: @test_vuzpq_p16(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
@@ -1330,27 +1324,25 @@ poly8x16x2_t test_vuzpq_p8(poly8x16_t a,
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
-// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
-// CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 // CHECK:   store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
-// CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-// CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP9]], 0
-// CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
-// CHECK:   [[TMP12:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
-// CHECK:   ret %struct.poly16x8x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
+// CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0
+// CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
+// CHECK:   [[TMP10:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
+// CHECK:   ret %struct.poly16x8x2_t [[TMP10]]
 poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) {
   return vuzpq_p16(a, b);
 }
 
-// CHECK-LABEL: define %struct.int8x8x2_t @test_vzip_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: @test_vzip_s8(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
@@ -1374,7 +1366,7 @@ int8x8x2_t test_vzip_s8(int8x8_t a, int8
   return vzip_s8(a, b);
 }
 
-// CHECK-LABEL: define %struct.int16x4x2_t @test_vzip_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: @test_vzip_s16(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
@@ -1382,26 +1374,25 @@ int8x8x2_t test_vzip_s8(int8x8_t a, int8
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
-// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
-// CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 // CHECK:   store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
-// CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-// CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.int16x4x2_t [[TMP9]], 0
-// CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
-// CHECK:   [[TMP12:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
-// CHECK:   ret %struct.int16x4x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
+// CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0
+// CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
+// CHECK:   [[TMP10:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
+// CHECK:   ret %struct.int16x4x2_t [[TMP10]]
 int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) {
   return vzip_s16(a, b);
 }
-// CHECK-LABEL: define %struct.int32x2x2_t @test_vzip_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+
+// CHECK-LABEL: @test_vzip_s32(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
@@ -1409,26 +1400,25 @@ int16x4x2_t test_vzip_s16(int16x4_t a, i
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
-// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
-// CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
+// CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
-// CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
-// CHECK:   store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.int32x2x2_t [[TMP9]], 0
-// CHECK:   store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8
-// CHECK:   [[TMP12:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
-// CHECK:   ret %struct.int32x2x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
+// CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+// CHECK:   store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0
+// CHECK:   store [2 x <2 x i32>] [[TMP9]], [2 x <2 x i32>]* [[TMP8]], align 8
+// CHECK:   [[TMP10:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
+// CHECK:   ret %struct.int32x2x2_t [[TMP10]]
 int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) {
   return vzip_s32(a, b);
 }
-// CHECK-LABEL: define %struct.uint8x8x2_t @test_vzip_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+
+// CHECK-LABEL: @test_vzip_u8(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
@@ -1451,7 +1441,8 @@ int32x2x2_t test_vzip_s32(int32x2_t a, i
 uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) {
   return vzip_u8(a, b);
 }
-// CHECK-LABEL: define %struct.uint16x4x2_t @test_vzip_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+
+// CHECK-LABEL: @test_vzip_u16(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
@@ -1459,26 +1450,25 @@ uint8x8x2_t test_vzip_u8(uint8x8_t a, ui
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
-// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
-// CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 // CHECK:   store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
-// CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-// CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP9]], 0
-// CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
-// CHECK:   [[TMP12:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
-// CHECK:   ret %struct.uint16x4x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
+// CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0
+// CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
+// CHECK:   [[TMP10:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
+// CHECK:   ret %struct.uint16x4x2_t [[TMP10]]
 uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) {
   return vzip_u16(a, b);
 }
-// CHECK-LABEL: define %struct.uint32x2x2_t @test_vzip_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+
+// CHECK-LABEL: @test_vzip_u32(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
@@ -1486,26 +1476,25 @@ uint16x4x2_t test_vzip_u16(uint16x4_t a,
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
-// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
-// CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
+// CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
-// CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
-// CHECK:   store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP9]], 0
-// CHECK:   store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8
-// CHECK:   [[TMP12:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
-// CHECK:   ret %struct.uint32x2x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
+// CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+// CHECK:   store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0
+// CHECK:   store [2 x <2 x i32>] [[TMP9]], [2 x <2 x i32>]* [[TMP8]], align 8
+// CHECK:   [[TMP10:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
+// CHECK:   ret %struct.uint32x2x2_t [[TMP10]]
 uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) {
   return vzip_u32(a, b);
 }
-// CHECK-LABEL: define %struct.float32x2x2_t @test_vzip_f32(<2 x float> %a, <2 x float> %b) #0 {
+
+// CHECK-LABEL: @test_vzip_f32(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
@@ -1513,26 +1502,25 @@ uint32x2x2_t test_vzip_u32(uint32x2_t a,
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
-// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
-// CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
+// CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   store <2 x float> [[VZIP_I]], <2 x float>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
-// CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3>
-// CHECK:   store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.float32x2x2_t [[TMP9]], 0
-// CHECK:   store [2 x <2 x float>] [[TMP11]], [2 x <2 x float>]* [[TMP10]], align 8
-// CHECK:   [[TMP12:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
-// CHECK:   ret %struct.float32x2x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
+// CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+// CHECK:   store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0
+// CHECK:   store [2 x <2 x float>] [[TMP9]], [2 x <2 x float>]* [[TMP8]], align 8
+// CHECK:   [[TMP10:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
+// CHECK:   ret %struct.float32x2x2_t [[TMP10]]
 float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) {
   return vzip_f32(a, b);
 }
-// CHECK-LABEL: define %struct.poly8x8x2_t @test_vzip_p8(<8 x i8> %a, <8 x i8> %b) #0 {
+
+// CHECK-LABEL: @test_vzip_p8(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
@@ -1555,7 +1543,8 @@ float32x2x2_t test_vzip_f32(float32x2_t
 poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) {
   return vzip_p8(a, b);
 }
-// CHECK-LABEL: define %struct.poly16x4x2_t @test_vzip_p16(<4 x i16> %a, <4 x i16> %b) #0 {
+
+// CHECK-LABEL: @test_vzip_p16(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
@@ -1563,26 +1552,25 @@ poly8x8x2_t test_vzip_p8(poly8x8_t a, po
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
-// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
-// CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 // CHECK:   store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
-// CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-// CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP9]], 0
-// CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
-// CHECK:   [[TMP12:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
-// CHECK:   ret %struct.poly16x4x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
+// CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0
+// CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
+// CHECK:   [[TMP10:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
+// CHECK:   ret %struct.poly16x4x2_t [[TMP10]]
 poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) {
   return vzip_p16(a, b);
 }
-// CHECK-LABEL: define %struct.int8x16x2_t @test_vzipq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+
+// CHECK-LABEL: @test_vzipq_s8(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
@@ -1605,7 +1593,8 @@ poly16x4x2_t test_vzip_p16(poly16x4_t a,
 int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) {
   return vzipq_s8(a, b);
 }
-// CHECK-LABEL: define %struct.int16x8x2_t @test_vzipq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+
+// CHECK-LABEL: @test_vzipq_s16(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
@@ -1613,26 +1602,25 @@ int8x16x2_t test_vzipq_s8(int8x16_t a, i
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
-// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
-// CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 // CHECK:   store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
-// CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-// CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.int16x8x2_t [[TMP9]], 0
-// CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
-// CHECK:   [[TMP12:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
-// CHECK:   ret %struct.int16x8x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
+// CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0
+// CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
+// CHECK:   [[TMP10:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
+// CHECK:   ret %struct.int16x8x2_t [[TMP10]]
 int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) {
   return vzipq_s16(a, b);
 }
-// CHECK-LABEL: define %struct.int32x4x2_t @test_vzipq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+
+// CHECK-LABEL: @test_vzipq_s32(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
@@ -1640,26 +1628,25 @@ int16x8x2_t test_vzipq_s16(int16x8_t a,
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
-// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
-// CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 // CHECK:   store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
-// CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-// CHECK:   store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.int32x4x2_t [[TMP9]], 0
-// CHECK:   store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16
-// CHECK:   [[TMP12:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
-// CHECK:   ret %struct.int32x4x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
+// CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK:   store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0
+// CHECK:   store [2 x <4 x i32>] [[TMP9]], [2 x <4 x i32>]* [[TMP8]], align 16
+// CHECK:   [[TMP10:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
+// CHECK:   ret %struct.int32x4x2_t [[TMP10]]
 int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) {
   return vzipq_s32(a, b);
 }
-// CHECK-LABEL: define %struct.uint8x16x2_t @test_vzipq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+
+// CHECK-LABEL: @test_vzipq_u8(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
@@ -1682,7 +1669,8 @@ int32x4x2_t test_vzipq_s32(int32x4_t a,
 uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) {
   return vzipq_u8(a, b);
 }
-// CHECK-LABEL: define %struct.uint16x8x2_t @test_vzipq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+
+// CHECK-LABEL: @test_vzipq_u16(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
@@ -1690,26 +1678,25 @@ uint8x16x2_t test_vzipq_u8(uint8x16_t a,
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
-// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
-// CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 // CHECK:   store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
-// CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-// CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP9]], 0
-// CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
-// CHECK:   [[TMP12:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
-// CHECK:   ret %struct.uint16x8x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
+// CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0
+// CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
+// CHECK:   [[TMP10:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
+// CHECK:   ret %struct.uint16x8x2_t [[TMP10]]
 uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) {
   return vzipq_u16(a, b);
 }
-// CHECK-LABEL: define %struct.uint32x4x2_t @test_vzipq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+
+// CHECK-LABEL: @test_vzipq_u32(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
@@ -1717,26 +1704,25 @@ uint16x8x2_t test_vzipq_u16(uint16x8_t a
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
-// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
-// CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 // CHECK:   store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
-// CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-// CHECK:   store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP9]], 0
-// CHECK:   store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16
-// CHECK:   [[TMP12:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
-// CHECK:   ret %struct.uint32x4x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
+// CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK:   store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0
+// CHECK:   store [2 x <4 x i32>] [[TMP9]], [2 x <4 x i32>]* [[TMP8]], align 16
+// CHECK:   [[TMP10:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
+// CHECK:   ret %struct.uint32x4x2_t [[TMP10]]
 uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) {
   return vzipq_u32(a, b);
 }
-// CHECK-LABEL: define %struct.float32x4x2_t @test_vzipq_f32(<4 x float> %a, <4 x float> %b) #0 {
+
+// CHECK-LABEL: @test_vzipq_f32(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
@@ -1744,26 +1730,25 @@ uint32x4x2_t test_vzipq_u32(uint32x4_t a
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
-// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
-// CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 // CHECK:   store <4 x float> [[VZIP_I]], <4 x float>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
-// CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-// CHECK:   store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.float32x4x2_t [[TMP9]], 0
-// CHECK:   store [2 x <4 x float>] [[TMP11]], [2 x <4 x float>]* [[TMP10]], align 16
-// CHECK:   [[TMP12:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
-// CHECK:   ret %struct.float32x4x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
+// CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK:   store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0
+// CHECK:   store [2 x <4 x float>] [[TMP9]], [2 x <4 x float>]* [[TMP8]], align 16
+// CHECK:   [[TMP10:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
+// CHECK:   ret %struct.float32x4x2_t [[TMP10]]
 float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) {
   return vzipq_f32(a, b);
 }
-// CHECK-LABEL: define %struct.poly8x16x2_t @test_vzipq_p8(<16 x i8> %a, <16 x i8> %b) #0 {
+
+// CHECK-LABEL: @test_vzipq_p8(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
@@ -1786,7 +1771,8 @@ float32x4x2_t test_vzipq_f32(float32x4_t
 poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) {
   return vzipq_p8(a, b);
 }
-// CHECK-LABEL: define %struct.poly16x8x2_t @test_vzipq_p16(<8 x i16> %a, <8 x i16> %b) #0 {
+
+// CHECK-LABEL: @test_vzipq_p16(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
@@ -1794,27 +1780,25 @@ poly8x16x2_t test_vzipq_p8(poly8x16_t a,
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
-// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
-// CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 // CHECK:   store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
-// CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-// CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP9]], 0
-// CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
-// CHECK:   [[TMP12:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
-// CHECK:   ret %struct.poly16x8x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
+// CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0
+// CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
+// CHECK:   [[TMP10:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
+// CHECK:   ret %struct.poly16x8x2_t [[TMP10]]
 poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) {
   return vzipq_p16(a, b);
 }
 
-// CHECK-LABEL: define %struct.int8x8x2_t @test_vtrn_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: @test_vtrn_s8(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
@@ -1838,7 +1822,7 @@ int8x8x2_t test_vtrn_s8(int8x8_t a, int8
   return vtrn_s8(a, b);
 }
 
-// CHECK-LABEL: define %struct.int16x4x2_t @test_vtrn_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: @test_vtrn_s16(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
@@ -1846,26 +1830,25 @@ int8x8x2_t test_vtrn_s8(int8x8_t a, int8
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
-// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
-// CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 // CHECK:   store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
-// CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.int16x4x2_t [[TMP9]], 0
-// CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
-// CHECK:   [[TMP12:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
-// CHECK:   ret %struct.int16x4x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
+// CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0
+// CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
+// CHECK:   [[TMP10:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
+// CHECK:   ret %struct.int16x4x2_t [[TMP10]]
 int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) {
   return vtrn_s16(a, b);
 }
-// CHECK-LABEL: define %struct.int32x2x2_t @test_vtrn_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+
+// CHECK-LABEL: @test_vtrn_s32(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
@@ -1873,26 +1856,25 @@ int16x4x2_t test_vtrn_s16(int16x4_t a, i
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
-// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
-// CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
+// CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
-// CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
-// CHECK:   store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.int32x2x2_t [[TMP9]], 0
-// CHECK:   store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8
-// CHECK:   [[TMP12:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
-// CHECK:   ret %struct.int32x2x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
+// CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+// CHECK:   store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0
+// CHECK:   store [2 x <2 x i32>] [[TMP9]], [2 x <2 x i32>]* [[TMP8]], align 8
+// CHECK:   [[TMP10:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
+// CHECK:   ret %struct.int32x2x2_t [[TMP10]]
 int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) {
   return vtrn_s32(a, b);
 }
-// CHECK-LABEL: define %struct.uint8x8x2_t @test_vtrn_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+
+// CHECK-LABEL: @test_vtrn_u8(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
@@ -1915,7 +1897,8 @@ int32x2x2_t test_vtrn_s32(int32x2_t a, i
 uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) {
   return vtrn_u8(a, b);
 }
-// CHECK-LABEL: define %struct.uint16x4x2_t @test_vtrn_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+
+// CHECK-LABEL: @test_vtrn_u16(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
@@ -1923,26 +1906,25 @@ uint8x8x2_t test_vtrn_u8(uint8x8_t a, ui
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
-// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
-// CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 // CHECK:   store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
-// CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP9]], 0
-// CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
-// CHECK:   [[TMP12:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
-// CHECK:   ret %struct.uint16x4x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
+// CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0
+// CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
+// CHECK:   [[TMP10:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
+// CHECK:   ret %struct.uint16x4x2_t [[TMP10]]
 uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) {
   return vtrn_u16(a, b);
 }
-// CHECK-LABEL: define %struct.uint32x2x2_t @test_vtrn_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+
+// CHECK-LABEL: @test_vtrn_u32(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
@@ -1950,26 +1932,25 @@ uint16x4x2_t test_vtrn_u16(uint16x4_t a,
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
-// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
-// CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
+// CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
-// CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
-// CHECK:   store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP9]], 0
-// CHECK:   store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8
-// CHECK:   [[TMP12:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
-// CHECK:   ret %struct.uint32x2x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
+// CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+// CHECK:   store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0
+// CHECK:   store [2 x <2 x i32>] [[TMP9]], [2 x <2 x i32>]* [[TMP8]], align 8
+// CHECK:   [[TMP10:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
+// CHECK:   ret %struct.uint32x2x2_t [[TMP10]]
 uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) {
   return vtrn_u32(a, b);
 }
-// CHECK-LABEL: define %struct.float32x2x2_t @test_vtrn_f32(<2 x float> %a, <2 x float> %b) #0 {
+
+// CHECK-LABEL: @test_vtrn_f32(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
@@ -1977,26 +1958,25 @@ uint32x2x2_t test_vtrn_u32(uint32x2_t a,
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
-// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
-// CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
+// CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   store <2 x float> [[VTRN_I]], <2 x float>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
-// CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3>
-// CHECK:   store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.float32x2x2_t [[TMP9]], 0
-// CHECK:   store [2 x <2 x float>] [[TMP11]], [2 x <2 x float>]* [[TMP10]], align 8
-// CHECK:   [[TMP12:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
-// CHECK:   ret %struct.float32x2x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
+// CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+// CHECK:   store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0
+// CHECK:   store [2 x <2 x float>] [[TMP9]], [2 x <2 x float>]* [[TMP8]], align 8
+// CHECK:   [[TMP10:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
+// CHECK:   ret %struct.float32x2x2_t [[TMP10]]
 float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) {
   return vtrn_f32(a, b);
 }
-// CHECK-LABEL: define %struct.poly8x8x2_t @test_vtrn_p8(<8 x i8> %a, <8 x i8> %b) #0 {
+
+// CHECK-LABEL: @test_vtrn_p8(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
@@ -2019,7 +1999,8 @@ float32x2x2_t test_vtrn_f32(float32x2_t
 poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) {
   return vtrn_p8(a, b);
 }
-// CHECK-LABEL: define %struct.poly16x4x2_t @test_vtrn_p16(<4 x i16> %a, <4 x i16> %b) #0 {
+
+// CHECK-LABEL: @test_vtrn_p16(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
@@ -2027,26 +2008,25 @@ poly8x8x2_t test_vtrn_p8(poly8x8_t a, po
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
-// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
-// CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 // CHECK:   store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
-// CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP9]], 0
-// CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
-// CHECK:   [[TMP12:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
-// CHECK:   ret %struct.poly16x4x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
+// CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0
+// CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
+// CHECK:   [[TMP10:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
+// CHECK:   ret %struct.poly16x4x2_t [[TMP10]]
 poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) {
   return vtrn_p16(a, b);
 }
-// CHECK-LABEL: define %struct.int8x16x2_t @test_vtrnq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+
+// CHECK-LABEL: @test_vtrnq_s8(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
@@ -2069,7 +2049,8 @@ poly16x4x2_t test_vtrn_p16(poly16x4_t a,
 int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) {
   return vtrnq_s8(a, b);
 }
-// CHECK-LABEL: define %struct.int16x8x2_t @test_vtrnq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+
+// CHECK-LABEL: @test_vtrnq_s16(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
@@ -2077,26 +2058,25 @@ int8x16x2_t test_vtrnq_s8(int8x16_t a, i
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
-// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
-// CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 // CHECK:   store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
-// CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.int16x8x2_t [[TMP9]], 0
-// CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
-// CHECK:   [[TMP12:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
-// CHECK:   ret %struct.int16x8x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
+// CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0
+// CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
+// CHECK:   [[TMP10:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
+// CHECK:   ret %struct.int16x8x2_t [[TMP10]]
 int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
   return vtrnq_s16(a, b);
 }
-// CHECK-LABEL: define %struct.int32x4x2_t @test_vtrnq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+
+// CHECK-LABEL: @test_vtrnq_s32(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
@@ -2104,26 +2084,25 @@ int16x8x2_t test_vtrnq_s16(int16x8_t a,
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
-// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
-// CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 // CHECK:   store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
-// CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK:   store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.int32x4x2_t [[TMP9]], 0
-// CHECK:   store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16
-// CHECK:   [[TMP12:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
-// CHECK:   ret %struct.int32x4x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
+// CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK:   store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0
+// CHECK:   store [2 x <4 x i32>] [[TMP9]], [2 x <4 x i32>]* [[TMP8]], align 16
+// CHECK:   [[TMP10:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
+// CHECK:   ret %struct.int32x4x2_t [[TMP10]]
 int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) {
   return vtrnq_s32(a, b);
 }
-// CHECK-LABEL: define %struct.uint8x16x2_t @test_vtrnq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+
+// CHECK-LABEL: @test_vtrnq_u8(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
@@ -2146,7 +2125,8 @@ int32x4x2_t test_vtrnq_s32(int32x4_t a,
 uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
   return vtrnq_u8(a, b);
 }
-// CHECK-LABEL: define %struct.uint16x8x2_t @test_vtrnq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+
+// CHECK-LABEL: @test_vtrnq_u16(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
@@ -2154,26 +2134,25 @@ uint8x16x2_t test_vtrnq_u8(uint8x16_t a,
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
-// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
-// CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 // CHECK:   store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
-// CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP9]], 0
-// CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
-// CHECK:   [[TMP12:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
-// CHECK:   ret %struct.uint16x8x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
+// CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0
+// CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
+// CHECK:   [[TMP10:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
+// CHECK:   ret %struct.uint16x8x2_t [[TMP10]]
 uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) {
   return vtrnq_u16(a, b);
 }
-// CHECK-LABEL: define %struct.uint32x4x2_t @test_vtrnq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+
+// CHECK-LABEL: @test_vtrnq_u32(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
@@ -2181,26 +2160,25 @@ uint16x8x2_t test_vtrnq_u16(uint16x8_t a
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
-// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
-// CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 // CHECK:   store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
-// CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK:   store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP9]], 0
-// CHECK:   store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16
-// CHECK:   [[TMP12:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
-// CHECK:   ret %struct.uint32x4x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
+// CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK:   store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0
+// CHECK:   store [2 x <4 x i32>] [[TMP9]], [2 x <4 x i32>]* [[TMP8]], align 16
+// CHECK:   [[TMP10:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
+// CHECK:   ret %struct.uint32x4x2_t [[TMP10]]
 uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
   return vtrnq_u32(a, b);
 }
-// CHECK-LABEL: define %struct.float32x4x2_t @test_vtrnq_f32(<4 x float> %a, <4 x float> %b) #0 {
+
+// CHECK-LABEL: @test_vtrnq_f32(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
@@ -2208,26 +2186,25 @@ uint32x4x2_t test_vtrnq_u32(uint32x4_t a
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
-// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
-// CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 // CHECK:   store <4 x float> [[VTRN_I]], <4 x float>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
-// CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK:   store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.float32x4x2_t [[TMP9]], 0
-// CHECK:   store [2 x <4 x float>] [[TMP11]], [2 x <4 x float>]* [[TMP10]], align 16
-// CHECK:   [[TMP12:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
-// CHECK:   ret %struct.float32x4x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
+// CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK:   store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0
+// CHECK:   store [2 x <4 x float>] [[TMP9]], [2 x <4 x float>]* [[TMP8]], align 16
+// CHECK:   [[TMP10:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
+// CHECK:   ret %struct.float32x4x2_t [[TMP10]]
 float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) {
   return vtrnq_f32(a, b);
 }
-// CHECK-LABEL: define %struct.poly8x16x2_t @test_vtrnq_p8(<16 x i8> %a, <16 x i8> %b) #0 {
+
+// CHECK-LABEL: @test_vtrnq_p8(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
@@ -2250,7 +2227,8 @@ float32x4x2_t test_vtrnq_f32(float32x4_t
 poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) {
   return vtrnq_p8(a, b);
 }
-// CHECK-LABEL: define %struct.poly16x8x2_t @test_vtrnq_p16(<8 x i16> %a, <8 x i16> %b) #0 {
+
+// CHECK-LABEL: @test_vtrnq_p16(
 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
@@ -2258,22 +2236,20 @@ poly8x16x2_t test_vtrnq_p8(poly8x16_t a,
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
-// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
-// CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 // CHECK:   store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]]
-// CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
-// CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]]
-// CHECK:   [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8*
-// CHECK:   [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
-// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
-// CHECK:   [[TMP9:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16
-// CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0
-// CHECK:   [[TMP11:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP9]], 0
-// CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
-// CHECK:   [[TMP12:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
-// CHECK:   ret %struct.poly16x8x2_t [[TMP12]]
+// CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
+// CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]]
+// CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8*
+// CHECK:   [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
+// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
+// CHECK:   [[TMP7:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16
+// CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0
+// CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0
+// CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
+// CHECK:   [[TMP10:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
+// CHECK:   ret %struct.poly16x8x2_t [[TMP10]]
 poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) {
   return vtrnq_p16(a, b);
 }

Modified: cfe/trunk/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c?rev=276728&r1=276727&r2=276728&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c Tue Jul 26 00:52:37 2016
@@ -47,9 +47,7 @@ float64_t test_vmuld_laneq_f64(float64_t
 }
 
 // CHECK-LABEL: define <1 x double> @test_vmul_n_f64(<1 x double> %a, double %b) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK:   [[TMP2:%.*]] = bitcast <1 x double> [[TMP1]] to double
+// CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %a to double
 // CHECK:   [[TMP3:%.*]] = fmul double [[TMP2]], %b
 // CHECK:   [[TMP4:%.*]] = bitcast double [[TMP3]] to <1 x double>
 // CHECK:   ret <1 x double> [[TMP4]]

Modified: cfe/trunk/test/CodeGen/aarch64-poly64.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-poly64.c?rev=276728&r1=276727&r2=276728&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-poly64.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-poly64.c Tue Jul 26 00:52:37 2016
@@ -23,11 +23,7 @@ uint64x2_t test_vceqq_p64(poly64x2_t a,
 }
 
 // CHECK-LABEL: define <1 x i64> @test_vtst_p64(<1 x i64> %a, <1 x i64> %b) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK:   [[TMP4:%.*]] = and <1 x i64> [[TMP2]], [[TMP3]]
+// CHECK:   [[TMP4:%.*]] = and <1 x i64> %a, %b
 // CHECK:   [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer
 // CHECK:   [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64>
 // CHECK:   ret <1 x i64> [[VTST_I]]
@@ -36,11 +32,7 @@ uint64x1_t test_vtst_p64(poly64x1_t a, p
 }
 
 // CHECK-LABEL: define <2 x i64> @test_vtstq_p64(<2 x i64> %a, <2 x i64> %b) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK:   [[TMP4:%.*]] = and <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK:   [[TMP4:%.*]] = and <2 x i64> %a, %b
 // CHECK:   [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer
 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64>
 // CHECK:   ret <2 x i64> [[VTST_I]]
@@ -49,15 +41,9 @@ uint64x2_t test_vtstq_p64(poly64x2_t a,
 }
 
 // CHECK-LABEL: define <1 x i64> @test_vbsl_p64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8>
-// CHECK:   [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
-// CHECK:   [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]]
-// CHECK:   [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], <i64 -1>
-// CHECK:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
+// CHECK:   [[VBSL3_I:%.*]] = and <1 x i64> %a, %b
+// CHECK:   [[TMP3:%.*]] = xor <1 x i64> %a, <i64 -1>
+// CHECK:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %c
 // CHECK:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
 // CHECK:   ret <1 x i64> [[VBSL5_I]]
 poly64x1_t test_vbsl_p64(poly64x1_t a, poly64x1_t b, poly64x1_t c) {
@@ -65,15 +51,9 @@ poly64x1_t test_vbsl_p64(poly64x1_t a, p
 }
 
 // CHECK-LABEL: define <2 x i64> @test_vbslq_p64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8>
-// CHECK:   [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
-// CHECK:   [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]]
-// CHECK:   [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], <i64 -1, i64 -1>
-// CHECK:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
+// CHECK:   [[VBSL3_I:%.*]] = and <2 x i64> %a, %b
+// CHECK:   [[TMP3:%.*]] = xor <2 x i64> %a, <i64 -1, i64 -1>
+// CHECK:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %c
 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
 // CHECK:   ret <2 x i64> [[VBSL5_I]]
 poly64x2_t test_vbslq_p64(poly64x2_t a, poly64x2_t b, poly64x2_t c) {

Modified: cfe/trunk/test/CodeGen/arm-neon-directed-rounding.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-neon-directed-rounding.c?rev=276728&r1=276727&r2=276728&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/arm-neon-directed-rounding.c (original)
+++ cfe/trunk/test/CodeGen/arm-neon-directed-rounding.c Tue Jul 26 00:52:37 2016
@@ -3,133 +3,85 @@
 #include <arm_neon.h>
 
 // CHECK-LABEL: define <2 x float> @test_vrnda_f32(<2 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VRNDA_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VRNDA_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrinta.v2f32(<2 x float> [[VRNDA_V_I]]) #2
-// CHECK:   [[VRNDA_V2_I:%.*]] = bitcast <2 x float> [[VRNDA_V1_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VRNDA_V2_I]] to <2 x float>
-// CHECK:   ret <2 x float> [[TMP1]]
+// CHECK:   [[VRNDA_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrinta.v2f32(<2 x float> %a) #2
+// CHECK:   ret <2 x float> [[VRNDA_V1_I]]
 float32x2_t test_vrnda_f32(float32x2_t a) {
   return vrnda_f32(a);
 }
 
 // CHECK-LABEL: define <4 x float> @test_vrndaq_f32(<4 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VRNDAQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VRNDAQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrinta.v4f32(<4 x float> [[VRNDAQ_V_I]]) #2
-// CHECK:   [[VRNDAQ_V2_I:%.*]] = bitcast <4 x float> [[VRNDAQ_V1_I]] to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VRNDAQ_V2_I]] to <4 x float>
-// CHECK:   ret <4 x float> [[TMP1]]
+// CHECK:   [[VRNDAQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrinta.v4f32(<4 x float> %a) #2
+// CHECK:   ret <4 x float> [[VRNDAQ_V1_I]]
 float32x4_t test_vrndaq_f32(float32x4_t a) {
   return vrndaq_f32(a);
 }
 
 // CHECK-LABEL: define <2 x float> @test_vrndm_f32(<2 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VRNDM_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VRNDM_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintm.v2f32(<2 x float> [[VRNDM_V_I]]) #2
-// CHECK:   [[VRNDM_V2_I:%.*]] = bitcast <2 x float> [[VRNDM_V1_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VRNDM_V2_I]] to <2 x float>
-// CHECK:   ret <2 x float> [[TMP1]]
+// CHECK:   [[VRNDM_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintm.v2f32(<2 x float> %a) #2
+// CHECK:   ret <2 x float> [[VRNDM_V1_I]]
 float32x2_t test_vrndm_f32(float32x2_t a) {
   return vrndm_f32(a);
 }
 
 // CHECK-LABEL: define <4 x float> @test_vrndmq_f32(<4 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VRNDMQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VRNDMQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintm.v4f32(<4 x float> [[VRNDMQ_V_I]]) #2
-// CHECK:   [[VRNDMQ_V2_I:%.*]] = bitcast <4 x float> [[VRNDMQ_V1_I]] to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VRNDMQ_V2_I]] to <4 x float>
-// CHECK:   ret <4 x float> [[TMP1]]
+// CHECK:   [[VRNDMQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintm.v4f32(<4 x float> %a) #2
+// CHECK:   ret <4 x float> [[VRNDMQ_V1_I]]
 float32x4_t test_vrndmq_f32(float32x4_t a) {
   return vrndmq_f32(a);
 }
 
 // CHECK-LABEL: define <2 x float> @test_vrndn_f32(<2 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VRNDN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintn.v2f32(<2 x float> [[VRNDN_V_I]]) #2
-// CHECK:   [[VRNDN_V2_I:%.*]] = bitcast <2 x float> [[VRNDN_V1_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VRNDN_V2_I]] to <2 x float>
-// CHECK:   ret <2 x float> [[TMP1]]
+// CHECK:   [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintn.v2f32(<2 x float> %a) #2
+// CHECK:   ret <2 x float> [[VRNDN_V1_I]]
 float32x2_t test_vrndn_f32(float32x2_t a) {
   return vrndn_f32(a);
 }
 
 // CHECK-LABEL: define <4 x float> @test_vrndnq_f32(<4 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VRNDNQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintn.v4f32(<4 x float> [[VRNDNQ_V_I]]) #2
-// CHECK:   [[VRNDNQ_V2_I:%.*]] = bitcast <4 x float> [[VRNDNQ_V1_I]] to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VRNDNQ_V2_I]] to <4 x float>
-// CHECK:   ret <4 x float> [[TMP1]]
+// CHECK:   [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintn.v4f32(<4 x float> %a) #2
+// CHECK:   ret <4 x float> [[VRNDNQ_V1_I]]
 float32x4_t test_vrndnq_f32(float32x4_t a) {
   return vrndnq_f32(a);
 }
 
 // CHECK-LABEL: define <2 x float> @test_vrndp_f32(<2 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VRNDP_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VRNDP_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintp.v2f32(<2 x float> [[VRNDP_V_I]]) #2
-// CHECK:   [[VRNDP_V2_I:%.*]] = bitcast <2 x float> [[VRNDP_V1_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VRNDP_V2_I]] to <2 x float>
-// CHECK:   ret <2 x float> [[TMP1]]
+// CHECK:   [[VRNDP_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintp.v2f32(<2 x float> %a) #2
+// CHECK:   ret <2 x float> [[VRNDP_V1_I]]
 float32x2_t test_vrndp_f32(float32x2_t a) {
   return vrndp_f32(a);
 }
 
 // CHECK-LABEL: define <4 x float> @test_vrndpq_f32(<4 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VRNDPQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VRNDPQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintp.v4f32(<4 x float> [[VRNDPQ_V_I]]) #2
-// CHECK:   [[VRNDPQ_V2_I:%.*]] = bitcast <4 x float> [[VRNDPQ_V1_I]] to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VRNDPQ_V2_I]] to <4 x float>
-// CHECK:   ret <4 x float> [[TMP1]]
+// CHECK:   [[VRNDPQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintp.v4f32(<4 x float> %a) #2
+// CHECK:   ret <4 x float> [[VRNDPQ_V1_I]]
 float32x4_t test_vrndpq_f32(float32x4_t a) {
   return vrndpq_f32(a);
 }
 
 // CHECK-LABEL: define <2 x float> @test_vrndx_f32(<2 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VRNDX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VRNDX_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintx.v2f32(<2 x float> [[VRNDX_V_I]]) #2
-// CHECK:   [[VRNDX_V2_I:%.*]] = bitcast <2 x float> [[VRNDX_V1_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VRNDX_V2_I]] to <2 x float>
-// CHECK:   ret <2 x float> [[TMP1]]
+// CHECK:   [[VRNDX_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintx.v2f32(<2 x float> %a) #2
+// CHECK:   ret <2 x float> [[VRNDX_V1_I]]
 float32x2_t test_vrndx_f32(float32x2_t a) {
   return vrndx_f32(a);
 }
 
 // CHECK-LABEL: define <4 x float> @test_vrndxq_f32(<4 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VRNDXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VRNDXQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintx.v4f32(<4 x float> [[VRNDXQ_V_I]]) #2
-// CHECK:   [[VRNDXQ_V2_I:%.*]] = bitcast <4 x float> [[VRNDXQ_V1_I]] to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VRNDXQ_V2_I]] to <4 x float>
-// CHECK:   ret <4 x float> [[TMP1]]
+// CHECK:   [[VRNDXQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintx.v4f32(<4 x float> %a) #2
+// CHECK:   ret <4 x float> [[VRNDXQ_V1_I]]
 float32x4_t test_vrndxq_f32(float32x4_t a) {
   return vrndxq_f32(a);
 }
 
 // CHECK-LABEL: define <2 x float> @test_vrnd_f32(<2 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VRND_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VRND_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintz.v2f32(<2 x float> [[VRND_V_I]]) #2
-// CHECK:   [[VRND_V2_I:%.*]] = bitcast <2 x float> [[VRND_V1_I]] to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VRND_V2_I]] to <2 x float>
-// CHECK:   ret <2 x float> [[TMP1]]
+// CHECK:   [[VRND_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintz.v2f32(<2 x float> %a) #2
+// CHECK:   ret <2 x float> [[VRND_V1_I]]
 float32x2_t test_vrnd_f32(float32x2_t a) {
   return vrnd_f32(a);
 }
 
 // CHECK-LABEL: define <4 x float> @test_vrndq_f32(<4 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VRNDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VRNDQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintz.v4f32(<4 x float> [[VRNDQ_V_I]]) #2
-// CHECK:   [[VRNDQ_V2_I:%.*]] = bitcast <4 x float> [[VRNDQ_V1_I]] to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VRNDQ_V2_I]] to <4 x float>
-// CHECK:   ret <4 x float> [[TMP1]]
+// CHECK:   [[VRNDQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintz.v4f32(<4 x float> %a) #2
+// CHECK:   ret <4 x float> [[VRNDQ_V1_I]]
 float32x4_t test_vrndq_f32(float32x4_t a) {
   return vrndq_f32(a);
 }

Modified: cfe/trunk/test/CodeGen/arm-neon-fma.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-neon-fma.c?rev=276728&r1=276727&r2=276728&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/arm-neon-fma.c (original)
+++ cfe/trunk/test/CodeGen/arm-neon-fma.c Tue Jul 26 00:52:37 2016
@@ -8,26 +8,14 @@
 #include <arm_neon.h>
 
 // CHECK-LABEL: define <2 x float> @test_fma_order(<2 x float> %accum, <2 x float> %lhs, <2 x float> %rhs) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %accum to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %lhs to <8 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %rhs to <8 x i8>
-// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
-// CHECK:   [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #2
+// CHECK:   [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %lhs, <2 x float> %rhs, <2 x float> %accum) #2
 // CHECK:   ret <2 x float> [[TMP6]]
 float32x2_t test_fma_order(float32x2_t accum, float32x2_t lhs, float32x2_t rhs) {
   return vfma_f32(accum, lhs, rhs);
 }
 
 // CHECK-LABEL: define <4 x float> @test_fmaq_order(<4 x float> %accum, <4 x float> %lhs, <4 x float> %rhs) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %accum to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %lhs to <16 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %rhs to <16 x i8>
-// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
-// CHECK:   [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #2
+// CHECK:   [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %lhs, <4 x float> %rhs, <4 x float> %accum) #2
 // CHECK:   ret <4 x float> [[TMP6]]
 float32x4_t test_fmaq_order(float32x4_t accum, float32x4_t lhs, float32x4_t rhs) {
   return vfmaq_f32(accum, lhs, rhs);

Modified: cfe/trunk/test/CodeGen/arm-neon-numeric-maxmin.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-neon-numeric-maxmin.c?rev=276728&r1=276727&r2=276728&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/arm-neon-numeric-maxmin.c (original)
+++ cfe/trunk/test/CodeGen/arm-neon-numeric-maxmin.c Tue Jul 26 00:52:37 2016
@@ -3,53 +3,29 @@
 #include <arm_neon.h>
 
 // CHECK-LABEL: define <2 x float> @test_vmaxnm_f32(<2 x float> %a, <2 x float> %b) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK:   [[VMAXNM_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VMAXNM_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK:   [[VMAXNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float> [[VMAXNM_V_I]], <2 x float> [[VMAXNM_V1_I]]) #2
-// CHECK:   [[VMAXNM_V3_I:%.*]] = bitcast <2 x float> [[VMAXNM_V2_I]] to <8 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VMAXNM_V3_I]] to <2 x float>
-// CHECK:   ret <2 x float> [[TMP2]]
+// CHECK:   [[VMAXNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float> %a, <2 x float> %b) #2
+// CHECK:   ret <2 x float> [[VMAXNM_V2_I]]
 float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) {
   return vmaxnm_f32(a, b);
 }
 
 // CHECK-LABEL: define <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK:   [[VMAXNMQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VMAXNMQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK:   [[VMAXNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float> [[VMAXNMQ_V_I]], <4 x float> [[VMAXNMQ_V1_I]]) #2
-// CHECK:   [[VMAXNMQ_V3_I:%.*]] = bitcast <4 x float> [[VMAXNMQ_V2_I]] to <16 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXNMQ_V3_I]] to <4 x float>
-// CHECK:   ret <4 x float> [[TMP2]]
+// CHECK:   [[VMAXNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float> %a, <4 x float> %b) #2
+// CHECK:   ret <4 x float> [[VMAXNMQ_V2_I]]
 float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) {
   return vmaxnmq_f32(a, b);
 }
 
 // CHECK-LABEL: define <2 x float> @test_vminnm_f32(<2 x float> %a, <2 x float> %b) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK:   [[VMINNM_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VMINNM_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK:   [[VMINNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float> [[VMINNM_V_I]], <2 x float> [[VMINNM_V1_I]]) #2
-// CHECK:   [[VMINNM_V3_I:%.*]] = bitcast <2 x float> [[VMINNM_V2_I]] to <8 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VMINNM_V3_I]] to <2 x float>
-// CHECK:   ret <2 x float> [[TMP2]]
+// CHECK:   [[VMINNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float> %a, <2 x float> %b) #2
+// CHECK:   ret <2 x float> [[VMINNM_V2_I]]
 float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) {
   return vminnm_f32(a, b);
 }
 
 // CHECK-LABEL: define <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK:   [[VMINNMQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VMINNMQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK:   [[VMINNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float> [[VMINNMQ_V_I]], <4 x float> [[VMINNMQ_V1_I]]) #2
-// CHECK:   [[VMINNMQ_V3_I:%.*]] = bitcast <4 x float> [[VMINNMQ_V2_I]] to <16 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VMINNMQ_V3_I]] to <4 x float>
-// CHECK:   ret <4 x float> [[TMP2]]
+// CHECK:   [[VMINNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float> %a, <4 x float> %b) #2
+// CHECK:   ret <4 x float> [[VMINNMQ_V2_I]]
 float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) {
   return vminnmq_f32(a, b);
 }

Modified: cfe/trunk/test/CodeGen/arm-neon-vcvtX.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-neon-vcvtX.c?rev=276728&r1=276727&r2=276728&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/arm-neon-vcvtX.c (original)
+++ cfe/trunk/test/CodeGen/arm-neon-vcvtX.c Tue Jul 26 00:52:37 2016
@@ -3,144 +3,112 @@
 #include <arm_neon.h>
 
 // CHECK-LABEL: define <2 x i32> @test_vcvta_s32_f32(<2 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VCVTA_S32_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VCVTA_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> [[VCVTA_S32_V_I]]) #2
+// CHECK:   [[VCVTA_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x i32> [[VCVTA_S32_V1_I]]
 int32x2_t test_vcvta_s32_f32(float32x2_t a) {
   return vcvta_s32_f32(a);
 }
 
 // CHECK-LABEL: define <2 x i32> @test_vcvta_u32_f32(<2 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VCVTA_U32_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VCVTA_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> [[VCVTA_U32_V_I]]) #2
+// CHECK:   [[VCVTA_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x i32> [[VCVTA_U32_V1_I]]
 uint32x2_t test_vcvta_u32_f32(float32x2_t a) {
   return vcvta_u32_f32(a);
 }
 
 // CHECK-LABEL: define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VCVTAQ_S32_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VCVTAQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> [[VCVTAQ_S32_V_I]]) #2
+// CHECK:   [[VCVTAQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x i32> [[VCVTAQ_S32_V1_I]]
 int32x4_t test_vcvtaq_s32_f32(float32x4_t a) {
   return vcvtaq_s32_f32(a);
 }
 
 // CHECK-LABEL: define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VCVTAQ_U32_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VCVTAQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> [[VCVTAQ_U32_V_I]]) #2
+// CHECK:   [[VCVTAQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x i32> [[VCVTAQ_U32_V1_I]]
 uint32x4_t test_vcvtaq_u32_f32(float32x4_t a) {
   return vcvtaq_u32_f32(a);
 }
 
 // CHECK-LABEL: define <2 x i32> @test_vcvtn_s32_f32(<2 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VCVTN_S32_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VCVTN_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> [[VCVTN_S32_V_I]]) #2
+// CHECK:   [[VCVTN_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x i32> [[VCVTN_S32_V1_I]]
 int32x2_t test_vcvtn_s32_f32(float32x2_t a) {
   return vcvtn_s32_f32(a);
 }
 
 // CHECK-LABEL: define <2 x i32> @test_vcvtn_u32_f32(<2 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VCVTN_U32_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VCVTN_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> [[VCVTN_U32_V_I]]) #2
+// CHECK:   [[VCVTN_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x i32> [[VCVTN_U32_V1_I]]
 uint32x2_t test_vcvtn_u32_f32(float32x2_t a) {
   return vcvtn_u32_f32(a);
 }
 
 // CHECK-LABEL: define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VCVTNQ_S32_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VCVTNQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> [[VCVTNQ_S32_V_I]]) #2
+// CHECK:   [[VCVTNQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x i32> [[VCVTNQ_S32_V1_I]]
 int32x4_t test_vcvtnq_s32_f32(float32x4_t a) {
   return vcvtnq_s32_f32(a);
 }
 
 // CHECK-LABEL: define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VCVTNQ_U32_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VCVTNQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> [[VCVTNQ_U32_V_I]]) #2
+// CHECK:   [[VCVTNQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x i32> [[VCVTNQ_U32_V1_I]]
 uint32x4_t test_vcvtnq_u32_f32(float32x4_t a) {
   return vcvtnq_u32_f32(a);
 }
 
 // CHECK-LABEL: define <2 x i32> @test_vcvtp_s32_f32(<2 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VCVTP_S32_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VCVTP_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> [[VCVTP_S32_V_I]]) #2
+// CHECK:   [[VCVTP_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x i32> [[VCVTP_S32_V1_I]]
 int32x2_t test_vcvtp_s32_f32(float32x2_t a) {
   return vcvtp_s32_f32(a);
 }
 
 // CHECK-LABEL: define <2 x i32> @test_vcvtp_u32_f32(<2 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VCVTP_U32_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VCVTP_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> [[VCVTP_U32_V_I]]) #2
+// CHECK:   [[VCVTP_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x i32> [[VCVTP_U32_V1_I]]
 uint32x2_t test_vcvtp_u32_f32(float32x2_t a) {
   return vcvtp_u32_f32(a);
 }
 
 // CHECK-LABEL: define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VCVTPQ_S32_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VCVTPQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> [[VCVTPQ_S32_V_I]]) #2
+// CHECK:   [[VCVTPQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x i32> [[VCVTPQ_S32_V1_I]]
 int32x4_t test_vcvtpq_s32_f32(float32x4_t a) {
   return vcvtpq_s32_f32(a);
 }
 
 // CHECK-LABEL: define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VCVTPQ_U32_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VCVTPQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> [[VCVTPQ_U32_V_I]]) #2
+// CHECK:   [[VCVTPQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x i32> [[VCVTPQ_U32_V1_I]]
 uint32x4_t test_vcvtpq_u32_f32(float32x4_t a) {
   return vcvtpq_u32_f32(a);
 }
 
 // CHECK-LABEL: define <2 x i32> @test_vcvtm_s32_f32(<2 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VCVTM_S32_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VCVTM_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> [[VCVTM_S32_V_I]]) #2
+// CHECK:   [[VCVTM_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x i32> [[VCVTM_S32_V1_I]]
 int32x2_t test_vcvtm_s32_f32(float32x2_t a) {
   return vcvtm_s32_f32(a);
 }
 
 // CHECK-LABEL: define <2 x i32> @test_vcvtm_u32_f32(<2 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[VCVTM_U32_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK:   [[VCVTM_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> [[VCVTM_U32_V_I]]) #2
+// CHECK:   [[VCVTM_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> %a) #2
 // CHECK:   ret <2 x i32> [[VCVTM_U32_V1_I]]
 uint32x2_t test_vcvtm_u32_f32(float32x2_t a) {
   return vcvtm_u32_f32(a);
 }
 
 // CHECK-LABEL: define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VCVTMQ_S32_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VCVTMQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> [[VCVTMQ_S32_V_I]]) #2
+// CHECK:   [[VCVTMQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x i32> [[VCVTMQ_S32_V1_I]]
 int32x4_t test_vcvtmq_s32_f32(float32x4_t a) {
   return vcvtmq_s32_f32(a);
 }
 
 // CHECK-LABEL: define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[VCVTMQ_U32_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK:   [[VCVTMQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> [[VCVTMQ_U32_V_I]]) #2
+// CHECK:   [[VCVTMQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> %a) #2
 // CHECK:   ret <4 x i32> [[VCVTMQ_U32_V1_I]]
 uint32x4_t test_vcvtmq_u32_f32(float32x4_t a) {
   return vcvtmq_u32_f32(a);




More information about the cfe-commits mailing list