r345168 - [CodeGen] Update min-legal-vector width based on function argument and return types

Wed Oct 24 10:42:17 PDT 2018

Author: ctopper
Date: Wed Oct 24 10:42:17 2018
New Revision: 345168

URL: http://llvm.org/viewvc/llvm-project?rev=345168&view=rev
Log:
[CodeGen] Update min-legal-vector width based on function argument and return types

This is a continuation of my patches to inform the X86 backend about what the largest IR types are in the function so that we can restrict the backend type legalizer to prevent 512-bit vectors on SKX when -mprefer-vector-width=256 is specified if no explicit 512 bit vectors were specified by the user.

This patch updates the vector width based on the argument and return types of the current function and from the types of any functions it calls. This is intended to make sure the backend type legalizer doesn't disturb any types that are required for ABI.

Differential Revision: https://reviews.llvm.org/D52441

Added:
    cfe/trunk/test/CodeGen/x86-vector-width.c   (with props)
Modified:
    cfe/trunk/lib/CodeGen/CGCall.cpp
    cfe/trunk/lib/CodeGen/CodeGenFunction.cpp
    cfe/trunk/test/CodeGen/aarch64-neon-3v.c
    cfe/trunk/test/CodeGen/aarch64-neon-across.c
    cfe/trunk/test/CodeGen/aarch64-neon-fma.c
    cfe/trunk/test/CodeGen/aarch64-neon-ldst-one.c
    cfe/trunk/test/CodeGen/aarch64-neon-scalar-copy.c
    cfe/trunk/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c
    cfe/trunk/test/CodeGen/aarch64-neon-tbl.c
    cfe/trunk/test/CodeGen/aarch64-neon-vget.c
    cfe/trunk/test/CodeGen/aarch64-poly64.c
    cfe/trunk/test/CodeGen/arm-neon-fma.c
    cfe/trunk/test/CodeGen/arm-neon-numeric-maxmin.c
    cfe/trunk/test/CodeGen/arm-neon-vcvtX.c
    cfe/trunk/test/CodeGen/arm64_vdupq_n_f64.c
    cfe/trunk/test/CodeGenOpenCL/fpmath.cl

Modified: cfe/trunk/lib/CodeGen/CGCall.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=345168&r1=345167&r2=345168&view=diff
==============================================================================

--- cfe/trunk/lib/CodeGen/CGCall.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCall.cpp Wed Oct 24 10:42:17 2018
@@ -4240,6 +4240,13 @@ RValue CodeGenFunction::EmitCall(const C
   }
 #endif
 
+  // Update the largest vector width if any arguments have vector types.
+  for (unsigned i = 0; i < IRCallArgs.size(); ++i) {
+    if (auto *VT = dyn_cast<llvm::VectorType>(IRCallArgs[i]->getType()))
+      LargestVectorWidth = std::max(LargestVectorWidth,
+                                    VT->getPrimitiveSizeInBits());
+  }
+
   // Compute the calling convention and attributes.
   unsigned CallingConv;
   llvm::AttributeList Attrs;
@@ -4320,6 +4327,11 @@ RValue CodeGenFunction::EmitCall(const C
   if (!CI->getType()->isVoidTy())
     CI->setName("call");
 
+  // Update largest vector width from the return type.
+  if (auto *VT = dyn_cast<llvm::VectorType>(CI->getType()))
+    LargestVectorWidth = std::max(LargestVectorWidth,
+                                  VT->getPrimitiveSizeInBits());
+
   // Insert instrumentation or attach profile metadata at indirect call sites.
   // For more details, see the comment before the definition of
   // IPVK_IndirectCallTarget in InstrProfData.inc.

Modified: cfe/trunk/lib/CodeGen/CodeGenFunction.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenFunction.cpp?rev=345168&r1=345167&r2=345168&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenFunction.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenFunction.cpp Wed Oct 24 10:42:17 2018
@@ -430,7 +430,24 @@ void CodeGenFunction::FinishFunction(Sou
     NormalCleanupDest = Address::invalid();
   }
 
-  // Add the required-vector-width attribute.
+  // Scan function arguments for vector width.
+  for (llvm::Argument &A : CurFn->args())
+    if (auto *VT = dyn_cast<llvm::VectorType>(A.getType()))
+      LargestVectorWidth = std::max(LargestVectorWidth,
+                                    VT->getPrimitiveSizeInBits());
+
+  // Update vector width based on return type.
+  if (auto *VT = dyn_cast<llvm::VectorType>(CurFn->getReturnType()))
+    LargestVectorWidth = std::max(LargestVectorWidth,
+                                  VT->getPrimitiveSizeInBits());
+
+  // Add the required-vector-width attribute. This contains the max width from:
+  // 1. min-vector-width attribute used in the source program.
+  // 2. Any builtins used that have a vector width specified.
+  // 3. Values passed in and out of inline assembly.
+  // 4. Width of vector arguments and return types for this function.
+  // 5. Width of vector aguments and return types for functions called by this
+  //    function.
   if (LargestVectorWidth != 0)
     CurFn->addFnAttr("min-legal-vector-width",
                      llvm::utostr(LargestVectorWidth));

Modified: cfe/trunk/test/CodeGen/aarch64-neon-3v.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-3v.c?rev=345168&r1=345167&r2=345168&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-3v.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-neon-3v.c Wed Oct 24 10:42:17 2018
@@ -11,7 +11,7 @@ int8x8_t test_vand_s8(int8x8_t a, int8x8
   return vand_s8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vandq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vandq_s8(<16 x i8> %a, <16 x i8> %b) #1 {
 // CHECK:   [[AND_I:%.*]] = and <16 x i8> %a, %b
 // CHECK:   ret <16 x i8> [[AND_I]]
 int8x16_t test_vandq_s8(int8x16_t a, int8x16_t b) {
@@ -25,7 +25,7 @@ int16x4_t test_vand_s16(int16x4_t a, int
   return vand_s16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vandq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_vandq_s16(<8 x i16> %a, <8 x i16> %b) #1 {
 // CHECK:   [[AND_I:%.*]] = and <8 x i16> %a, %b
 // CHECK:   ret <8 x i16> [[AND_I]]
 int16x8_t test_vandq_s16(int16x8_t a, int16x8_t b) {
@@ -39,7 +39,7 @@ int32x2_t test_vand_s32(int32x2_t a, int
   return vand_s32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vandq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define <4 x i32> @test_vandq_s32(<4 x i32> %a, <4 x i32> %b) #1 {
 // CHECK:   [[AND_I:%.*]] = and <4 x i32> %a, %b
 // CHECK:   ret <4 x i32> [[AND_I]]
 int32x4_t test_vandq_s32(int32x4_t a, int32x4_t b) {
@@ -53,7 +53,7 @@ int64x1_t test_vand_s64(int64x1_t a, int
   return vand_s64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vandq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vandq_s64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[AND_I:%.*]] = and <2 x i64> %a, %b
 // CHECK:   ret <2 x i64> [[AND_I]]
 int64x2_t test_vandq_s64(int64x2_t a, int64x2_t b) {
@@ -67,7 +67,7 @@ uint8x8_t test_vand_u8(uint8x8_t a, uint
   return vand_u8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vandq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vandq_u8(<16 x i8> %a, <16 x i8> %b) #1 {
 // CHECK:   [[AND_I:%.*]] = and <16 x i8> %a, %b
 // CHECK:   ret <16 x i8> [[AND_I]]
 uint8x16_t test_vandq_u8(uint8x16_t a, uint8x16_t b) {
@@ -81,7 +81,7 @@ uint16x4_t test_vand_u16(uint16x4_t a, u
   return vand_u16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vandq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_vandq_u16(<8 x i16> %a, <8 x i16> %b) #1 {
 // CHECK:   [[AND_I:%.*]] = and <8 x i16> %a, %b
 // CHECK:   ret <8 x i16> [[AND_I]]
 uint16x8_t test_vandq_u16(uint16x8_t a, uint16x8_t b) {
@@ -95,7 +95,7 @@ uint32x2_t test_vand_u32(uint32x2_t a, u
   return vand_u32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vandq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define <4 x i32> @test_vandq_u32(<4 x i32> %a, <4 x i32> %b) #1 {
 // CHECK:   [[AND_I:%.*]] = and <4 x i32> %a, %b
 // CHECK:   ret <4 x i32> [[AND_I]]
 uint32x4_t test_vandq_u32(uint32x4_t a, uint32x4_t b) {
@@ -109,7 +109,7 @@ uint64x1_t test_vand_u64(uint64x1_t a, u
   return vand_u64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vandq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vandq_u64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[AND_I:%.*]] = and <2 x i64> %a, %b
 // CHECK:   ret <2 x i64> [[AND_I]]
 uint64x2_t test_vandq_u64(uint64x2_t a, uint64x2_t b) {
@@ -123,7 +123,7 @@ int8x8_t test_vorr_s8(int8x8_t a, int8x8
   return vorr_s8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vorrq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vorrq_s8(<16 x i8> %a, <16 x i8> %b) #1 {
 // CHECK:   [[OR_I:%.*]] = or <16 x i8> %a, %b
 // CHECK:   ret <16 x i8> [[OR_I]]
 int8x16_t test_vorrq_s8(int8x16_t a, int8x16_t b) {
@@ -137,7 +137,7 @@ int16x4_t test_vorr_s16(int16x4_t a, int
   return vorr_s16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vorrq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_vorrq_s16(<8 x i16> %a, <8 x i16> %b) #1 {
 // CHECK:   [[OR_I:%.*]] = or <8 x i16> %a, %b
 // CHECK:   ret <8 x i16> [[OR_I]]
 int16x8_t test_vorrq_s16(int16x8_t a, int16x8_t b) {
@@ -151,7 +151,7 @@ int32x2_t test_vorr_s32(int32x2_t a, int
   return vorr_s32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vorrq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define <4 x i32> @test_vorrq_s32(<4 x i32> %a, <4 x i32> %b) #1 {
 // CHECK:   [[OR_I:%.*]] = or <4 x i32> %a, %b
 // CHECK:   ret <4 x i32> [[OR_I]]
 int32x4_t test_vorrq_s32(int32x4_t a, int32x4_t b) {
@@ -165,7 +165,7 @@ int64x1_t test_vorr_s64(int64x1_t a, int
   return vorr_s64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vorrq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vorrq_s64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[OR_I:%.*]] = or <2 x i64> %a, %b
 // CHECK:   ret <2 x i64> [[OR_I]]
 int64x2_t test_vorrq_s64(int64x2_t a, int64x2_t b) {
@@ -179,7 +179,7 @@ uint8x8_t test_vorr_u8(uint8x8_t a, uint
   return vorr_u8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vorrq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vorrq_u8(<16 x i8> %a, <16 x i8> %b) #1 {
 // CHECK:   [[OR_I:%.*]] = or <16 x i8> %a, %b
 // CHECK:   ret <16 x i8> [[OR_I]]
 uint8x16_t test_vorrq_u8(uint8x16_t a, uint8x16_t b) {
@@ -193,7 +193,7 @@ uint16x4_t test_vorr_u16(uint16x4_t a, u
   return vorr_u16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vorrq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_vorrq_u16(<8 x i16> %a, <8 x i16> %b) #1 {
 // CHECK:   [[OR_I:%.*]] = or <8 x i16> %a, %b
 // CHECK:   ret <8 x i16> [[OR_I]]
 uint16x8_t test_vorrq_u16(uint16x8_t a, uint16x8_t b) {
@@ -207,7 +207,7 @@ uint32x2_t test_vorr_u32(uint32x2_t a, u
   return vorr_u32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vorrq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define <4 x i32> @test_vorrq_u32(<4 x i32> %a, <4 x i32> %b) #1 {
 // CHECK:   [[OR_I:%.*]] = or <4 x i32> %a, %b
 // CHECK:   ret <4 x i32> [[OR_I]]
 uint32x4_t test_vorrq_u32(uint32x4_t a, uint32x4_t b) {
@@ -221,7 +221,7 @@ uint64x1_t test_vorr_u64(uint64x1_t a, u
   return vorr_u64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vorrq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vorrq_u64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[OR_I:%.*]] = or <2 x i64> %a, %b
 // CHECK:   ret <2 x i64> [[OR_I]]
 uint64x2_t test_vorrq_u64(uint64x2_t a, uint64x2_t b) {
@@ -235,7 +235,7 @@ int8x8_t test_veor_s8(int8x8_t a, int8x8
   return veor_s8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_veorq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_veorq_s8(<16 x i8> %a, <16 x i8> %b) #1 {
 // CHECK:   [[XOR_I:%.*]] = xor <16 x i8> %a, %b
 // CHECK:   ret <16 x i8> [[XOR_I]]
 int8x16_t test_veorq_s8(int8x16_t a, int8x16_t b) {
@@ -249,7 +249,7 @@ int16x4_t test_veor_s16(int16x4_t a, int
   return veor_s16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_veorq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_veorq_s16(<8 x i16> %a, <8 x i16> %b) #1 {
 // CHECK:   [[XOR_I:%.*]] = xor <8 x i16> %a, %b
 // CHECK:   ret <8 x i16> [[XOR_I]]
 int16x8_t test_veorq_s16(int16x8_t a, int16x8_t b) {
@@ -263,7 +263,7 @@ int32x2_t test_veor_s32(int32x2_t a, int
   return veor_s32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_veorq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define <4 x i32> @test_veorq_s32(<4 x i32> %a, <4 x i32> %b) #1 {
 // CHECK:   [[XOR_I:%.*]] = xor <4 x i32> %a, %b
 // CHECK:   ret <4 x i32> [[XOR_I]]
 int32x4_t test_veorq_s32(int32x4_t a, int32x4_t b) {
@@ -277,7 +277,7 @@ int64x1_t test_veor_s64(int64x1_t a, int
   return veor_s64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_veorq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_veorq_s64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[XOR_I:%.*]] = xor <2 x i64> %a, %b
 // CHECK:   ret <2 x i64> [[XOR_I]]
 int64x2_t test_veorq_s64(int64x2_t a, int64x2_t b) {
@@ -291,7 +291,7 @@ uint8x8_t test_veor_u8(uint8x8_t a, uint
   return veor_u8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_veorq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_veorq_u8(<16 x i8> %a, <16 x i8> %b) #1 {
 // CHECK:   [[XOR_I:%.*]] = xor <16 x i8> %a, %b
 // CHECK:   ret <16 x i8> [[XOR_I]]
 uint8x16_t test_veorq_u8(uint8x16_t a, uint8x16_t b) {
@@ -305,7 +305,7 @@ uint16x4_t test_veor_u16(uint16x4_t a, u
   return veor_u16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_veorq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_veorq_u16(<8 x i16> %a, <8 x i16> %b) #1 {
 // CHECK:   [[XOR_I:%.*]] = xor <8 x i16> %a, %b
 // CHECK:   ret <8 x i16> [[XOR_I]]
 uint16x8_t test_veorq_u16(uint16x8_t a, uint16x8_t b) {
@@ -319,7 +319,7 @@ uint32x2_t test_veor_u32(uint32x2_t a, u
   return veor_u32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_veorq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define <4 x i32> @test_veorq_u32(<4 x i32> %a, <4 x i32> %b) #1 {
 // CHECK:   [[XOR_I:%.*]] = xor <4 x i32> %a, %b
 // CHECK:   ret <4 x i32> [[XOR_I]]
 uint32x4_t test_veorq_u32(uint32x4_t a, uint32x4_t b) {
@@ -333,7 +333,7 @@ uint64x1_t test_veor_u64(uint64x1_t a, u
   return veor_u64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_veorq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_veorq_u64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[XOR_I:%.*]] = xor <2 x i64> %a, %b
 // CHECK:   ret <2 x i64> [[XOR_I]]
 uint64x2_t test_veorq_u64(uint64x2_t a, uint64x2_t b) {
@@ -348,7 +348,7 @@ int8x8_t test_vbic_s8(int8x8_t a, int8x8
   return vbic_s8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vbicq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vbicq_s8(<16 x i8> %a, <16 x i8> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 // CHECK:   [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]]
 // CHECK:   ret <16 x i8> [[AND_I]]
@@ -364,7 +364,7 @@ int16x4_t test_vbic_s16(int16x4_t a, int
   return vbic_s16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vbicq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_vbicq_s16(<8 x i16> %a, <8 x i16> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
 // CHECK:   [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]]
 // CHECK:   ret <8 x i16> [[AND_I]]
@@ -380,7 +380,7 @@ int32x2_t test_vbic_s32(int32x2_t a, int
   return vbic_s32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vbicq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define <4 x i32> @test_vbicq_s32(<4 x i32> %a, <4 x i32> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
 // CHECK:   [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]]
 // CHECK:   ret <4 x i32> [[AND_I]]
@@ -396,7 +396,7 @@ int64x1_t test_vbic_s64(int64x1_t a, int
   return vbic_s64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vbicq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vbicq_s64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
 // CHECK:   [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]]
 // CHECK:   ret <2 x i64> [[AND_I]]
@@ -412,7 +412,7 @@ uint8x8_t test_vbic_u8(uint8x8_t a, uint
   return vbic_u8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vbicq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vbicq_u8(<16 x i8> %a, <16 x i8> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 // CHECK:   [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]]
 // CHECK:   ret <16 x i8> [[AND_I]]
@@ -428,7 +428,7 @@ uint16x4_t test_vbic_u16(uint16x4_t a, u
   return vbic_u16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vbicq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_vbicq_u16(<8 x i16> %a, <8 x i16> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
 // CHECK:   [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]]
 // CHECK:   ret <8 x i16> [[AND_I]]
@@ -444,7 +444,7 @@ uint32x2_t test_vbic_u32(uint32x2_t a, u
   return vbic_u32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vbicq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define <4 x i32> @test_vbicq_u32(<4 x i32> %a, <4 x i32> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
 // CHECK:   [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]]
 // CHECK:   ret <4 x i32> [[AND_I]]
@@ -460,7 +460,7 @@ uint64x1_t test_vbic_u64(uint64x1_t a, u
   return vbic_u64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vbicq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vbicq_u64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
 // CHECK:   [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]]
 // CHECK:   ret <2 x i64> [[AND_I]]
@@ -476,7 +476,7 @@ int8x8_t test_vorn_s8(int8x8_t a, int8x8
   return vorn_s8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vornq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vornq_s8(<16 x i8> %a, <16 x i8> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 // CHECK:   [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]]
 // CHECK:   ret <16 x i8> [[OR_I]]
@@ -492,7 +492,7 @@ int16x4_t test_vorn_s16(int16x4_t a, int
   return vorn_s16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vornq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_vornq_s16(<8 x i16> %a, <8 x i16> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
 // CHECK:   [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]]
 // CHECK:   ret <8 x i16> [[OR_I]]
@@ -508,7 +508,7 @@ int32x2_t test_vorn_s32(int32x2_t a, int
   return vorn_s32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vornq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define <4 x i32> @test_vornq_s32(<4 x i32> %a, <4 x i32> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
 // CHECK:   [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]]
 // CHECK:   ret <4 x i32> [[OR_I]]
@@ -524,7 +524,7 @@ int64x1_t test_vorn_s64(int64x1_t a, int
   return vorn_s64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vornq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vornq_s64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
 // CHECK:   [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]]
 // CHECK:   ret <2 x i64> [[OR_I]]
@@ -540,7 +540,7 @@ uint8x8_t test_vorn_u8(uint8x8_t a, uint
   return vorn_u8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vornq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vornq_u8(<16 x i8> %a, <16 x i8> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 // CHECK:   [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]]
 // CHECK:   ret <16 x i8> [[OR_I]]
@@ -556,7 +556,7 @@ uint16x4_t test_vorn_u16(uint16x4_t a, u
   return vorn_u16(a, b);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vornq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_vornq_u16(<8 x i16> %a, <8 x i16> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
 // CHECK:   [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]]
 // CHECK:   ret <8 x i16> [[OR_I]]
@@ -572,7 +572,7 @@ uint32x2_t test_vorn_u32(uint32x2_t a, u
   return vorn_u32(a, b);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vornq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define <4 x i32> @test_vornq_u32(<4 x i32> %a, <4 x i32> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
 // CHECK:   [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]]
 // CHECK:   ret <4 x i32> [[OR_I]]
@@ -588,10 +588,13 @@ uint64x1_t test_vorn_u64(uint64x1_t a, u
   return vorn_u64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vornq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vornq_u64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
 // CHECK:   [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]]
 // CHECK:   ret <2 x i64> [[OR_I]]
 uint64x2_t test_vornq_u64(uint64x2_t a, uint64x2_t b) {
   return vornq_u64(a, b);
 }
+
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"

Modified: cfe/trunk/test/CodeGen/aarch64-neon-across.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-across.c?rev=345168&r1=345167&r2=345168&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-across.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-neon-across.c Wed Oct 24 10:42:17 2018
@@ -6,7 +6,7 @@
 #include <arm_neon.h>
 
 // CHECK-LABEL: define i16 @test_vaddlv_s8(<8 x i8> %a) #0 {
-// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> %a) #2
+// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16
 // CHECK:   ret i16 [[TMP0]]
 int16_t test_vaddlv_s8(int8x8_t a) {
@@ -14,14 +14,14 @@ int16_t test_vaddlv_s8(int8x8_t a) {
 }
 
 // CHECK-LABEL: define i32 @test_vaddlv_s16(<4 x i16> %a) #0 {
-// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> %a) #2
+// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> %a) #3
 // CHECK:   ret i32 [[VADDLV_I]]
 int32_t test_vaddlv_s16(int16x4_t a) {
   return vaddlv_s16(a);
 }
 
 // CHECK-LABEL: define i16 @test_vaddlv_u8(<8 x i8> %a) #0 {
-// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a) #2
+// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16
 // CHECK:   ret i16 [[TMP0]]
 uint16_t test_vaddlv_u8(uint8x8_t a) {
@@ -29,58 +29,58 @@ uint16_t test_vaddlv_u8(uint8x8_t a) {
 }
 
 // CHECK-LABEL: define i32 @test_vaddlv_u16(<4 x i16> %a) #0 {
-// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> %a) #2
+// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> %a) #3
 // CHECK:   ret i32 [[VADDLV_I]]
 uint32_t test_vaddlv_u16(uint16x4_t a) {
   return vaddlv_u16(a);
 }
 
-// CHECK-LABEL: define i16 @test_vaddlvq_s8(<16 x i8> %a) #0 {
-// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> %a) #2
+// CHECK-LABEL: define i16 @test_vaddlvq_s8(<16 x i8> %a) #1 {
+// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16
 // CHECK:   ret i16 [[TMP0]]
 int16_t test_vaddlvq_s8(int8x16_t a) {
   return vaddlvq_s8(a);
 }
 
-// CHECK-LABEL: define i32 @test_vaddlvq_s16(<8 x i16> %a) #0 {
-// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> %a) #2
+// CHECK-LABEL: define i32 @test_vaddlvq_s16(<8 x i16> %a) #1 {
+// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> %a) #3
 // CHECK:   ret i32 [[VADDLV_I]]
 int32_t test_vaddlvq_s16(int16x8_t a) {
   return vaddlvq_s16(a);
 }
 
-// CHECK-LABEL: define i64 @test_vaddlvq_s32(<4 x i32> %a) #0 {
-// CHECK:   [[VADDLVQ_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %a) #2
+// CHECK-LABEL: define i64 @test_vaddlvq_s32(<4 x i32> %a) #1 {
+// CHECK:   [[VADDLVQ_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %a) #3
 // CHECK:   ret i64 [[VADDLVQ_S32_I]]
 int64_t test_vaddlvq_s32(int32x4_t a) {
   return vaddlvq_s32(a);
 }
 
-// CHECK-LABEL: define i16 @test_vaddlvq_u8(<16 x i8> %a) #0 {
-// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a) #2
+// CHECK-LABEL: define i16 @test_vaddlvq_u8(<16 x i8> %a) #1 {
+// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16
 // CHECK:   ret i16 [[TMP0]]
 uint16_t test_vaddlvq_u8(uint8x16_t a) {
   return vaddlvq_u8(a);
 }
 
-// CHECK-LABEL: define i32 @test_vaddlvq_u16(<8 x i16> %a) #0 {
-// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> %a) #2
+// CHECK-LABEL: define i32 @test_vaddlvq_u16(<8 x i16> %a) #1 {
+// CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> %a) #3
 // CHECK:   ret i32 [[VADDLV_I]]
 uint32_t test_vaddlvq_u16(uint16x8_t a) {
   return vaddlvq_u16(a);
 }
 
-// CHECK-LABEL: define i64 @test_vaddlvq_u32(<4 x i32> %a) #0 {
-// CHECK:   [[VADDLVQ_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %a) #2
+// CHECK-LABEL: define i64 @test_vaddlvq_u32(<4 x i32> %a) #1 {
+// CHECK:   [[VADDLVQ_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %a) #3
 // CHECK:   ret i64 [[VADDLVQ_U32_I]]
 uint64_t test_vaddlvq_u32(uint32x4_t a) {
   return vaddlvq_u32(a);
 }
 
 // CHECK-LABEL: define i8 @test_vmaxv_s8(<8 x i8> %a) #0 {
-// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a) #2
+// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8
 // CHECK:   ret i8 [[TMP0]]
 int8_t test_vmaxv_s8(int8x8_t a) {
@@ -88,7 +88,7 @@ int8_t test_vmaxv_s8(int8x8_t a) {
 }
 
 // CHECK-LABEL: define i16 @test_vmaxv_s16(<4 x i16> %a) #0 {
-// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a) #2
+// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a) #3
 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16
 // CHECK:   ret i16 [[TMP2]]
 int16_t test_vmaxv_s16(int16x4_t a) {
@@ -96,7 +96,7 @@ int16_t test_vmaxv_s16(int16x4_t a) {
 }
 
 // CHECK-LABEL: define i8 @test_vmaxv_u8(<8 x i8> %a) #0 {
-// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a) #2
+// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8
 // CHECK:   ret i8 [[TMP0]]
 uint8_t test_vmaxv_u8(uint8x8_t a) {
@@ -104,61 +104,61 @@ uint8_t test_vmaxv_u8(uint8x8_t a) {
 }
 
 // CHECK-LABEL: define i16 @test_vmaxv_u16(<4 x i16> %a) #0 {
-// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a) #2
+// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a) #3
 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16
 // CHECK:   ret i16 [[TMP2]]
 uint16_t test_vmaxv_u16(uint16x4_t a) {
   return vmaxv_u16(a);
 }
 
-// CHECK-LABEL: define i8 @test_vmaxvq_s8(<16 x i8> %a) #0 {
-// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a) #2
+// CHECK-LABEL: define i8 @test_vmaxvq_s8(<16 x i8> %a) #1 {
+// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8
 // CHECK:   ret i8 [[TMP0]]
 int8_t test_vmaxvq_s8(int8x16_t a) {
   return vmaxvq_s8(a);
 }
 
-// CHECK-LABEL: define i16 @test_vmaxvq_s16(<8 x i16> %a) #0 {
-// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a) #2
+// CHECK-LABEL: define i16 @test_vmaxvq_s16(<8 x i16> %a) #1 {
+// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a) #3
 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16
 // CHECK:   ret i16 [[TMP2]]
 int16_t test_vmaxvq_s16(int16x8_t a) {
   return vmaxvq_s16(a);
 }
 
-// CHECK-LABEL: define i32 @test_vmaxvq_s32(<4 x i32> %a) #0 {
-// CHECK:   [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a) #2
+// CHECK-LABEL: define i32 @test_vmaxvq_s32(<4 x i32> %a) #1 {
+// CHECK:   [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a) #3
 // CHECK:   ret i32 [[VMAXVQ_S32_I]]
 int32_t test_vmaxvq_s32(int32x4_t a) {
   return vmaxvq_s32(a);
 }
 
-// CHECK-LABEL: define i8 @test_vmaxvq_u8(<16 x i8> %a) #0 {
-// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a) #2
+// CHECK-LABEL: define i8 @test_vmaxvq_u8(<16 x i8> %a) #1 {
+// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8
 // CHECK:   ret i8 [[TMP0]]
 uint8_t test_vmaxvq_u8(uint8x16_t a) {
   return vmaxvq_u8(a);
 }
 
-// CHECK-LABEL: define i16 @test_vmaxvq_u16(<8 x i16> %a) #0 {
-// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a) #2
+// CHECK-LABEL: define i16 @test_vmaxvq_u16(<8 x i16> %a) #1 {
+// CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a) #3
 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16
 // CHECK:   ret i16 [[TMP2]]
 uint16_t test_vmaxvq_u16(uint16x8_t a) {
   return vmaxvq_u16(a);
 }
 
-// CHECK-LABEL: define i32 @test_vmaxvq_u32(<4 x i32> %a) #0 {
-// CHECK:   [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> %a) #2
+// CHECK-LABEL: define i32 @test_vmaxvq_u32(<4 x i32> %a) #1 {
+// CHECK:   [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> %a) #3
 // CHECK:   ret i32 [[VMAXVQ_U32_I]]
 uint32_t test_vmaxvq_u32(uint32x4_t a) {
   return vmaxvq_u32(a);
 }
 
 // CHECK-LABEL: define i8 @test_vminv_s8(<8 x i8> %a) #0 {
-// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a) #2
+// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8
 // CHECK:   ret i8 [[TMP0]]
 int8_t test_vminv_s8(int8x8_t a) {
@@ -166,7 +166,7 @@ int8_t test_vminv_s8(int8x8_t a) {
 }
 
 // CHECK-LABEL: define i16 @test_vminv_s16(<4 x i16> %a) #0 {
-// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a) #2
+// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a) #3
 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16
 // CHECK:   ret i16 [[TMP2]]
 int16_t test_vminv_s16(int16x4_t a) {
@@ -174,7 +174,7 @@ int16_t test_vminv_s16(int16x4_t a) {
 }
 
 // CHECK-LABEL: define i8 @test_vminv_u8(<8 x i8> %a) #0 {
-// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a) #2
+// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8
 // CHECK:   ret i8 [[TMP0]]
 uint8_t test_vminv_u8(uint8x8_t a) {
@@ -182,61 +182,61 @@ uint8_t test_vminv_u8(uint8x8_t a) {
 }
 
 // CHECK-LABEL: define i16 @test_vminv_u16(<4 x i16> %a) #0 {
-// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a) #2
+// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a) #3
 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16
 // CHECK:   ret i16 [[TMP2]]
 uint16_t test_vminv_u16(uint16x4_t a) {
   return vminv_u16(a);
 }
 
-// CHECK-LABEL: define i8 @test_vminvq_s8(<16 x i8> %a) #0 {
-// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a) #2
+// CHECK-LABEL: define i8 @test_vminvq_s8(<16 x i8> %a) #1 {
+// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8
 // CHECK:   ret i8 [[TMP0]]
 int8_t test_vminvq_s8(int8x16_t a) {
   return vminvq_s8(a);
 }
 
-// CHECK-LABEL: define i16 @test_vminvq_s16(<8 x i16> %a) #0 {
-// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a) #2
+// CHECK-LABEL: define i16 @test_vminvq_s16(<8 x i16> %a) #1 {
+// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a) #3
 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16
 // CHECK:   ret i16 [[TMP2]]
 int16_t test_vminvq_s16(int16x8_t a) {
   return vminvq_s16(a);
 }
 
-// CHECK-LABEL: define i32 @test_vminvq_s32(<4 x i32> %a) #0 {
-// CHECK:   [[VMINVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a) #2
+// CHECK-LABEL: define i32 @test_vminvq_s32(<4 x i32> %a) #1 {
+// CHECK:   [[VMINVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a) #3
 // CHECK:   ret i32 [[VMINVQ_S32_I]]
 int32_t test_vminvq_s32(int32x4_t a) {
   return vminvq_s32(a);
 }
 
-// CHECK-LABEL: define i8 @test_vminvq_u8(<16 x i8> %a) #0 {
-// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a) #2
+// CHECK-LABEL: define i8 @test_vminvq_u8(<16 x i8> %a) #1 {
+// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8
 // CHECK:   ret i8 [[TMP0]]
 uint8_t test_vminvq_u8(uint8x16_t a) {
   return vminvq_u8(a);
 }
 
-// CHECK-LABEL: define i16 @test_vminvq_u16(<8 x i16> %a) #0 {
-// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a) #2
+// CHECK-LABEL: define i16 @test_vminvq_u16(<8 x i16> %a) #1 {
+// CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a) #3
 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16
 // CHECK:   ret i16 [[TMP2]]
 uint16_t test_vminvq_u16(uint16x8_t a) {
   return vminvq_u16(a);
 }
 
-// CHECK-LABEL: define i32 @test_vminvq_u32(<4 x i32> %a) #0 {
-// CHECK:   [[VMINVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> %a) #2
+// CHECK-LABEL: define i32 @test_vminvq_u32(<4 x i32> %a) #1 {
+// CHECK:   [[VMINVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> %a) #3
 // CHECK:   ret i32 [[VMINVQ_U32_I]]
 uint32_t test_vminvq_u32(uint32x4_t a) {
   return vminvq_u32(a);
 }
 
 // CHECK-LABEL: define i8 @test_vaddv_s8(<8 x i8> %a) #0 {
-// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a) #2
+// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8
 // CHECK:   ret i8 [[TMP0]]
 int8_t test_vaddv_s8(int8x8_t a) {
@@ -244,7 +244,7 @@ int8_t test_vaddv_s8(int8x8_t a) {
 }
 
 // CHECK-LABEL: define i16 @test_vaddv_s16(<4 x i16> %a) #0 {
-// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a) #2
+// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a) #3
 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16
 // CHECK:   ret i16 [[TMP2]]
 int16_t test_vaddv_s16(int16x4_t a) {
@@ -252,7 +252,7 @@ int16_t test_vaddv_s16(int16x4_t a) {
 }
 
 // CHECK-LABEL: define i8 @test_vaddv_u8(<8 x i8> %a) #0 {
-// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> %a) #2
+// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8
 // CHECK:   ret i8 [[TMP0]]
 uint8_t test_vaddv_u8(uint8x8_t a) {
@@ -260,83 +260,86 @@ uint8_t test_vaddv_u8(uint8x8_t a) {
 }
 
 // CHECK-LABEL: define i16 @test_vaddv_u16(<4 x i16> %a) #0 {
-// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> %a) #2
+// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> %a) #3
 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16
 // CHECK:   ret i16 [[TMP2]]
 uint16_t test_vaddv_u16(uint16x4_t a) {
   return vaddv_u16(a);
 }
 
-// CHECK-LABEL: define i8 @test_vaddvq_s8(<16 x i8> %a) #0 {
-// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a) #2
+// CHECK-LABEL: define i8 @test_vaddvq_s8(<16 x i8> %a) #1 {
+// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8
 // CHECK:   ret i8 [[TMP0]]
 int8_t test_vaddvq_s8(int8x16_t a) {
   return vaddvq_s8(a);
 }
 
-// CHECK-LABEL: define i16 @test_vaddvq_s16(<8 x i16> %a) #0 {
-// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a) #2
+// CHECK-LABEL: define i16 @test_vaddvq_s16(<8 x i16> %a) #1 {
+// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a) #3
 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16
 // CHECK:   ret i16 [[TMP2]]
 int16_t test_vaddvq_s16(int16x8_t a) {
   return vaddvq_s16(a);
 }
 
-// CHECK-LABEL: define i32 @test_vaddvq_s32(<4 x i32> %a) #0 {
-// CHECK:   [[VADDVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a) #2
+// CHECK-LABEL: define i32 @test_vaddvq_s32(<4 x i32> %a) #1 {
+// CHECK:   [[VADDVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a) #3
 // CHECK:   ret i32 [[VADDVQ_S32_I]]
 int32_t test_vaddvq_s32(int32x4_t a) {
   return vaddvq_s32(a);
 }
 
-// CHECK-LABEL: define i8 @test_vaddvq_u8(<16 x i8> %a) #0 {
-// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8> %a) #2
+// CHECK-LABEL: define i8 @test_vaddvq_u8(<16 x i8> %a) #1 {
+// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8> %a) #3
 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8
 // CHECK:   ret i8 [[TMP0]]
 uint8_t test_vaddvq_u8(uint8x16_t a) {
   return vaddvq_u8(a);
 }
 
-// CHECK-LABEL: define i16 @test_vaddvq_u16(<8 x i16> %a) #0 {
-// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> %a) #2
+// CHECK-LABEL: define i16 @test_vaddvq_u16(<8 x i16> %a) #1 {
+// CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> %a) #3
 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16
 // CHECK:   ret i16 [[TMP2]]
 uint16_t test_vaddvq_u16(uint16x8_t a) {
   return vaddvq_u16(a);
 }
 
-// CHECK-LABEL: define i32 @test_vaddvq_u32(<4 x i32> %a) #0 {
-// CHECK:   [[VADDVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> %a) #2
+// CHECK-LABEL: define i32 @test_vaddvq_u32(<4 x i32> %a) #1 {
+// CHECK:   [[VADDVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> %a) #3
 // CHECK:   ret i32 [[VADDVQ_U32_I]]
 uint32_t test_vaddvq_u32(uint32x4_t a) {
   return vaddvq_u32(a);
 }
 
-// CHECK-LABEL: define float @test_vmaxvq_f32(<4 x float> %a) #0 {
-// CHECK:   [[VMAXVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %a) #2
+// CHECK-LABEL: define float @test_vmaxvq_f32(<4 x float> %a) #1 {
+// CHECK:   [[VMAXVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %a) #3
 // CHECK:   ret float [[VMAXVQ_F32_I]]
 float32_t test_vmaxvq_f32(float32x4_t a) {
   return vmaxvq_f32(a);
 }
 
-// CHECK-LABEL: define float @test_vminvq_f32(<4 x float> %a) #0 {
-// CHECK:   [[VMINVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %a) #2
+// CHECK-LABEL: define float @test_vminvq_f32(<4 x float> %a) #1 {
+// CHECK:   [[VMINVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %a) #3
 // CHECK:   ret float [[VMINVQ_F32_I]]
 float32_t test_vminvq_f32(float32x4_t a) {
   return vminvq_f32(a);
 }
 
-// CHECK-LABEL: define float @test_vmaxnmvq_f32(<4 x float> %a) #0 {
-// CHECK:   [[VMAXNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %a) #2
+// CHECK-LABEL: define float @test_vmaxnmvq_f32(<4 x float> %a) #1 {
+// CHECK:   [[VMAXNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %a) #3
 // CHECK:   ret float [[VMAXNMVQ_F32_I]]
 float32_t test_vmaxnmvq_f32(float32x4_t a) {
   return vmaxnmvq_f32(a);
 }
 
-// CHECK-LABEL: define float @test_vminnmvq_f32(<4 x float> %a) #0 {
-// CHECK:   [[VMINNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %a) #2
+// CHECK-LABEL: define float @test_vminnmvq_f32(<4 x float> %a) #1 {
+// CHECK:   [[VMINNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %a) #3
 // CHECK:   ret float [[VMINNMVQ_F32_I]]
 float32_t test_vminnmvq_f32(float32x4_t a) {
   return vminnmvq_f32(a);
 }
+
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"

Modified: cfe/trunk/test/CodeGen/aarch64-neon-fma.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-fma.c?rev=345168&r1=345167&r2=345168&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-fma.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-neon-fma.c Wed Oct 24 10:42:17 2018
@@ -14,7 +14,7 @@ float32x2_t test_vmla_n_f32(float32x2_t
   return vmla_n_f32(a, b, c);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vmlaq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 {
+// CHECK-LABEL: define <4 x float> @test_vmlaq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #1 {
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
@@ -26,7 +26,7 @@ float32x4_t test_vmlaq_n_f32(float32x4_t
   return vmlaq_n_f32(a, b, c);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vmlaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 {
+// CHECK-LABEL: define <2 x double> @test_vmlaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #1 {
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1
 // CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]]
@@ -36,7 +36,7 @@ float64x2_t test_vmlaq_n_f64(float64x2_t
   return vmlaq_n_f64(a, b, c);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vmlsq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 {
+// CHECK-LABEL: define <4 x float> @test_vmlsq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #1 {
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
@@ -58,7 +58,7 @@ float32x2_t test_vmls_n_f32(float32x2_t
   return vmls_n_f32(a, b, c);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vmlsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 {
+// CHECK-LABEL: define <2 x double> @test_vmlsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #1 {
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1
 // CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]]
@@ -77,7 +77,7 @@ float32x2_t test_vmla_lane_f32_0(float32
   return vmla_lane_f32(a, b, v, 0);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
+// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer
 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
 // CHECK:   [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
@@ -86,7 +86,7 @@ float32x4_t test_vmlaq_lane_f32_0(float3
   return vmlaq_lane_f32(a, b, v, 0);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
+// CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer
 // CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
 // CHECK:   [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
@@ -95,7 +95,7 @@ float32x2_t test_vmla_laneq_f32_0(float3
   return vmla_laneq_f32(a, b, v, 0);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
+// CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer
 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
 // CHECK:   [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
@@ -113,7 +113,7 @@ float32x2_t test_vmls_lane_f32_0(float32
   return vmls_lane_f32(a, b, v, 0);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
+// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer
 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
 // CHECK:   [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
@@ -122,7 +122,7 @@ float32x4_t test_vmlsq_lane_f32_0(float3
   return vmlsq_lane_f32(a, b, v, 0);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
+// CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer
 // CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
 // CHECK:   [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
@@ -131,7 +131,7 @@ float32x2_t test_vmls_laneq_f32_0(float3
   return vmls_laneq_f32(a, b, v, 0);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
+// CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer
 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
 // CHECK:   [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
@@ -149,7 +149,7 @@ float32x2_t test_vmla_lane_f32(float32x2
   return vmla_lane_f32(a, b, v, 1);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
+// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
 // CHECK:   [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
@@ -158,7 +158,7 @@ float32x4_t test_vmlaq_lane_f32(float32x
   return vmlaq_lane_f32(a, b, v, 1);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
+// CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3>
 // CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
 // CHECK:   [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
@@ -167,7 +167,7 @@ float32x2_t test_vmla_laneq_f32(float32x
   return vmla_laneq_f32(a, b, v, 3);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
+// CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
 // CHECK:   [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
@@ -185,7 +185,7 @@ float32x2_t test_vmls_lane_f32(float32x2
   return vmls_lane_f32(a, b, v, 1);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
+// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
 // CHECK:   [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
@@ -193,7 +193,7 @@ float32x2_t test_vmls_lane_f32(float32x2
 float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
   return vmlsq_lane_f32(a, b, v, 1);
 }
-// CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
+// CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3>
 // CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
 // CHECK:   [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
@@ -202,7 +202,7 @@ float32x2_t test_vmls_laneq_f32(float32x
   return vmls_laneq_f32(a, b, v, 3);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
+// CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
 // CHECK:   [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
@@ -211,7 +211,7 @@ float32x4_t test_vmlsq_laneq_f32(float32
   return vmlsq_laneq_f32(a, b, v, 3);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vfmaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 {
+// CHECK-LABEL: define <2 x double> @test_vfmaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #1 {
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1
 // CHECK:   [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> [[VECINIT1_I]], <2 x double> %a)
@@ -220,12 +220,15 @@ float64x2_t test_vfmaq_n_f64(float64x2_t
   return vfmaq_n_f64(a, b, c);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vfmsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 {
+// CHECK-LABEL: define <2 x double> @test_vfmsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #1 {
 // CHECK:   [[SUB_I:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1
-// CHECK:   [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> [[VECINIT1_I]], <2 x double> %a) #2
+// CHECK:   [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> [[VECINIT1_I]], <2 x double> %a) #3
 // CHECK:   ret <2 x double> [[TMP6]]
 float64x2_t test_vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
   return vfmsq_n_f64(a, b, c);
 }
+
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"

Modified: cfe/trunk/test/CodeGen/aarch64-neon-ldst-one.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-ldst-one.c?rev=345168&r1=345167&r2=345168&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-ldst-one.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-neon-ldst-one.c Wed Oct 24 10:42:17 2018
@@ -152,7 +152,7 @@ poly64x2_t test_vld1q_dup_p64(poly64_t
   return vld1q_dup_p64(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vld1_dup_u8(i8* %a) #0 {
+// CHECK-LABEL: define <8 x i8> @test_vld1_dup_u8(i8* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a
 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
 // CHECK:   [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
@@ -161,7 +161,7 @@ uint8x8_t test_vld1_dup_u8(uint8_t  *a)
   return vld1_dup_u8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vld1_dup_u16(i16* %a) #0 {
+// CHECK-LABEL: define <4 x i16> @test_vld1_dup_u16(i16* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]]
@@ -172,7 +172,7 @@ uint16x4_t test_vld1_dup_u16(uint16_t  *
   return vld1_dup_u16(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vld1_dup_u32(i32* %a) #0 {
+// CHECK-LABEL: define <2 x i32> @test_vld1_dup_u32(i32* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
 // CHECK:   [[TMP2:%.*]] = load i32, i32* [[TMP1]]
@@ -183,7 +183,7 @@ uint32x2_t test_vld1_dup_u32(uint32_t  *
   return vld1_dup_u32(a);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vld1_dup_u64(i64* %a) #0 {
+// CHECK-LABEL: define <1 x i64> @test_vld1_dup_u64(i64* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
 // CHECK:   [[TMP2:%.*]] = load i64, i64* [[TMP1]]
@@ -194,7 +194,7 @@ uint64x1_t test_vld1_dup_u64(uint64_t  *
   return vld1_dup_u64(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vld1_dup_s8(i8* %a) #0 {
+// CHECK-LABEL: define <8 x i8> @test_vld1_dup_s8(i8* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a
 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
 // CHECK:   [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
@@ -203,7 +203,7 @@ int8x8_t test_vld1_dup_s8(int8_t  *a) {
   return vld1_dup_s8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vld1_dup_s16(i16* %a) #0 {
+// CHECK-LABEL: define <4 x i16> @test_vld1_dup_s16(i16* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]]
@@ -214,7 +214,7 @@ int16x4_t test_vld1_dup_s16(int16_t  *a)
   return vld1_dup_s16(a);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vld1_dup_s32(i32* %a) #0 {
+// CHECK-LABEL: define <2 x i32> @test_vld1_dup_s32(i32* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
 // CHECK:   [[TMP2:%.*]] = load i32, i32* [[TMP1]]
@@ -225,7 +225,7 @@ int32x2_t test_vld1_dup_s32(int32_t  *a)
   return vld1_dup_s32(a);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vld1_dup_s64(i64* %a) #0 {
+// CHECK-LABEL: define <1 x i64> @test_vld1_dup_s64(i64* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
 // CHECK:   [[TMP2:%.*]] = load i64, i64* [[TMP1]]
@@ -236,7 +236,7 @@ int64x1_t test_vld1_dup_s64(int64_t  *a)
   return vld1_dup_s64(a);
 }
 
-// CHECK-LABEL: define <4 x half> @test_vld1_dup_f16(half* %a) #0 {
+// CHECK-LABEL: define <4 x half> @test_vld1_dup_f16(half* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to half*
 // CHECK:   [[TMP2:%.*]] = load half, half* [[TMP1]]
@@ -247,7 +247,7 @@ float16x4_t test_vld1_dup_f16(float16_t
   return vld1_dup_f16(a);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vld1_dup_f32(float* %a) #0 {
+// CHECK-LABEL: define <2 x float> @test_vld1_dup_f32(float* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to float*
 // CHECK:   [[TMP2:%.*]] = load float, float* [[TMP1]]
@@ -258,7 +258,7 @@ float32x2_t test_vld1_dup_f32(float32_t
   return vld1_dup_f32(a);
 }
 
-// CHECK-LABEL: define <1 x double> @test_vld1_dup_f64(double* %a) #0 {
+// CHECK-LABEL: define <1 x double> @test_vld1_dup_f64(double* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to double*
 // CHECK:   [[TMP2:%.*]] = load double, double* [[TMP1]]
@@ -269,7 +269,7 @@ float64x1_t test_vld1_dup_f64(float64_t
   return vld1_dup_f64(a);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vld1_dup_p8(i8* %a) #0 {
+// CHECK-LABEL: define <8 x i8> @test_vld1_dup_p8(i8* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a
 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
 // CHECK:   [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
@@ -278,7 +278,7 @@ poly8x8_t test_vld1_dup_p8(poly8_t  *a)
   return vld1_dup_p8(a);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vld1_dup_p16(i16* %a) #0 {
+// CHECK-LABEL: define <4 x i16> @test_vld1_dup_p16(i16* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]]
@@ -289,7 +289,7 @@ poly16x4_t test_vld1_dup_p16(poly16_t  *
   return vld1_dup_p16(a);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vld1_dup_p64(i64* %a) #0 {
+// CHECK-LABEL: define <1 x i64> @test_vld1_dup_p64(i64* %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
 // CHECK:   [[TMP2:%.*]] = load i64, i64* [[TMP1]]
@@ -300,7 +300,7 @@ poly64x1_t test_vld1_dup_p64(poly64_t  *
   return vld1_dup_p64(a);
 }
 
-// CHECK-LABEL: define %struct.uint64x2x2_t @test_vld2q_dup_u64(i64* %a) #0 {
+// CHECK-LABEL: define %struct.uint64x2x2_t @test_vld2q_dup_u64(i64* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
@@ -318,7 +318,7 @@ uint64x2x2_t test_vld2q_dup_u64(uint64_t
   return vld2q_dup_u64(a);
 }
 
-// CHECK-LABEL: define %struct.int64x2x2_t @test_vld2q_dup_s64(i64* %a) #0 {
+// CHECK-LABEL: define %struct.int64x2x2_t @test_vld2q_dup_s64(i64* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
@@ -336,7 +336,7 @@ int64x2x2_t test_vld2q_dup_s64(int64_t
   return vld2q_dup_s64(a);
 }
 
-// CHECK-LABEL: define %struct.float64x2x2_t @test_vld2q_dup_f64(double* %a) #0 {
+// CHECK-LABEL: define %struct.float64x2x2_t @test_vld2q_dup_f64(double* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
@@ -354,7 +354,7 @@ float64x2x2_t test_vld2q_dup_f64(float64
   return vld2q_dup_f64(a);
 }
 
-// CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_dup_p64(i64* %a) #0 {
+// CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_dup_p64(i64* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
@@ -372,7 +372,7 @@ poly64x2x2_t test_vld2q_dup_p64(poly64_t
   return vld2q_dup_p64(a);
 }
 
-// CHECK-LABEL: define %struct.float64x1x2_t @test_vld2_dup_f64(double* %a) #0 {
+// CHECK-LABEL: define %struct.float64x1x2_t @test_vld2_dup_f64(double* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
@@ -390,7 +390,7 @@ float64x1x2_t test_vld2_dup_f64(float64_
   return vld2_dup_f64(a);
 }
 
-// CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_dup_p64(i64* %a) #0 {
+// CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_dup_p64(i64* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
@@ -408,7 +408,7 @@ poly64x1x2_t test_vld2_dup_p64(poly64_t
   return vld2_dup_p64(a);
 }
 
-// CHECK-LABEL: define %struct.uint64x2x3_t @test_vld3q_dup_u64(i64* %a) #0 {
+// CHECK-LABEL: define %struct.uint64x2x3_t @test_vld3q_dup_u64(i64* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
@@ -427,7 +427,7 @@ uint64x2x3_t test_vld3q_dup_u64(uint64_t
   // [{{x[0-9]+|sp}}]
 }
 
-// CHECK-LABEL: define %struct.int64x2x3_t @test_vld3q_dup_s64(i64* %a) #0 {
+// CHECK-LABEL: define %struct.int64x2x3_t @test_vld3q_dup_s64(i64* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
@@ -446,7 +446,7 @@ int64x2x3_t test_vld3q_dup_s64(int64_t
   // [{{x[0-9]+|sp}}]
 }
 
-// CHECK-LABEL: define %struct.float64x2x3_t @test_vld3q_dup_f64(double* %a) #0 {
+// CHECK-LABEL: define %struct.float64x2x3_t @test_vld3q_dup_f64(double* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
@@ -465,7 +465,7 @@ float64x2x3_t test_vld3q_dup_f64(float64
   // [{{x[0-9]+|sp}}]
 }
 
-// CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_dup_p64(i64* %a) #0 {
+// CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_dup_p64(i64* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
@@ -484,7 +484,7 @@ poly64x2x3_t test_vld3q_dup_p64(poly64_t
   // [{{x[0-9]+|sp}}]
 }
 
-// CHECK-LABEL: define %struct.float64x1x3_t @test_vld3_dup_f64(double* %a) #0 {
+// CHECK-LABEL: define %struct.float64x1x3_t @test_vld3_dup_f64(double* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
@@ -503,7 +503,7 @@ float64x1x3_t test_vld3_dup_f64(float64_
   // [{{x[0-9]+|sp}}]
 }
 
-// CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_dup_p64(i64* %a) #0 {
+// CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_dup_p64(i64* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
@@ -522,7 +522,7 @@ poly64x1x3_t test_vld3_dup_p64(poly64_t
   // [{{x[0-9]+|sp}}]
 }
 
-// CHECK-LABEL: define %struct.uint64x2x4_t @test_vld4q_dup_u64(i64* %a) #0 {
+// CHECK-LABEL: define %struct.uint64x2x4_t @test_vld4q_dup_u64(i64* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
@@ -540,7 +540,7 @@ uint64x2x4_t test_vld4q_dup_u64(uint64_t
   return vld4q_dup_u64(a);
 }
 
-// CHECK-LABEL: define %struct.int64x2x4_t @test_vld4q_dup_s64(i64* %a) #0 {
+// CHECK-LABEL: define %struct.int64x2x4_t @test_vld4q_dup_s64(i64* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
@@ -558,7 +558,7 @@ int64x2x4_t test_vld4q_dup_s64(int64_t
   return vld4q_dup_s64(a);
 }
 
-// CHECK-LABEL: define %struct.float64x2x4_t @test_vld4q_dup_f64(double* %a) #0 {
+// CHECK-LABEL: define %struct.float64x2x4_t @test_vld4q_dup_f64(double* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
@@ -576,7 +576,7 @@ float64x2x4_t test_vld4q_dup_f64(float64
   return vld4q_dup_f64(a);
 }
 
-// CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_dup_p64(i64* %a) #0 {
+// CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_dup_p64(i64* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
@@ -594,7 +594,7 @@ poly64x2x4_t test_vld4q_dup_p64(poly64_t
   return vld4q_dup_p64(a);
 }
 
-// CHECK-LABEL: define %struct.float64x1x4_t @test_vld4_dup_f64(double* %a) #0 {
+// CHECK-LABEL: define %struct.float64x1x4_t @test_vld4_dup_f64(double* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
@@ -612,7 +612,7 @@ float64x1x4_t test_vld4_dup_f64(float64_
   return vld4_dup_f64(a);
 }
 
-// CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_dup_p64(i64* %a) #0 {
+// CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_dup_p64(i64* %a) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
@@ -786,7 +786,7 @@ poly64x2_t test_vld1q_lane_p64(poly64_t
   return vld1q_lane_p64(a, b, 1);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vld1_lane_u8(i8* %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: define <8 x i8> @test_vld1_lane_u8(i8* %a, <8 x i8> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a
 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
 // CHECK:   ret <8 x i8> [[VLD1_LANE]]
@@ -794,7 +794,7 @@ uint8x8_t test_vld1_lane_u8(uint8_t  *a,
   return vld1_lane_u8(a, b, 7);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vld1_lane_u16(i16* %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: define <4 x i16> @test_vld1_lane_u16(i16* %a, <4 x i16> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
@@ -806,7 +806,7 @@ uint16x4_t test_vld1_lane_u16(uint16_t
   return vld1_lane_u16(a, b, 3);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vld1_lane_u32(i32* %a, <2 x i32> %b) #0 {
+// CHECK-LABEL: define <2 x i32> @test_vld1_lane_u32(i32* %a, <2 x i32> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
@@ -818,7 +818,7 @@ uint32x2_t test_vld1_lane_u32(uint32_t
   return vld1_lane_u32(a, b, 1);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vld1_lane_u64(i64* %a, <1 x i64> %b) #0 {
+// CHECK-LABEL: define <1 x i64> @test_vld1_lane_u64(i64* %a, <1 x i64> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
@@ -830,7 +830,7 @@ uint64x1_t test_vld1_lane_u64(uint64_t
   return vld1_lane_u64(a, b, 0);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vld1_lane_s8(i8* %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: define <8 x i8> @test_vld1_lane_s8(i8* %a, <8 x i8> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a
 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
 // CHECK:   ret <8 x i8> [[VLD1_LANE]]
@@ -838,7 +838,7 @@ int8x8_t test_vld1_lane_s8(int8_t  *a, i
   return vld1_lane_s8(a, b, 7);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vld1_lane_s16(i16* %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: define <4 x i16> @test_vld1_lane_s16(i16* %a, <4 x i16> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
@@ -850,7 +850,7 @@ int16x4_t test_vld1_lane_s16(int16_t  *a
   return vld1_lane_s16(a, b, 3);
 }
 
-// CHECK-LABEL: define <2 x i32> @test_vld1_lane_s32(i32* %a, <2 x i32> %b) #0 {
+// CHECK-LABEL: define <2 x i32> @test_vld1_lane_s32(i32* %a, <2 x i32> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
@@ -862,7 +862,7 @@ int32x2_t test_vld1_lane_s32(int32_t  *a
   return vld1_lane_s32(a, b, 1);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vld1_lane_s64(i64* %a, <1 x i64> %b) #0 {
+// CHECK-LABEL: define <1 x i64> @test_vld1_lane_s64(i64* %a, <1 x i64> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
@@ -874,7 +874,7 @@ int64x1_t test_vld1_lane_s64(int64_t  *a
   return vld1_lane_s64(a, b, 0);
 }
 
-// CHECK-LABEL: define <4 x half> @test_vld1_lane_f16(half* %a, <4 x half> %b) #0 {
+// CHECK-LABEL: define <4 x half> @test_vld1_lane_f16(half* %a, <4 x half> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
@@ -886,7 +886,7 @@ float16x4_t test_vld1_lane_f16(float16_t
   return vld1_lane_f16(a, b, 3);
 }
 
-// CHECK-LABEL: define <2 x float> @test_vld1_lane_f32(float* %a, <2 x float> %b) #0 {
+// CHECK-LABEL: define <2 x float> @test_vld1_lane_f32(float* %a, <2 x float> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
@@ -898,7 +898,7 @@ float32x2_t test_vld1_lane_f32(float32_t
   return vld1_lane_f32(a, b, 1);
 }
 
-// CHECK-LABEL: define <1 x double> @test_vld1_lane_f64(double* %a, <1 x double> %b) #0 {
+// CHECK-LABEL: define <1 x double> @test_vld1_lane_f64(double* %a, <1 x double> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
@@ -910,7 +910,7 @@ float64x1_t test_vld1_lane_f64(float64_t
   return vld1_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vld1_lane_p8(i8* %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: define <8 x i8> @test_vld1_lane_p8(i8* %a, <8 x i8> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a
 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
 // CHECK:   ret <8 x i8> [[VLD1_LANE]]
@@ -918,7 +918,7 @@ poly8x8_t test_vld1_lane_p8(poly8_t  *a,
   return vld1_lane_p8(a, b, 7);
 }
 
-// CHECK-LABEL: define <4 x i16> @test_vld1_lane_p16(i16* %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: define <4 x i16> @test_vld1_lane_p16(i16* %a, <4 x i16> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
@@ -930,7 +930,7 @@ poly16x4_t test_vld1_lane_p16(poly16_t
   return vld1_lane_p16(a, b, 3);
 }
 
-// CHECK-LABEL: define <1 x i64> @test_vld1_lane_p64(i64* %a, <1 x i64> %b) #0 {
+// CHECK-LABEL: define <1 x i64> @test_vld1_lane_p64(i64* %a, <1 x i64> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
@@ -942,7 +942,7 @@ poly64x1_t test_vld1_lane_p64(poly64_t
   return vld1_lane_p64(a, b, 0);
 }
 
-// CHECK-LABEL: define %struct.int8x16x2_t @test_vld2q_lane_s8(i8* %ptr, [2 x <16 x i8>] %src.coerce) #0 {
+// CHECK-LABEL: define %struct.int8x16x2_t @test_vld2q_lane_s8(i8* %ptr, [2 x <16 x i8>] %src.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[SRC:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
@@ -971,7 +971,7 @@ int8x16x2_t test_vld2q_lane_s8(int8_t co
   return vld2q_lane_s8(ptr, src, 15);
 }
 
-// CHECK-LABEL: define %struct.uint8x16x2_t @test_vld2q_lane_u8(i8* %ptr, [2 x <16 x i8>] %src.coerce) #0 {
+// CHECK-LABEL: define %struct.uint8x16x2_t @test_vld2q_lane_u8(i8* %ptr, [2 x <16 x i8>] %src.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[SRC:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
@@ -1000,7 +1000,7 @@ uint8x16x2_t test_vld2q_lane_u8(uint8_t
   return vld2q_lane_u8(ptr, src, 15);
 }
 
-// CHECK-LABEL: define %struct.poly8x16x2_t @test_vld2q_lane_p8(i8* %ptr, [2 x <16 x i8>] %src.coerce) #0 {
+// CHECK-LABEL: define %struct.poly8x16x2_t @test_vld2q_lane_p8(i8* %ptr, [2 x <16 x i8>] %src.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[SRC:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
@@ -1029,7 +1029,7 @@ poly8x16x2_t test_vld2q_lane_p8(poly8_t
   return vld2q_lane_p8(ptr, src, 15);
 }
 
-// CHECK-LABEL: define %struct.int8x16x3_t @test_vld3q_lane_s8(i8* %ptr, [3 x <16 x i8>] %src.coerce) #0 {
+// CHECK-LABEL: define %struct.int8x16x3_t @test_vld3q_lane_s8(i8* %ptr, [3 x <16 x i8>] %src.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
 // CHECK:   [[SRC:%.*]] = alloca %struct.int8x16x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
@@ -1061,7 +1061,7 @@ int8x16x3_t test_vld3q_lane_s8(int8_t co
   return vld3q_lane_s8(ptr, src, 15);
 }
 
-// CHECK-LABEL: define %struct.uint8x16x3_t @test_vld3q_lane_u8(i8* %ptr, [3 x <16 x i8>] %src.coerce) #0 {
+// CHECK-LABEL: define %struct.uint8x16x3_t @test_vld3q_lane_u8(i8* %ptr, [3 x <16 x i8>] %src.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
 // CHECK:   [[SRC:%.*]] = alloca %struct.uint8x16x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
@@ -1093,7 +1093,7 @@ uint8x16x3_t test_vld3q_lane_u8(uint8_t
   return vld3q_lane_u8(ptr, src, 15);
 }
 
-// CHECK-LABEL: define %struct.uint16x8x2_t @test_vld2q_lane_u16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint16x8x2_t @test_vld2q_lane_u16(i16* %a, [2 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
@@ -1127,7 +1127,7 @@ uint16x8x2_t test_vld2q_lane_u16(uint16_
   return vld2q_lane_u16(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.uint32x4x2_t @test_vld2q_lane_u32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint32x4x2_t @test_vld2q_lane_u32(i32* %a, [2 x <4 x i32>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
@@ -1161,7 +1161,7 @@ uint32x4x2_t test_vld2q_lane_u32(uint32_
   return vld2q_lane_u32(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.uint64x2x2_t @test_vld2q_lane_u64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint64x2x2_t @test_vld2q_lane_u64(i64* %a, [2 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
@@ -1195,7 +1195,7 @@ uint64x2x2_t test_vld2q_lane_u64(uint64_
   return vld2q_lane_u64(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.int16x8x2_t @test_vld2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int16x8x2_t @test_vld2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
@@ -1229,7 +1229,7 @@ int16x8x2_t test_vld2q_lane_s16(int16_t
   return vld2q_lane_s16(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.int32x4x2_t @test_vld2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int32x4x2_t @test_vld2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
@@ -1263,7 +1263,7 @@ int32x4x2_t test_vld2q_lane_s32(int32_t
   return vld2q_lane_s32(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.int64x2x2_t @test_vld2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int64x2x2_t @test_vld2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
@@ -1297,7 +1297,7 @@ int64x2x2_t test_vld2q_lane_s64(int64_t
   return vld2q_lane_s64(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.float16x8x2_t @test_vld2q_lane_f16(half* %a, [2 x <8 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float16x8x2_t @test_vld2q_lane_f16(half* %a, [2 x <8 x half>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
@@ -1331,7 +1331,7 @@ float16x8x2_t test_vld2q_lane_f16(float1
   return vld2q_lane_f16(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.float32x4x2_t @test_vld2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float32x4x2_t @test_vld2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
@@ -1365,7 +1365,7 @@ float32x4x2_t test_vld2q_lane_f32(float3
   return vld2q_lane_f32(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.float64x2x2_t @test_vld2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float64x2x2_t @test_vld2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
@@ -1399,7 +1399,7 @@ float64x2x2_t test_vld2q_lane_f64(float6
   return vld2q_lane_f64(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.poly16x8x2_t @test_vld2q_lane_p16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly16x8x2_t @test_vld2q_lane_p16(i16* %a, [2 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
@@ -1433,7 +1433,7 @@ poly16x8x2_t test_vld2q_lane_p16(poly16_
   return vld2q_lane_p16(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_lane_p64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_lane_p64(i64* %a, [2 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
@@ -1467,7 +1467,7 @@ poly64x2x2_t test_vld2q_lane_p64(poly64_
   return vld2q_lane_p64(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.uint8x8x2_t @test_vld2_lane_u8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint8x8x2_t @test_vld2_lane_u8(i8* %a, [2 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
@@ -1496,7 +1496,7 @@ uint8x8x2_t test_vld2_lane_u8(uint8_t  *
   return vld2_lane_u8(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.uint16x4x2_t @test_vld2_lane_u16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint16x4x2_t @test_vld2_lane_u16(i16* %a, [2 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
@@ -1530,7 +1530,7 @@ uint16x4x2_t test_vld2_lane_u16(uint16_t
   return vld2_lane_u16(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.uint32x2x2_t @test_vld2_lane_u32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint32x2x2_t @test_vld2_lane_u32(i32* %a, [2 x <2 x i32>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
@@ -1564,7 +1564,7 @@ uint32x2x2_t test_vld2_lane_u32(uint32_t
   return vld2_lane_u32(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.uint64x1x2_t @test_vld2_lane_u64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint64x1x2_t @test_vld2_lane_u64(i64* %a, [2 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
@@ -1598,7 +1598,7 @@ uint64x1x2_t test_vld2_lane_u64(uint64_t
   return vld2_lane_u64(a, b, 0);
 }
 
-// CHECK-LABEL: define %struct.int8x8x2_t @test_vld2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int8x8x2_t @test_vld2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
@@ -1627,7 +1627,7 @@ int8x8x2_t test_vld2_lane_s8(int8_t  *a,
   return vld2_lane_s8(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.int16x4x2_t @test_vld2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int16x4x2_t @test_vld2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
@@ -1661,7 +1661,7 @@ int16x4x2_t test_vld2_lane_s16(int16_t
   return vld2_lane_s16(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.int32x2x2_t @test_vld2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int32x2x2_t @test_vld2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
@@ -1695,7 +1695,7 @@ int32x2x2_t test_vld2_lane_s32(int32_t
   return vld2_lane_s32(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.int64x1x2_t @test_vld2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int64x1x2_t @test_vld2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
@@ -1729,7 +1729,7 @@ int64x1x2_t test_vld2_lane_s64(int64_t
   return vld2_lane_s64(a, b, 0);
 }
 
-// CHECK-LABEL: define %struct.float16x4x2_t @test_vld2_lane_f16(half* %a, [2 x <4 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float16x4x2_t @test_vld2_lane_f16(half* %a, [2 x <4 x half>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
@@ -1763,7 +1763,7 @@ float16x4x2_t test_vld2_lane_f16(float16
   return vld2_lane_f16(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.float32x2x2_t @test_vld2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float32x2x2_t @test_vld2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
@@ -1797,7 +1797,7 @@ float32x2x2_t test_vld2_lane_f32(float32
   return vld2_lane_f32(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.float64x1x2_t @test_vld2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float64x1x2_t @test_vld2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
@@ -1831,7 +1831,7 @@ float64x1x2_t test_vld2_lane_f64(float64
   return vld2_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define %struct.poly8x8x2_t @test_vld2_lane_p8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly8x8x2_t @test_vld2_lane_p8(i8* %a, [2 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
@@ -1860,7 +1860,7 @@ poly8x8x2_t test_vld2_lane_p8(poly8_t  *
   return vld2_lane_p8(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.poly16x4x2_t @test_vld2_lane_p16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly16x4x2_t @test_vld2_lane_p16(i16* %a, [2 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
@@ -1894,7 +1894,7 @@ poly16x4x2_t test_vld2_lane_p16(poly16_t
   return vld2_lane_p16(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_lane_p64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_lane_p64(i64* %a, [2 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
@@ -1928,7 +1928,7 @@ poly64x1x2_t test_vld2_lane_p64(poly64_t
   return vld2_lane_p64(a, b, 0);
 }
 
-// CHECK-LABEL: define %struct.uint16x8x3_t @test_vld3q_lane_u16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint16x8x3_t @test_vld3q_lane_u16(i16* %a, [3 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
@@ -1967,7 +1967,7 @@ uint16x8x3_t test_vld3q_lane_u16(uint16_
   return vld3q_lane_u16(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.uint32x4x3_t @test_vld3q_lane_u32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint32x4x3_t @test_vld3q_lane_u32(i32* %a, [3 x <4 x i32>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
@@ -2006,7 +2006,7 @@ uint32x4x3_t test_vld3q_lane_u32(uint32_
   return vld3q_lane_u32(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.uint64x2x3_t @test_vld3q_lane_u64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint64x2x3_t @test_vld3q_lane_u64(i64* %a, [3 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
@@ -2045,7 +2045,7 @@ uint64x2x3_t test_vld3q_lane_u64(uint64_
   return vld3q_lane_u64(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.int16x8x3_t @test_vld3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int16x8x3_t @test_vld3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
@@ -2084,7 +2084,7 @@ int16x8x3_t test_vld3q_lane_s16(int16_t
   return vld3q_lane_s16(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.int32x4x3_t @test_vld3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int32x4x3_t @test_vld3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
@@ -2123,7 +2123,7 @@ int32x4x3_t test_vld3q_lane_s32(int32_t
   return vld3q_lane_s32(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.int64x2x3_t @test_vld3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int64x2x3_t @test_vld3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
@@ -2162,7 +2162,7 @@ int64x2x3_t test_vld3q_lane_s64(int64_t
   return vld3q_lane_s64(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.float16x8x3_t @test_vld3q_lane_f16(half* %a, [3 x <8 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float16x8x3_t @test_vld3q_lane_f16(half* %a, [3 x <8 x half>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
@@ -2201,7 +2201,7 @@ float16x8x3_t test_vld3q_lane_f16(float1
   return vld3q_lane_f16(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.float32x4x3_t @test_vld3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float32x4x3_t @test_vld3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
@@ -2240,7 +2240,7 @@ float32x4x3_t test_vld3q_lane_f32(float3
   return vld3q_lane_f32(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.float64x2x3_t @test_vld3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float64x2x3_t @test_vld3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
@@ -2279,7 +2279,7 @@ float64x2x3_t test_vld3q_lane_f64(float6
   return vld3q_lane_f64(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.poly8x16x3_t @test_vld3q_lane_p8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly8x16x3_t @test_vld3q_lane_p8(i8* %a, [3 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
@@ -2311,7 +2311,7 @@ poly8x16x3_t test_vld3q_lane_p8(poly8_t
   return vld3q_lane_p8(a, b, 15);
 }
 
-// CHECK-LABEL: define %struct.poly16x8x3_t @test_vld3q_lane_p16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly16x8x3_t @test_vld3q_lane_p16(i16* %a, [3 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
@@ -2350,7 +2350,7 @@ poly16x8x3_t test_vld3q_lane_p16(poly16_
   return vld3q_lane_p16(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_lane_p64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_lane_p64(i64* %a, [3 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
@@ -2389,7 +2389,7 @@ poly64x2x3_t test_vld3q_lane_p64(poly64_
   return vld3q_lane_p64(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.uint8x8x3_t @test_vld3_lane_u8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint8x8x3_t @test_vld3_lane_u8(i8* %a, [3 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
@@ -2421,7 +2421,7 @@ uint8x8x3_t test_vld3_lane_u8(uint8_t  *
   return vld3_lane_u8(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.uint16x4x3_t @test_vld3_lane_u16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint16x4x3_t @test_vld3_lane_u16(i16* %a, [3 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
@@ -2460,7 +2460,7 @@ uint16x4x3_t test_vld3_lane_u16(uint16_t
   return vld3_lane_u16(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.uint32x2x3_t @test_vld3_lane_u32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint32x2x3_t @test_vld3_lane_u32(i32* %a, [3 x <2 x i32>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
@@ -2499,7 +2499,7 @@ uint32x2x3_t test_vld3_lane_u32(uint32_t
   return vld3_lane_u32(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.uint64x1x3_t @test_vld3_lane_u64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint64x1x3_t @test_vld3_lane_u64(i64* %a, [3 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
@@ -2538,7 +2538,7 @@ uint64x1x3_t test_vld3_lane_u64(uint64_t
   return vld3_lane_u64(a, b, 0);
 }
 
-// CHECK-LABEL: define %struct.int8x8x3_t @test_vld3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int8x8x3_t @test_vld3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
@@ -2570,7 +2570,7 @@ int8x8x3_t test_vld3_lane_s8(int8_t  *a,
   return vld3_lane_s8(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.int16x4x3_t @test_vld3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int16x4x3_t @test_vld3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
@@ -2609,7 +2609,7 @@ int16x4x3_t test_vld3_lane_s16(int16_t
   return vld3_lane_s16(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.int32x2x3_t @test_vld3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int32x2x3_t @test_vld3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
@@ -2648,7 +2648,7 @@ int32x2x3_t test_vld3_lane_s32(int32_t
   return vld3_lane_s32(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.int64x1x3_t @test_vld3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int64x1x3_t @test_vld3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
@@ -2687,7 +2687,7 @@ int64x1x3_t test_vld3_lane_s64(int64_t
   return vld3_lane_s64(a, b, 0);
 }
 
-// CHECK-LABEL: define %struct.float16x4x3_t @test_vld3_lane_f16(half* %a, [3 x <4 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float16x4x3_t @test_vld3_lane_f16(half* %a, [3 x <4 x half>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
@@ -2726,7 +2726,7 @@ float16x4x3_t test_vld3_lane_f16(float16
   return vld3_lane_f16(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.float32x2x3_t @test_vld3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float32x2x3_t @test_vld3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
@@ -2765,7 +2765,7 @@ float32x2x3_t test_vld3_lane_f32(float32
   return vld3_lane_f32(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.float64x1x3_t @test_vld3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float64x1x3_t @test_vld3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
@@ -2804,7 +2804,7 @@ float64x1x3_t test_vld3_lane_f64(float64
   return vld3_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define %struct.poly8x8x3_t @test_vld3_lane_p8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly8x8x3_t @test_vld3_lane_p8(i8* %a, [3 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
@@ -2836,7 +2836,7 @@ poly8x8x3_t test_vld3_lane_p8(poly8_t  *
   return vld3_lane_p8(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.poly16x4x3_t @test_vld3_lane_p16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly16x4x3_t @test_vld3_lane_p16(i16* %a, [3 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
@@ -2875,7 +2875,7 @@ poly16x4x3_t test_vld3_lane_p16(poly16_t
   return vld3_lane_p16(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_lane_p64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_lane_p64(i64* %a, [3 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
@@ -2914,7 +2914,7 @@ poly64x1x3_t test_vld3_lane_p64(poly64_t
   return vld3_lane_p64(a, b, 0);
 }
 
-// CHECK-LABEL: define %struct.uint8x16x4_t @test_vld4q_lane_u8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint8x16x4_t @test_vld4q_lane_u8(i8* %a, [4 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
@@ -2949,7 +2949,7 @@ uint8x16x4_t test_vld4q_lane_u8(uint8_t
   return vld4q_lane_u8(a, b, 15);
 }
 
-// CHECK-LABEL: define %struct.uint16x8x4_t @test_vld4q_lane_u16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint16x8x4_t @test_vld4q_lane_u16(i16* %a, [4 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
@@ -2993,7 +2993,7 @@ uint16x8x4_t test_vld4q_lane_u16(uint16_
   return vld4q_lane_u16(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.uint32x4x4_t @test_vld4q_lane_u32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint32x4x4_t @test_vld4q_lane_u32(i32* %a, [4 x <4 x i32>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
@@ -3037,7 +3037,7 @@ uint32x4x4_t test_vld4q_lane_u32(uint32_
   return vld4q_lane_u32(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.uint64x2x4_t @test_vld4q_lane_u64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint64x2x4_t @test_vld4q_lane_u64(i64* %a, [4 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
@@ -3081,7 +3081,7 @@ uint64x2x4_t test_vld4q_lane_u64(uint64_
   return vld4q_lane_u64(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.int8x16x4_t @test_vld4q_lane_s8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int8x16x4_t @test_vld4q_lane_s8(i8* %a, [4 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
@@ -3116,7 +3116,7 @@ int8x16x4_t test_vld4q_lane_s8(int8_t  *
   return vld4q_lane_s8(a, b, 15);
 }
 
-// CHECK-LABEL: define %struct.int16x8x4_t @test_vld4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int16x8x4_t @test_vld4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
@@ -3160,7 +3160,7 @@ int16x8x4_t test_vld4q_lane_s16(int16_t
   return vld4q_lane_s16(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.int32x4x4_t @test_vld4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int32x4x4_t @test_vld4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
@@ -3204,7 +3204,7 @@ int32x4x4_t test_vld4q_lane_s32(int32_t
   return vld4q_lane_s32(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.int64x2x4_t @test_vld4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int64x2x4_t @test_vld4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
@@ -3248,7 +3248,7 @@ int64x2x4_t test_vld4q_lane_s64(int64_t
   return vld4q_lane_s64(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.float16x8x4_t @test_vld4q_lane_f16(half* %a, [4 x <8 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float16x8x4_t @test_vld4q_lane_f16(half* %a, [4 x <8 x half>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
@@ -3292,7 +3292,7 @@ float16x8x4_t test_vld4q_lane_f16(float1
   return vld4q_lane_f16(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.float32x4x4_t @test_vld4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float32x4x4_t @test_vld4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
@@ -3336,7 +3336,7 @@ float32x4x4_t test_vld4q_lane_f32(float3
   return vld4q_lane_f32(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.float64x2x4_t @test_vld4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float64x2x4_t @test_vld4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
@@ -3380,7 +3380,7 @@ float64x2x4_t test_vld4q_lane_f64(float6
   return vld4q_lane_f64(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.poly8x16x4_t @test_vld4q_lane_p8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly8x16x4_t @test_vld4q_lane_p8(i8* %a, [4 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
@@ -3415,7 +3415,7 @@ poly8x16x4_t test_vld4q_lane_p8(poly8_t
   return vld4q_lane_p8(a, b, 15);
 }
 
-// CHECK-LABEL: define %struct.poly16x8x4_t @test_vld4q_lane_p16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly16x8x4_t @test_vld4q_lane_p16(i16* %a, [4 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
@@ -3459,7 +3459,7 @@ poly16x8x4_t test_vld4q_lane_p16(poly16_
   return vld4q_lane_p16(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_lane_p64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_lane_p64(i64* %a, [4 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
@@ -3503,7 +3503,7 @@ poly64x2x4_t test_vld4q_lane_p64(poly64_
   return vld4q_lane_p64(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.uint8x8x4_t @test_vld4_lane_u8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint8x8x4_t @test_vld4_lane_u8(i8* %a, [4 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
@@ -3538,7 +3538,7 @@ uint8x8x4_t test_vld4_lane_u8(uint8_t  *
   return vld4_lane_u8(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.uint16x4x4_t @test_vld4_lane_u16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint16x4x4_t @test_vld4_lane_u16(i16* %a, [4 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
@@ -3582,7 +3582,7 @@ uint16x4x4_t test_vld4_lane_u16(uint16_t
   return vld4_lane_u16(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.uint32x2x4_t @test_vld4_lane_u32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint32x2x4_t @test_vld4_lane_u32(i32* %a, [4 x <2 x i32>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
@@ -3626,7 +3626,7 @@ uint32x2x4_t test_vld4_lane_u32(uint32_t
   return vld4_lane_u32(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.uint64x1x4_t @test_vld4_lane_u64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.uint64x1x4_t @test_vld4_lane_u64(i64* %a, [4 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
@@ -3670,7 +3670,7 @@ uint64x1x4_t test_vld4_lane_u64(uint64_t
   return vld4_lane_u64(a, b, 0);
 }
 
-// CHECK-LABEL: define %struct.int8x8x4_t @test_vld4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int8x8x4_t @test_vld4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
@@ -3705,7 +3705,7 @@ int8x8x4_t test_vld4_lane_s8(int8_t  *a,
   return vld4_lane_s8(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.int16x4x4_t @test_vld4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int16x4x4_t @test_vld4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
@@ -3749,7 +3749,7 @@ int16x4x4_t test_vld4_lane_s16(int16_t
   return vld4_lane_s16(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.int32x2x4_t @test_vld4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int32x2x4_t @test_vld4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
@@ -3793,7 +3793,7 @@ int32x2x4_t test_vld4_lane_s32(int32_t
   return vld4_lane_s32(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.int64x1x4_t @test_vld4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.int64x1x4_t @test_vld4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
@@ -3837,7 +3837,7 @@ int64x1x4_t test_vld4_lane_s64(int64_t
   return vld4_lane_s64(a, b, 0);
 }
 
-// CHECK-LABEL: define %struct.float16x4x4_t @test_vld4_lane_f16(half* %a, [4 x <4 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float16x4x4_t @test_vld4_lane_f16(half* %a, [4 x <4 x half>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
@@ -3881,7 +3881,7 @@ float16x4x4_t test_vld4_lane_f16(float16
   return vld4_lane_f16(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.float32x2x4_t @test_vld4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float32x2x4_t @test_vld4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
@@ -3925,7 +3925,7 @@ float32x2x4_t test_vld4_lane_f32(float32
   return vld4_lane_f32(a, b, 1);
 }
 
-// CHECK-LABEL: define %struct.float64x1x4_t @test_vld4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.float64x1x4_t @test_vld4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
@@ -3969,7 +3969,7 @@ float64x1x4_t test_vld4_lane_f64(float64
   return vld4_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define %struct.poly8x8x4_t @test_vld4_lane_p8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly8x8x4_t @test_vld4_lane_p8(i8* %a, [4 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
@@ -4004,7 +4004,7 @@ poly8x8x4_t test_vld4_lane_p8(poly8_t  *
   return vld4_lane_p8(a, b, 7);
 }
 
-// CHECK-LABEL: define %struct.poly16x4x4_t @test_vld4_lane_p16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly16x4x4_t @test_vld4_lane_p16(i16* %a, [4 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
@@ -4048,7 +4048,7 @@ poly16x4x4_t test_vld4_lane_p16(poly16_t
   return vld4_lane_p16(a, b, 3);
 }
 
-// CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_lane_p64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_lane_p64(i64* %a, [4 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
@@ -4248,7 +4248,7 @@ void test_vst1q_lane_p64(poly64_t  *a, p
   vst1q_lane_p64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_u8(i8* %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_u8(i8* %a, <8 x i8> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
 // CHECK:   store i8 [[TMP0]], i8* %a
 // CHECK:   ret void
@@ -4256,7 +4256,7 @@ void test_vst1_lane_u8(uint8_t  *a, uint
   vst1_lane_u8(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_u16(i16* %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_u16(i16* %a, <4 x i16> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
@@ -4268,7 +4268,7 @@ void test_vst1_lane_u16(uint16_t  *a, ui
   vst1_lane_u16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_u32(i32* %a, <2 x i32> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_u32(i32* %a, <2 x i32> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
@@ -4280,7 +4280,7 @@ void test_vst1_lane_u32(uint32_t  *a, ui
   vst1_lane_u32(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_u64(i64* %a, <1 x i64> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_u64(i64* %a, <1 x i64> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
@@ -4292,7 +4292,7 @@ void test_vst1_lane_u64(uint64_t  *a, ui
   vst1_lane_u64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_s8(i8* %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_s8(i8* %a, <8 x i8> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
 // CHECK:   store i8 [[TMP0]], i8* %a
 // CHECK:   ret void
@@ -4300,7 +4300,7 @@ void test_vst1_lane_s8(int8_t  *a, int8x
   vst1_lane_s8(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_s16(i16* %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_s16(i16* %a, <4 x i16> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
@@ -4312,7 +4312,7 @@ void test_vst1_lane_s16(int16_t  *a, int
   vst1_lane_s16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_s32(i32* %a, <2 x i32> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_s32(i32* %a, <2 x i32> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
@@ -4324,7 +4324,7 @@ void test_vst1_lane_s32(int32_t  *a, int
   vst1_lane_s32(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_s64(i64* %a, <1 x i64> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_s64(i64* %a, <1 x i64> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
@@ -4336,7 +4336,7 @@ void test_vst1_lane_s64(int64_t  *a, int
   vst1_lane_s64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_f16(half* %a, <4 x half> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_f16(half* %a, <4 x half> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
@@ -4348,7 +4348,7 @@ void test_vst1_lane_f16(float16_t  *a, f
   vst1_lane_f16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_f32(float* %a, <2 x float> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_f32(float* %a, <2 x float> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
@@ -4360,7 +4360,7 @@ void test_vst1_lane_f32(float32_t  *a, f
   vst1_lane_f32(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_f64(double* %a, <1 x double> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_f64(double* %a, <1 x double> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
@@ -4372,7 +4372,7 @@ void test_vst1_lane_f64(float64_t  *a, f
   vst1_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_p8(i8* %a, <8 x i8> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_p8(i8* %a, <8 x i8> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
 // CHECK:   store i8 [[TMP0]], i8* %a
 // CHECK:   ret void
@@ -4380,7 +4380,7 @@ void test_vst1_lane_p8(poly8_t  *a, poly
   vst1_lane_p8(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_p16(i16* %a, <4 x i16> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_p16(i16* %a, <4 x i16> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
@@ -4392,7 +4392,7 @@ void test_vst1_lane_p16(poly16_t  *a, po
   vst1_lane_p16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst1_lane_p64(i64* %a, <1 x i64> %b) #0 {
+// CHECK-LABEL: define void @test_vst1_lane_p64(i64* %a, <1 x i64> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
@@ -4404,7 +4404,7 @@ void test_vst1_lane_p64(poly64_t  *a, po
   vst1_lane_p64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_u8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_u8(i8* %a, [2 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
@@ -4424,7 +4424,7 @@ void test_vst2q_lane_u8(uint8_t  *a, uin
   vst2q_lane_u8(a, b, 15);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_u16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_u16(i16* %a, [2 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
@@ -4449,7 +4449,7 @@ void test_vst2q_lane_u16(uint16_t  *a, u
   vst2q_lane_u16(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_u32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_u32(i32* %a, [2 x <4 x i32>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
@@ -4474,7 +4474,7 @@ void test_vst2q_lane_u32(uint32_t  *a, u
   vst2q_lane_u32(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_u64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_u64(i64* %a, [2 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[B]], i32 0, i32 0
@@ -4499,7 +4499,7 @@ void test_vst2q_lane_u64(uint64_t  *a, u
   vst2q_lane_u64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_s8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_s8(i8* %a, [2 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
@@ -4519,7 +4519,7 @@ void test_vst2q_lane_s8(int8_t  *a, int8
   vst2q_lane_s8(a, b, 15);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
@@ -4544,7 +4544,7 @@ void test_vst2q_lane_s16(int16_t  *a, in
   vst2q_lane_s16(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
@@ -4569,7 +4569,7 @@ void test_vst2q_lane_s32(int32_t  *a, in
   vst2q_lane_s32(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[B]], i32 0, i32 0
@@ -4594,7 +4594,7 @@ void test_vst2q_lane_s64(int64_t  *a, in
   vst2q_lane_s64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_f16(half* %a, [2 x <8 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_f16(half* %a, [2 x <8 x half>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
@@ -4619,7 +4619,7 @@ void test_vst2q_lane_f16(float16_t  *a,
   vst2q_lane_f16(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
@@ -4644,7 +4644,7 @@ void test_vst2q_lane_f32(float32_t  *a,
   vst2q_lane_f32(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0
@@ -4669,7 +4669,7 @@ void test_vst2q_lane_f64(float64_t  *a,
   vst2q_lane_f64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_p8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_p8(i8* %a, [2 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
@@ -4689,7 +4689,7 @@ void test_vst2q_lane_p8(poly8_t  *a, pol
   vst2q_lane_p8(a, b, 15);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_p16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_p16(i16* %a, [2 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
@@ -4714,7 +4714,7 @@ void test_vst2q_lane_p16(poly16_t  *a, p
   vst2q_lane_p16(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst2q_lane_p64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_lane_p64(i64* %a, [2 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[B]], i32 0, i32 0
@@ -4739,7 +4739,7 @@ void test_vst2q_lane_p64(poly64_t  *a, p
   vst2q_lane_p64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_u8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_u8(i8* %a, [2 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
@@ -4759,7 +4759,7 @@ void test_vst2_lane_u8(uint8_t  *a, uint
   vst2_lane_u8(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_u16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_u16(i16* %a, [2 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
@@ -4784,7 +4784,7 @@ void test_vst2_lane_u16(uint16_t  *a, ui
   vst2_lane_u16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_u32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_u32(i32* %a, [2 x <2 x i32>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
@@ -4809,7 +4809,7 @@ void test_vst2_lane_u32(uint32_t  *a, ui
   vst2_lane_u32(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_u64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_u64(i64* %a, [2 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0
@@ -4834,7 +4834,7 @@ void test_vst2_lane_u64(uint64_t  *a, ui
   vst2_lane_u64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
@@ -4854,7 +4854,7 @@ void test_vst2_lane_s8(int8_t  *a, int8x
   vst2_lane_s8(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
@@ -4879,7 +4879,7 @@ void test_vst2_lane_s16(int16_t  *a, int
   vst2_lane_s16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
@@ -4904,7 +4904,7 @@ void test_vst2_lane_s32(int32_t  *a, int
   vst2_lane_s32(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0
@@ -4929,7 +4929,7 @@ void test_vst2_lane_s64(int64_t  *a, int
   vst2_lane_s64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_f16(half* %a, [2 x <4 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_f16(half* %a, [2 x <4 x half>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
@@ -4954,7 +4954,7 @@ void test_vst2_lane_f16(float16_t  *a, f
   vst2_lane_f16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
@@ -4979,7 +4979,7 @@ void test_vst2_lane_f32(float32_t  *a, f
   vst2_lane_f32(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0
@@ -5004,7 +5004,7 @@ void test_vst2_lane_f64(float64_t  *a, f
   vst2_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_p8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_p8(i8* %a, [2 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
@@ -5024,7 +5024,7 @@ void test_vst2_lane_p8(poly8_t  *a, poly
   vst2_lane_p8(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_p16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_p16(i16* %a, [2 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
@@ -5049,7 +5049,7 @@ void test_vst2_lane_p16(poly16_t  *a, po
   vst2_lane_p16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst2_lane_p64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_lane_p64(i64* %a, [2 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[B]], i32 0, i32 0
@@ -5074,7 +5074,7 @@ void test_vst2_lane_p64(poly64_t  *a, po
   vst2_lane_p64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_u8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_u8(i8* %a, [3 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
@@ -5097,7 +5097,7 @@ void test_vst3q_lane_u8(uint8_t  *a, uin
   vst3q_lane_u8(a, b, 15);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_u16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_u16(i16* %a, [3 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
@@ -5127,7 +5127,7 @@ void test_vst3q_lane_u16(uint16_t  *a, u
   vst3q_lane_u16(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_u32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_u32(i32* %a, [3 x <4 x i32>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
@@ -5157,7 +5157,7 @@ void test_vst3q_lane_u32(uint32_t  *a, u
   vst3q_lane_u32(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_u64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_u64(i64* %a, [3 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[B]], i32 0, i32 0
@@ -5187,7 +5187,7 @@ void test_vst3q_lane_u64(uint64_t  *a, u
   vst3q_lane_u64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_s8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_s8(i8* %a, [3 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
@@ -5210,7 +5210,7 @@ void test_vst3q_lane_s8(int8_t  *a, int8
   vst3q_lane_s8(a, b, 15);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
@@ -5240,7 +5240,7 @@ void test_vst3q_lane_s16(int16_t  *a, in
   vst3q_lane_s16(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
@@ -5270,7 +5270,7 @@ void test_vst3q_lane_s32(int32_t  *a, in
   vst3q_lane_s32(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[B]], i32 0, i32 0
@@ -5300,7 +5300,7 @@ void test_vst3q_lane_s64(int64_t  *a, in
   vst3q_lane_s64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_f16(half* %a, [3 x <8 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_f16(half* %a, [3 x <8 x half>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
@@ -5330,7 +5330,7 @@ void test_vst3q_lane_f16(float16_t  *a,
   vst3q_lane_f16(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
@@ -5360,7 +5360,7 @@ void test_vst3q_lane_f32(float32_t  *a,
   vst3q_lane_f32(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0
@@ -5390,7 +5390,7 @@ void test_vst3q_lane_f64(float64_t  *a,
   vst3q_lane_f64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_p8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_p8(i8* %a, [3 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
@@ -5413,7 +5413,7 @@ void test_vst3q_lane_p8(poly8_t  *a, pol
   vst3q_lane_p8(a, b, 15);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_p16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_p16(i16* %a, [3 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
@@ -5443,7 +5443,7 @@ void test_vst3q_lane_p16(poly16_t  *a, p
   vst3q_lane_p16(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst3q_lane_p64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_lane_p64(i64* %a, [3 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[B]], i32 0, i32 0
@@ -5473,7 +5473,7 @@ void test_vst3q_lane_p64(poly64_t  *a, p
   vst3q_lane_p64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_u8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_u8(i8* %a, [3 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
@@ -5496,7 +5496,7 @@ void test_vst3_lane_u8(uint8_t  *a, uint
   vst3_lane_u8(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_u16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_u16(i16* %a, [3 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
@@ -5526,7 +5526,7 @@ void test_vst3_lane_u16(uint16_t  *a, ui
   vst3_lane_u16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_u32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_u32(i32* %a, [3 x <2 x i32>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
@@ -5556,7 +5556,7 @@ void test_vst3_lane_u32(uint32_t  *a, ui
   vst3_lane_u32(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_u64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_u64(i64* %a, [3 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0
@@ -5586,7 +5586,7 @@ void test_vst3_lane_u64(uint64_t  *a, ui
   vst3_lane_u64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
@@ -5609,7 +5609,7 @@ void test_vst3_lane_s8(int8_t  *a, int8x
   vst3_lane_s8(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
@@ -5639,7 +5639,7 @@ void test_vst3_lane_s16(int16_t  *a, int
   vst3_lane_s16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
@@ -5669,7 +5669,7 @@ void test_vst3_lane_s32(int32_t  *a, int
   vst3_lane_s32(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0
@@ -5699,7 +5699,7 @@ void test_vst3_lane_s64(int64_t  *a, int
   vst3_lane_s64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_f16(half* %a, [3 x <4 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_f16(half* %a, [3 x <4 x half>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
@@ -5729,7 +5729,7 @@ void test_vst3_lane_f16(float16_t  *a, f
   vst3_lane_f16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
@@ -5759,7 +5759,7 @@ void test_vst3_lane_f32(float32_t  *a, f
   vst3_lane_f32(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0
@@ -5789,7 +5789,7 @@ void test_vst3_lane_f64(float64_t  *a, f
   vst3_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_p8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_p8(i8* %a, [3 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
@@ -5812,7 +5812,7 @@ void test_vst3_lane_p8(poly8_t  *a, poly
   vst3_lane_p8(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_p16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_p16(i16* %a, [3 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
@@ -5842,7 +5842,7 @@ void test_vst3_lane_p16(poly16_t  *a, po
   vst3_lane_p16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst3_lane_p64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_lane_p64(i64* %a, [3 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[B]], i32 0, i32 0
@@ -5872,7 +5872,7 @@ void test_vst3_lane_p64(poly64_t  *a, po
   vst3_lane_p64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_u8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_u8(i8* %a, [4 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
@@ -5898,7 +5898,7 @@ void test_vst4q_lane_u8(uint8_t  *a, uin
   vst4q_lane_u8(a, b, 15);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_u16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_u16(i16* %a, [4 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
@@ -5933,7 +5933,7 @@ void test_vst4q_lane_u16(uint16_t  *a, u
   vst4q_lane_u16(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_u32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_u32(i32* %a, [4 x <4 x i32>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
@@ -5968,7 +5968,7 @@ void test_vst4q_lane_u32(uint32_t  *a, u
   vst4q_lane_u32(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_u64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_u64(i64* %a, [4 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[B]], i32 0, i32 0
@@ -6003,7 +6003,7 @@ void test_vst4q_lane_u64(uint64_t  *a, u
   vst4q_lane_u64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_s8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_s8(i8* %a, [4 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
@@ -6029,7 +6029,7 @@ void test_vst4q_lane_s8(int8_t  *a, int8
   vst4q_lane_s8(a, b, 15);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
@@ -6064,7 +6064,7 @@ void test_vst4q_lane_s16(int16_t  *a, in
   vst4q_lane_s16(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
@@ -6099,7 +6099,7 @@ void test_vst4q_lane_s32(int32_t  *a, in
   vst4q_lane_s32(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[B]], i32 0, i32 0
@@ -6134,7 +6134,7 @@ void test_vst4q_lane_s64(int64_t  *a, in
   vst4q_lane_s64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_f16(half* %a, [4 x <8 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_f16(half* %a, [4 x <8 x half>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
@@ -6169,7 +6169,7 @@ void test_vst4q_lane_f16(float16_t  *a,
   vst4q_lane_f16(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
@@ -6204,7 +6204,7 @@ void test_vst4q_lane_f32(float32_t  *a,
   vst4q_lane_f32(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0
@@ -6239,7 +6239,7 @@ void test_vst4q_lane_f64(float64_t  *a,
   vst4q_lane_f64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_p8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_p8(i8* %a, [4 x <16 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
@@ -6265,7 +6265,7 @@ void test_vst4q_lane_p8(poly8_t  *a, pol
   vst4q_lane_p8(a, b, 15);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_p16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_p16(i16* %a, [4 x <8 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
@@ -6300,7 +6300,7 @@ void test_vst4q_lane_p16(poly16_t  *a, p
   vst4q_lane_p16(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst4q_lane_p64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_lane_p64(i64* %a, [4 x <2 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[B]], i32 0, i32 0
@@ -6335,7 +6335,7 @@ void test_vst4q_lane_p64(poly64_t  *a, p
   vst4q_lane_p64(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_u8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_u8(i8* %a, [4 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
@@ -6361,7 +6361,7 @@ void test_vst4_lane_u8(uint8_t  *a, uint
   vst4_lane_u8(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_u16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_u16(i16* %a, [4 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
@@ -6396,7 +6396,7 @@ void test_vst4_lane_u16(uint16_t  *a, ui
   vst4_lane_u16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_u32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_u32(i32* %a, [4 x <2 x i32>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
@@ -6431,7 +6431,7 @@ void test_vst4_lane_u32(uint32_t  *a, ui
   vst4_lane_u32(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_u64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_u64(i64* %a, [4 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0
@@ -6466,7 +6466,7 @@ void test_vst4_lane_u64(uint64_t  *a, ui
   vst4_lane_u64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
@@ -6492,7 +6492,7 @@ void test_vst4_lane_s8(int8_t  *a, int8x
   vst4_lane_s8(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
@@ -6527,7 +6527,7 @@ void test_vst4_lane_s16(int16_t  *a, int
   vst4_lane_s16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
@@ -6562,7 +6562,7 @@ void test_vst4_lane_s32(int32_t  *a, int
   vst4_lane_s32(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0
@@ -6597,7 +6597,7 @@ void test_vst4_lane_s64(int64_t  *a, int
   vst4_lane_s64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_f16(half* %a, [4 x <4 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_f16(half* %a, [4 x <4 x half>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
@@ -6632,7 +6632,7 @@ void test_vst4_lane_f16(float16_t  *a, f
   vst4_lane_f16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
@@ -6667,7 +6667,7 @@ void test_vst4_lane_f32(float32_t  *a, f
   vst4_lane_f32(a, b, 1);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0
@@ -6702,7 +6702,7 @@ void test_vst4_lane_f64(float64_t  *a, f
   vst4_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_p8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_p8(i8* %a, [4 x <8 x i8>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
@@ -6728,7 +6728,7 @@ void test_vst4_lane_p8(poly8_t  *a, poly
   vst4_lane_p8(a, b, 7);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_p16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_p16(i16* %a, [4 x <4 x i16>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
@@ -6763,7 +6763,7 @@ void test_vst4_lane_p16(poly16_t  *a, po
   vst4_lane_p16(a, b, 3);
 }
 
-// CHECK-LABEL: define void @test_vst4_lane_p64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_lane_p64(i64* %a, [4 x <1 x i64>] %b.coerce) #2 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[B]], i32 0, i32 0
@@ -6797,3 +6797,7 @@ void test_vst4_lane_p16(poly16_t  *a, po
 void test_vst4_lane_p64(poly64_t  *a, poly64x1x4_t b) {
   vst4_lane_p64(a, b, 0);
 }
+
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="128"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="64"
+// CHECK-NOT: attributes #2 ={{.*}}"min-legal-vector-width"

Modified: cfe/trunk/test/CodeGen/aarch64-neon-scalar-copy.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-scalar-copy.c?rev=345168&r1=345167&r2=345168&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-scalar-copy.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-neon-scalar-copy.c Wed Oct 24 10:42:17 2018
@@ -23,7 +23,7 @@ float64_t test_vdupd_lane_f64(float64x1_
 }
 
 
-// CHECK-LABEL: define float @test_vdups_laneq_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: define float @test_vdups_laneq_f32(<4 x float> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
@@ -33,7 +33,7 @@ float32_t test_vdups_laneq_f32(float32x4
 }
 
 
-// CHECK-LABEL: define double @test_vdupd_laneq_f64(<2 x double> %a) #0 {
+// CHECK-LABEL: define double @test_vdupd_laneq_f64(<2 x double> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
@@ -118,7 +118,7 @@ uint64_t test_vdupd_lane_u64(uint64x1_t
   return vdupd_lane_u64(a, 0);
 }
 
-// CHECK-LABEL: define i8 @test_vdupb_laneq_s8(<16 x i8> %a) #0 {
+// CHECK-LABEL: define i8 @test_vdupb_laneq_s8(<16 x i8> %a) #1 {
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
 // CHECK:   ret i8 [[VGETQ_LANE]]
 int8_t test_vdupb_laneq_s8(int8x16_t a) {
@@ -126,7 +126,7 @@ int8_t test_vdupb_laneq_s8(int8x16_t a)
 }
 
 
-// CHECK-LABEL: define i16 @test_vduph_laneq_s16(<8 x i16> %a) #0 {
+// CHECK-LABEL: define i16 @test_vduph_laneq_s16(<8 x i16> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@@ -136,7 +136,7 @@ int16_t test_vduph_laneq_s16(int16x8_t a
 }
 
 
-// CHECK-LABEL: define i32 @test_vdups_laneq_s32(<4 x i32> %a) #0 {
+// CHECK-LABEL: define i32 @test_vdups_laneq_s32(<4 x i32> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@@ -146,7 +146,7 @@ int32_t test_vdups_laneq_s32(int32x4_t a
 }
 
 
-// CHECK-LABEL: define i64 @test_vdupd_laneq_s64(<2 x i64> %a) #0 {
+// CHECK-LABEL: define i64 @test_vdupd_laneq_s64(<2 x i64> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
@@ -156,7 +156,7 @@ int64_t test_vdupd_laneq_s64(int64x2_t a
 }
 
 
-// CHECK-LABEL: define i8 @test_vdupb_laneq_u8(<16 x i8> %a) #0 {
+// CHECK-LABEL: define i8 @test_vdupb_laneq_u8(<16 x i8> %a) #1 {
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
 // CHECK:   ret i8 [[VGETQ_LANE]]
 uint8_t test_vdupb_laneq_u8(uint8x16_t a) {
@@ -164,7 +164,7 @@ uint8_t test_vdupb_laneq_u8(uint8x16_t a
 }
 
 
-// CHECK-LABEL: define i16 @test_vduph_laneq_u16(<8 x i16> %a) #0 {
+// CHECK-LABEL: define i16 @test_vduph_laneq_u16(<8 x i16> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@@ -174,7 +174,7 @@ uint16_t test_vduph_laneq_u16(uint16x8_t
 }
 
 
-// CHECK-LABEL: define i32 @test_vdups_laneq_u32(<4 x i32> %a) #0 {
+// CHECK-LABEL: define i32 @test_vdups_laneq_u32(<4 x i32> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@@ -184,7 +184,7 @@ uint32_t test_vdups_laneq_u32(uint32x4_t
 }
 
 
-// CHECK-LABEL: define i64 @test_vdupd_laneq_u64(<2 x i64> %a) #0 {
+// CHECK-LABEL: define i64 @test_vdupd_laneq_u64(<2 x i64> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
@@ -209,14 +209,14 @@ poly16_t test_vduph_lane_p16(poly16x4_t
   return vduph_lane_p16(a, 3);
 }
 
-// CHECK-LABEL: define i8 @test_vdupb_laneq_p8(<16 x i8> %a) #0 {
+// CHECK-LABEL: define i8 @test_vdupb_laneq_p8(<16 x i8> %a) #1 {
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
 // CHECK:   ret i8 [[VGETQ_LANE]]
 poly8_t test_vdupb_laneq_p8(poly8x16_t a) {
   return vdupb_laneq_p8(a, 15);
 }
 
-// CHECK-LABEL: define i16 @test_vduph_laneq_p16(<8 x i16> %a) #0 {
+// CHECK-LABEL: define i16 @test_vduph_laneq_p16(<8 x i16> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@@ -225,3 +225,5 @@ poly16_t test_vduph_laneq_p16(poly16x8_t
   return vduph_laneq_p16(a, 7);
 }
 
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"

Modified: cfe/trunk/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c?rev=345168&r1=345167&r2=345168&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c Wed Oct 24 10:42:17 2018
@@ -26,7 +26,7 @@ float64_t test_vmuld_lane_f64(float64_t
   return vmuld_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define float @test_vmuls_laneq_f32(float %a, <4 x float> %b) #0 {
+// CHECK-LABEL: define float @test_vmuls_laneq_f32(float %a, <4 x float> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
@@ -36,7 +36,7 @@ float32_t test_vmuls_laneq_f32(float32_t
   return vmuls_laneq_f32(a, b, 3);
 }
 
-// CHECK-LABEL: define double @test_vmuld_laneq_f64(double %a, <2 x double> %b) #0 {
+// CHECK-LABEL: define double @test_vmuld_laneq_f64(double %a, <2 x double> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
@@ -65,7 +65,7 @@ float32_t test_vmulxs_lane_f32(float32_t
   return vmulxs_lane_f32(a, b, 1);
 }
 
-// CHECK-LABEL: define float @test_vmulxs_laneq_f32(float %a, <4 x float> %b) #0 {
+// CHECK-LABEL: define float @test_vmulxs_laneq_f32(float %a, <4 x float> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
@@ -85,7 +85,7 @@ float64_t test_vmulxd_lane_f64(float64_t
   return vmulxd_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define double @test_vmulxd_laneq_f64(double %a, <2 x double> %b) #0 {
+// CHECK-LABEL: define double @test_vmulxd_laneq_f64(double %a, <2 x double> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
@@ -112,7 +112,7 @@ float64x1_t test_vmulx_lane_f64(float64x
 }
 
 
-// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_0(<1 x double> %a, <2 x double> %b) #0 {
+// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_0(<1 x double> %a, <2 x double> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
@@ -128,7 +128,7 @@ float64x1_t test_vmulx_laneq_f64_0(float
   return vmulx_laneq_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_1(<1 x double> %a, <2 x double> %b) #0 {
+// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_1(<1 x double> %a, <2 x double> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
@@ -165,7 +165,7 @@ float64_t test_vfmad_lane_f64(float64_t
   return vfmad_lane_f64(a, b, c, 0);
 }
 
-// CHECK-LABEL: define double @test_vfmad_laneq_f64(double %a, double %b, <2 x double> %c) #0 {
+// CHECK-LABEL: define double @test_vfmad_laneq_f64(double %a, double %b, <2 x double> %c) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %c to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
 // CHECK:   [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
@@ -215,7 +215,7 @@ float64x1_t test_vfms_lane_f64(float64x1
   return vfms_lane_f64(a, b, v, 0);
 }
 
-// CHECK-LABEL: define <1 x double> @test_vfma_laneq_f64(<1 x double> %a, <1 x double> %b, <2 x double> %v) #0 {
+// CHECK-LABEL: define <1 x double> @test_vfma_laneq_f64(<1 x double> %a, <1 x double> %b, <2 x double> %v) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8>
@@ -230,7 +230,7 @@ float64x1_t test_vfma_laneq_f64(float64x
   return vfma_laneq_f64(a, b, v, 0);
 }
 
-// CHECK-LABEL: define <1 x double> @test_vfms_laneq_f64(<1 x double> %a, <1 x double> %b, <2 x double> %v) #0 {
+// CHECK-LABEL: define <1 x double> @test_vfms_laneq_f64(<1 x double> %a, <1 x double> %b, <2 x double> %v) #1 {
 // CHECK:   [[SUB:%.*]] = fsub <1 x double> <double -0.000000e+00>, %b
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8>
@@ -269,7 +269,7 @@ int64_t test_vqdmulls_lane_s32(int32_t a
   return vqdmulls_lane_s32(a, b, 1);
 }
 
-// CHECK-LABEL: define i32 @test_vqdmullh_laneq_s16(i16 %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define i32 @test_vqdmullh_laneq_s16(i16 %a, <8 x i16> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@@ -282,7 +282,7 @@ int32_t test_vqdmullh_laneq_s16(int16_t
   return vqdmullh_laneq_s16(a, b, 7);
 }
 
-// CHECK-LABEL: define i64 @test_vqdmulls_laneq_s32(i32 %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define i64 @test_vqdmulls_laneq_s32(i32 %a, <4 x i32> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@@ -316,7 +316,7 @@ int32_t test_vqdmulhs_lane_s32(int32_t a
 }
 
 
-// CHECK-LABEL: define i16 @test_vqdmulhh_laneq_s16(i16 %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define i16 @test_vqdmulhh_laneq_s16(i16 %a, <8 x i16> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@@ -330,7 +330,7 @@ int16_t test_vqdmulhh_laneq_s16(int16_t
 }
 
 
-// CHECK-LABEL: define i32 @test_vqdmulhs_laneq_s32(i32 %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define i32 @test_vqdmulhs_laneq_s32(i32 %a, <4 x i32> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@@ -364,7 +364,7 @@ int32_t test_vqrdmulhs_lane_s32(int32_t
 }
 
 
-// CHECK-LABEL: define i16 @test_vqrdmulhh_laneq_s16(i16 %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define i16 @test_vqrdmulhh_laneq_s16(i16 %a, <8 x i16> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@@ -378,7 +378,7 @@ int16_t test_vqrdmulhh_laneq_s16(int16_t
 }
 
 
-// CHECK-LABEL: define i32 @test_vqrdmulhs_laneq_s32(i32 %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define i32 @test_vqrdmulhs_laneq_s32(i32 %a, <4 x i32> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@@ -413,7 +413,7 @@ int64_t test_vqdmlals_lane_s32(int64_t a
   return vqdmlals_lane_s32(a, b, c, 1);
 }
 
-// CHECK-LABEL: define i32 @test_vqdmlalh_laneq_s16(i32 %a, i16 %b, <8 x i16> %c) #0 {
+// CHECK-LABEL: define i32 @test_vqdmlalh_laneq_s16(i32 %a, i16 %b, <8 x i16> %c) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %c to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@@ -427,7 +427,7 @@ int32_t test_vqdmlalh_laneq_s16(int32_t
   return vqdmlalh_laneq_s16(a, b, c, 7);
 }
 
-// CHECK-LABEL: define i64 @test_vqdmlals_laneq_s32(i64 %a, i32 %b, <4 x i32> %c) #0 {
+// CHECK-LABEL: define i64 @test_vqdmlals_laneq_s32(i64 %a, i32 %b, <4 x i32> %c) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %c to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@@ -463,7 +463,7 @@ int64_t test_vqdmlsls_lane_s32(int64_t a
   return vqdmlsls_lane_s32(a, b, c, 1);
 }
 
-// CHECK-LABEL: define i32 @test_vqdmlslh_laneq_s16(i32 %a, i16 %b, <8 x i16> %c) #0 {
+// CHECK-LABEL: define i32 @test_vqdmlslh_laneq_s16(i32 %a, i16 %b, <8 x i16> %c) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %c to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@@ -477,7 +477,7 @@ int32_t test_vqdmlslh_laneq_s16(int32_t
   return vqdmlslh_laneq_s16(a, b, c, 7);
 }
 
-// CHECK-LABEL: define i64 @test_vqdmlsls_laneq_s32(i64 %a, i32 %b, <4 x i32> %c) #0 {
+// CHECK-LABEL: define i64 @test_vqdmlsls_laneq_s32(i64 %a, i32 %b, <4 x i32> %c) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %c to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@@ -513,7 +513,7 @@ float64x1_t test_vmulx_lane_f64_0() {
       return result;
 }
 
-// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_2() #0 {
+// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_2() #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
 // CHECK:   [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <1 x double> [[TMP0]], <1 x double> [[TMP1]], <2 x i32> <i32 0, i32 1>
@@ -540,3 +540,6 @@ float64x1_t test_vmulx_laneq_f64_2() {
       result = vmulx_laneq_f64(arg1, arg3, 1);
       return result;
 }
+
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"

Modified: cfe/trunk/test/CodeGen/aarch64-neon-tbl.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-tbl.c?rev=345168&r1=345167&r2=345168&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-tbl.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-neon-tbl.c Wed Oct 24 10:42:17 2018
@@ -7,14 +7,14 @@
 
 // CHECK-LABEL: define <8 x i8> @test_vtbl1_s8(<8 x i8> %a, <8 x i8> %b) #0 {
 // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2
+// CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL11_I]]
 int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) {
   return vtbl1_s8(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vqtbl1_s8(<16 x i8> %a, <8 x i8> %b) #0 {
-// CHECK:   [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #2
+// CHECK-LABEL: define <8 x i8> @test_vqtbl1_s8(<16 x i8> %a, <8 x i8> %b) #1 {
+// CHECK:   [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL1_I]]
 int8x8_t test_vqtbl1_s8(int8x16_t a, int8x8_t b) {
   return vqtbl1_s8(a, b);
@@ -36,7 +36,7 @@ int8x8_t test_vqtbl1_s8(int8x16_t a, int
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
 // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2
+// CHECK:   [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL13_I]]
 int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) {
   return vtbl2_s8(a, b);
@@ -57,7 +57,7 @@ int8x8_t test_vtbl2_s8(int8x8x2_t a, int
 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK:   [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #2
+// CHECK:   [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL2_I]]
 int8x8_t test_vqtbl2_s8(int8x16x2_t a, int8x8_t b) {
   return vqtbl2_s8(a, b);
@@ -83,7 +83,7 @@ int8x8_t test_vqtbl2_s8(int8x16x2_t a, i
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
 // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #2
+// CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL26_I]]
 int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) {
   return vtbl3_s8(a, b);
@@ -107,7 +107,7 @@ int8x8_t test_vtbl3_s8(int8x8x3_t a, int
 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK:   [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #2
+// CHECK:   [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL3_I]]
 int8x8_t test_vqtbl3_s8(int8x16x3_t a, int8x8_t b) {
   return vqtbl3_s8(a, b);
@@ -136,7 +136,7 @@ int8x8_t test_vqtbl3_s8(int8x16x3_t a, i
 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
 // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #2
+// CHECK:   [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL28_I]]
 int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) {
   return vtbl4_s8(a, b);
@@ -163,20 +163,20 @@ int8x8_t test_vtbl4_s8(int8x8x4_t a, int
 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
-// CHECK:   [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #2
+// CHECK:   [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL4_I]]
 int8x8_t test_vqtbl4_s8(int8x16x4_t a, int8x8_t b) {
   return vqtbl4_s8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
-// CHECK:   [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #2
+// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_s8(<16 x i8> %a, <16 x i8> %b) #1 {
+// CHECK:   [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #3
 // CHECK:   ret <16 x i8> [[VTBL1_I]]
 int8x16_t test_vqtbl1q_s8(int8x16_t a, int8x16_t b) {
   return vqtbl1q_s8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_s8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_s8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
 // CHECK:   [[__P0_I:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[A:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[A]], i32 0, i32 0
@@ -191,13 +191,13 @@ int8x16_t test_vqtbl1q_s8(int8x16_t a, i
 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK:   [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #2
+// CHECK:   [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #3
 // CHECK:   ret <16 x i8> [[VTBL2_I]]
 int8x16_t test_vqtbl2q_s8(int8x16x2_t a, int8x16_t b) {
   return vqtbl2q_s8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_s8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_s8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
 // CHECK:   [[__P0_I:%.*]] = alloca %struct.int8x16x3_t, align 16
 // CHECK:   [[A:%.*]] = alloca %struct.int8x16x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[A]], i32 0, i32 0
@@ -215,13 +215,13 @@ int8x16_t test_vqtbl2q_s8(int8x16x2_t a,
 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK:   [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #2
+// CHECK:   [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #3
 // CHECK:   ret <16 x i8> [[VTBL3_I]]
 int8x16_t test_vqtbl3q_s8(int8x16x3_t a, int8x16_t b) {
   return vqtbl3q_s8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_s8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_s8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
 // CHECK:   [[__P0_I:%.*]] = alloca %struct.int8x16x4_t, align 16
 // CHECK:   [[A:%.*]] = alloca %struct.int8x16x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[A]], i32 0, i32 0
@@ -242,7 +242,7 @@ int8x16_t test_vqtbl3q_s8(int8x16x3_t a,
 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
-// CHECK:   [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #2
+// CHECK:   [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #3
 // CHECK:   ret <16 x i8> [[VTBL4_I]]
 int8x16_t test_vqtbl4q_s8(int8x16x4_t a, int8x16_t b) {
   return vqtbl4q_s8(a, b);
@@ -250,7 +250,7 @@ int8x16_t test_vqtbl4q_s8(int8x16x4_t a,
 
 // CHECK-LABEL: define <8 x i8> @test_vtbx1_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
 // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #2
+// CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #3
 // CHECK:   [[TMP0:%.*]] = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
 // CHECK:   [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = and <8 x i8> [[TMP1]], %a
@@ -278,7 +278,7 @@ int8x8_t test_vtbx1_s8(int8x8_t a, int8x
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
 // CHECK:   [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #2
+// CHECK:   [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX13_I]]
 int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) {
   return vtbx2_s8(a, b, c);
@@ -304,7 +304,7 @@ int8x8_t test_vtbx2_s8(int8x8_t a, int8x
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
 // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #2
+// CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #3
 // CHECK:   [[TMP4:%.*]] = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
 // CHECK:   [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
 // CHECK:   [[TMP6:%.*]] = and <8 x i8> [[TMP5]], %a
@@ -339,14 +339,14 @@ int8x8_t test_vtbx3_s8(int8x8_t a, int8x
 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
 // CHECK:   [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #2
+// CHECK:   [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX28_I]]
 int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) {
   return vtbx4_s8(a, b, c);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vqtbx1_s8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #0 {
-// CHECK:   [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #2
+// CHECK-LABEL: define <8 x i8> @test_vqtbx1_s8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #1 {
+// CHECK:   [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX1_I]]
 int8x8_t test_vqtbx1_s8(int8x8_t a, int8x16_t b, int8x8_t c) {
   return vqtbx1_s8(a, b, c);
@@ -367,7 +367,7 @@ int8x8_t test_vqtbx1_s8(int8x8_t a, int8
 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK:   [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #2
+// CHECK:   [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX2_I]]
 int8x8_t test_vqtbx2_s8(int8x8_t a, int8x16x2_t b, int8x8_t c) {
   return vqtbx2_s8(a, b, c);
@@ -391,7 +391,7 @@ int8x8_t test_vqtbx2_s8(int8x8_t a, int8
 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK:   [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #2
+// CHECK:   [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX3_I]]
 int8x8_t test_vqtbx3_s8(int8x8_t a, int8x16x3_t b, int8x8_t c) {
   return vqtbx3_s8(a, b, c);
@@ -418,20 +418,20 @@ int8x8_t test_vqtbx3_s8(int8x8_t a, int8
 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
-// CHECK:   [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #2
+// CHECK:   [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX4_I]]
 int8x8_t test_vqtbx4_s8(int8x8_t a, int8x16x4_t b, int8x8_t c) {
   return vqtbx4_s8(a, b, c);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
-// CHECK:   [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #2
+// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #1 {
+// CHECK:   [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #3
 // CHECK:   ret <16 x i8> [[VTBX1_I]]
 int8x16_t test_vqtbx1q_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
   return vqtbx1q_s8(a, b, c);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_s8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_s8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
 // CHECK:   [[__P1_I:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
@@ -446,13 +446,13 @@ int8x16_t test_vqtbx1q_s8(int8x16_t a, i
 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK:   [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #2
+// CHECK:   [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #3
 // CHECK:   ret <16 x i8> [[VTBX2_I]]
 int8x16_t test_vqtbx2q_s8(int8x16_t a, int8x16x2_t b, int8x16_t c) {
   return vqtbx2q_s8(a, b, c);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_s8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_s8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
 // CHECK:   [[__P1_I:%.*]] = alloca %struct.int8x16x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
@@ -470,13 +470,13 @@ int8x16_t test_vqtbx2q_s8(int8x16_t a, i
 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK:   [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #2
+// CHECK:   [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #3
 // CHECK:   ret <16 x i8> [[VTBX3_I]]
 int8x16_t test_vqtbx3q_s8(int8x16_t a, int8x16x3_t b, int8x16_t c) {
   return vqtbx3q_s8(a, b, c);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_s8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_s8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
 // CHECK:   [[__P1_I:%.*]] = alloca %struct.int8x16x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
@@ -497,7 +497,7 @@ int8x16_t test_vqtbx3q_s8(int8x16_t a, i
 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
-// CHECK:   [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #2
+// CHECK:   [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #3
 // CHECK:   ret <16 x i8> [[VTBX4_I]]
 int8x16_t test_vqtbx4q_s8(int8x16_t a, int8x16x4_t b, int8x16_t c) {
   return vqtbx4q_s8(a, b, c);
@@ -505,14 +505,14 @@ int8x16_t test_vqtbx4q_s8(int8x16_t a, i
 
 // CHECK-LABEL: define <8 x i8> @test_vtbl1_u8(<8 x i8> %a, <8 x i8> %b) #0 {
 // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2
+// CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL11_I]]
 uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) {
   return vtbl1_u8(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vqtbl1_u8(<16 x i8> %a, <8 x i8> %b) #0 {
-// CHECK:   [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #2
+// CHECK-LABEL: define <8 x i8> @test_vqtbl1_u8(<16 x i8> %a, <8 x i8> %b) #1 {
+// CHECK:   [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL1_I]]
 uint8x8_t test_vqtbl1_u8(uint8x16_t a, uint8x8_t b) {
   return vqtbl1_u8(a, b);
@@ -534,7 +534,7 @@ uint8x8_t test_vqtbl1_u8(uint8x16_t a, u
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
 // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2
+// CHECK:   [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL13_I]]
 uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) {
   return vtbl2_u8(a, b);
@@ -555,7 +555,7 @@ uint8x8_t test_vtbl2_u8(uint8x8x2_t a, u
 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK:   [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #2
+// CHECK:   [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL2_I]]
 uint8x8_t test_vqtbl2_u8(uint8x16x2_t a, uint8x8_t b) {
   return vqtbl2_u8(a, b);
@@ -581,7 +581,7 @@ uint8x8_t test_vqtbl2_u8(uint8x16x2_t a,
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
 // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #2
+// CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL26_I]]
 uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) {
   return vtbl3_u8(a, b);
@@ -605,7 +605,7 @@ uint8x8_t test_vtbl3_u8(uint8x8x3_t a, u
 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK:   [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #2
+// CHECK:   [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL3_I]]
 uint8x8_t test_vqtbl3_u8(uint8x16x3_t a, uint8x8_t b) {
   return vqtbl3_u8(a, b);
@@ -634,7 +634,7 @@ uint8x8_t test_vqtbl3_u8(uint8x16x3_t a,
 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
 // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #2
+// CHECK:   [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL28_I]]
 uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) {
   return vtbl4_u8(a, b);
@@ -661,20 +661,20 @@ uint8x8_t test_vtbl4_u8(uint8x8x4_t a, u
 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
-// CHECK:   [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #2
+// CHECK:   [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL4_I]]
 uint8x8_t test_vqtbl4_u8(uint8x16x4_t a, uint8x8_t b) {
   return vqtbl4_u8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
-// CHECK:   [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #2
+// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_u8(<16 x i8> %a, <16 x i8> %b) #1 {
+// CHECK:   [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #3
 // CHECK:   ret <16 x i8> [[VTBL1_I]]
 uint8x16_t test_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) {
   return vqtbl1q_u8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_u8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_u8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
 // CHECK:   [[__P0_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[A:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[A]], i32 0, i32 0
@@ -689,13 +689,13 @@ uint8x16_t test_vqtbl1q_u8(uint8x16_t a,
 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK:   [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #2
+// CHECK:   [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #3
 // CHECK:   ret <16 x i8> [[VTBL2_I]]
 uint8x16_t test_vqtbl2q_u8(uint8x16x2_t a, uint8x16_t b) {
   return vqtbl2q_u8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_u8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_u8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
 // CHECK:   [[__P0_I:%.*]] = alloca %struct.uint8x16x3_t, align 16
 // CHECK:   [[A:%.*]] = alloca %struct.uint8x16x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[A]], i32 0, i32 0
@@ -713,13 +713,13 @@ uint8x16_t test_vqtbl2q_u8(uint8x16x2_t
 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK:   [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #2
+// CHECK:   [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #3
 // CHECK:   ret <16 x i8> [[VTBL3_I]]
 uint8x16_t test_vqtbl3q_u8(uint8x16x3_t a, uint8x16_t b) {
   return vqtbl3q_u8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_u8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_u8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
 // CHECK:   [[__P0_I:%.*]] = alloca %struct.uint8x16x4_t, align 16
 // CHECK:   [[A:%.*]] = alloca %struct.uint8x16x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[A]], i32 0, i32 0
@@ -740,7 +740,7 @@ uint8x16_t test_vqtbl3q_u8(uint8x16x3_t
 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
-// CHECK:   [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #2
+// CHECK:   [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #3
 // CHECK:   ret <16 x i8> [[VTBL4_I]]
 uint8x16_t test_vqtbl4q_u8(uint8x16x4_t a, uint8x16_t b) {
   return vqtbl4q_u8(a, b);
@@ -748,7 +748,7 @@ uint8x16_t test_vqtbl4q_u8(uint8x16x4_t
 
 // CHECK-LABEL: define <8 x i8> @test_vtbx1_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
 // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #2
+// CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #3
 // CHECK:   [[TMP0:%.*]] = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
 // CHECK:   [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = and <8 x i8> [[TMP1]], %a
@@ -776,7 +776,7 @@ uint8x8_t test_vtbx1_u8(uint8x8_t a, uin
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
 // CHECK:   [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #2
+// CHECK:   [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX13_I]]
 uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) {
   return vtbx2_u8(a, b, c);
@@ -802,7 +802,7 @@ uint8x8_t test_vtbx2_u8(uint8x8_t a, uin
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
 // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #2
+// CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #3
 // CHECK:   [[TMP4:%.*]] = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
 // CHECK:   [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
 // CHECK:   [[TMP6:%.*]] = and <8 x i8> [[TMP5]], %a
@@ -837,14 +837,14 @@ uint8x8_t test_vtbx3_u8(uint8x8_t a, uin
 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
 // CHECK:   [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #2
+// CHECK:   [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX28_I]]
 uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) {
   return vtbx4_u8(a, b, c);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vqtbx1_u8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #0 {
-// CHECK:   [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #2
+// CHECK-LABEL: define <8 x i8> @test_vqtbx1_u8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #1 {
+// CHECK:   [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX1_I]]
 uint8x8_t test_vqtbx1_u8(uint8x8_t a, uint8x16_t b, uint8x8_t c) {
   return vqtbx1_u8(a, b, c);
@@ -865,7 +865,7 @@ uint8x8_t test_vqtbx1_u8(uint8x8_t a, ui
 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK:   [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #2
+// CHECK:   [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX2_I]]
 uint8x8_t test_vqtbx2_u8(uint8x8_t a, uint8x16x2_t b, uint8x8_t c) {
   return vqtbx2_u8(a, b, c);
@@ -889,7 +889,7 @@ uint8x8_t test_vqtbx2_u8(uint8x8_t a, ui
 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK:   [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #2
+// CHECK:   [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX3_I]]
 uint8x8_t test_vqtbx3_u8(uint8x8_t a, uint8x16x3_t b, uint8x8_t c) {
   return vqtbx3_u8(a, b, c);
@@ -916,20 +916,20 @@ uint8x8_t test_vqtbx3_u8(uint8x8_t a, ui
 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
-// CHECK:   [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #2
+// CHECK:   [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX4_I]]
 uint8x8_t test_vqtbx4_u8(uint8x8_t a, uint8x16x4_t b, uint8x8_t c) {
   return vqtbx4_u8(a, b, c);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
-// CHECK:   [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #2
+// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #1 {
+// CHECK:   [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #3
 // CHECK:   ret <16 x i8> [[VTBX1_I]]
 uint8x16_t test_vqtbx1q_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
   return vqtbx1q_u8(a, b, c);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_u8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_u8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
 // CHECK:   [[__P1_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
@@ -944,13 +944,13 @@ uint8x16_t test_vqtbx1q_u8(uint8x16_t a,
 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK:   [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #2
+// CHECK:   [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #3
 // CHECK:   ret <16 x i8> [[VTBX2_I]]
 uint8x16_t test_vqtbx2q_u8(uint8x16_t a, uint8x16x2_t b, uint8x16_t c) {
   return vqtbx2q_u8(a, b, c);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_u8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_u8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
 // CHECK:   [[__P1_I:%.*]] = alloca %struct.uint8x16x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
@@ -968,13 +968,13 @@ uint8x16_t test_vqtbx2q_u8(uint8x16_t a,
 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK:   [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #2
+// CHECK:   [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #3
 // CHECK:   ret <16 x i8> [[VTBX3_I]]
 uint8x16_t test_vqtbx3q_u8(uint8x16_t a, uint8x16x3_t b, uint8x16_t c) {
   return vqtbx3q_u8(a, b, c);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_u8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_u8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
 // CHECK:   [[__P1_I:%.*]] = alloca %struct.uint8x16x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
@@ -995,7 +995,7 @@ uint8x16_t test_vqtbx3q_u8(uint8x16_t a,
 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
-// CHECK:   [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #2
+// CHECK:   [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #3
 // CHECK:   ret <16 x i8> [[VTBX4_I]]
 uint8x16_t test_vqtbx4q_u8(uint8x16_t a, uint8x16x4_t b, uint8x16_t c) {
   return vqtbx4q_u8(a, b, c);
@@ -1003,14 +1003,14 @@ uint8x16_t test_vqtbx4q_u8(uint8x16_t a,
 
 // CHECK-LABEL: define <8 x i8> @test_vtbl1_p8(<8 x i8> %a, <8 x i8> %b) #0 {
 // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2
+// CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL11_I]]
 poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) {
   return vtbl1_p8(a, b);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vqtbl1_p8(<16 x i8> %a, <8 x i8> %b) #0 {
-// CHECK:   [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #2
+// CHECK-LABEL: define <8 x i8> @test_vqtbl1_p8(<16 x i8> %a, <8 x i8> %b) #1 {
+// CHECK:   [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL1_I]]
 poly8x8_t test_vqtbl1_p8(poly8x16_t a, uint8x8_t b) {
   return vqtbl1_p8(a, b);
@@ -1032,7 +1032,7 @@ poly8x8_t test_vqtbl1_p8(poly8x16_t a, u
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
 // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2
+// CHECK:   [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL13_I]]
 poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) {
   return vtbl2_p8(a, b);
@@ -1053,7 +1053,7 @@ poly8x8_t test_vtbl2_p8(poly8x8x2_t a, u
 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK:   [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #2
+// CHECK:   [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL2_I]]
 poly8x8_t test_vqtbl2_p8(poly8x16x2_t a, uint8x8_t b) {
   return vqtbl2_p8(a, b);
@@ -1079,7 +1079,7 @@ poly8x8_t test_vqtbl2_p8(poly8x16x2_t a,
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
 // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #2
+// CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL26_I]]
 poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) {
   return vtbl3_p8(a, b);
@@ -1103,7 +1103,7 @@ poly8x8_t test_vtbl3_p8(poly8x8x3_t a, u
 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK:   [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #2
+// CHECK:   [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL3_I]]
 poly8x8_t test_vqtbl3_p8(poly8x16x3_t a, uint8x8_t b) {
   return vqtbl3_p8(a, b);
@@ -1132,7 +1132,7 @@ poly8x8_t test_vqtbl3_p8(poly8x16x3_t a,
 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
 // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #2
+// CHECK:   [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL28_I]]
 poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) {
   return vtbl4_p8(a, b);
@@ -1159,20 +1159,20 @@ poly8x8_t test_vtbl4_p8(poly8x8x4_t a, u
 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
-// CHECK:   [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #2
+// CHECK:   [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #3
 // CHECK:   ret <8 x i8> [[VTBL4_I]]
 poly8x8_t test_vqtbl4_p8(poly8x16x4_t a, uint8x8_t b) {
   return vqtbl4_p8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
-// CHECK:   [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #2
+// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_p8(<16 x i8> %a, <16 x i8> %b) #1 {
+// CHECK:   [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #3
 // CHECK:   ret <16 x i8> [[VTBL1_I]]
 poly8x16_t test_vqtbl1q_p8(poly8x16_t a, uint8x16_t b) {
   return vqtbl1q_p8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_p8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_p8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
 // CHECK:   [[__P0_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[A:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[A]], i32 0, i32 0
@@ -1187,13 +1187,13 @@ poly8x16_t test_vqtbl1q_p8(poly8x16_t a,
 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK:   [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #2
+// CHECK:   [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #3
 // CHECK:   ret <16 x i8> [[VTBL2_I]]
 poly8x16_t test_vqtbl2q_p8(poly8x16x2_t a, uint8x16_t b) {
   return vqtbl2q_p8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_p8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_p8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
 // CHECK:   [[__P0_I:%.*]] = alloca %struct.poly8x16x3_t, align 16
 // CHECK:   [[A:%.*]] = alloca %struct.poly8x16x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[A]], i32 0, i32 0
@@ -1211,13 +1211,13 @@ poly8x16_t test_vqtbl2q_p8(poly8x16x2_t
 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK:   [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #2
+// CHECK:   [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #3
 // CHECK:   ret <16 x i8> [[VTBL3_I]]
 poly8x16_t test_vqtbl3q_p8(poly8x16x3_t a, uint8x16_t b) {
   return vqtbl3q_p8(a, b);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_p8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_p8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
 // CHECK:   [[__P0_I:%.*]] = alloca %struct.poly8x16x4_t, align 16
 // CHECK:   [[A:%.*]] = alloca %struct.poly8x16x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[A]], i32 0, i32 0
@@ -1238,7 +1238,7 @@ poly8x16_t test_vqtbl3q_p8(poly8x16x3_t
 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
-// CHECK:   [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #2
+// CHECK:   [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #3
 // CHECK:   ret <16 x i8> [[VTBL4_I]]
 poly8x16_t test_vqtbl4q_p8(poly8x16x4_t a, uint8x16_t b) {
   return vqtbl4q_p8(a, b);
@@ -1246,7 +1246,7 @@ poly8x16_t test_vqtbl4q_p8(poly8x16x4_t
 
 // CHECK-LABEL: define <8 x i8> @test_vtbx1_p8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
 // CHECK:   [[VTBL1_I:%.*]] = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #2
+// CHECK:   [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #3
 // CHECK:   [[TMP0:%.*]] = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
 // CHECK:   [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = and <8 x i8> [[TMP1]], %a
@@ -1274,7 +1274,7 @@ poly8x8_t test_vtbx1_p8(poly8x8_t a, pol
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
 // CHECK:   [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #2
+// CHECK:   [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX13_I]]
 poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) {
   return vtbx2_p8(a, b, c);
@@ -1300,7 +1300,7 @@ poly8x8_t test_vtbx2_p8(poly8x8_t a, pol
 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
 // CHECK:   [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #2
+// CHECK:   [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #3
 // CHECK:   [[TMP4:%.*]] = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
 // CHECK:   [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
 // CHECK:   [[TMP6:%.*]] = and <8 x i8> [[TMP5]], %a
@@ -1335,14 +1335,14 @@ poly8x8_t test_vtbx3_p8(poly8x8_t a, pol
 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
 // CHECK:   [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK:   [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #2
+// CHECK:   [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX28_I]]
 poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) {
   return vtbx4_p8(a, b, c);
 }
 
-// CHECK-LABEL: define <8 x i8> @test_vqtbx1_p8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #0 {
-// CHECK:   [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #2
+// CHECK-LABEL: define <8 x i8> @test_vqtbx1_p8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #1 {
+// CHECK:   [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX1_I]]
 poly8x8_t test_vqtbx1_p8(poly8x8_t a, uint8x16_t b, uint8x8_t c) {
   return vqtbx1_p8(a, b, c);
@@ -1363,7 +1363,7 @@ poly8x8_t test_vqtbx1_p8(poly8x8_t a, ui
 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK:   [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #2
+// CHECK:   [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX2_I]]
 poly8x8_t test_vqtbx2_p8(poly8x8_t a, poly8x16x2_t b, uint8x8_t c) {
   return vqtbx2_p8(a, b, c);
@@ -1387,7 +1387,7 @@ poly8x8_t test_vqtbx2_p8(poly8x8_t a, po
 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK:   [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #2
+// CHECK:   [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX3_I]]
 poly8x8_t test_vqtbx3_p8(poly8x8_t a, poly8x16x3_t b, uint8x8_t c) {
   return vqtbx3_p8(a, b, c);
@@ -1414,20 +1414,20 @@ poly8x8_t test_vqtbx3_p8(poly8x8_t a, po
 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
-// CHECK:   [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #2
+// CHECK:   [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #3
 // CHECK:   ret <8 x i8> [[VTBX4_I]]
 poly8x8_t test_vqtbx4_p8(poly8x8_t a, poly8x16x4_t b, uint8x8_t c) {
   return vqtbx4_p8(a, b, c);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_p8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
-// CHECK:   [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #2
+// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_p8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #1 {
+// CHECK:   [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #3
 // CHECK:   ret <16 x i8> [[VTBX1_I]]
 poly8x16_t test_vqtbx1q_p8(poly8x16_t a, uint8x16_t b, uint8x16_t c) {
   return vqtbx1q_p8(a, b, c);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_p8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_p8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
 // CHECK:   [[__P1_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
@@ -1442,13 +1442,13 @@ poly8x16_t test_vqtbx1q_p8(poly8x16_t a,
 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK:   [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #2
+// CHECK:   [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #3
 // CHECK:   ret <16 x i8> [[VTBX2_I]]
 poly8x16_t test_vqtbx2q_p8(poly8x16_t a, poly8x16x2_t b, uint8x16_t c) {
   return vqtbx2q_p8(a, b, c);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_p8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_p8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
 // CHECK:   [[__P1_I:%.*]] = alloca %struct.poly8x16x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
@@ -1466,13 +1466,13 @@ poly8x16_t test_vqtbx2q_p8(poly8x16_t a,
 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK:   [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #2
+// CHECK:   [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #3
 // CHECK:   ret <16 x i8> [[VTBX3_I]]
 poly8x16_t test_vqtbx3q_p8(poly8x16_t a, poly8x16x3_t b, uint8x16_t c) {
   return vqtbx3q_p8(a, b, c);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_p8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_p8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
 // CHECK:   [[__P1_I:%.*]] = alloca %struct.poly8x16x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
@@ -1493,8 +1493,11 @@ poly8x16_t test_vqtbx3q_p8(poly8x16_t a,
 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
-// CHECK:   [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #2
+// CHECK:   [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #3
 // CHECK:   ret <16 x i8> [[VTBX4_I]]
 poly8x16_t test_vqtbx4q_p8(poly8x16_t a, poly8x16x4_t b, uint8x16_t c) {
   return vqtbx4q_p8(a, b, c);
 }
+
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"

Modified: cfe/trunk/test/CodeGen/aarch64-neon-vget.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-vget.c?rev=345168&r1=345167&r2=345168&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-vget.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-neon-vget.c Wed Oct 24 10:42:17 2018
@@ -97,14 +97,14 @@ float32_t test_vget_lane_f16(float16x4_t
   return vget_lane_f16(a, 1);
 }
 
-// CHECK-LABEL: define i8 @test_vgetq_lane_u8(<16 x i8> %a) #0 {
+// CHECK-LABEL: define i8 @test_vgetq_lane_u8(<16 x i8> %a) #1 {
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
 // CHECK:   ret i8 [[VGETQ_LANE]]
 uint8_t test_vgetq_lane_u8(uint8x16_t a) {
   return vgetq_lane_u8(a, 15);
 }
 
-// CHECK-LABEL: define i16 @test_vgetq_lane_u16(<8 x i16> %a) #0 {
+// CHECK-LABEL: define i16 @test_vgetq_lane_u16(<8 x i16> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@@ -113,7 +113,7 @@ uint16_t test_vgetq_lane_u16(uint16x8_t
   return vgetq_lane_u16(a, 7);
 }
 
-// CHECK-LABEL: define i32 @test_vgetq_lane_u32(<4 x i32> %a) #0 {
+// CHECK-LABEL: define i32 @test_vgetq_lane_u32(<4 x i32> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@@ -122,14 +122,14 @@ uint32_t test_vgetq_lane_u32(uint32x4_t
   return vgetq_lane_u32(a, 3);
 }
 
-// CHECK-LABEL: define i8 @test_vgetq_lane_s8(<16 x i8> %a) #0 {
+// CHECK-LABEL: define i8 @test_vgetq_lane_s8(<16 x i8> %a) #1 {
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
 // CHECK:   ret i8 [[VGETQ_LANE]]
 int8_t test_vgetq_lane_s8(int8x16_t a) {
   return vgetq_lane_s8(a, 15);
 }
 
-// CHECK-LABEL: define i16 @test_vgetq_lane_s16(<8 x i16> %a) #0 {
+// CHECK-LABEL: define i16 @test_vgetq_lane_s16(<8 x i16> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@@ -138,7 +138,7 @@ int16_t test_vgetq_lane_s16(int16x8_t a)
   return vgetq_lane_s16(a, 7);
 }
 
-// CHECK-LABEL: define i32 @test_vgetq_lane_s32(<4 x i32> %a) #0 {
+// CHECK-LABEL: define i32 @test_vgetq_lane_s32(<4 x i32> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@@ -147,14 +147,14 @@ int32_t test_vgetq_lane_s32(int32x4_t a)
   return vgetq_lane_s32(a, 3);
 }
 
-// CHECK-LABEL: define i8 @test_vgetq_lane_p8(<16 x i8> %a) #0 {
+// CHECK-LABEL: define i8 @test_vgetq_lane_p8(<16 x i8> %a) #1 {
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
 // CHECK:   ret i8 [[VGETQ_LANE]]
 poly8_t test_vgetq_lane_p8(poly8x16_t a) {
   return vgetq_lane_p8(a, 15);
 }
 
-// CHECK-LABEL: define i16 @test_vgetq_lane_p16(<8 x i16> %a) #0 {
+// CHECK-LABEL: define i16 @test_vgetq_lane_p16(<8 x i16> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@@ -163,7 +163,7 @@ poly16_t test_vgetq_lane_p16(poly16x8_t
   return vgetq_lane_p16(a, 7);
 }
 
-// CHECK-LABEL: define float @test_vgetq_lane_f32(<4 x float> %a) #0 {
+// CHECK-LABEL: define float @test_vgetq_lane_f32(<4 x float> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
@@ -172,7 +172,7 @@ float32_t test_vgetq_lane_f32(float32x4_
   return vgetq_lane_f32(a, 3);
 }
 
-// CHECK-LABEL: define float @test_vgetq_lane_f16(<8 x half> %a) #0 {
+// CHECK-LABEL: define float @test_vgetq_lane_f16(<8 x half> %a) #1 {
 // CHECK:   [[__REINT_244:%.*]] = alloca <8 x half>, align 16
 // CHECK:   [[__REINT1_244:%.*]] = alloca i16, align 2
 // CHECK:   store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16
@@ -208,7 +208,7 @@ uint64_t test_vget_lane_u64(uint64x1_t a
   return vget_lane_u64(a, 0);
 }
 
-// CHECK-LABEL: define i64 @test_vgetq_lane_s64(<2 x i64> %a) #0 {
+// CHECK-LABEL: define i64 @test_vgetq_lane_s64(<2 x i64> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
@@ -217,7 +217,7 @@ int64_t test_vgetq_lane_s64(int64x2_t a)
   return vgetq_lane_s64(a, 1);
 }
 
-// CHECK-LABEL: define i64 @test_vgetq_lane_u64(<2 x i64> %a) #0 {
+// CHECK-LABEL: define i64 @test_vgetq_lane_u64(<2 x i64> %a) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
@@ -324,14 +324,14 @@ float16x4_t test_vset_lane_f16(float16_t
   return vset_lane_f16(*a, b, 3);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_u8(i8 %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_u8(i8 %a, <16 x i8> %b) #1 {
 // CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
 // CHECK:   ret <16 x i8> [[VSET_LANE]]
 uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) {
   return vsetq_lane_u8(a, b, 15);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_u16(i16 %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_u16(i16 %a, <8 x i16> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
@@ -340,7 +340,7 @@ uint16x8_t test_vsetq_lane_u16(uint16_t
   return vsetq_lane_u16(a, b, 7);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_u32(i32 %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_u32(i32 %a, <4 x i32> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3
@@ -349,14 +349,14 @@ uint32x4_t test_vsetq_lane_u32(uint32_t
   return vsetq_lane_u32(a, b, 3);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_s8(i8 %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_s8(i8 %a, <16 x i8> %b) #1 {
 // CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
 // CHECK:   ret <16 x i8> [[VSET_LANE]]
 int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) {
   return vsetq_lane_s8(a, b, 15);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_s16(i16 %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_s16(i16 %a, <8 x i16> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
@@ -365,7 +365,7 @@ int16x8_t test_vsetq_lane_s16(int16_t a,
   return vsetq_lane_s16(a, b, 7);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_s32(i32 %a, <4 x i32> %b) #0 {
+// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_s32(i32 %a, <4 x i32> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3
@@ -374,14 +374,14 @@ int32x4_t test_vsetq_lane_s32(int32_t a,
   return vsetq_lane_s32(a, b, 3);
 }
 
-// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_p8(i8 %a, <16 x i8> %b) #0 {
+// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_p8(i8 %a, <16 x i8> %b) #1 {
 // CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
 // CHECK:   ret <16 x i8> [[VSET_LANE]]
 poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t b) {
   return vsetq_lane_p8(a, b, 15);
 }
 
-// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_p16(i16 %a, <8 x i16> %b) #0 {
+// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_p16(i16 %a, <8 x i16> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
@@ -390,7 +390,7 @@ poly16x8_t test_vsetq_lane_p16(poly16_t
   return vsetq_lane_p16(a, b, 7);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vsetq_lane_f32(float %a, <4 x float> %b) #0 {
+// CHECK-LABEL: define <4 x float> @test_vsetq_lane_f32(float %a, <4 x float> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP1]], float %a, i32 3
@@ -399,7 +399,7 @@ float32x4_t test_vsetq_lane_f32(float32_
   return vsetq_lane_f32(a, b, 3);
 }
 
-// CHECK-LABEL: define <8 x half> @test_vsetq_lane_f16(half* %a, <8 x half> %b) #0 {
+// CHECK-LABEL: define <8 x half> @test_vsetq_lane_f16(half* %a, <8 x half> %b) #1 {
 // CHECK:   [[__REINT_248:%.*]] = alloca half, align 2
 // CHECK:   [[__REINT1_248:%.*]] = alloca <8 x half>, align 16
 // CHECK:   [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16
@@ -439,7 +439,7 @@ uint64x1_t test_vset_lane_u64(uint64_t a
   return vset_lane_u64(a, b, 0);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_s64(i64 %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_s64(i64 %a, <2 x i64> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
@@ -448,7 +448,7 @@ int64x2_t test_vsetq_lane_s64(int64_t a,
   return vsetq_lane_s64(a, b, 1);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_u64(i64 %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_u64(i64 %a, <2 x i64> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
@@ -456,3 +456,6 @@ int64x2_t test_vsetq_lane_s64(int64_t a,
 uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) {
   return vsetq_lane_u64(a, b, 1);
 }
+
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"

Modified: cfe/trunk/test/CodeGen/aarch64-poly64.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-poly64.c?rev=345168&r1=345167&r2=345168&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-poly64.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-poly64.c Wed Oct 24 10:42:17 2018
@@ -14,7 +14,7 @@ uint64x1_t test_vceq_p64(poly64x1_t a, p
   return vceq_p64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vceqq_p64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vceqq_p64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i64> %a, %b
 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
 // CHECK:   ret <2 x i64> [[SEXT_I]]
@@ -31,7 +31,7 @@ uint64x1_t test_vtst_p64(poly64x1_t a, p
   return vtst_p64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vtstq_p64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vtstq_p64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[TMP4:%.*]] = and <2 x i64> %a, %b
 // CHECK:   [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer
 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64>
@@ -50,7 +50,7 @@ poly64x1_t test_vbsl_p64(poly64x1_t a, p
   return vbsl_p64(a, b, c);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vbslq_p64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vbslq_p64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) #1 {
 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i64> %a, %b
 // CHECK:   [[TMP3:%.*]] = xor <2 x i64> %a, <i64 -1, i64 -1>
 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %c
@@ -69,7 +69,7 @@ poly64_t test_vget_lane_p64(poly64x1_t v
   return vget_lane_p64(v, 0);
 }
 
-// CHECK-LABEL: define i64 @test_vgetq_lane_p64(<2 x i64> %v) #0 {
+// CHECK-LABEL: define i64 @test_vgetq_lane_p64(<2 x i64> %v) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
@@ -87,7 +87,7 @@ poly64x1_t test_vset_lane_p64(poly64_t a
   return vset_lane_p64(a, v, 0);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_p64(i64 %a, <2 x i64> %v) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_p64(i64 %a, <2 x i64> %v) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
@@ -109,7 +109,7 @@ poly64x1_t test_vcopy_lane_p64(poly64x1_
 
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
@@ -121,7 +121,7 @@ poly64x2_t test_vcopyq_lane_p64(poly64x2
   return vcopyq_lane_p64(a, 1, b, 0);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_p64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_p64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
@@ -146,7 +146,7 @@ poly64x1_t test_vcreate_p64(uint64_t a)
 poly64x1_t test_vdup_n_p64(poly64_t a) {
   return vdup_n_p64(a);
 }
-// CHECK-LABEL: define <2 x i64> @test_vdupq_n_p64(i64 %a) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vdupq_n_p64(i64 %a) #1 {
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
 // CHECK:   ret <2 x i64> [[VECINIT1_I]]
@@ -161,7 +161,7 @@ poly64x1_t test_vmov_n_p64(poly64_t a) {
   return vmov_n_p64(a);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vmovq_n_p64(i64 %a) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vmovq_n_p64(i64 %a) #1 {
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
 // CHECK:   ret <2 x i64> [[VECINIT1_I]]
@@ -176,21 +176,21 @@ poly64x1_t test_vdup_lane_p64(poly64x1_t
   return vdup_lane_p64(vec, 0);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vdupq_lane_p64(<1 x i64> %vec) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vdupq_lane_p64(<1 x i64> %vec) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <1 x i64> %vec, <1 x i64> %vec, <2 x i32> zeroinitializer
 // CHECK:   ret <2 x i64> [[SHUFFLE]]
 poly64x2_t test_vdupq_lane_p64(poly64x1_t vec) {
   return vdupq_lane_p64(vec, 0);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vdupq_laneq_p64(<2 x i64> %vec) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vdupq_laneq_p64(<2 x i64> %vec) #1 {
 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i64> %vec, <2 x i64> %vec, <2 x i32> <i32 1, i32 1>
 // CHECK:   ret <2 x i64> [[SHUFFLE]]
 poly64x2_t test_vdupq_laneq_p64(poly64x2_t vec) {
   return vdupq_laneq_p64(vec, 1);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vcombine_p64(<1 x i64> %low, <1 x i64> %high) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vcombine_p64(<1 x i64> %low, <1 x i64> %high) #1 {
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %low, <1 x i64> %high, <2 x i32> <i32 0, i32 1>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 poly64x2_t test_vcombine_p64(poly64x1_t low, poly64x1_t high) {
@@ -206,7 +206,7 @@ poly64x1_t test_vld1_p64(poly64_t const
   return vld1_p64(ptr);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vld1q_p64(i64* %ptr) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vld1q_p64(i64* %ptr) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64* %ptr to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
 // CHECK:   [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]]
@@ -226,7 +226,7 @@ void test_vst1_p64(poly64_t * ptr, poly6
   return vst1_p64(ptr, val);
 }
 
-// CHECK-LABEL: define void @test_vst1q_p64(i64* %ptr, <2 x i64> %val) #0 {
+// CHECK-LABEL: define void @test_vst1q_p64(i64* %ptr, <2 x i64> %val) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast i64* %ptr to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %val to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
@@ -237,7 +237,7 @@ void test_vst1q_p64(poly64_t * ptr, poly
   return vst1q_p64(ptr, val);
 }
 
-// CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_p64(i64* %ptr) #0 {
+// CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_p64(i64* %ptr) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
@@ -255,7 +255,7 @@ poly64x1x2_t test_vld2_p64(poly64_t cons
   return vld2_p64(ptr);
 }
 
-// CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_p64(i64* %ptr) #0 {
+// CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_p64(i64* %ptr) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
@@ -273,7 +273,7 @@ poly64x2x2_t test_vld2q_p64(poly64_t con
   return vld2q_p64(ptr);
 }
 
-// CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_p64(i64* %ptr) #0 {
+// CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_p64(i64* %ptr) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
@@ -291,7 +291,7 @@ poly64x1x3_t test_vld3_p64(poly64_t cons
   return vld3_p64(ptr);
 }
 
-// CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_p64(i64* %ptr) #0 {
+// CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_p64(i64* %ptr) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
@@ -309,7 +309,7 @@ poly64x2x3_t test_vld3q_p64(poly64_t con
   return vld3q_p64(ptr);
 }
 
-// CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_p64(i64* %ptr) #0 {
+// CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_p64(i64* %ptr) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
@@ -327,7 +327,7 @@ poly64x1x4_t test_vld4_p64(poly64_t cons
   return vld4_p64(ptr);
 }
 
-// CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_p64(i64* %ptr) #0 {
+// CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_p64(i64* %ptr) #2 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
@@ -345,7 +345,7 @@ poly64x2x4_t test_vld4q_p64(poly64_t con
   return vld4q_p64(ptr);
 }
 
-// CHECK-LABEL: define void @test_vst2_p64(i64* %ptr, [2 x <1 x i64>] %val.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2_p64(i64* %ptr, [2 x <1 x i64>] %val.coerce) #2 {
 // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[VAL]], i32 0, i32 0
@@ -370,7 +370,7 @@ void test_vst2_p64(poly64_t * ptr, poly6
   return vst2_p64(ptr, val);
 }
 
-// CHECK-LABEL: define void @test_vst2q_p64(i64* %ptr, [2 x <2 x i64>] %val.coerce) #0 {
+// CHECK-LABEL: define void @test_vst2q_p64(i64* %ptr, [2 x <2 x i64>] %val.coerce) #2 {
 // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[VAL]], i32 0, i32 0
@@ -395,7 +395,7 @@ void test_vst2q_p64(poly64_t * ptr, poly
   return vst2q_p64(ptr, val);
 }
 
-// CHECK-LABEL: define void @test_vst3_p64(i64* %ptr, [3 x <1 x i64>] %val.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3_p64(i64* %ptr, [3 x <1 x i64>] %val.coerce) #2 {
 // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[VAL]], i32 0, i32 0
@@ -425,7 +425,7 @@ void test_vst3_p64(poly64_t * ptr, poly6
   return vst3_p64(ptr, val);
 }
 
-// CHECK-LABEL: define void @test_vst3q_p64(i64* %ptr, [3 x <2 x i64>] %val.coerce) #0 {
+// CHECK-LABEL: define void @test_vst3q_p64(i64* %ptr, [3 x <2 x i64>] %val.coerce) #2 {
 // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[VAL]], i32 0, i32 0
@@ -455,7 +455,7 @@ void test_vst3q_p64(poly64_t * ptr, poly
   return vst3q_p64(ptr, val);
 }
 
-// CHECK-LABEL: define void @test_vst4_p64(i64* %ptr, [4 x <1 x i64>] %val.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4_p64(i64* %ptr, [4 x <1 x i64>] %val.coerce) #2 {
 // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[VAL]], i32 0, i32 0
@@ -490,7 +490,7 @@ void test_vst4_p64(poly64_t * ptr, poly6
   return vst4_p64(ptr, val);
 }
 
-// CHECK-LABEL: define void @test_vst4q_p64(i64* %ptr, [4 x <2 x i64>] %val.coerce) #0 {
+// CHECK-LABEL: define void @test_vst4q_p64(i64* %ptr, [4 x <2 x i64>] %val.coerce) #2 {
 // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[VAL]], i32 0, i32 0
@@ -537,7 +537,7 @@ poly64x1_t test_vext_p64(poly64x1_t a, p
 
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vextq_p64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vextq_p64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
@@ -548,42 +548,42 @@ poly64x2_t test_vextq_p64(poly64x2_t a,
   return vextq_p64(a, b, 1);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vzip1q_p64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vzip1q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 poly64x2_t test_vzip1q_p64(poly64x2_t a, poly64x2_t b) {
   return vzip1q_p64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vzip2q_p64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vzip2q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 poly64x2_t test_vzip2q_p64(poly64x2_t a, poly64x2_t b) {
   return vzip2q_u64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vuzp1q_p64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vuzp1q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 poly64x2_t test_vuzp1q_p64(poly64x2_t a, poly64x2_t b) {
   return vuzp1q_p64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vuzp2q_p64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vuzp2q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 poly64x2_t test_vuzp2q_p64(poly64x2_t a, poly64x2_t b) {
   return vuzp2q_u64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vtrn1q_p64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vtrn1q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 poly64x2_t test_vtrn1q_p64(poly64x2_t a, poly64x2_t b) {
   return vtrn1q_p64(a, b);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vtrn2q_p64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vtrn2q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 poly64x2_t test_vtrn2q_p64(poly64x2_t a, poly64x2_t b) {
@@ -601,7 +601,7 @@ poly64x1_t test_vsri_n_p64(poly64x1_t a,
   return vsri_n_p64(a, b, 33);
 }
 
-// CHECK-LABEL: define <2 x i64> @test_vsriq_n_p64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK-LABEL: define <2 x i64> @test_vsriq_n_p64(<2 x i64> %a, <2 x i64> %b) #1 {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
@@ -612,3 +612,6 @@ poly64x2_t test_vsriq_n_p64(poly64x2_t a
   return vsriq_n_p64(a, b, 64);
 }
 
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"
+// CHECK-NOT: attributes #2 ={{.*}}"min-legal-vector-width"

Modified: cfe/trunk/test/CodeGen/arm-neon-fma.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-neon-fma.c?rev=345168&r1=345167&r2=345168&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/arm-neon-fma.c (original)
+++ cfe/trunk/test/CodeGen/arm-neon-fma.c Wed Oct 24 10:42:17 2018
@@ -8,14 +8,14 @@
 #include <arm_neon.h>
 
 // CHECK-LABEL: define <2 x float> @test_fma_order(<2 x float> %accum, <2 x float> %lhs, <2 x float> %rhs) #0 {
-// CHECK:   [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %lhs, <2 x float> %rhs, <2 x float> %accum) #2
+// CHECK:   [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %lhs, <2 x float> %rhs, <2 x float> %accum) #3
 // CHECK:   ret <2 x float> [[TMP6]]
 float32x2_t test_fma_order(float32x2_t accum, float32x2_t lhs, float32x2_t rhs) {
   return vfma_f32(accum, lhs, rhs);
 }
 
-// CHECK-LABEL: define <4 x float> @test_fmaq_order(<4 x float> %accum, <4 x float> %lhs, <4 x float> %rhs) #0 {
-// CHECK:   [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %lhs, <4 x float> %rhs, <4 x float> %accum) #2
+// CHECK-LABEL: define <4 x float> @test_fmaq_order(<4 x float> %accum, <4 x float> %lhs, <4 x float> %rhs) #1 {
+// CHECK:   [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %lhs, <4 x float> %rhs, <4 x float> %accum) #3
 // CHECK:   ret <4 x float> [[TMP6]]
 float32x4_t test_fmaq_order(float32x4_t accum, float32x4_t lhs, float32x4_t rhs) {
   return vfmaq_f32(accum, lhs, rhs);
@@ -32,7 +32,7 @@ float32x2_t test_vfma_n_f32(float32x2_t
   return vfma_n_f32(a, b, n);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 {
+// CHECK-LABEL: define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #1 {
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %n, i32 1
 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %n, i32 2
@@ -44,3 +44,6 @@ float32x2_t test_vfma_n_f32(float32x2_t
 float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t n) {
   return vfmaq_n_f32(a, b, n);
 }
+
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"

Modified: cfe/trunk/test/CodeGen/arm-neon-numeric-maxmin.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-neon-numeric-maxmin.c?rev=345168&r1=345167&r2=345168&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/arm-neon-numeric-maxmin.c (original)
+++ cfe/trunk/test/CodeGen/arm-neon-numeric-maxmin.c Wed Oct 24 10:42:17 2018
@@ -3,29 +3,32 @@
 #include <arm_neon.h>
 
 // CHECK-LABEL: define <2 x float> @test_vmaxnm_f32(<2 x float> %a, <2 x float> %b) #0 {
-// CHECK:   [[VMAXNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float> %a, <2 x float> %b) #2
+// CHECK:   [[VMAXNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float> %a, <2 x float> %b) #3
 // CHECK:   ret <2 x float> [[VMAXNM_V2_I]]
 float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) {
   return vmaxnm_f32(a, b);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
-// CHECK:   [[VMAXNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float> %a, <4 x float> %b) #2
+// CHECK-LABEL: define <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x float> %b) #1 {
+// CHECK:   [[VMAXNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float> %a, <4 x float> %b) #3
 // CHECK:   ret <4 x float> [[VMAXNMQ_V2_I]]
 float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) {
   return vmaxnmq_f32(a, b);
 }
 
 // CHECK-LABEL: define <2 x float> @test_vminnm_f32(<2 x float> %a, <2 x float> %b) #0 {
-// CHECK:   [[VMINNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float> %a, <2 x float> %b) #2
+// CHECK:   [[VMINNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float> %a, <2 x float> %b) #3
 // CHECK:   ret <2 x float> [[VMINNM_V2_I]]
 float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) {
   return vminnm_f32(a, b);
 }
 
-// CHECK-LABEL: define <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
-// CHECK:   [[VMINNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float> %a, <4 x float> %b) #2
+// CHECK-LABEL: define <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x float> %b) #1 {
+// CHECK:   [[VMINNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float> %a, <4 x float> %b) #3
 // CHECK:   ret <4 x float> [[VMINNMQ_V2_I]]
 float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) {
   return vminnmq_f32(a, b);
 }
+
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"

Modified: cfe/trunk/test/CodeGen/arm-neon-vcvtX.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-neon-vcvtX.c?rev=345168&r1=345167&r2=345168&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/arm-neon-vcvtX.c (original)
+++ cfe/trunk/test/CodeGen/arm-neon-vcvtX.c Wed Oct 24 10:42:17 2018
@@ -3,113 +3,116 @@
 #include <arm_neon.h>
 
 // CHECK-LABEL: define <2 x i32> @test_vcvta_s32_f32(<2 x float> %a) #0 {
-// CHECK:   [[VCVTA_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> %a) #2
+// CHECK:   [[VCVTA_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> %a) #3
 // CHECK:   ret <2 x i32> [[VCVTA_S32_V1_I]]
 int32x2_t test_vcvta_s32_f32(float32x2_t a) {
   return vcvta_s32_f32(a);
 }
 
 // CHECK-LABEL: define <2 x i32> @test_vcvta_u32_f32(<2 x float> %a) #0 {
-// CHECK:   [[VCVTA_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> %a) #2
+// CHECK:   [[VCVTA_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> %a) #3
 // CHECK:   ret <2 x i32> [[VCVTA_U32_V1_I]]
 uint32x2_t test_vcvta_u32_f32(float32x2_t a) {
   return vcvta_u32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) #0 {
-// CHECK:   [[VCVTAQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> %a) #2
+// CHECK-LABEL: define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) #1 {
+// CHECK:   [[VCVTAQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> %a) #3
 // CHECK:   ret <4 x i32> [[VCVTAQ_S32_V1_I]]
 int32x4_t test_vcvtaq_s32_f32(float32x4_t a) {
   return vcvtaq_s32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) #0 {
-// CHECK:   [[VCVTAQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> %a) #2
+// CHECK-LABEL: define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) #1 {
+// CHECK:   [[VCVTAQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> %a) #3
 // CHECK:   ret <4 x i32> [[VCVTAQ_U32_V1_I]]
 uint32x4_t test_vcvtaq_u32_f32(float32x4_t a) {
   return vcvtaq_u32_f32(a);
 }
 
 // CHECK-LABEL: define <2 x i32> @test_vcvtn_s32_f32(<2 x float> %a) #0 {
-// CHECK:   [[VCVTN_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> %a) #2
+// CHECK:   [[VCVTN_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> %a) #3
 // CHECK:   ret <2 x i32> [[VCVTN_S32_V1_I]]
 int32x2_t test_vcvtn_s32_f32(float32x2_t a) {
   return vcvtn_s32_f32(a);
 }
 
 // CHECK-LABEL: define <2 x i32> @test_vcvtn_u32_f32(<2 x float> %a) #0 {
-// CHECK:   [[VCVTN_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> %a) #2
+// CHECK:   [[VCVTN_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> %a) #3
 // CHECK:   ret <2 x i32> [[VCVTN_U32_V1_I]]
 uint32x2_t test_vcvtn_u32_f32(float32x2_t a) {
   return vcvtn_u32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) #0 {
-// CHECK:   [[VCVTNQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> %a) #2
+// CHECK-LABEL: define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) #1 {
+// CHECK:   [[VCVTNQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> %a) #3
 // CHECK:   ret <4 x i32> [[VCVTNQ_S32_V1_I]]
 int32x4_t test_vcvtnq_s32_f32(float32x4_t a) {
   return vcvtnq_s32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) #0 {
-// CHECK:   [[VCVTNQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> %a) #2
+// CHECK-LABEL: define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) #1 {
+// CHECK:   [[VCVTNQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> %a) #3
 // CHECK:   ret <4 x i32> [[VCVTNQ_U32_V1_I]]
 uint32x4_t test_vcvtnq_u32_f32(float32x4_t a) {
   return vcvtnq_u32_f32(a);
 }
 
 // CHECK-LABEL: define <2 x i32> @test_vcvtp_s32_f32(<2 x float> %a) #0 {
-// CHECK:   [[VCVTP_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> %a) #2
+// CHECK:   [[VCVTP_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> %a) #3
 // CHECK:   ret <2 x i32> [[VCVTP_S32_V1_I]]
 int32x2_t test_vcvtp_s32_f32(float32x2_t a) {
   return vcvtp_s32_f32(a);
 }
 
 // CHECK-LABEL: define <2 x i32> @test_vcvtp_u32_f32(<2 x float> %a) #0 {
-// CHECK:   [[VCVTP_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> %a) #2
+// CHECK:   [[VCVTP_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> %a) #3
 // CHECK:   ret <2 x i32> [[VCVTP_U32_V1_I]]
 uint32x2_t test_vcvtp_u32_f32(float32x2_t a) {
   return vcvtp_u32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) #0 {
-// CHECK:   [[VCVTPQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> %a) #2
+// CHECK-LABEL: define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) #1 {
+// CHECK:   [[VCVTPQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> %a) #3
 // CHECK:   ret <4 x i32> [[VCVTPQ_S32_V1_I]]
 int32x4_t test_vcvtpq_s32_f32(float32x4_t a) {
   return vcvtpq_s32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) #0 {
-// CHECK:   [[VCVTPQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> %a) #2
+// CHECK-LABEL: define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) #1 {
+// CHECK:   [[VCVTPQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> %a) #3
 // CHECK:   ret <4 x i32> [[VCVTPQ_U32_V1_I]]
 uint32x4_t test_vcvtpq_u32_f32(float32x4_t a) {
   return vcvtpq_u32_f32(a);
 }
 
 // CHECK-LABEL: define <2 x i32> @test_vcvtm_s32_f32(<2 x float> %a) #0 {
-// CHECK:   [[VCVTM_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> %a) #2
+// CHECK:   [[VCVTM_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> %a) #3
 // CHECK:   ret <2 x i32> [[VCVTM_S32_V1_I]]
 int32x2_t test_vcvtm_s32_f32(float32x2_t a) {
   return vcvtm_s32_f32(a);
 }
 
 // CHECK-LABEL: define <2 x i32> @test_vcvtm_u32_f32(<2 x float> %a) #0 {
-// CHECK:   [[VCVTM_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> %a) #2
+// CHECK:   [[VCVTM_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> %a) #3
 // CHECK:   ret <2 x i32> [[VCVTM_U32_V1_I]]
 uint32x2_t test_vcvtm_u32_f32(float32x2_t a) {
   return vcvtm_u32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) #0 {
-// CHECK:   [[VCVTMQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> %a) #2
+// CHECK-LABEL: define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) #1 {
+// CHECK:   [[VCVTMQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> %a) #3
 // CHECK:   ret <4 x i32> [[VCVTMQ_S32_V1_I]]
 int32x4_t test_vcvtmq_s32_f32(float32x4_t a) {
   return vcvtmq_s32_f32(a);
 }
 
-// CHECK-LABEL: define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) #0 {
-// CHECK:   [[VCVTMQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> %a) #2
+// CHECK-LABEL: define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) #1 {
+// CHECK:   [[VCVTMQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> %a) #3
 // CHECK:   ret <4 x i32> [[VCVTMQ_U32_V1_I]]
 uint32x4_t test_vcvtmq_u32_f32(float32x4_t a) {
   return vcvtmq_u32_f32(a);
 }
+
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"

Modified: cfe/trunk/test/CodeGen/arm64_vdupq_n_f64.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm64_vdupq_n_f64.c?rev=345168&r1=345167&r2=345168&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/arm64_vdupq_n_f64.c (original)
+++ cfe/trunk/test/CodeGen/arm64_vdupq_n_f64.c Wed Oct 24 10:42:17 2018
@@ -44,7 +44,7 @@ float64x2_t test_vmovq_n_f64(float64_t w
   return vmovq_n_f64(w);
 }
 
-// CHECK-LABEL: define <4 x half> @test_vmov_n_f16(half* %a1) #0 {
+// CHECK-LABEL: define <4 x half> @test_vmov_n_f16(half* %a1) #1 {
 // CHECK:   [[TMP0:%.*]] = load half, half* %a1, align 2
 // CHECK:   [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0
 // CHECK:   [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1
@@ -76,3 +76,5 @@ float16x8_t test_vmovq_n_f16(float16_t *
   return vmovq_n_f16(*a1);
 }
 
+// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="128"
+// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="64"

Added: cfe/trunk/test/CodeGen/x86-vector-width.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/x86-vector-width.c?rev=345168&view=auto
==============================================================================
--- cfe/trunk/test/CodeGen/x86-vector-width.c (added)
+++ cfe/trunk/test/CodeGen/x86-vector-width.c Wed Oct 24 10:42:17 2018
@@ -0,0 +1,61 @@
+// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu i686 -emit-llvm %s -o - | FileCheck %s
+
+typedef signed long long V2LLi __attribute__((vector_size(16)));
+typedef signed long long V4LLi __attribute__((vector_size(32)));
+
+V2LLi ret_128();
+V4LLi ret_256();
+void arg_128(V2LLi);
+void arg_256(V4LLi);
+
+// Make sure return type forces a min-legal-width
+V2LLi foo(void) {
+  return (V2LLi){ 0, 0 };
+}
+
+V4LLi goo(void) {
+  return (V4LLi){ 0, 0 };
+}
+
+// Make sure return type of called function forces a min-legal-width
+void hoo(void) {
+  V2LLi tmp_V2LLi;
+  tmp_V2LLi = ret_128();
+}
+
+void joo(void) {
+  V4LLi tmp_V4LLi;
+  tmp_V4LLi = ret_256();
+}
+
+// Make sure arg type of called function forces a min-legal-width
+void koo(void) {
+  V2LLi tmp_V2LLi;
+  arg_128(tmp_V2LLi);
+}
+
+void loo(void) {
+  V4LLi tmp_V4LLi;
+  arg_256(tmp_V4LLi);
+}
+
+// Make sure arg type of our function forces a min-legal-width
+void moo(V2LLi x) {
+
+}
+
+void noo(V4LLi x) {
+
+}
+
+// CHECK: {{.*}}@foo{{.*}} #0
+// CHECK: {{.*}}@goo{{.*}} #1
+// CHECK: {{.*}}@hoo{{.*}} #0
+// CHECK: {{.*}}@joo{{.*}} #1
+// CHECK: {{.*}}@koo{{.*}} #0
+// CHECK: {{.*}}@loo{{.*}} #1
+// CHECK: {{.*}}@moo{{.*}} #0
+// CHECK: {{.*}}@noo{{.*}} #1
+
+// CHECK: #0 = {{.*}}"min-legal-vector-width"="128"
+// CHECK: #1 = {{.*}}"min-legal-vector-width"="256"

Propchange: cfe/trunk/test/CodeGen/x86-vector-width.c
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/trunk/test/CodeGen/x86-vector-width.c
------------------------------------------------------------------------------
    svn:keywords = "Author Date Id Rev URL"

Propchange: cfe/trunk/test/CodeGen/x86-vector-width.c
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: cfe/trunk/test/CodeGenOpenCL/fpmath.cl
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/fpmath.cl?rev=345168&r1=345167&r2=345168&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenOpenCL/fpmath.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/fpmath.cl Wed Oct 24 10:42:17 2018
@@ -16,7 +16,7 @@ float spscalardiv(float a, float b) {
 
 float4 spvectordiv(float4 a, float4 b) {
   // CHECK: @spvectordiv
-  // CHECK: #[[ATTR]]
+  // CHECK: #[[ATTR2:[0-9]+]]
   // CHECK: fdiv{{.*}},
   // NODIVOPT: !fpmath ![[MD]]
   // DIVOPT-NOT: !fpmath ![[MD]]
@@ -45,7 +45,11 @@ double dpscalardiv(double a, double b) {
 #endif
 
 // CHECK: attributes #[[ATTR]] = {
-// NODIVOPT: "correctly-rounded-divide-sqrt-fp-math"="false"
-// DIVOPT: "correctly-rounded-divide-sqrt-fp-math"="true"
-// CHECK: }
+// NODIVOPT-SAME: "correctly-rounded-divide-sqrt-fp-math"="false"
+// DIVOPT-SAME: "correctly-rounded-divide-sqrt-fp-math"="true"
+// CHECK-SAME: }
+// CHECK: attributes #[[ATTR2]] = {
+// NODIVOPT-SAME: "correctly-rounded-divide-sqrt-fp-math"="false"
+// DIVOPT-SAME: "correctly-rounded-divide-sqrt-fp-math"="true"
+// CHECK-SAME: }
 // NODIVOPT: ![[MD]] = !{float 2.500000e+00}