r194991 - Implement the newly added AArch64 ACLE functions for ld1/st1 with 2/3/4 vectors.

Hao Liu Hao.Liu at arm.com
Sun Nov 17 22:33:43 PST 2013


Author: haoliu
Date: Mon Nov 18 00:33:43 2013
New Revision: 194991

URL: http://llvm.org/viewvc/llvm-project?rev=194991&view=rev
Log:
Implement the newly added AArch64 ACLE functions for ld1/st1 with 2/3/4 vectors.
The functions are like: vst1_s8_x2 ...

Added:
    cfe/trunk/test/CodeGenCXX/aarch64-neon.cpp
Modified:
    cfe/trunk/include/clang/Basic/arm_neon.td
    cfe/trunk/lib/CodeGen/CGBuiltin.cpp
    cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c
    cfe/trunk/utils/TableGen/NeonEmitter.cpp

Modified: cfe/trunk/include/clang/Basic/arm_neon.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_neon.td?rev=194991&r1=194990&r2=194991&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/arm_neon.td (original)
+++ cfe/trunk/include/clang/Basic/arm_neon.td Mon Nov 18 00:33:43 2013
@@ -543,6 +543,20 @@ def ST3 : WInst<"vst3", "vp3",
 def ST4 : WInst<"vst4", "vp4",
                 "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
 
+def LD1_X2 : WInst<"vld1_x2", "2c",
+                   "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+def LD3_x3 : WInst<"vld1_x3", "3c",
+                   "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+def LD4_x4 : WInst<"vld1_x4", "4c",
+                   "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+
+def ST1_X2 : WInst<"vst1_x2", "vp2",
+                   "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+def ST1_X3 : WInst<"vst1_x3", "vp3",
+                   "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+def ST1_X4 : WInst<"vst1_x4", "vp4",
+                   "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+
 ////////////////////////////////////////////////////////////////////////////////
 // Addition
 // With additional Qd type.

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=194991&r1=194990&r2=194991&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon Nov 18 00:33:43 2013
@@ -2750,7 +2750,42 @@ Value *CodeGenFunction::EmitAArch64Built
   }
 
   SmallVector<Value *, 4> Ops;
+  llvm::Value *Align = 0; // Alignment for load/store
   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
+    if (i == 0) {
+      switch (BuiltinID) {
+      case AArch64::BI__builtin_neon_vst1_x2_v:
+      case AArch64::BI__builtin_neon_vst1q_x2_v:
+      case AArch64::BI__builtin_neon_vst1_x3_v:
+      case AArch64::BI__builtin_neon_vst1q_x3_v:
+      case AArch64::BI__builtin_neon_vst1_x4_v:
+      case AArch64::BI__builtin_neon_vst1q_x4_v:
+        // Get the alignment for the argument in addition to the value;
+        // we'll use it later.
+        std::pair<llvm::Value *, unsigned> Src =
+            EmitPointerWithAlignment(E->getArg(0));
+        Ops.push_back(Src.first);
+        Align = Builder.getInt32(Src.second);
+        continue;
+      }
+    }
+    if (i == 1) {
+      switch (BuiltinID) {
+      case AArch64::BI__builtin_neon_vld1_x2_v:
+      case AArch64::BI__builtin_neon_vld1q_x2_v:
+      case AArch64::BI__builtin_neon_vld1_x3_v:
+      case AArch64::BI__builtin_neon_vld1q_x3_v:
+      case AArch64::BI__builtin_neon_vld1_x4_v:
+      case AArch64::BI__builtin_neon_vld1q_x4_v:
+        // Get the alignment for the argument in addition to the value;
+        // we'll use it later.
+        std::pair<llvm::Value *, unsigned> Src =
+            EmitPointerWithAlignment(E->getArg(1));
+        Ops.push_back(Src.first);
+        Align = Builder.getInt32(Src.second);
+        continue;
+      }
+    }
     Ops.push_back(EmitScalarExpr(E->getArg(i)));
   }
 
@@ -3084,6 +3119,57 @@ Value *CodeGenFunction::EmitAArch64Built
     return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4_v, E);
   case AArch64::BI__builtin_neon_vst4q_v:
     return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4q_v, E);
+  case AArch64::BI__builtin_neon_vld1_x2_v:
+  case AArch64::BI__builtin_neon_vld1q_x2_v:
+  case AArch64::BI__builtin_neon_vld1_x3_v:
+  case AArch64::BI__builtin_neon_vld1q_x3_v:
+  case AArch64::BI__builtin_neon_vld1_x4_v:
+  case AArch64::BI__builtin_neon_vld1q_x4_v: {
+    unsigned Int;
+    switch (BuiltinID) {
+    case AArch64::BI__builtin_neon_vld1_x2_v:
+    case AArch64::BI__builtin_neon_vld1q_x2_v:
+      Int = Intrinsic::aarch64_neon_vld1x2;
+      break;
+    case AArch64::BI__builtin_neon_vld1_x3_v:
+    case AArch64::BI__builtin_neon_vld1q_x3_v:
+      Int = Intrinsic::aarch64_neon_vld1x3;
+      break;
+    case AArch64::BI__builtin_neon_vld1_x4_v:
+    case AArch64::BI__builtin_neon_vld1q_x4_v:
+      Int = Intrinsic::aarch64_neon_vld1x4;
+      break;
+    }
+    Function *F = CGM.getIntrinsic(Int, Ty);
+    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld1xN");
+    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+    return Builder.CreateStore(Ops[1], Ops[0]);
+  }
+  case AArch64::BI__builtin_neon_vst1_x2_v:
+  case AArch64::BI__builtin_neon_vst1q_x2_v:
+  case AArch64::BI__builtin_neon_vst1_x3_v:
+  case AArch64::BI__builtin_neon_vst1q_x3_v:
+  case AArch64::BI__builtin_neon_vst1_x4_v:
+  case AArch64::BI__builtin_neon_vst1q_x4_v: {
+    Ops.push_back(Align);
+    unsigned Int;
+    switch (BuiltinID) {
+    case AArch64::BI__builtin_neon_vst1_x2_v:
+    case AArch64::BI__builtin_neon_vst1q_x2_v:
+      Int = Intrinsic::aarch64_neon_vst1x2;
+      break;
+    case AArch64::BI__builtin_neon_vst1_x3_v:
+    case AArch64::BI__builtin_neon_vst1q_x3_v:
+      Int = Intrinsic::aarch64_neon_vst1x3;
+      break;
+    case AArch64::BI__builtin_neon_vst1_x4_v:
+    case AArch64::BI__builtin_neon_vst1q_x4_v:
+      Int = Intrinsic::aarch64_neon_vst1x4;
+      break;
+    }
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "");
+  }
 
   // Crypto
   case AArch64::BI__builtin_neon_vaeseq_v:

Modified: cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c?rev=194991&r1=194990&r2=194991&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c Mon Nov 18 00:33:43 2013
@@ -7010,6 +7010,1126 @@ void test_vst4_p16(poly16_t *a, poly16x4
   // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
 }
 
+uint8x16x2_t test_vld1q_u8_x2(uint8_t const *a) {
+  // CHECK-LABEL: test_vld1q_u8_x2
+  return vld1q_u8_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x8x2_t test_vld1q_u16_x2(uint16_t const *a) {
+  // CHECK-LABEL: test_vld1q_u16_x2
+  return vld1q_u16_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x4x2_t test_vld1q_u32_x2(uint32_t const *a) {
+  // CHECK-LABEL: test_vld1q_u32_x2
+  return vld1q_u32_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x2x2_t test_vld1q_u64_x2(uint64_t const *a) {
+  // CHECK-LABEL: test_vld1q_u64_x2
+  return vld1q_u64_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+int8x16x2_t test_vld1q_s8_x2(int8_t const *a) {
+  // CHECK-LABEL: test_vld1q_s8_x2
+  return vld1q_s8_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+int16x8x2_t test_vld1q_s16_x2(int16_t const *a) {
+  // CHECK-LABEL: test_vld1q_s16_x2
+  return vld1q_s16_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+int32x4x2_t test_vld1q_s32_x2(int32_t const *a) {
+  // CHECK-LABEL: test_vld1q_s32_x2
+  return vld1q_s32_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+int64x2x2_t test_vld1q_s64_x2(int64_t const *a) {
+  // CHECK-LABEL: test_vld1q_s64_x2
+  return vld1q_s64_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+float16x8x2_t test_vld1q_f16_x2(float16_t const *a) {
+  // CHECK-LABEL: test_vld1q_f16_x2
+  return vld1q_f16_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+float32x4x2_t test_vld1q_f32_x2(float32_t const *a) {
+  // CHECK-LABEL: test_vld1q_f32_x2
+  return vld1q_f32_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+float64x2x2_t test_vld1q_f64_x2(float64_t const *a) {
+  // CHECK-LABEL: test_vld1q_f64_x2
+  return vld1q_f64_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+poly8x16x2_t test_vld1q_p8_x2(poly8_t const *a) {
+  // CHECK-LABEL: test_vld1q_p8_x2
+  return vld1q_p8_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x8x2_t test_vld1q_p16_x2(poly16_t const *a) {
+  // CHECK-LABEL: test_vld1q_p16_x2
+  return vld1q_p16_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+poly64x2x2_t test_vld1q_p64_x2(poly64_t const *a) {
+  // CHECK-LABEL: test_vld1q_p64_x2
+  return vld1q_p64_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+uint8x8x2_t test_vld1_u8_x2(uint8_t const *a) {
+  // CHECK-LABEL: test_vld1_u8_x2
+  return vld1_u8_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x4x2_t test_vld1_u16_x2(uint16_t const *a) {
+  // CHECK-LABEL: test_vld1_u16_x2
+  return vld1_u16_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x2x2_t test_vld1_u32_x2(uint32_t const *a) {
+  // CHECK-LABEL: test_vld1_u32_x2
+  return vld1_u32_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x1x2_t test_vld1_u64_x2(uint64_t const *a) {
+  // CHECK-LABEL: test_vld1_u64_x2
+  return vld1_u64_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+int8x8x2_t test_vld1_s8_x2(int8_t const *a) {
+  // CHECK-LABEL: test_vld1_s8_x2
+  return vld1_s8_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+int16x4x2_t test_vld1_s16_x2(int16_t const *a) {
+  // CHECK-LABEL: test_vld1_s16_x2
+  return vld1_s16_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+int32x2x2_t test_vld1_s32_x2(int32_t const *a) {
+  // CHECK-LABEL: test_vld1_s32_x2
+  return vld1_s32_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+int64x1x2_t test_vld1_s64_x2(int64_t const *a) {
+  // CHECK-LABEL: test_vld1_s64_x2
+  return vld1_s64_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+float16x4x2_t test_vld1_f16_x2(float16_t const *a) {
+  // CHECK-LABEL: test_vld1_f16_x2
+  return vld1_f16_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+float32x2x2_t test_vld1_f32_x2(float32_t const *a) {
+  // CHECK-LABEL: test_vld1_f32_x2
+  return vld1_f32_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+float64x1x2_t test_vld1_f64_x2(float64_t const *a) {
+  // CHECK-LABEL: test_vld1_f64_x2
+  return vld1_f64_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+poly8x8x2_t test_vld1_p8_x2(poly8_t const *a) {
+  // CHECK-LABEL: test_vld1_p8_x2
+  return vld1_p8_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x4x2_t test_vld1_p16_x2(poly16_t const *a) {
+  // CHECK-LABEL: test_vld1_p16_x2
+  return vld1_p16_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+poly64x1x2_t test_vld1_p64_x2(poly64_t const *a) {
+  // CHECK-LABEL: test_vld1_p64_x2
+  return vld1_p64_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+uint8x16x3_t test_vld1q_u8_x3(uint8_t const *a) {
+  // CHECK-LABEL: test_vld1q_u8_x3
+  return vld1q_u8_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint16x8x3_t test_vld1q_u16_x3(uint16_t const *a) {
+  // CHECK-LABEL: test_vld1q_u16_x3
+  return vld1q_u16_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint32x4x3_t test_vld1q_u32_x3(uint32_t const *a) {
+  // CHECK-LABEL: test_vld1q_u32_x3
+  return vld1q_u32_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint64x2x3_t test_vld1q_u64_x3(uint64_t const *a) {
+  // CHECK-LABEL: test_vld1q_u64_x3
+  return vld1q_u64_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+int8x16x3_t test_vld1q_s8_x3(int8_t const *a) {
+  // CHECK-LABEL: test_vld1q_s8_x3
+  return vld1q_s8_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+  // [{{x[0-9]+|sp}}]
+}
+
+int16x8x3_t test_vld1q_s16_x3(int16_t const *a) {
+  // CHECK-LABEL: test_vld1q_s16_x3
+  return vld1q_s16_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+int32x4x3_t test_vld1q_s32_x3(int32_t const *a) {
+  // CHECK-LABEL: test_vld1q_s32_x3
+  return vld1q_s32_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+  // [{{x[0-9]+|sp}}]
+}
+
+int64x2x3_t test_vld1q_s64_x3(int64_t const *a) {
+  // CHECK-LABEL: test_vld1q_s64_x3
+  return vld1q_s64_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+float16x8x3_t test_vld1q_f16_x3(float16_t const *a) {
+  // CHECK-LABEL: test_vld1q_f16_x3
+  return vld1q_f16_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+float32x4x3_t test_vld1q_f32_x3(float32_t const *a) {
+  // CHECK-LABEL: test_vld1q_f32_x3
+  return vld1q_f32_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+  // [{{x[0-9]+|sp}}]
+}
+
+float64x2x3_t test_vld1q_f64_x3(float64_t const *a) {
+  // CHECK-LABEL: test_vld1q_f64_x3
+  return vld1q_f64_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly8x16x3_t test_vld1q_p8_x3(poly8_t const *a) {
+  // CHECK-LABEL: test_vld1q_p8_x3
+  return vld1q_p8_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly16x8x3_t test_vld1q_p16_x3(poly16_t const *a) {
+  // CHECK-LABEL: test_vld1q_p16_x3
+  return vld1q_p16_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly64x2x3_t test_vld1q_p64_x3(poly64_t const *a) {
+  // CHECK-LABEL: test_vld1q_p64_x3
+  return vld1q_p64_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint8x8x3_t test_vld1_u8_x3(uint8_t const *a) {
+  // CHECK-LABEL: test_vld1_u8_x3
+  return vld1_u8_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint16x4x3_t test_vld1_u16_x3(uint16_t const *a) {
+  // CHECK-LABEL: test_vld1_u16_x3
+  return vld1_u16_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint32x2x3_t test_vld1_u32_x3(uint32_t const *a) {
+  // CHECK-LABEL: test_vld1_u32_x3
+  return vld1_u32_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint64x1x3_t test_vld1_u64_x3(uint64_t const *a) {
+  // CHECK-LABEL: test_vld1_u64_x3
+  return vld1_u64_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+int8x8x3_t test_vld1_s8_x3(int8_t const *a) {
+  // CHECK-LABEL: test_vld1_s8_x3
+  return vld1_s8_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+  // [{{x[0-9]+|sp}}]
+}
+
+int16x4x3_t test_vld1_s16_x3(int16_t const *a) {
+  // CHECK-LABEL: test_vld1_s16_x3
+  return vld1_s16_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+int32x2x3_t test_vld1_s32_x3(int32_t const *a) {
+  // CHECK-LABEL: test_vld1_s32_x3
+  return vld1_s32_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+  // [{{x[0-9]+|sp}}]
+}
+
+int64x1x3_t test_vld1_s64_x3(int64_t const *a) {
+  // CHECK-LABEL: test_vld1_s64_x3
+  return vld1_s64_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+float16x4x3_t test_vld1_f16_x3(float16_t const *a) {
+  // CHECK-LABEL: test_vld1_f16_x3
+  return vld1_f16_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+float32x2x3_t test_vld1_f32_x3(float32_t const *a) {
+  // CHECK-LABEL: test_vld1_f32_x3
+  return vld1_f32_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+  // [{{x[0-9]+|sp}}]
+}
+
+float64x1x3_t test_vld1_f64_x3(float64_t const *a) {
+  // CHECK-LABEL: test_vld1_f64_x3
+  return vld1_f64_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly8x8x3_t test_vld1_p8_x3(poly8_t const *a) {
+  // CHECK-LABEL: test_vld1_p8_x3
+  return vld1_p8_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly16x4x3_t test_vld1_p16_x3(poly16_t const *a) {
+  // CHECK-LABEL: test_vld1_p16_x3
+  return vld1_p16_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly64x1x3_t test_vld1_p64_x3(poly64_t const *a) {
+  // CHECK-LABEL: test_vld1_p64_x3
+  return vld1_p64_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint8x16x4_t test_vld1q_u8_x4(uint8_t const *a) {
+  // CHECK-LABEL: test_vld1q_u8_x4
+  return vld1q_u8_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+  // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x8x4_t test_vld1q_u16_x4(uint16_t const *a) {
+  // CHECK-LABEL: test_vld1q_u16_x4
+  return vld1q_u16_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x4x4_t test_vld1q_u32_x4(uint32_t const *a) {
+  // CHECK-LABEL: test_vld1q_u32_x4
+  return vld1q_u32_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x2x4_t test_vld1q_u64_x4(uint64_t const *a) {
+  // CHECK-LABEL: test_vld1q_u64_x4
+  return vld1q_u64_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+int8x16x4_t test_vld1q_s8_x4(int8_t const *a) {
+  // CHECK-LABEL: test_vld1q_s8_x4
+  return vld1q_s8_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+  // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+int16x8x4_t test_vld1q_s16_x4(int16_t const *a) {
+  // CHECK-LABEL: test_vld1q_s16_x4
+  return vld1q_s16_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+int32x4x4_t test_vld1q_s32_x4(int32_t const *a) {
+  // CHECK-LABEL: test_vld1q_s32_x4
+  return vld1q_s32_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+int64x2x4_t test_vld1q_s64_x4(int64_t const *a) {
+  // CHECK-LABEL: test_vld1q_s64_x4
+  return vld1q_s64_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+float16x8x4_t test_vld1q_f16_x4(float16_t const *a) {
+  // CHECK-LABEL: test_vld1q_f16_x4
+  return vld1q_f16_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+float32x4x4_t test_vld1q_f32_x4(float32_t const *a) {
+  // CHECK-LABEL: test_vld1q_f32_x4
+  return vld1q_f32_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+float64x2x4_t test_vld1q_f64_x4(float64_t const *a) {
+  // CHECK-LABEL: test_vld1q_f64_x4
+  return vld1q_f64_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+poly8x16x4_t test_vld1q_p8_x4(poly8_t const *a) {
+  // CHECK-LABEL: test_vld1q_p8_x4
+  return vld1q_p8_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+  // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x8x4_t test_vld1q_p16_x4(poly16_t const *a) {
+  // CHECK-LABEL: test_vld1q_p16_x4
+  return vld1q_p16_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+poly64x2x4_t test_vld1q_p64_x4(poly64_t const *a) {
+  // CHECK-LABEL: test_vld1q_p64_x4
+  return vld1q_p64_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+uint8x8x4_t test_vld1_u8_x4(uint8_t const *a) {
+  // CHECK-LABEL: test_vld1_u8_x4
+  return vld1_u8_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+  // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x4x4_t test_vld1_u16_x4(uint16_t const *a) {
+  // CHECK-LABEL: test_vld1_u16_x4
+  return vld1_u16_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x2x4_t test_vld1_u32_x4(uint32_t const *a) {
+  // CHECK-LABEL: test_vld1_u32_x4
+  return vld1_u32_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+  // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x1x4_t test_vld1_u64_x4(uint64_t const *a) {
+  // CHECK-LABEL: test_vld1_u64_x4
+  return vld1_u64_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+int8x8x4_t test_vld1_s8_x4(int8_t const *a) {
+  // CHECK-LABEL: test_vld1_s8_x4
+  return vld1_s8_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+  // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+int16x4x4_t test_vld1_s16_x4(int16_t const *a) {
+  // CHECK-LABEL: test_vld1_s16_x4
+  return vld1_s16_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+int32x2x4_t test_vld1_s32_x4(int32_t const *a) {
+  // CHECK-LABEL: test_vld1_s32_x4
+  return vld1_s32_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+  // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+int64x1x4_t test_vld1_s64_x4(int64_t const *a) {
+  // CHECK-LABEL: test_vld1_s64_x4
+  return vld1_s64_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+float16x4x4_t test_vld1_f16_x4(float16_t const *a) {
+  // CHECK-LABEL: test_vld1_f16_x4
+  return vld1_f16_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+float32x2x4_t test_vld1_f32_x4(float32_t const *a) {
+  // CHECK-LABEL: test_vld1_f32_x4
+  return vld1_f32_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+  // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+float64x1x4_t test_vld1_f64_x4(float64_t const *a) {
+  // CHECK-LABEL: test_vld1_f64_x4
+  return vld1_f64_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+poly8x8x4_t test_vld1_p8_x4(poly8_t const *a) {
+  // CHECK-LABEL: test_vld1_p8_x4
+  return vld1_p8_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+  // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x4x4_t test_vld1_p16_x4(poly16_t const *a) {
+  // CHECK-LABEL: test_vld1_p16_x4
+  return vld1_p16_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+poly64x1x4_t test_vld1_p64_x4(poly64_t const *a) {
+  // CHECK-LABEL: test_vld1_p64_x4
+  return vld1_p64_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u8_x2(uint8_t *a, uint8x16x2_t b) {
+  // CHECK: test_vst1q_u8_x2
+  vst1q_u8_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u16_x2(uint16_t *a, uint16x8x2_t b) {
+  // CHECK: test_vst1q_u16_x2
+  vst1q_u16_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u32_x2(uint32_t *a, uint32x4x2_t b) {
+  // CHECK: test_vst1q_u32_x2
+  vst1q_u32_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u64_x2(uint64_t *a, uint64x2x2_t b) {
+  // CHECK: test_vst1q_u64_x2
+  vst1q_u64_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s8_x2(int8_t *a, int8x16x2_t b) {
+  // CHECK: test_vst1q_s8_x2
+  vst1q_s8_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s16_x2(int16_t *a, int16x8x2_t b) {
+  // CHECK: test_vst1q_s16_x2
+  vst1q_s16_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s32_x2(int32_t *a, int32x4x2_t b) {
+  // CHECK: test_vst1q_s32_x2
+  vst1q_s32_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s64_x2(int64_t *a, int64x2x2_t b) {
+  // CHECK: test_vst1q_s64_x2
+  vst1q_s64_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_f16_x2(float16_t *a, float16x8x2_t b) {
+  // CHECK: test_vst1q_f16_x2
+  vst1q_f16_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_f32_x2(float32_t *a, float32x4x2_t b) {
+  // CHECK: test_vst1q_f32_x2
+  vst1q_f32_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_f64_x2(float64_t *a, float64x2x2_t b) {
+  // CHECK: test_vst1q_f64_x2
+  vst1q_f64_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_p8_x2(poly8_t *a, poly8x16x2_t b) {
+  // CHECK: test_vst1q_p8_x2
+  vst1q_p8_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_p16_x2(poly16_t *a, poly16x8x2_t b) {
+  // CHECK: test_vst1q_p16_x2
+  vst1q_p16_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_p64_x2(poly64_t *a, poly64x2x2_t b) {
+  // CHECK: test_vst1q_p64_x2
+  vst1q_p64_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u8_x2(uint8_t *a, uint8x8x2_t b) {
+  // CHECK: test_vst1_u8_x2
+  vst1_u8_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u16_x2(uint16_t *a, uint16x4x2_t b) {
+  // CHECK: test_vst1_u16_x2
+  vst1_u16_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u32_x2(uint32_t *a, uint32x2x2_t b) {
+  // CHECK: test_vst1_u32_x2
+  vst1_u32_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u64_x2(uint64_t *a, uint64x1x2_t b) {
+  // CHECK: test_vst1_u64_x2
+  vst1_u64_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s8_x2(int8_t *a, int8x8x2_t b) {
+  // CHECK: test_vst1_s8_x2
+  vst1_s8_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s16_x2(int16_t *a, int16x4x2_t b) {
+  // CHECK: test_vst1_s16_x2
+  vst1_s16_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s32_x2(int32_t *a, int32x2x2_t b) {
+  // CHECK: test_vst1_s32_x2
+  vst1_s32_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s64_x2(int64_t *a, int64x1x2_t b) {
+  // CHECK: test_vst1_s64_x2
+  vst1_s64_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_f16_x2(float16_t *a, float16x4x2_t b) {
+  // CHECK: test_vst1_f16_x2
+  vst1_f16_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_f32_x2(float32_t *a, float32x2x2_t b) {
+  // CHECK: test_vst1_f32_x2
+  vst1_f32_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_f64_x2(float64_t *a, float64x1x2_t b) {
+  // CHECK: test_vst1_f64_x2
+  vst1_f64_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_p8_x2(poly8_t *a, poly8x8x2_t b) {
+  // CHECK: test_vst1_p8_x2
+  vst1_p8_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_p16_x2(poly16_t *a, poly16x4x2_t b) {
+  // CHECK: test_vst1_p16_x2
+  vst1_p16_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_p64_x2(poly64_t *a, poly64x1x2_t b) {
+  // CHECK: test_vst1_p64_x2
+  vst1_p64_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u8_x3(uint8_t *a, uint8x16x3_t b) {
+  // CHECK: test_vst1q_u8_x3
+  vst1q_u8_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u16_x3(uint16_t *a, uint16x8x3_t b) {
+  // CHECK: test_vst1q_u16_x3
+  vst1q_u16_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u32_x3(uint32_t *a, uint32x4x3_t b) {
+  // CHECK: test_vst1q_u32_x3
+  vst1q_u32_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u64_x3(uint64_t *a, uint64x2x3_t b) {
+  // CHECK: test_vst1q_u64_x3
+  vst1q_u64_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s8_x3(int8_t *a, int8x16x3_t b) {
+  // CHECK: test_vst1q_s8_x3
+  vst1q_s8_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s16_x3(int16_t *a, int16x8x3_t b) {
+  // CHECK: test_vst1q_s16_x3
+  vst1q_s16_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s32_x3(int32_t *a, int32x4x3_t b) {
+  // CHECK: test_vst1q_s32_x3
+  vst1q_s32_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s64_x3(int64_t *a, int64x2x3_t b) {
+  // CHECK: test_vst1q_s64_x3
+  vst1q_s64_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_f16_x3(float16_t *a, float16x8x3_t b) {
+  // CHECK: test_vst1q_f16_x3
+  vst1q_f16_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_f32_x3(float32_t *a, float32x4x3_t b) {
+  // CHECK: test_vst1q_f32_x3
+  vst1q_f32_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_f64_x3(float64_t *a, float64x2x3_t b) {
+  // CHECK: test_vst1q_f64_x3
+  vst1q_f64_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_p8_x3(poly8_t *a, poly8x16x3_t b) {
+  // CHECK: test_vst1q_p8_x3
+  vst1q_p8_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_p16_x3(poly16_t *a, poly16x8x3_t b) {
+  // CHECK: test_vst1q_p16_x3
+  vst1q_p16_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_p64_x3(poly64_t *a, poly64x2x3_t b) {
+  // CHECK: test_vst1q_p64_x3
+  vst1q_p64_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u8_x3(uint8_t *a, uint8x8x3_t b) {
+  // CHECK: test_vst1_u8_x3
+  vst1_u8_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u16_x3(uint16_t *a, uint16x4x3_t b) {
+  // CHECK: test_vst1_u16_x3
+  vst1_u16_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u32_x3(uint32_t *a, uint32x2x3_t b) {
+  // CHECK: test_vst1_u32_x3
+  vst1_u32_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u64_x3(uint64_t *a, uint64x1x3_t b) {
+  // CHECK: test_vst1_u64_x3
+  vst1_u64_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s8_x3(int8_t *a, int8x8x3_t b) {
+  // CHECK: test_vst1_s8_x3
+  vst1_s8_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s16_x3(int16_t *a, int16x4x3_t b) {
+  // CHECK: test_vst1_s16_x3
+  vst1_s16_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s32_x3(int32_t *a, int32x2x3_t b) {
+  // CHECK: test_vst1_s32_x3
+  vst1_s32_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s64_x3(int64_t *a, int64x1x3_t b) {
+  // CHECK: test_vst1_s64_x3
+  vst1_s64_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_f16_x3(float16_t *a, float16x4x3_t b) {
+  // CHECK: test_vst1_f16_x3
+  vst1_f16_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_f32_x3(float32_t *a, float32x2x3_t b) {
+  // CHECK: test_vst1_f32_x3
+  vst1_f32_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_f64_x3(float64_t *a, float64x1x3_t b) {
+  // CHECK: test_vst1_f64_x3
+  vst1_f64_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_p8_x3(poly8_t *a, poly8x8x3_t b) {
+  // CHECK: test_vst1_p8_x3
+  vst1_p8_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_p16_x3(poly16_t *a, poly16x4x3_t b) {
+  // CHECK: test_vst1_p16_x3
+  vst1_p16_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_p64_x3(poly64_t *a, poly64x1x3_t b) {
+  // CHECK: test_vst1_p64_x3
+  vst1_p64_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u8_x4(uint8_t *a, uint8x16x4_t b) {
+  // CHECK: test_vst1q_u8_x4
+  vst1q_u8_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+  // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u16_x4(uint16_t *a, uint16x8x4_t b) {
+  // CHECK: test_vst1q_u16_x4
+  vst1q_u16_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u32_x4(uint32_t *a, uint32x4x4_t b) {
+  // CHECK: test_vst1q_u32_x4
+  vst1q_u32_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u64_x4(uint64_t *a, uint64x2x4_t b) {
+  // CHECK: test_vst1q_u64_x4
+  vst1q_u64_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s8_x4(int8_t *a, int8x16x4_t b) {
+  // CHECK: test_vst1q_s8_x4
+  vst1q_s8_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+  // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s16_x4(int16_t *a, int16x8x4_t b) {
+  // CHECK: test_vst1q_s16_x4
+  vst1q_s16_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s32_x4(int32_t *a, int32x4x4_t b) {
+  // CHECK: test_vst1q_s32_x4
+  vst1q_s32_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s64_x4(int64_t *a, int64x2x4_t b) {
+  // CHECK: test_vst1q_s64_x4
+  vst1q_s64_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_f16_x4(float16_t *a, float16x8x4_t b) {
+  // CHECK: test_vst1q_f16_x4
+  vst1q_f16_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_f32_x4(float32_t *a, float32x4x4_t b) {
+  // CHECK: test_vst1q_f32_x4
+  vst1q_f32_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_f64_x4(float64_t *a, float64x2x4_t b) {
+  // CHECK: test_vst1q_f64_x4
+  vst1q_f64_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_p8_x4(poly8_t *a, poly8x16x4_t b) {
+  // CHECK: test_vst1q_p8_x4
+  vst1q_p8_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+  // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_p16_x4(poly16_t *a, poly16x8x4_t b) {
+  // CHECK: test_vst1q_p16_x4
+  vst1q_p16_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_p64_x4(poly64_t *a, poly64x2x4_t b) {
+  // CHECK: test_vst1q_p64_x4
+  vst1q_p64_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u8_x4(uint8_t *a, uint8x8x4_t b) {
+  // CHECK: test_vst1_u8_x4
+  vst1_u8_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+  // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u16_x4(uint16_t *a, uint16x4x4_t b) {
+  // CHECK: test_vst1_u16_x4
+  vst1_u16_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u32_x4(uint32_t *a, uint32x2x4_t b) {
+  // CHECK: test_vst1_u32_x4
+  vst1_u32_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+  // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u64_x4(uint64_t *a, uint64x1x4_t b) {
+  // CHECK: test_vst1_u64_x4
+  vst1_u64_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s8_x4(int8_t *a, int8x8x4_t b) {
+  // CHECK: test_vst1_s8_x4
+  vst1_s8_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+  // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s16_x4(int16_t *a, int16x4x4_t b) {
+  // CHECK: test_vst1_s16_x4
+  vst1_s16_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s32_x4(int32_t *a, int32x2x4_t b) {
+  // CHECK: test_vst1_s32_x4
+  vst1_s32_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+  // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s64_x4(int64_t *a, int64x1x4_t b) {
+  // CHECK: test_vst1_s64_x4
+  vst1_s64_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_f16_x4(float16_t *a, float16x4x4_t b) {
+  // CHECK: test_vst1_f16_x4
+  vst1_f16_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_f32_x4(float32_t *a, float32x2x4_t b) {
+  // CHECK: test_vst1_f32_x4
+  vst1_f32_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+  // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_f64_x4(float64_t *a, float64x1x4_t b) {
+  // CHECK: test_vst1_f64_x4
+  vst1_f64_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_p8_x4(poly8_t *a, poly8x8x4_t b) {
+  // CHECK: test_vst1_p8_x4
+  vst1_p8_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+  // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_p16_x4(poly16_t *a, poly16x4x4_t b) {
+  // CHECK: test_vst1_p16_x4
+  vst1_p16_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_p64_x4(poly64_t *a, poly64x1x4_t b) {
+  // CHECK: test_vst1_p64_x4
+  vst1_p64_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
 int64_t test_vceqd_s64(int64_t a, int64_t b) {
 // CHECK: test_vceqd_s64
 // CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}

Added: cfe/trunk/test/CodeGenCXX/aarch64-neon.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/aarch64-neon.cpp?rev=194991&view=auto
==============================================================================
--- cfe/trunk/test/CodeGenCXX/aarch64-neon.cpp (added)
+++ cfe/trunk/test/CodeGenCXX/aarch64-neon.cpp Mon Nov 18 00:33:43 2013
@@ -0,0 +1,13 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+// Test whether arm_neon.h can be used in .cpp file.
+
+#include "arm_neon.h"
+
+poly64x1_t test_vld1_p64(poly64_t const * ptr) {
+  // CHECK: test_vld1_p64
+  return vld1_p64(ptr);
+  // CHECK:  ld1 {{{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}

Modified: cfe/trunk/utils/TableGen/NeonEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/NeonEmitter.cpp?rev=194991&r1=194990&r2=194991&view=diff
==============================================================================
--- cfe/trunk/utils/TableGen/NeonEmitter.cpp (original)
+++ cfe/trunk/utils/TableGen/NeonEmitter.cpp Mon Nov 18 00:33:43 2013
@@ -881,6 +881,16 @@ static char Insert_BHSD_Suffix(StringRef
   return 0;
 }
 
+static bool endsWith_xN(std::string const &name) {
+  if (name.length() > 3) {
+    if (name.compare(name.length() - 3, 3, "_x2") == 0 ||
+        name.compare(name.length() - 3, 3, "_x3") == 0 ||
+        name.compare(name.length() - 3, 3, "_x4") == 0)
+      return true;
+  }
+  return false;
+}
+
 /// MangleName - Append a type or width suffix to a base neon function name,
 /// and insert a 'q' in the appropriate location if type string starts with 'Q'.
 /// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
@@ -898,7 +908,11 @@ static std::string MangleName(const std:
   std::string s = name;
 
   if (typeCode.size() > 0) {
-    s += "_" + typeCode;
+    // If the name is end with _xN (N = 2,3,4), insert the typeCode before _xN.
+    if (endsWith_xN(s))
+      s.insert(s.length() - 3, "_" + typeCode);
+    else
+      s += "_" + typeCode;
   }
 
   if (ck == ClassB)





More information about the cfe-commits mailing list