r195079 - Implement AArch64 neon instructions class SIMD lsone and SIMD lone-post.

Hao Liu Hao.Liu at arm.com
Mon Nov 18 18:17:31 PST 2013


Author: haoliu
Date: Mon Nov 18 20:17:31 2013
New Revision: 195079

URL: http://llvm.org/viewvc/llvm-project?rev=195079&view=rev
Log:
Implement AArch64 neon instructions class SIMD lsone and SIMD lone-post.

Added:
    cfe/trunk/test/CodeGen/aarch64-neon-ldst-one.c
Modified:
    cfe/trunk/include/clang/Basic/arm_neon.td
    cfe/trunk/lib/CodeGen/CGBuiltin.cpp

Modified: cfe/trunk/include/clang/Basic/arm_neon.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_neon.td?rev=195079&r1=195078&r2=195079&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/arm_neon.td (original)
+++ cfe/trunk/include/clang/Basic/arm_neon.td Mon Nov 18 20:17:31 2013
@@ -526,7 +526,7 @@ let isA64 = 1 in {
 
 ////////////////////////////////////////////////////////////////////////////////
 // Load/Store
-// With additional QUl, Ql, Qd, Pl, QPl type.
+// With additional QUl, Ql, d, Qd, Pl, QPl type.
 def LD1 : WInst<"vld1", "dc",
                 "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
 def LD2 : WInst<"vld2", "2c",
@@ -558,6 +558,33 @@ def ST1_X3 : WInst<"vst1_x3", "vp3",
 def ST1_X4 : WInst<"vst1_x4", "vp4",
                    "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
 
+// With additional QUl, Ql, d, Qd, Pl, QPl type.
+def LD1_LANE : WInst<"vld1_lane", "dcdi",
+                    "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+def LD2_LANE : WInst<"vld2_lane", "2c2i",
+                    "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+def LD3_LANE : WInst<"vld3_lane", "3c3i",
+                    "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+def LD4_LANE : WInst<"vld4_lane", "4c4i",
+                    "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+def ST1_LANE : WInst<"vst1_lane", "vpdi",
+                    "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+def ST2_LANE : WInst<"vst2_lane", "vp2i",
+                    "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+def ST3_LANE : WInst<"vst3_lane", "vp3i",
+                    "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+def ST4_LANE : WInst<"vst4_lane", "vp4i",
+                    "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+
+def LD1_DUP  : WInst<"vld1_dup", "dc",
+                    "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+def LD2_DUP  : WInst<"vld2_dup", "2c",
+                    "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+def LD3_DUP  : WInst<"vld3_dup", "3c",
+                    "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+def LD4_DUP  : WInst<"vld4_dup", "4c",
+                    "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+
 ////////////////////////////////////////////////////////////////////////////////
 // Addition
 // With additional Qd type.

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=195079&r1=195078&r2=195079&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon Nov 18 20:17:31 2013
@@ -2760,6 +2760,11 @@ Value *CodeGenFunction::EmitAArch64Built
       case AArch64::BI__builtin_neon_vst1q_x3_v:
       case AArch64::BI__builtin_neon_vst1_x4_v:
       case AArch64::BI__builtin_neon_vst1q_x4_v:
+      // Handle ld1/st1 lane in this function a little different from ARM.
+      case AArch64::BI__builtin_neon_vld1_lane_v:
+      case AArch64::BI__builtin_neon_vld1q_lane_v:
+      case AArch64::BI__builtin_neon_vst1_lane_v:
+      case AArch64::BI__builtin_neon_vst1q_lane_v:
         // Get the alignment for the argument in addition to the value;
         // we'll use it later.
         std::pair<llvm::Value *, unsigned> Src =
@@ -2777,6 +2782,15 @@ Value *CodeGenFunction::EmitAArch64Built
       case AArch64::BI__builtin_neon_vld1q_x3_v:
       case AArch64::BI__builtin_neon_vld1_x4_v:
       case AArch64::BI__builtin_neon_vld1q_x4_v:
+      // Handle ld1/st1 dup lane in this function a little different from ARM.
+      case AArch64::BI__builtin_neon_vld2_dup_v:
+      case AArch64::BI__builtin_neon_vld2q_dup_v:
+      case AArch64::BI__builtin_neon_vld3_dup_v:
+      case AArch64::BI__builtin_neon_vld3q_dup_v:
+      case AArch64::BI__builtin_neon_vld4_dup_v:
+      case AArch64::BI__builtin_neon_vld4q_dup_v:
+      case AArch64::BI__builtin_neon_vld2_lane_v:
+      case AArch64::BI__builtin_neon_vld2q_lane_v:
         // Get the alignment for the argument in addition to the value;
         // we'll use it later.
         std::pair<llvm::Value *, unsigned> Src =
@@ -3170,6 +3184,119 @@ Value *CodeGenFunction::EmitAArch64Built
     }
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "");
   }
+  case AArch64::BI__builtin_neon_vld1_lane_v:
+  case AArch64::BI__builtin_neon_vld1q_lane_v: {
+    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
+    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+    LoadInst *Ld = Builder.CreateLoad(Ops[0]);
+    Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
+    return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
+  }
+  case AArch64::BI__builtin_neon_vld2_lane_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2q_lane_v, E);
+  case AArch64::BI__builtin_neon_vld2q_lane_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2q_lane_v, E);
+  case AArch64::BI__builtin_neon_vld3_lane_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3_lane_v, E);
+  case AArch64::BI__builtin_neon_vld3q_lane_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3q_lane_v, E);
+  case AArch64::BI__builtin_neon_vld4_lane_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4_lane_v, E);
+  case AArch64::BI__builtin_neon_vld4q_lane_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4q_lane_v, E);
+  case AArch64::BI__builtin_neon_vst1_lane_v:
+  case AArch64::BI__builtin_neon_vst1q_lane_v: {
+    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
+    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+    StoreInst *St =
+        Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty));
+    St->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
+    return St;
+  }
+  case AArch64::BI__builtin_neon_vst2_lane_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2_lane_v, E);
+  case AArch64::BI__builtin_neon_vst2q_lane_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2q_lane_v, E);
+  case AArch64::BI__builtin_neon_vst3_lane_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3_lane_v, E);
+  case AArch64::BI__builtin_neon_vst3q_lane_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3q_lane_v, E);
+  case AArch64::BI__builtin_neon_vst4_lane_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4_lane_v, E);
+  case AArch64::BI__builtin_neon_vst4q_lane_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4q_lane_v, E);
+  case AArch64::BI__builtin_neon_vld1_dup_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1_dup_v, E);
+  case AArch64::BI__builtin_neon_vld1q_dup_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1q_dup_v, E);
+  case AArch64::BI__builtin_neon_vld2_dup_v:
+  case AArch64::BI__builtin_neon_vld2q_dup_v:
+  case AArch64::BI__builtin_neon_vld3_dup_v:
+  case AArch64::BI__builtin_neon_vld3q_dup_v:
+  case AArch64::BI__builtin_neon_vld4_dup_v:
+  case AArch64::BI__builtin_neon_vld4q_dup_v: {
+    // Handle 64-bit x 1 elements as a special-case.  There is no "dup" needed.
+    if (VTy->getElementType()->getPrimitiveSizeInBits() == 64 &&
+        VTy->getNumElements() == 1) {
+      switch (BuiltinID) {
+      case AArch64::BI__builtin_neon_vld2_dup_v:
+        Int = Intrinsic::arm_neon_vld2;
+        break;
+      case AArch64::BI__builtin_neon_vld3_dup_v:
+        Int = Intrinsic::arm_neon_vld3;
+        break;
+      case AArch64::BI__builtin_neon_vld4_dup_v:
+        Int = Intrinsic::arm_neon_vld4;
+        break;
+      default:
+        llvm_unreachable("unknown vld_dup intrinsic?");
+      }
+      Function *F = CGM.getIntrinsic(Int, Ty);
+      Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup");
+      Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+      Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+      return Builder.CreateStore(Ops[1], Ops[0]);
+    }
+    switch (BuiltinID) {
+    case AArch64::BI__builtin_neon_vld2_dup_v:
+    case AArch64::BI__builtin_neon_vld2q_dup_v:
+      Int = Intrinsic::arm_neon_vld2lane;
+      break;
+    case AArch64::BI__builtin_neon_vld3_dup_v:
+    case AArch64::BI__builtin_neon_vld3q_dup_v:
+      Int = Intrinsic::arm_neon_vld3lane;
+      break;
+    case AArch64::BI__builtin_neon_vld4_dup_v:
+    case AArch64::BI__builtin_neon_vld4q_dup_v:
+      Int = Intrinsic::arm_neon_vld4lane;
+      break;
+    }
+    Function *F = CGM.getIntrinsic(Int, Ty);
+    llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
+
+    SmallVector<Value *, 6> Args;
+    Args.push_back(Ops[1]);
+    Args.append(STy->getNumElements(), UndefValue::get(Ty));
+
+    llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
+    Args.push_back(CI);
+    Args.push_back(Align);
+
+    Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
+    // splat lane 0 to all elts in each vector of the result.
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+      Value *Val = Builder.CreateExtractValue(Ops[1], i);
+      Value *Elt = Builder.CreateBitCast(Val, Ty);
+      Elt = EmitNeonSplat(Elt, CI);
+      Elt = Builder.CreateBitCast(Elt, Val->getType());
+      Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
+    }
+    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+    return Builder.CreateStore(Ops[1], Ops[0]);
+  }
 
   // Crypto
   case AArch64::BI__builtin_neon_vaeseq_v:

Added: cfe/trunk/test/CodeGen/aarch64-neon-ldst-one.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-ldst-one.c?rev=195079&view=auto
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-ldst-one.c (added)
+++ cfe/trunk/test/CodeGen/aarch64-neon-ldst-one.c Mon Nov 18 20:17:31 2013
@@ -0,0 +1,2047 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+#include <arm_neon.h>
+
+uint8x16_t test_vld1q_dup_u8(uint8_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_u8
+  return vld1q_dup_u8(a);
+  // CHECK: ld1r {v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x8_t test_vld1q_dup_u16(uint16_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_u16
+  return vld1q_dup_u16(a);
+  // CHECK: ld1r {v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x4_t test_vld1q_dup_u32(uint32_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_u32
+  return vld1q_dup_u32(a);
+  // CHECK: ld1r {v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x2_t test_vld1q_dup_u64(uint64_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_u64
+  return vld1q_dup_u64(a);
+  // CHECK: ld1r {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+int8x16_t test_vld1q_dup_s8(int8_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_s8
+  return vld1q_dup_s8(a);
+  // CHECK: ld1r {v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+int16x8_t test_vld1q_dup_s16(int16_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_s16
+  return vld1q_dup_s16(a);
+  // CHECK: ld1r {v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+int32x4_t test_vld1q_dup_s32(int32_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_s32
+  return vld1q_dup_s32(a);
+  // CHECK: ld1r {v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+int64x2_t test_vld1q_dup_s64(int64_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_s64
+  return vld1q_dup_s64(a);
+  // CHECK: ld1r {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+float16x8_t test_vld1q_dup_f16(float16_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_f16
+  return vld1q_dup_f16(a);
+  // CHECK: ld1r {v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+float32x4_t test_vld1q_dup_f32(float32_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_f32
+  return vld1q_dup_f32(a);
+  // CHECK: ld1r {v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+float64x2_t test_vld1q_dup_f64(float64_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_f64
+  return vld1q_dup_f64(a);
+  // CHECK: ld1r {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+poly8x16_t test_vld1q_dup_p8(poly8_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_p8
+  return vld1q_dup_p8(a);
+  // CHECK: ld1r {v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x8_t test_vld1q_dup_p16(poly16_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_p16
+  return vld1q_dup_p16(a);
+  // CHECK: ld1r {v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+poly64x2_t test_vld1q_dup_p64(poly64_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_p64
+  return vld1q_dup_p64(a);
+  // CHECK: ld1r {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+uint8x8_t test_vld1_dup_u8(uint8_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_u8
+  return vld1_dup_u8(a);
+  // CHECK: ld1r {v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x4_t test_vld1_dup_u16(uint16_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_u16
+  return vld1_dup_u16(a);
+  // CHECK: ld1r {v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x2_t test_vld1_dup_u32(uint32_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_u32
+  return vld1_dup_u32(a);
+  // CHECK: ld1r {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x1_t test_vld1_dup_u64(uint64_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_u64
+  return vld1_dup_u64(a);
+  // CHECK: ld1r {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+int8x8_t test_vld1_dup_s8(int8_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_s8
+  return vld1_dup_s8(a);
+  // CHECK: ld1r {v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+int16x4_t test_vld1_dup_s16(int16_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_s16
+  return vld1_dup_s16(a);
+  // CHECK: ld1r {v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+int32x2_t test_vld1_dup_s32(int32_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_s32
+  return vld1_dup_s32(a);
+  // CHECK: ld1r {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+int64x1_t test_vld1_dup_s64(int64_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_s64
+  return vld1_dup_s64(a);
+  // CHECK: ld1r {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+float16x4_t test_vld1_dup_f16(float16_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_f16
+  return vld1_dup_f16(a);
+  // CHECK: ld1r {v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+float32x2_t test_vld1_dup_f32(float32_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_f32
+  return vld1_dup_f32(a);
+  // CHECK: ld1r {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+float64x1_t test_vld1_dup_f64(float64_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_f64
+  return vld1_dup_f64(a);
+  // CHECK: ld1r {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+poly8x8_t test_vld1_dup_p8(poly8_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_p8
+  return vld1_dup_p8(a);
+  // CHECK: ld1r {v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x4_t test_vld1_dup_p16(poly16_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_p16
+  return vld1_dup_p16(a);
+  // CHECK: ld1r {v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+poly64x1_t test_vld1_dup_p64(poly64_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_p64
+  return vld1_dup_p64(a);
+  // CHECK: ld1r {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+uint8x16x2_t test_vld2q_dup_u8(uint8_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_u8
+  return vld2q_dup_u8(a);
+  // CHECK: ld2r {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x8x2_t test_vld2q_dup_u16(uint16_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_u16
+  return vld2q_dup_u16(a);
+  // CHECK: ld2r {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x4x2_t test_vld2q_dup_u32(uint32_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_u32
+  return vld2q_dup_u32(a);
+  // CHECK: ld2r {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x2x2_t test_vld2q_dup_u64(uint64_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_u64
+  return vld2q_dup_u64(a);
+  // CHECK: ld2r {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+int8x16x2_t test_vld2q_dup_s8(int8_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_s8
+  return vld2q_dup_s8(a);
+  // CHECK: ld2r {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+int16x8x2_t test_vld2q_dup_s16(int16_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_s16
+  return vld2q_dup_s16(a);
+  // CHECK: ld2r {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+int32x4x2_t test_vld2q_dup_s32(int32_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_s32
+  return vld2q_dup_s32(a);
+  // CHECK: ld2r {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+int64x2x2_t test_vld2q_dup_s64(int64_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_s64
+  return vld2q_dup_s64(a);
+  // CHECK: ld2r {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+float16x8x2_t test_vld2q_dup_f16(float16_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_f16
+  return vld2q_dup_f16(a);
+  // CHECK: ld2r {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+float32x4x2_t test_vld2q_dup_f32(float32_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_f32
+  return vld2q_dup_f32(a);
+  // CHECK: ld2r {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+float64x2x2_t test_vld2q_dup_f64(float64_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_f64
+  return vld2q_dup_f64(a);
+  // CHECK: ld2r {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+poly8x16x2_t test_vld2q_dup_p8(poly8_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_p8
+  return vld2q_dup_p8(a);
+  // CHECK: ld2r {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x8x2_t test_vld2q_dup_p16(poly16_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_p16
+  return vld2q_dup_p16(a);
+  // CHECK: ld2r {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+poly64x2x2_t test_vld2q_dup_p64(poly64_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_p64
+  return vld2q_dup_p64(a);
+  // CHECK: ld2r {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+uint8x8x2_t test_vld2_dup_u8(uint8_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_u8
+  return vld2_dup_u8(a);
+  // CHECK: ld2r {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x4x2_t test_vld2_dup_u16(uint16_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_u16
+  return vld2_dup_u16(a);
+  // CHECK: ld2r {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x2x2_t test_vld2_dup_u32(uint32_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_u32
+  return vld2_dup_u32(a);
+  // CHECK: ld2r {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x1x2_t test_vld2_dup_u64(uint64_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_u64
+  return vld2_dup_u64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+int8x8x2_t test_vld2_dup_s8(int8_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_s8
+  return vld2_dup_s8(a);
+  // CHECK: ld2r {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+int16x4x2_t test_vld2_dup_s16(int16_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_s16
+  return vld2_dup_s16(a);
+  // CHECK: ld2r {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+int32x2x2_t test_vld2_dup_s32(int32_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_s32
+  return vld2_dup_s32(a);
+  // CHECK: ld2r {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+int64x1x2_t test_vld2_dup_s64(int64_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_s64
+  return vld2_dup_s64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+float16x4x2_t test_vld2_dup_f16(float16_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_f16
+  return vld2_dup_f16(a);
+  // CHECK: ld2r {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+float32x2x2_t test_vld2_dup_f32(float32_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_f32
+  return vld2_dup_f32(a);
+  // CHECK: ld2r {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+float64x1x2_t test_vld2_dup_f64(float64_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_f64
+  return vld2_dup_f64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+poly8x8x2_t test_vld2_dup_p8(poly8_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_p8
+  return vld2_dup_p8(a);
+  // CHECK: ld2r {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x4x2_t test_vld2_dup_p16(poly16_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_p16
+  return vld2_dup_p16(a);
+  // CHECK: ld2r {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+poly64x1x2_t test_vld2_dup_p64(poly64_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_p64
+  return vld2_dup_p64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+uint8x16x3_t test_vld3q_dup_u8(uint8_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_u8
+  return vld3q_dup_u8(a);
+  // CHECK: ld3r {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint16x8x3_t test_vld3q_dup_u16(uint16_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_u16
+  return vld3q_dup_u16(a);
+  // CHECK: ld3r {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint32x4x3_t test_vld3q_dup_u32(uint32_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_u32
+  return vld3q_dup_u32(a);
+  // CHECK: ld3r {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint64x2x3_t test_vld3q_dup_u64(uint64_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_u64
+  return vld3q_dup_u64(a);
+  // CHECK: ld3r {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+int8x16x3_t test_vld3q_dup_s8(int8_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_s8
+  return vld3q_dup_s8(a);
+  // CHECK: ld3r {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+  // [{{x[0-9]+|sp}}]
+}
+
+int16x8x3_t test_vld3q_dup_s16(int16_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_s16
+  return vld3q_dup_s16(a);
+  // CHECK: ld3r {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+int32x4x3_t test_vld3q_dup_s32(int32_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_s32
+  return vld3q_dup_s32(a);
+  // CHECK: ld3r {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+  // [{{x[0-9]+|sp}}]
+}
+
+int64x2x3_t test_vld3q_dup_s64(int64_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_s64
+  return vld3q_dup_s64(a);
+  // CHECK: ld3r {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+float16x8x3_t test_vld3q_dup_f16(float16_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_f16
+  return vld3q_dup_f16(a);
+  // CHECK: ld3r {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+float32x4x3_t test_vld3q_dup_f32(float32_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_f32
+  return vld3q_dup_f32(a);
+  // CHECK: ld3r {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+  // [{{x[0-9]+|sp}}]
+}
+
+float64x2x3_t test_vld3q_dup_f64(float64_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_f64
+  return vld3q_dup_f64(a);
+  // CHECK: ld3r {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly8x16x3_t test_vld3q_dup_p8(poly8_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_p8
+  return vld3q_dup_p8(a);
+  // CHECK: ld3r {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly16x8x3_t test_vld3q_dup_p16(poly16_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_p16
+  return vld3q_dup_p16(a);
+  // CHECK: ld3r {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly64x2x3_t test_vld3q_dup_p64(poly64_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_p64
+  return vld3q_dup_p64(a);
+  // CHECK: ld3r {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint8x8x3_t test_vld3_dup_u8(uint8_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_u8
+  return vld3_dup_u8(a);
+  // CHECK: ld3r {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint16x4x3_t test_vld3_dup_u16(uint16_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_u16
+  return vld3_dup_u16(a);
+  // CHECK: ld3r {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint32x2x3_t test_vld3_dup_u32(uint32_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_u32
+  return vld3_dup_u32(a);
+  // CHECK: ld3r {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint64x1x3_t test_vld3_dup_u64(uint64_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_u64
+  return vld3_dup_u64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+int8x8x3_t test_vld3_dup_s8(int8_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_s8
+  return vld3_dup_s8(a);
+  // CHECK: ld3r {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+  // [{{x[0-9]+|sp}}]
+}
+
+int16x4x3_t test_vld3_dup_s16(int16_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_s16
+  return vld3_dup_s16(a);
+  // CHECK: ld3r {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+int32x2x3_t test_vld3_dup_s32(int32_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_s32
+  return vld3_dup_s32(a);
+  // CHECK: ld3r {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+  // [{{x[0-9]+|sp}}]
+}
+
+int64x1x3_t test_vld3_dup_s64(int64_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_s64
+  return vld3_dup_s64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+float16x4x3_t test_vld3_dup_f16(float16_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_f16
+  return vld3_dup_f16(a);
+  // CHECK: ld3r {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+float32x2x3_t test_vld3_dup_f32(float32_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_f32
+  return vld3_dup_f32(a);
+  // CHECK: ld3r {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+  // [{{x[0-9]+|sp}}]
+}
+
+float64x1x3_t test_vld3_dup_f64(float64_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_f64
+  return vld3_dup_f64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly8x8x3_t test_vld3_dup_p8(poly8_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_p8
+  return vld3_dup_p8(a);
+  // CHECK: ld3r {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly16x4x3_t test_vld3_dup_p16(poly16_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_p16
+  return vld3_dup_p16(a);
+  // CHECK: ld3r {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly64x1x3_t test_vld3_dup_p64(poly64_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_p64
+  return vld3_dup_p64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint8x16x4_t test_vld4q_dup_u8(uint8_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_u8
+  return vld4q_dup_u8(a);
+  // CHECK: ld4r {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+  // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x8x4_t test_vld4q_dup_u16(uint16_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_u16
+  return vld4q_dup_u16(a);
+  // CHECK: ld4r {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x4x4_t test_vld4q_dup_u32(uint32_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_u32
+  return vld4q_dup_u32(a);
+  // CHECK: ld4r {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x2x4_t test_vld4q_dup_u64(uint64_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_u64
+  return vld4q_dup_u64(a);
+  // CHECK: ld4r {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+int8x16x4_t test_vld4q_dup_s8(int8_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_s8
+  return vld4q_dup_s8(a);
+  // CHECK: ld4r {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+  // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+int16x8x4_t test_vld4q_dup_s16(int16_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_s16
+  return vld4q_dup_s16(a);
+  // CHECK: ld4r {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+int32x4x4_t test_vld4q_dup_s32(int32_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_s32
+  return vld4q_dup_s32(a);
+  // CHECK: ld4r {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+int64x2x4_t test_vld4q_dup_s64(int64_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_s64
+  return vld4q_dup_s64(a);
+  // CHECK: ld4r {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+float16x8x4_t test_vld4q_dup_f16(float16_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_f16
+  return vld4q_dup_f16(a);
+  // CHECK: ld4r {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+float32x4x4_t test_vld4q_dup_f32(float32_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_f32
+  return vld4q_dup_f32(a);
+  // CHECK: ld4r {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+float64x2x4_t test_vld4q_dup_f64(float64_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_f64
+  return vld4q_dup_f64(a);
+  // CHECK: ld4r {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+poly8x16x4_t test_vld4q_dup_p8(poly8_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_p8
+  return vld4q_dup_p8(a);
+  // CHECK: ld4r {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+  // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x8x4_t test_vld4q_dup_p16(poly16_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_p16
+  return vld4q_dup_p16(a);
+  // CHECK: ld4r {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+poly64x2x4_t test_vld4q_dup_p64(poly64_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_p64
+  return vld4q_dup_p64(a);
+  // CHECK: ld4r {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+uint8x8x4_t test_vld4_dup_u8(uint8_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_u8
+  return vld4_dup_u8(a);
+  // CHECK: ld4r {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+  // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x4x4_t test_vld4_dup_u16(uint16_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_u16
+  return vld4_dup_u16(a);
+  // CHECK: ld4r {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x2x4_t test_vld4_dup_u32(uint32_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_u32
+  return vld4_dup_u32(a);
+  // CHECK: ld4r {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+  // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x1x4_t test_vld4_dup_u64(uint64_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_u64
+  return vld4_dup_u64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+int8x8x4_t test_vld4_dup_s8(int8_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_s8
+  return vld4_dup_s8(a);
+  // CHECK: ld4r {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+  // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+int16x4x4_t test_vld4_dup_s16(int16_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_s16
+  return vld4_dup_s16(a);
+  // CHECK: ld4r {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+int32x2x4_t test_vld4_dup_s32(int32_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_s32
+  return vld4_dup_s32(a);
+  // CHECK: ld4r {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+  // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+int64x1x4_t test_vld4_dup_s64(int64_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_s64
+  return vld4_dup_s64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+float16x4x4_t test_vld4_dup_f16(float16_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_f16
+  return vld4_dup_f16(a);
+  // CHECK: ld4r {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+float32x2x4_t test_vld4_dup_f32(float32_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_f32
+  return vld4_dup_f32(a);
+  // CHECK: ld4r {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+  // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+float64x1x4_t test_vld4_dup_f64(float64_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_f64
+  return vld4_dup_f64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+poly8x8x4_t test_vld4_dup_p8(poly8_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_p8
+  return vld4_dup_p8(a);
+  // CHECK: ld4r {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+  // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x4x4_t test_vld4_dup_p16(poly16_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_p16
+  return vld4_dup_p16(a);
+  // CHECK: ld4r {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+poly64x1x4_t test_vld4_dup_p64(poly64_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_p64
+  return vld4_dup_p64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+uint8x16_t test_vld1q_lane_u8(uint8_t const *a, uint8x16_t b) {
+  // CHECK-LABEL: test_vld1q_lane_u8
+  return vld1q_lane_u8(a, b, 15);
+  // CHECK: ld1 {v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+uint16x8_t test_vld1q_lane_u16(uint16_t const *a, uint16x8_t b) {
+  // CHECK-LABEL: test_vld1q_lane_u16
+  return vld1q_lane_u16(a, b, 7);
+  // CHECK: ld1 {v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+uint32x4_t test_vld1q_lane_u32(uint32_t const *a, uint32x4_t b) {
+  // CHECK-LABEL: test_vld1q_lane_u32
+  return vld1q_lane_u32(a, b, 3);
+  // CHECK: ld1 {v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+uint64x2_t test_vld1q_lane_u64(uint64_t const *a, uint64x2_t b) {
+  // CHECK-LABEL: test_vld1q_lane_u64
+  return vld1q_lane_u64(a, b, 1);
+  // CHECK: ld1 {v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+int8x16_t test_vld1q_lane_s8(int8_t const *a, int8x16_t b) {
+  // CHECK-LABEL: test_vld1q_lane_s8
+  return vld1q_lane_s8(a, b, 15);
+  // CHECK: ld1 {v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+int16x8_t test_vld1q_lane_s16(int16_t const *a, int16x8_t b) {
+  // CHECK-LABEL: test_vld1q_lane_s16
+  return vld1q_lane_s16(a, b, 7);
+  // CHECK: ld1 {v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+int32x4_t test_vld1q_lane_s32(int32_t const *a, int32x4_t b) {
+  // CHECK-LABEL: test_vld1q_lane_s32
+  return vld1q_lane_s32(a, b, 3);
+  // CHECK: ld1 {v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+int64x2_t test_vld1q_lane_s64(int64_t const *a, int64x2_t b) {
+  // CHECK-LABEL: test_vld1q_lane_s64
+  return vld1q_lane_s64(a, b, 1);
+  // CHECK: ld1 {v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+float16x8_t test_vld1q_lane_f16(float16_t const *a, float16x8_t b) {
+  // CHECK-LABEL: test_vld1q_lane_f16
+  return vld1q_lane_f16(a, b, 7);
+  // CHECK: ld1 {v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+float32x4_t test_vld1q_lane_f32(float32_t const *a, float32x4_t b) {
+  // CHECK-LABEL: test_vld1q_lane_f32
+  return vld1q_lane_f32(a, b, 3);
+  // CHECK: ld1 {v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+float64x2_t test_vld1q_lane_f64(float64_t const *a, float64x2_t b) {
+  // CHECK-LABEL: test_vld1q_lane_f64
+  return vld1q_lane_f64(a, b, 1);
+  // CHECK: ld1 {v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+poly8x16_t test_vld1q_lane_p8(poly8_t const *a, poly8x16_t b) {
+  // CHECK-LABEL: test_vld1q_lane_p8
+  return vld1q_lane_p8(a, b, 15);
+  // CHECK: ld1 {v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+poly16x8_t test_vld1q_lane_p16(poly16_t const *a, poly16x8_t b) {
+  // CHECK-LABEL: test_vld1q_lane_p16
+  return vld1q_lane_p16(a, b, 7);
+  // CHECK: ld1 {v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+poly64x2_t test_vld1q_lane_p64(poly64_t const *a, poly64x2_t b) {
+  // CHECK-LABEL: test_vld1q_lane_p64
+  return vld1q_lane_p64(a, b, 1);
+  // CHECK: ld1 {v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+uint8x8_t test_vld1_lane_u8(uint8_t const *a, uint8x8_t b) {
+  // CHECK-LABEL: test_vld1_lane_u8
+  return vld1_lane_u8(a, b, 7);
+  // CHECK: ld1 {v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+uint16x4_t test_vld1_lane_u16(uint16_t const *a, uint16x4_t b) {
+  // CHECK-LABEL: test_vld1_lane_u16
+  return vld1_lane_u16(a, b, 3);
+  // CHECK: ld1 {v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+uint32x2_t test_vld1_lane_u32(uint32_t const *a, uint32x2_t b) {
+  // CHECK-LABEL: test_vld1_lane_u32
+  return vld1_lane_u32(a, b, 1);
+  // CHECK: ld1 {v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+uint64x1_t test_vld1_lane_u64(uint64_t const *a, uint64x1_t b) {
+  // CHECK-LABEL: test_vld1_lane_u64
+  return vld1_lane_u64(a, b, 0);
+  // CHECK: ld1r {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+int8x8_t test_vld1_lane_s8(int8_t const *a, int8x8_t b) {
+  // CHECK-LABEL: test_vld1_lane_s8
+  return vld1_lane_s8(a, b, 7);
+  // CHECK: ld1 {v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+int16x4_t test_vld1_lane_s16(int16_t const *a, int16x4_t b) {
+  // CHECK-LABEL: test_vld1_lane_s16
+  return vld1_lane_s16(a, b, 3);
+  // CHECK: ld1 {v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+int32x2_t test_vld1_lane_s32(int32_t const *a, int32x2_t b) {
+  // CHECK-LABEL: test_vld1_lane_s32
+  return vld1_lane_s32(a, b, 1);
+  // CHECK: ld1 {v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+int64x1_t test_vld1_lane_s64(int64_t const *a, int64x1_t b) {
+  // CHECK-LABEL: test_vld1_lane_s64
+  return vld1_lane_s64(a, b, 0);
+  // CHECK: ld1r {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+float16x4_t test_vld1_lane_f16(float16_t const *a, float16x4_t b) {
+  // CHECK-LABEL: test_vld1_lane_f16
+  return vld1_lane_f16(a, b, 3);
+  // CHECK: ld1 {v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+float32x2_t test_vld1_lane_f32(float32_t const *a, float32x2_t b) {
+  // CHECK-LABEL: test_vld1_lane_f32
+  return vld1_lane_f32(a, b, 1);
+  // CHECK: ld1 {v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+float64x1_t test_vld1_lane_f64(float64_t const *a, float64x1_t b) {
+  // CHECK-LABEL: test_vld1_lane_f64
+  return vld1_lane_f64(a, b, 0);
+  // CHECK: ld1r {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+poly8x8_t test_vld1_lane_p8(poly8_t const *a, poly8x8_t b) {
+  // CHECK-LABEL: test_vld1_lane_p8
+  return vld1_lane_p8(a, b, 7);
+  // CHECK: ld1 {v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+poly16x4_t test_vld1_lane_p16(poly16_t const *a, poly16x4_t b) {
+  // CHECK-LABEL: test_vld1_lane_p16
+  return vld1_lane_p16(a, b, 3);
+  // CHECK: ld1 {v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+poly64x1_t test_vld1_lane_p64(poly64_t const *a, poly64x1_t b) {
+  // CHECK-LABEL: test_vld1_lane_p64
+  return vld1_lane_p64(a, b, 0);
+  // CHECK: ld1r {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+uint16x8x2_t test_vld2q_lane_u16(uint16_t const *a, uint16x8x2_t b) {
+  // CHECK-LABEL: test_vld2q_lane_u16
+  return vld2q_lane_u16(a, b, 7);
+  // CHECK: ld2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+uint32x4x2_t test_vld2q_lane_u32(uint32_t const *a, uint32x4x2_t b) {
+  // CHECK-LABEL: test_vld2q_lane_u32
+  return vld2q_lane_u32(a, b, 3);
+  // CHECK: ld2 {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+uint64x2x2_t test_vld2q_lane_u64(uint64_t const *a, uint64x2x2_t b) {
+  // CHECK-LABEL: test_vld2q_lane_u64
+  return vld2q_lane_u64(a, b, 1);
+  // CHECK: ld2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+int16x8x2_t test_vld2q_lane_s16(int16_t const *a, int16x8x2_t b) {
+  // CHECK-LABEL: test_vld2q_lane_s16
+  return vld2q_lane_s16(a, b, 7);
+  // CHECK: ld2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+int32x4x2_t test_vld2q_lane_s32(int32_t const *a, int32x4x2_t b) {
+  // CHECK-LABEL: test_vld2q_lane_s32
+  return vld2q_lane_s32(a, b, 3);
+  // CHECK: ld2 {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+int64x2x2_t test_vld2q_lane_s64(int64_t const *a, int64x2x2_t b) {
+  // CHECK-LABEL: test_vld2q_lane_s64
+  return vld2q_lane_s64(a, b, 1);
+  // CHECK: ld2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+float16x8x2_t test_vld2q_lane_f16(float16_t const *a, float16x8x2_t b) {
+  // CHECK-LABEL: test_vld2q_lane_f16
+  return vld2q_lane_f16(a, b, 7);
+  // CHECK: ld2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+float32x4x2_t test_vld2q_lane_f32(float32_t const *a, float32x4x2_t b) {
+  // CHECK-LABEL: test_vld2q_lane_f32
+  return vld2q_lane_f32(a, b, 3);
+  // CHECK: ld2 {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+float64x2x2_t test_vld2q_lane_f64(float64_t const *a, float64x2x2_t b) {
+  // CHECK-LABEL: test_vld2q_lane_f64
+  return vld2q_lane_f64(a, b, 1);
+  // CHECK: ld2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+poly16x8x2_t test_vld2q_lane_p16(poly16_t const *a, poly16x8x2_t b) {
+  // CHECK-LABEL: test_vld2q_lane_p16
+  return vld2q_lane_p16(a, b, 7);
+  // CHECK: ld2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+poly64x2x2_t test_vld2q_lane_p64(poly64_t const *a, poly64x2x2_t b) {
+  // CHECK-LABEL: test_vld2q_lane_p64
+  return vld2q_lane_p64(a, b, 1);
+  // CHECK: ld2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+uint8x8x2_t test_vld2_lane_u8(uint8_t const *a, uint8x8x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_u8
+  return vld2_lane_u8(a, b, 7);
+  // CHECK: ld2 {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+uint16x4x2_t test_vld2_lane_u16(uint16_t const *a, uint16x4x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_u16
+  return vld2_lane_u16(a, b, 3);
+  // CHECK: ld2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+uint32x2x2_t test_vld2_lane_u32(uint32_t const *a, uint32x2x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_u32
+  return vld2_lane_u32(a, b, 1);
+  // CHECK: ld2 {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+uint64x1x2_t test_vld2_lane_u64(uint64_t const *a, uint64x1x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_u64
+  return vld2_lane_u64(a, b, 0);
+  // CHECK: ld2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+int8x8x2_t test_vld2_lane_s8(int8_t const *a, int8x8x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_s8
+  return vld2_lane_s8(a, b, 7);
+  // CHECK: ld2 {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+int16x4x2_t test_vld2_lane_s16(int16_t const *a, int16x4x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_s16
+  return vld2_lane_s16(a, b, 3);
+  // CHECK: ld2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+int32x2x2_t test_vld2_lane_s32(int32_t const *a, int32x2x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_s32
+  return vld2_lane_s32(a, b, 1);
+  // CHECK: ld2 {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+int64x1x2_t test_vld2_lane_s64(int64_t const *a, int64x1x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_s64
+  return vld2_lane_s64(a, b, 0);
+  // CHECK: ld2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+float16x4x2_t test_vld2_lane_f16(float16_t const *a, float16x4x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_f16
+  return vld2_lane_f16(a, b, 3);
+  // CHECK: ld2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+float32x2x2_t test_vld2_lane_f32(float32_t const *a, float32x2x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_f32
+  return vld2_lane_f32(a, b, 1);
+  // CHECK: ld2 {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+float64x1x2_t test_vld2_lane_f64(float64_t const *a, float64x1x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_f64
+  return vld2_lane_f64(a, b, 0);
+  // CHECK: ld2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+poly8x8x2_t test_vld2_lane_p8(poly8_t const *a, poly8x8x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_p8
+  return vld2_lane_p8(a, b, 7);
+  // CHECK: ld2 {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+poly16x4x2_t test_vld2_lane_p16(poly16_t const *a, poly16x4x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_p16
+  return vld2_lane_p16(a, b, 3);
+  // CHECK: ld2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+poly64x1x2_t test_vld2_lane_p64(poly64_t const *a, poly64x1x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_p64
+  return vld2_lane_p64(a, b, 0);
+  // CHECK: ld2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+uint16x8x3_t test_vld3q_lane_u16(uint16_t const *a, uint16x8x3_t b) {
+  // CHECK-LABEL: test_vld3q_lane_u16
+  return vld3q_lane_u16(a, b, 7);
+  // CHECK: ld3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+uint32x4x3_t test_vld3q_lane_u32(uint32_t const *a, uint32x4x3_t b) {
+  // CHECK-LABEL: test_vld3q_lane_u32
+  return vld3q_lane_u32(a, b, 3);
+  // CHECK: ld3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+uint64x2x3_t test_vld3q_lane_u64(uint64_t const *a, uint64x2x3_t b) {
+  // CHECK-LABEL: test_vld3q_lane_u64
+  return vld3q_lane_u64(a, b, 1);
+  // CHECK: ld3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+int16x8x3_t test_vld3q_lane_s16(int16_t const *a, int16x8x3_t b) {
+  // CHECK-LABEL: test_vld3q_lane_s16
+  return vld3q_lane_s16(a, b, 7);
+  // CHECK: ld3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+int32x4x3_t test_vld3q_lane_s32(int32_t const *a, int32x4x3_t b) {
+  // CHECK-LABEL: test_vld3q_lane_s32
+  return vld3q_lane_s32(a, b, 3);
+  // CHECK: ld3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+int64x2x3_t test_vld3q_lane_s64(int64_t const *a, int64x2x3_t b) {
+  // CHECK-LABEL: test_vld3q_lane_s64
+  return vld3q_lane_s64(a, b, 1);
+  // CHECK: ld3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+float16x8x3_t test_vld3q_lane_f16(float16_t const *a, float16x8x3_t b) {
+  // CHECK-LABEL: test_vld3q_lane_f16
+  return vld3q_lane_f16(a, b, 7);
+  // CHECK: ld3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+float32x4x3_t test_vld3q_lane_f32(float32_t const *a, float32x4x3_t b) {
+  // CHECK-LABEL: test_vld3q_lane_f32
+  return vld3q_lane_f32(a, b, 3);
+  // CHECK: ld3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+float64x2x3_t test_vld3q_lane_f64(float64_t const *a, float64x2x3_t b) {
+  // CHECK-LABEL: test_vld3q_lane_f64
+  return vld3q_lane_f64(a, b, 1);
+  // CHECK: ld3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+poly8x16x3_t test_vld3q_lane_p8(poly8_t const *a, poly8x16x3_t b) {
+  // CHECK-LABEL: test_vld3q_lane_p8
+  return vld3q_lane_p8(a, b, 15);
+  // CHECK: ld3 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+poly16x8x3_t test_vld3q_lane_p16(poly16_t const *a, poly16x8x3_t b) {
+  // CHECK-LABEL: test_vld3q_lane_p16
+  return vld3q_lane_p16(a, b, 7);
+  // CHECK: ld3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+poly64x2x3_t test_vld3q_lane_p64(poly64_t const *a, poly64x2x3_t b) {
+  // CHECK-LABEL: test_vld3q_lane_p64
+  return vld3q_lane_p64(a, b, 1);
+  // CHECK: ld3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+uint8x8x3_t test_vld3_lane_u8(uint8_t const *a, uint8x8x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_u8
+  return vld3_lane_u8(a, b, 7);
+  // CHECK: ld3 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+uint16x4x3_t test_vld3_lane_u16(uint16_t const *a, uint16x4x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_u16
+  return vld3_lane_u16(a, b, 3);
+  // CHECK: ld3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+uint32x2x3_t test_vld3_lane_u32(uint32_t const *a, uint32x2x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_u32
+  return vld3_lane_u32(a, b, 1);
+  // CHECK: ld3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+uint64x1x3_t test_vld3_lane_u64(uint64_t const *a, uint64x1x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_u64
+  return vld3_lane_u64(a, b, 0);
+  // CHECK: ld3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+int8x8x3_t test_vld3_lane_s8(int8_t const *a, int8x8x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_s8
+  return vld3_lane_s8(a, b, 7);
+  // CHECK: ld3 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+int16x4x3_t test_vld3_lane_s16(int16_t const *a, int16x4x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_s16
+  return vld3_lane_s16(a, b, 3);
+  // CHECK: ld3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+int32x2x3_t test_vld3_lane_s32(int32_t const *a, int32x2x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_s32
+  return vld3_lane_s32(a, b, 1);
+  // CHECK: ld3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+int64x1x3_t test_vld3_lane_s64(int64_t const *a, int64x1x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_s64
+  return vld3_lane_s64(a, b, 0);
+  // CHECK: ld3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+float16x4x3_t test_vld3_lane_f16(float16_t const *a, float16x4x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_f16
+  return vld3_lane_f16(a, b, 3);
+  // CHECK: ld3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+float32x2x3_t test_vld3_lane_f32(float32_t const *a, float32x2x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_f32
+  return vld3_lane_f32(a, b, 1);
+  // CHECK: ld3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+float64x1x3_t test_vld3_lane_f64(float64_t const *a, float64x1x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_f64
+  return vld3_lane_f64(a, b, 0);
+  // CHECK: ld3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+poly8x8x3_t test_vld3_lane_p8(poly8_t const *a, poly8x8x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_p8
+  return vld3_lane_p8(a, b, 7);
+  // CHECK: ld3 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+poly16x4x3_t test_vld3_lane_p16(poly16_t const *a, poly16x4x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_p16
+  return vld3_lane_p16(a, b, 3);
+  // CHECK: ld3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+poly64x1x3_t test_vld3_lane_p64(poly64_t const *a, poly64x1x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_p64
+  return vld3_lane_p64(a, b, 0);
+  // CHECK: ld3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+uint8x16x4_t test_vld4q_lane_u8(uint8_t const *a, uint8x16x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_u8
+  return vld4q_lane_u8(a, b, 15);
+  // CHECK: ld4 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+uint16x8x4_t test_vld4q_lane_u16(uint16_t const *a, uint16x8x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_u16
+  return vld4q_lane_u16(a, b, 7);
+  // CHECK: ld4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+uint32x4x4_t test_vld4q_lane_u32(uint32_t const *a, uint32x4x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_u32
+  return vld4q_lane_u32(a, b, 3);
+  // CHECK: ld4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+uint64x2x4_t test_vld4q_lane_u64(uint64_t const *a, uint64x2x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_u64
+  return vld4q_lane_u64(a, b, 1);
+  // CHECK: ld4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+int8x16x4_t test_vld4q_lane_s8(int8_t const *a, int8x16x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_s8
+  return vld4q_lane_s8(a, b, 15);
+  // CHECK: ld4 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+int16x8x4_t test_vld4q_lane_s16(int16_t const *a, int16x8x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_s16
+  return vld4q_lane_s16(a, b, 7);
+  // CHECK: ld4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+int32x4x4_t test_vld4q_lane_s32(int32_t const *a, int32x4x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_s32
+  return vld4q_lane_s32(a, b, 3);
+  // CHECK: ld4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+int64x2x4_t test_vld4q_lane_s64(int64_t const *a, int64x2x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_s64
+  return vld4q_lane_s64(a, b, 1);
+  // CHECK: ld4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+float16x8x4_t test_vld4q_lane_f16(float16_t const *a, float16x8x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_f16
+  return vld4q_lane_f16(a, b, 7);
+  // CHECK: ld4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+float32x4x4_t test_vld4q_lane_f32(float32_t const *a, float32x4x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_f32
+  return vld4q_lane_f32(a, b, 3);
+  // CHECK: ld4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+float64x2x4_t test_vld4q_lane_f64(float64_t const *a, float64x2x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_f64
+  return vld4q_lane_f64(a, b, 1);
+  // CHECK: ld4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+poly8x16x4_t test_vld4q_lane_p8(poly8_t const *a, poly8x16x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_p8
+  return vld4q_lane_p8(a, b, 15);
+  // CHECK: ld4 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+poly16x8x4_t test_vld4q_lane_p16(poly16_t const *a, poly16x8x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_p16
+  return vld4q_lane_p16(a, b, 7);
+  // CHECK: ld4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+poly64x2x4_t test_vld4q_lane_p64(poly64_t const *a, poly64x2x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_p64
+  return vld4q_lane_p64(a, b, 1);
+  // CHECK: ld4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+uint8x8x4_t test_vld4_lane_u8(uint8_t const *a, uint8x8x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_u8
+  return vld4_lane_u8(a, b, 7);
+  // CHECK: ld4 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+uint16x4x4_t test_vld4_lane_u16(uint16_t const *a, uint16x4x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_u16
+  return vld4_lane_u16(a, b, 3);
+  // CHECK: ld4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+uint32x2x4_t test_vld4_lane_u32(uint32_t const *a, uint32x2x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_u32
+  return vld4_lane_u32(a, b, 1);
+  // CHECK: ld4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+uint64x1x4_t test_vld4_lane_u64(uint64_t const *a, uint64x1x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_u64
+  return vld4_lane_u64(a, b, 0);
+  // CHECK: ld4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+int8x8x4_t test_vld4_lane_s8(int8_t const *a, int8x8x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_s8
+  return vld4_lane_s8(a, b, 7);
+  // CHECK: ld4 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+int16x4x4_t test_vld4_lane_s16(int16_t const *a, int16x4x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_s16
+  return vld4_lane_s16(a, b, 3);
+  // CHECK: ld4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+int32x2x4_t test_vld4_lane_s32(int32_t const *a, int32x2x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_s32
+  return vld4_lane_s32(a, b, 1);
+  // CHECK: ld4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+int64x1x4_t test_vld4_lane_s64(int64_t const *a, int64x1x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_s64
+  return vld4_lane_s64(a, b, 0);
+  // CHECK: ld4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+float16x4x4_t test_vld4_lane_f16(float16_t const *a, float16x4x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_f16
+  return vld4_lane_f16(a, b, 3);
+  // CHECK: ld4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+float32x2x4_t test_vld4_lane_f32(float32_t const *a, float32x2x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_f32
+  return vld4_lane_f32(a, b, 1);
+  // CHECK: ld4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+float64x1x4_t test_vld4_lane_f64(float64_t const *a, float64x1x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_f64
+  return vld4_lane_f64(a, b, 0);
+  // CHECK: ld4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+poly8x8x4_t test_vld4_lane_p8(poly8_t const *a, poly8x8x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_p8
+  return vld4_lane_p8(a, b, 7);
+  // CHECK: ld4 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+poly16x4x4_t test_vld4_lane_p16(poly16_t const *a, poly16x4x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_p16
+  return vld4_lane_p16(a, b, 3);
+  // CHECK: ld4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+poly64x1x4_t test_vld4_lane_p64(poly64_t const *a, poly64x1x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_p64
+  return vld4_lane_p64(a, b, 0);
+  // CHECK: ld4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_u8(uint8_t const *a, uint8x16_t b) {
+  // CHECK-LABEL: test_vst1q_lane_u8
+  vst1q_lane_u8(a, b, 15);
+  // CHECK: st1 {v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_u16(uint16_t const *a, uint16x8_t b) {
+  // CHECK-LABEL: test_vst1q_lane_u16
+  vst1q_lane_u16(a, b, 7);
+  // CHECK: st1 {v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_u32(uint32_t const *a, uint32x4_t b) {
+  // CHECK-LABEL: test_vst1q_lane_u32
+  vst1q_lane_u32(a, b, 3);
+  // CHECK: st1 {v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_u64(uint64_t const *a, uint64x2_t b) {
+  // CHECK-LABEL: test_vst1q_lane_u64
+  vst1q_lane_u64(a, b, 1);
+  // CHECK: st1 {v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_s8(int8_t const *a, int8x16_t b) {
+  // CHECK-LABEL: test_vst1q_lane_s8
+  vst1q_lane_s8(a, b, 15);
+  // CHECK: st1 {v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_s16(int16_t const *a, int16x8_t b) {
+  // CHECK-LABEL: test_vst1q_lane_s16
+  vst1q_lane_s16(a, b, 7);
+  // CHECK: st1 {v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_s32(int32_t const *a, int32x4_t b) {
+  // CHECK-LABEL: test_vst1q_lane_s32
+  vst1q_lane_s32(a, b, 3);
+  // CHECK: st1 {v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_s64(int64_t const *a, int64x2_t b) {
+  // CHECK-LABEL: test_vst1q_lane_s64
+  vst1q_lane_s64(a, b, 1);
+  // CHECK: st1 {v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_f16(float16_t const *a, float16x8_t b) {
+  // CHECK-LABEL: test_vst1q_lane_f16
+  vst1q_lane_f16(a, b, 7);
+  // CHECK: st1 {v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_f32(float32_t const *a, float32x4_t b) {
+  // CHECK-LABEL: test_vst1q_lane_f32
+  vst1q_lane_f32(a, b, 3);
+  // CHECK: st1 {v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_f64(float64_t const *a, float64x2_t b) {
+  // CHECK-LABEL: test_vst1q_lane_f64
+  vst1q_lane_f64(a, b, 1);
+  // CHECK: st1 {v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+poly8x16_t test_vst1q_lane_p8(poly8_t const *a, poly8x16_t b) {
+  // CHECK-LABEL: test_vst1q_lane_p8
+  vst1q_lane_p8(a, b, 15);
+  // CHECK: st1 {v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_p16(poly16_t const *a, poly16x8_t b) {
+  // CHECK-LABEL: test_vst1q_lane_p16
+  vst1q_lane_p16(a, b, 7);
+  // CHECK: st1 {v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_p64(poly64_t const *a, poly64x2_t b) {
+  // CHECK-LABEL: test_vst1q_lane_p64
+  vst1q_lane_p64(a, b, 1);
+  // CHECK: st1 {v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_u8(uint8_t const *a, uint8x8_t b) {
+  // CHECK-LABEL: test_vst1_lane_u8
+  vst1_lane_u8(a, b, 7);
+  // CHECK: st1 {v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_u16(uint16_t const *a, uint16x4_t b) {
+  // CHECK-LABEL: test_vst1_lane_u16
+  vst1_lane_u16(a, b, 3);
+  // CHECK: st1 {v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_u32(uint32_t const *a, uint32x2_t b) {
+  // CHECK-LABEL: test_vst1_lane_u32
+  vst1_lane_u32(a, b, 1);
+  // CHECK: st1 {v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_u64(uint64_t const *a, uint64x1_t b) {
+  // CHECK-LABEL: test_vst1_lane_u64
+  vst1_lane_u64(a, b, 0);
+  // CHECK: st1 {v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_s8(int8_t const *a, int8x8_t b) {
+  // CHECK-LABEL: test_vst1_lane_s8
+  vst1_lane_s8(a, b, 7);
+  // CHECK: st1 {v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_s16(int16_t const *a, int16x4_t b) {
+  // CHECK-LABEL: test_vst1_lane_s16
+  vst1_lane_s16(a, b, 3);
+  // CHECK: st1 {v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_s32(int32_t const *a, int32x2_t b) {
+  // CHECK-LABEL: test_vst1_lane_s32
+  vst1_lane_s32(a, b, 1);
+  // CHECK: st1 {v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_s64(int64_t const *a, int64x1_t b) {
+  // CHECK-LABEL: test_vst1_lane_s64
+  vst1_lane_s64(a, b, 0);
+  // CHECK: st1 {v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_f16(float16_t const *a, float16x4_t b) {
+  // CHECK-LABEL: test_vst1_lane_f16
+  vst1_lane_f16(a, b, 3);
+  // CHECK: st1 {v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_f32(float32_t const *a, float32x2_t b) {
+  // CHECK-LABEL: test_vst1_lane_f32
+  vst1_lane_f32(a, b, 1);
+  // CHECK: st1 {v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_f64(float64_t const *a, float64x1_t b) {
+  // CHECK-LABEL: test_vst1_lane_f64
+  vst1_lane_f64(a, b, 0);
+  // CHECK: st1 {v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_p8(poly8_t const *a, poly8x8_t b) {
+  // CHECK-LABEL: test_vst1_lane_p8
+  vst1_lane_p8(a, b, 7);
+  // CHECK: st1 {v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_p16(poly16_t const *a, poly16x4_t b) {
+  // CHECK-LABEL: test_vst1_lane_p16
+  vst1_lane_p16(a, b, 3);
+  // CHECK: st1 {v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_p64(poly64_t const *a, poly64x1_t b) {
+  // CHECK-LABEL: test_vst1_lane_p64
+  vst1_lane_p64(a, b, 0);
+  // CHECK: st1 {v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_u8(uint8_t const *a, uint8x16x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_u8
+  vst2q_lane_u8(a, b, 15);
+  // CHECK: st2 {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_u16(uint16_t const *a, uint16x8x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_u16
+  vst2q_lane_u16(a, b, 7);
+  // CHECK: st2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_u32(uint32_t const *a, uint32x4x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_u32
+  vst2q_lane_u32(a, b, 3);
+  // CHECK: st2 {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_u64(uint64_t const *a, uint64x2x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_u64
+  vst2q_lane_u64(a, b, 1);
+  // CHECK: st2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_s8(int8_t const *a, int8x16x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_s8
+  vst2q_lane_s8(a, b, 15);
+  // CHECK: st2 {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_s16(int16_t const *a, int16x8x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_s16
+  vst2q_lane_s16(a, b, 7);
+  // CHECK: st2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_s32(int32_t const *a, int32x4x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_s32
+  vst2q_lane_s32(a, b, 3);
+  // CHECK: st2 {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_s64(int64_t const *a, int64x2x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_s64
+  vst2q_lane_s64(a, b, 1);
+  // CHECK: st2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_f16(float16_t const *a, float16x8x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_f16
+  vst2q_lane_f16(a, b, 7);
+  // CHECK: st2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_f32(float32_t const *a, float32x4x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_f32
+  vst2q_lane_f32(a, b, 3);
+  // CHECK: st2 {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_f64(float64_t const *a, float64x2x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_f64
+  vst2q_lane_f64(a, b, 1);
+  // CHECK: st2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_p8(poly8_t const *a, poly8x16x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_p8
+  vst2q_lane_p8(a, b, 15);
+  // CHECK: st2 {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_p16(poly16_t const *a, poly16x8x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_p16
+  vst2q_lane_p16(a, b, 7);
+  // CHECK: st2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_p64(poly64_t const *a, poly64x2x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_p64
+  vst2q_lane_p64(a, b, 1);
+  // CHECK: st2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_u8(uint8_t const *a, uint8x8x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_u8
+  vst2_lane_u8(a, b, 7);
+  // CHECK: st2 {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_u16(uint16_t const *a, uint16x4x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_u16
+  vst2_lane_u16(a, b, 3);
+  // CHECK: st2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_u32(uint32_t const *a, uint32x2x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_u32
+  vst2_lane_u32(a, b, 1);
+  // CHECK: st2 {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_u64(uint64_t const *a, uint64x1x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_u64
+  vst2_lane_u64(a, b, 0);
+  // CHECK: st2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_s8(int8_t const *a, int8x8x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_s8
+  vst2_lane_s8(a, b, 7);
+  // CHECK: st2 {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_s16(int16_t const *a, int16x4x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_s16
+  vst2_lane_s16(a, b, 3);
+  // CHECK: st2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_s32(int32_t const *a, int32x2x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_s32
+  vst2_lane_s32(a, b, 1);
+  // CHECK: st2 {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_s64(int64_t const *a, int64x1x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_s64
+  vst2_lane_s64(a, b, 0);
+  // CHECK: st2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_f16(float16_t const *a, float16x4x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_f16
+  vst2_lane_f16(a, b, 3);
+  // CHECK: st2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_f32(float32_t const *a, float32x2x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_f32
+  vst2_lane_f32(a, b, 1);
+  // CHECK: st2 {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_f64(float64_t const *a, float64x1x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_f64
+  vst2_lane_f64(a, b, 0);
+  // CHECK: st2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_p8(poly8_t const *a, poly8x8x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_p8
+  vst2_lane_p8(a, b, 7);
+  // CHECK: st2 {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_p16(poly16_t const *a, poly16x4x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_p16
+  vst2_lane_p16(a, b, 3);
+  // CHECK: st2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_p64(poly64_t const *a, poly64x1x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_p64
+  vst2_lane_p64(a, b, 0);
+  // CHECK: st2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_u8(uint8_t const *a, uint8x16x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_u8
+  vst3q_lane_u8(a, b, 15);
+  // CHECK: st3 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_u16(uint16_t const *a, uint16x8x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_u16
+  vst3q_lane_u16(a, b, 7);
+  // CHECK: st3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_u32(uint32_t const *a, uint32x4x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_u32
+  vst3q_lane_u32(a, b, 3);
+  // CHECK: st3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_u64(uint64_t const *a, uint64x2x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_u64
+  vst3q_lane_u64(a, b, 1);
+  // CHECK: st3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_s8(int8_t const *a, int8x16x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_s8
+  vst3q_lane_s8(a, b, 15);
+  // CHECK: st3 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_s16(int16_t const *a, int16x8x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_s16
+  vst3q_lane_s16(a, b, 7);
+  // CHECK: st3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_s32(int32_t const *a, int32x4x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_s32
+  vst3q_lane_s32(a, b, 3);
+  // CHECK: st3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_s64(int64_t const *a, int64x2x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_s64
+  vst3q_lane_s64(a, b, 1);
+  // CHECK: st3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_f16(float16_t const *a, float16x8x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_f16
+  vst3q_lane_f16(a, b, 7);
+  // CHECK: st3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_f32(float32_t const *a, float32x4x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_f32
+  vst3q_lane_f32(a, b, 3);
+  // CHECK: st3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_f64(float64_t const *a, float64x2x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_f64
+  vst3q_lane_f64(a, b, 1);
+  // CHECK: st3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_p8(poly8_t const *a, poly8x16x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_p8
+  vst3q_lane_p8(a, b, 15);
+  // CHECK: st3 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_p16(poly16_t const *a, poly16x8x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_p16
+  vst3q_lane_p16(a, b, 7);
+  // CHECK: st3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_p64(poly64_t const *a, poly64x2x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_p64
+  vst3q_lane_p64(a, b, 1);
+  // CHECK: st3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_u8(uint8_t const *a, uint8x8x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_u8
+  vst3_lane_u8(a, b, 7);
+  // CHECK: st3 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_u16(uint16_t const *a, uint16x4x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_u16
+  vst3_lane_u16(a, b, 3);
+  // CHECK: st3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_u32(uint32_t const *a, uint32x2x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_u32
+  vst3_lane_u32(a, b, 1);
+  // CHECK: st3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_u64(uint64_t const *a, uint64x1x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_u64
+  vst3_lane_u64(a, b, 0);
+  // CHECK: st3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_s8(int8_t const *a, int8x8x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_s8
+  vst3_lane_s8(a, b, 7);
+  // CHECK: st3 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_s16(int16_t const *a, int16x4x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_s16
+  vst3_lane_s16(a, b, 3);
+  // CHECK: st3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_s32(int32_t const *a, int32x2x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_s32
+  vst3_lane_s32(a, b, 1);
+  // CHECK: st3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_s64(int64_t const *a, int64x1x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_s64
+  vst3_lane_s64(a, b, 0);
+  // CHECK: st3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_f16(float16_t const *a, float16x4x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_f16
+  vst3_lane_f16(a, b, 3);
+  // CHECK: st3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_f32(float32_t const *a, float32x2x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_f32
+  vst3_lane_f32(a, b, 1);
+  // CHECK: st3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_f64(float64_t const *a, float64x1x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_f64
+  vst3_lane_f64(a, b, 0);
+  // CHECK: st3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_p8(poly8_t const *a, poly8x8x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_p8
+  vst3_lane_p8(a, b, 7);
+  // CHECK: st3 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_p16(poly16_t const *a, poly16x4x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_p16
+  vst3_lane_p16(a, b, 3);
+  // CHECK: st3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_p64(poly64_t const *a, poly64x1x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_p64
+  vst3_lane_p64(a, b, 0);
+  // CHECK: st3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_u8(uint16_t const *a, uint8x16x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_u8
+  vst4q_lane_u8(a, b, 15);
+  // CHECK: st4 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_u16(uint16_t const *a, uint16x8x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_u16
+  vst4q_lane_u16(a, b, 7);
+  // CHECK: st4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_u32(uint32_t const *a, uint32x4x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_u32
+  vst4q_lane_u32(a, b, 3);
+  // CHECK: st4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_u64(uint64_t const *a, uint64x2x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_u64
+  vst4q_lane_u64(a, b, 1);
+  // CHECK: st4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_s8(int16_t const *a, int8x16x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_s8
+  vst4q_lane_s8(a, b, 15);
+  // CHECK: st4 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_s16(int16_t const *a, int16x8x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_s16
+  vst4q_lane_s16(a, b, 7);
+  // CHECK: st4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_s32(int32_t const *a, int32x4x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_s32
+  vst4q_lane_s32(a, b, 3);
+  // CHECK: st4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_s64(int64_t const *a, int64x2x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_s64
+  vst4q_lane_s64(a, b, 1);
+  // CHECK: st4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_f16(float16_t const *a, float16x8x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_f16
+  vst4q_lane_f16(a, b, 7);
+  // CHECK: st4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_f32(float32_t const *a, float32x4x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_f32
+  vst4q_lane_f32(a, b, 3);
+  // CHECK: st4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_f64(float64_t const *a, float64x2x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_f64
+  vst4q_lane_f64(a, b, 1);
+  // CHECK: st4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_p8(poly16_t const *a, poly8x16x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_p8
+  vst4q_lane_p8(a, b, 15);
+  // CHECK: st4 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_p16(poly16_t const *a, poly16x8x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_p16
+  vst4q_lane_p16(a, b, 7);
+  // CHECK: st4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_p64(poly64_t const *a, poly64x2x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_p64
+  vst4q_lane_p64(a, b, 1);
+  // CHECK: st4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_u8(uint8_t const *a, uint8x8x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_u8
+  vst4_lane_u8(a, b, 7);
+  // CHECK: st4 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_u16(uint16_t const *a, uint16x4x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_u16
+  vst4_lane_u16(a, b, 3);
+  // CHECK: st4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_u32(uint32_t const *a, uint32x2x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_u32
+  vst4_lane_u32(a, b, 1);
+  // CHECK: st4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_u64(uint64_t const *a, uint64x1x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_u64
+  vst4_lane_u64(a, b, 0);
+  // CHECK: st4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_s8(int8_t const *a, int8x8x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_s8
+  vst4_lane_s8(a, b, 7);
+  // CHECK: st4 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_s16(int16_t const *a, int16x4x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_s16
+  vst4_lane_s16(a, b, 3);
+  // CHECK: st4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_s32(int32_t const *a, int32x2x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_s32
+  vst4_lane_s32(a, b, 1);
+  // CHECK: st4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_s64(int64_t const *a, int64x1x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_s64
+  vst4_lane_s64(a, b, 0);
+  // CHECK: st4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_f16(float16_t const *a, float16x4x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_f16
+  vst4_lane_f16(a, b, 3);
+  // CHECK: st4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_f32(float32_t const *a, float32x2x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_f32
+  vst4_lane_f32(a, b, 1);
+  // CHECK: st4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_f64(float64_t const *a, float64x1x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_f64
+  vst4_lane_f64(a, b, 0);
+  // CHECK: st4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_p8(poly8_t const *a, poly8x8x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_p8
+  vst4_lane_p8(a, b, 7);
+  // CHECK: st4 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_p16(poly16_t const *a, poly16x4x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_p16
+  vst4_lane_p16(a, b, 3);
+  // CHECK: st4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_p64(poly64_t const *a, poly64x1x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_p64
+  vst4_lane_p64(a, b, 0);
+  // CHECK: st4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}





More information about the cfe-commits mailing list