r195844 - Fix the AArch64 NEON bug exposed by checking constant integer argument range of ACLE intrinsics.

Jiangning Liu jiangning.liu at arm.com
Wed Nov 27 06:02:55 PST 2013


Author: jiangning
Date: Wed Nov 27 08:02:55 2013
New Revision: 195844

URL: http://llvm.org/viewvc/llvm-project?rev=195844&view=rev
Log:
Fix the AArch64 NEON bug exposed by checking constant integer argument range of ACLE intrinsics.

Modified:
    cfe/trunk/test/CodeGen/aarch64-neon-2velem.c
    cfe/trunk/utils/TableGen/NeonEmitter.cpp

Modified: cfe/trunk/test/CodeGen/aarch64-neon-2velem.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-2velem.c?rev=195844&r1=195843&r2=195844&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-2velem.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-neon-2velem.c Wed Nov 27 08:02:55 2013
@@ -10,14 +10,14 @@
 
 int16x4_t test_vmla_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v) {
   // CHECK: test_vmla_lane_s16
-  return vmla_lane_s16(a, b, v, 1);
-  // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vmla_lane_s16(a, b, v, 3);
+  // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
 }
 
 int16x8_t test_vmlaq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v) {
   // CHECK: test_vmlaq_lane_s16
-  return vmlaq_lane_s16(a, b, v, 1);
-  // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vmlaq_lane_s16(a, b, v, 3);
+  // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
 }
 
 int32x2_t test_vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v) {
@@ -34,38 +34,38 @@ int32x4_t test_vmlaq_lane_s32(int32x4_t
 
 int16x4_t test_vmla_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
   // CHECK: test_vmla_laneq_s16
-  return vmla_laneq_s16(a, b, v, 1);
-  // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vmla_laneq_s16(a, b, v, 7);
+  // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
 }
 
 int16x8_t test_vmlaq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
   // CHECK: test_vmlaq_laneq_s16
-  return vmlaq_laneq_s16(a, b, v, 1);
-  // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vmlaq_laneq_s16(a, b, v, 7);
+  // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
 }
 
 int32x2_t test_vmla_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
   // CHECK: test_vmla_laneq_s32
-  return vmla_laneq_s32(a, b, v, 1);
-  // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  return vmla_laneq_s32(a, b, v, 3);
+  // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
 }
 
 int32x4_t test_vmlaq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
   // CHECK: test_vmlaq_laneq_s32
-  return vmlaq_laneq_s32(a, b, v, 1);
-  // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+  return vmlaq_laneq_s32(a, b, v, 3);
+  // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
 }
 
 int16x4_t test_vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v) {
   // CHECK: test_vmls_lane_s16
-  return vmls_lane_s16(a, b, v, 1);
-  // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vmls_lane_s16(a, b, v, 3);
+  // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
 }
 
 int16x8_t test_vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v) {
   // CHECK: test_vmlsq_lane_s16
-  return vmlsq_lane_s16(a, b, v, 1);
-  // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vmlsq_lane_s16(a, b, v, 3);
+  // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
 }
 
 int32x2_t test_vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v) {
@@ -82,38 +82,38 @@ int32x4_t test_vmlsq_lane_s32(int32x4_t
 
 int16x4_t test_vmls_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
   // CHECK: test_vmls_laneq_s16
-  return vmls_laneq_s16(a, b, v, 1);
-  // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vmls_laneq_s16(a, b, v, 7);
+  // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
 }
 
 int16x8_t test_vmlsq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
   // CHECK: test_vmlsq_laneq_s16
-  return vmlsq_laneq_s16(a, b, v, 1);
-  // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vmlsq_laneq_s16(a, b, v, 7);
+  // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
 }
 
 int32x2_t test_vmls_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
   // CHECK: test_vmls_laneq_s32
-  return vmls_laneq_s32(a, b, v, 1);
-  // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  return vmls_laneq_s32(a, b, v, 3);
+  // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
 }
 
 int32x4_t test_vmlsq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
   // CHECK: test_vmlsq_laneq_s32
-  return vmlsq_laneq_s32(a, b, v, 1);
-  // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+  return vmlsq_laneq_s32(a, b, v, 3);
+  // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
 }
 
 int16x4_t test_vmul_lane_s16(int16x4_t a, int16x4_t v) {
   // CHECK: test_vmul_lane_s16
-  return vmul_lane_s16(a, v, 1);
-  // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vmul_lane_s16(a, v, 3);
+  // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
 }
 
 int16x8_t test_vmulq_lane_s16(int16x8_t a, int16x4_t v) {
   // CHECK: test_vmulq_lane_s16
-  return vmulq_lane_s16(a, v, 1);
-  // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vmulq_lane_s16(a, v, 3);
+  // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
 }
 
 int32x2_t test_vmul_lane_s32(int32x2_t a, int32x2_t v) {
@@ -130,14 +130,14 @@ int32x4_t test_vmulq_lane_s32(int32x4_t
 
 uint16x4_t test_vmul_lane_u16(uint16x4_t a, uint16x4_t v) {
   // CHECK: test_vmul_lane_u16
-  return vmul_lane_u16(a, v, 1);
-  // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vmul_lane_u16(a, v, 3);
+  // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
 }
 
 uint16x8_t test_vmulq_lane_u16(uint16x8_t a, uint16x4_t v) {
   // CHECK: test_vmulq_lane_u16
-  return vmulq_lane_u16(a, v, 1);
-  // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vmulq_lane_u16(a, v, 3);
+  // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
 }
 
 uint32x2_t test_vmul_lane_u32(uint32x2_t a, uint32x2_t v) {
@@ -154,50 +154,50 @@ uint32x4_t test_vmulq_lane_u32(uint32x4_
 
 int16x4_t test_vmul_laneq_s16(int16x4_t a, int16x8_t v) {
   // CHECK: test_vmul_laneq_s16
-  return vmul_laneq_s16(a, v, 1);
-  // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vmul_laneq_s16(a, v, 7);
+  // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
 }
 
 int16x8_t test_vmulq_laneq_s16(int16x8_t a, int16x8_t v) {
   // CHECK: test_vmulq_laneq_s16
-  return vmulq_laneq_s16(a, v, 1);
-  // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vmulq_laneq_s16(a, v, 7);
+  // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
 }
 
 int32x2_t test_vmul_laneq_s32(int32x2_t a, int32x4_t v) {
   // CHECK: test_vmul_laneq_s32
-  return vmul_laneq_s32(a, v, 1);
-  // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  return vmul_laneq_s32(a, v, 3);
+  // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
 }
 
 int32x4_t test_vmulq_laneq_s32(int32x4_t a, int32x4_t v) {
   // CHECK: test_vmulq_laneq_s32
-  return vmulq_laneq_s32(a, v, 1);
-  // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+  return vmulq_laneq_s32(a, v, 3);
+  // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
 }
 
 uint16x4_t test_vmul_laneq_u16(uint16x4_t a, uint16x8_t v) {
   // CHECK: test_vmul_laneq_u16
-  return vmul_laneq_u16(a, v, 1);
-  // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vmul_laneq_u16(a, v, 7);
+  // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
 }
 
 uint16x8_t test_vmulq_laneq_u16(uint16x8_t a, uint16x8_t v) {
   // CHECK: test_vmulq_laneq_u16
-  return vmulq_laneq_u16(a, v, 1);
-  // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vmulq_laneq_u16(a, v, 7);
+  // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
 }
 
 uint32x2_t test_vmul_laneq_u32(uint32x2_t a, uint32x4_t v) {
   // CHECK: test_vmul_laneq_u32
-  return vmul_laneq_u32(a, v, 1);
-  // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  return vmul_laneq_u32(a, v, 3);
+  // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
 }
 
 uint32x4_t test_vmulq_laneq_u32(uint32x4_t a, uint32x4_t v) {
   // CHECK: test_vmulq_laneq_u32
-  return vmulq_laneq_u32(a, v, 1);
-  // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+  return vmulq_laneq_u32(a, v, 3);
+  // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
 }
 
 float32x2_t test_vfma_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
@@ -214,14 +214,14 @@ float32x4_t test_vfmaq_lane_f32(float32x
 
 float32x2_t test_vfma_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
   // CHECK: test_vfma_laneq_f32
-  return vfma_laneq_f32(a, b, v, 1);
-  // CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  return vfma_laneq_f32(a, b, v, 3);
+  // CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
 }
 
 float32x4_t test_vfmaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
   // CHECK: test_vfmaq_laneq_f32
-  return vfmaq_laneq_f32(a, b, v, 1);
-  // CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+  return vfmaq_laneq_f32(a, b, v, 3);
+  // CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
 }
 
 float32x2_t test_vfms_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
@@ -238,14 +238,14 @@ float32x4_t test_vfmsq_lane_f32(float32x
 
 float32x2_t test_vfms_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
   // CHECK: test_vfms_laneq_f32
-  return vfms_laneq_f32(a, b, v, 1);
-  // CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  return vfms_laneq_f32(a, b, v, 3);
+  // CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
 }
 
 float32x4_t test_vfmsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
   // CHECK: test_vfmsq_laneq_f32
-  return vfmsq_laneq_f32(a, b, v, 1);
-  // CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+  return vfmsq_laneq_f32(a, b, v, 3);
+  // CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
 }
 
 float64x2_t test_vfmaq_lane_f64(float64x2_t a, float64x2_t b, float64x1_t v) {
@@ -254,12 +254,6 @@ float64x2_t test_vfmaq_lane_f64(float64x
   // CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
 }
 
-float64x2_t test_vfmaq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v) {
-  // CHECK: test_vfmaq_laneq_f64
-  return vfmaq_laneq_f64(a, b, v, 0);
-  // CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-}
-
 float64x2_t test_vfmaq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v) {
   // CHECK: test_vfmaq_laneq_f64
   return vfmaq_laneq_f64(a, b, v, 1);
@@ -272,12 +266,6 @@ float64x2_t test_vfmsq_lane_f64(float64x
   // CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
 }
 
-float64x2_t test_vfmsq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v) {
-  // CHECK: test_vfmsq_laneq_f64
-  return vfmsq_laneq_f64(a, b, v, 0);
-  // CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-}
-
 float64x2_t test_vfmsq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v) {
   // CHECK: test_vfmsq_laneq_f64
   return vfmsq_laneq_f64(a, b, v, 1);
@@ -286,8 +274,8 @@ float64x2_t test_vfmsq_laneq_f64(float64
 
 int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) {
   // CHECK: test_vmlal_lane_s16
-  return vmlal_lane_s16(a, b, v, 1);
-  // CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vmlal_lane_s16(a, b, v, 3);
+  // CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
 }
 
 int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) {
@@ -298,20 +286,20 @@ int64x2_t test_vmlal_lane_s32(int64x2_t
 
 int32x4_t test_vmlal_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) {
   // CHECK: test_vmlal_laneq_s16
-  return vmlal_laneq_s16(a, b, v, 1);
-  // CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vmlal_laneq_s16(a, b, v, 7);
+  // CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
 }
 
 int64x2_t test_vmlal_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) {
   // CHECK: test_vmlal_laneq_s32
-  return vmlal_laneq_s32(a, b, v, 1);
-  // CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  return vmlal_laneq_s32(a, b, v, 3);
+  // CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
 }
 
 int32x4_t test_vmlal_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) {
   // CHECK: test_vmlal_high_lane_s16
-  return vmlal_high_lane_s16(a, b, v, 1);
-  // CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vmlal_high_lane_s16(a, b, v, 3);
+  // CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
 }
 
 int64x2_t test_vmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) {
@@ -322,20 +310,20 @@ int64x2_t test_vmlal_high_lane_s32(int64
 
 int32x4_t test_vmlal_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) {
   // CHECK: test_vmlal_high_laneq_s16
-  return vmlal_high_laneq_s16(a, b, v, 1);
-  // CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vmlal_high_laneq_s16(a, b, v, 7);
+  // CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
 }
 
 int64x2_t test_vmlal_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) {
   // CHECK: test_vmlal_high_laneq_s32
-  return vmlal_high_laneq_s32(a, b, v, 1);
-  // CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+  return vmlal_high_laneq_s32(a, b, v, 3);
+  // CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
 }
 
 int32x4_t test_vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) {
   // CHECK: test_vmlsl_lane_s16
-  return vmlsl_lane_s16(a, b, v, 1);
-  // CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vmlsl_lane_s16(a, b, v, 3);
+  // CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
 }
 
 int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) {
@@ -346,20 +334,20 @@ int64x2_t test_vmlsl_lane_s32(int64x2_t
 
 int32x4_t test_vmlsl_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) {
   // CHECK: test_vmlsl_laneq_s16
-  return vmlsl_laneq_s16(a, b, v, 1);
-  // CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vmlsl_laneq_s16(a, b, v, 7);
+  // CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
 }
 
 int64x2_t test_vmlsl_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) {
   // CHECK: test_vmlsl_laneq_s32
-  return vmlsl_laneq_s32(a, b, v, 1);
-  // CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  return vmlsl_laneq_s32(a, b, v, 3);
+  // CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
 }
 
 int32x4_t test_vmlsl_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) {
   // CHECK: test_vmlsl_high_lane_s16
-  return vmlsl_high_lane_s16(a, b, v, 1);
-  // CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vmlsl_high_lane_s16(a, b, v, 3);
+  // CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
 }
 
 int64x2_t test_vmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) {
@@ -370,20 +358,20 @@ int64x2_t test_vmlsl_high_lane_s32(int64
 
 int32x4_t test_vmlsl_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) {
   // CHECK: test_vmlsl_high_laneq_s16
-  return vmlsl_high_laneq_s16(a, b, v, 1);
-  // CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vmlsl_high_laneq_s16(a, b, v, 7);
+  // CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
 }
 
 int64x2_t test_vmlsl_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) {
   // CHECK: test_vmlsl_high_laneq_s32
-  return vmlsl_high_laneq_s32(a, b, v, 1);
-  // CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+  return vmlsl_high_laneq_s32(a, b, v, 3);
+  // CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
 }
 
 int32x4_t test_vmlal_lane_u16(int32x4_t a, int16x4_t b, int16x4_t v) {
   // CHECK: test_vmlal_lane_u16
-  return vmlal_lane_u16(a, b, v, 1);
-  // CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vmlal_lane_u16(a, b, v, 3);
+  // CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
 }
 
 int64x2_t test_vmlal_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) {
@@ -394,20 +382,20 @@ int64x2_t test_vmlal_lane_u32(int64x2_t
 
 int32x4_t test_vmlal_laneq_u16(int32x4_t a, int16x4_t b, int16x8_t v) {
   // CHECK: test_vmlal_laneq_u16
-  return vmlal_laneq_u16(a, b, v, 1);
-  // CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vmlal_laneq_u16(a, b, v, 7);
+  // CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
 }
 
 int64x2_t test_vmlal_laneq_u32(int64x2_t a, int32x2_t b, int32x4_t v) {
   // CHECK: test_vmlal_laneq_u32
-  return vmlal_laneq_u32(a, b, v, 1);
-  // CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  return vmlal_laneq_u32(a, b, v, 3);
+  // CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
 }
 
 int32x4_t test_vmlal_high_lane_u16(int32x4_t a, int16x8_t b, int16x4_t v) {
   // CHECK: test_vmlal_high_lane_u16
-  return vmlal_high_lane_u16(a, b, v, 1);
-  // CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vmlal_high_lane_u16(a, b, v, 3);
+  // CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
 }
 
 int64x2_t test_vmlal_high_lane_u32(int64x2_t a, int32x4_t b, int32x2_t v) {
@@ -418,20 +406,20 @@ int64x2_t test_vmlal_high_lane_u32(int64
 
 int32x4_t test_vmlal_high_laneq_u16(int32x4_t a, int16x8_t b, int16x8_t v) {
   // CHECK: test_vmlal_high_laneq_u16
-  return vmlal_high_laneq_u16(a, b, v, 1);
-  // CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vmlal_high_laneq_u16(a, b, v, 7);
+  // CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
 }
 
 int64x2_t test_vmlal_high_laneq_u32(int64x2_t a, int32x4_t b, int32x4_t v) {
   // CHECK: test_vmlal_high_laneq_u32
-  return vmlal_high_laneq_u32(a, b, v, 1);
-  // CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+  return vmlal_high_laneq_u32(a, b, v, 3);
+  // CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
 }
 
 int32x4_t test_vmlsl_lane_u16(int32x4_t a, int16x4_t b, int16x4_t v) {
   // CHECK: test_vmlsl_lane_u16
-  return vmlsl_lane_u16(a, b, v, 1);
-  // CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vmlsl_lane_u16(a, b, v, 3);
+  // CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
 }
 
 int64x2_t test_vmlsl_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) {
@@ -442,20 +430,20 @@ int64x2_t test_vmlsl_lane_u32(int64x2_t
 
 int32x4_t test_vmlsl_laneq_u16(int32x4_t a, int16x4_t b, int16x8_t v) {
   // CHECK: test_vmlsl_laneq_u16
-  return vmlsl_laneq_u16(a, b, v, 1);
-  // CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vmlsl_laneq_u16(a, b, v, 7);
+  // CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
 }
 
 int64x2_t test_vmlsl_laneq_u32(int64x2_t a, int32x2_t b, int32x4_t v) {
   // CHECK: test_vmlsl_laneq_u32
-  return vmlsl_laneq_u32(a, b, v, 1);
-  // CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  return vmlsl_laneq_u32(a, b, v, 3);
+  // CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
 }
 
 int32x4_t test_vmlsl_high_lane_u16(int32x4_t a, int16x8_t b, int16x4_t v) {
   // CHECK: test_vmlsl_high_lane_u16
-  return vmlsl_high_lane_u16(a, b, v, 1);
-  // CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vmlsl_high_lane_u16(a, b, v, 3);
+  // CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
 }
 
 int64x2_t test_vmlsl_high_lane_u32(int64x2_t a, int32x4_t b, int32x2_t v) {
@@ -466,20 +454,20 @@ int64x2_t test_vmlsl_high_lane_u32(int64
 
 int32x4_t test_vmlsl_high_laneq_u16(int32x4_t a, int16x8_t b, int16x8_t v) {
   // CHECK: test_vmlsl_high_laneq_u16
-  return vmlsl_high_laneq_u16(a, b, v, 1);
-  // CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vmlsl_high_laneq_u16(a, b, v, 7);
+  // CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
 }
 
 int64x2_t test_vmlsl_high_laneq_u32(int64x2_t a, int32x4_t b, int32x4_t v) {
   // CHECK: test_vmlsl_high_laneq_u32
-  return vmlsl_high_laneq_u32(a, b, v, 1);
-  // CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+  return vmlsl_high_laneq_u32(a, b, v, 3);
+  // CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
 }
 
 int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t v) {
   // CHECK: test_vmull_lane_s16
-  return vmull_lane_s16(a, v, 1);
-  // CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vmull_lane_s16(a, v, 3);
+  // CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
 }
 
 int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t v) {
@@ -490,8 +478,8 @@ int64x2_t test_vmull_lane_s32(int32x2_t
 
 uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t v) {
   // CHECK: test_vmull_lane_u16
-  return vmull_lane_u16(a, v, 1);
-  // CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vmull_lane_u16(a, v, 3);
+  // CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
 }
 
 uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t v) {
@@ -502,8 +490,8 @@ uint64x2_t test_vmull_lane_u32(uint32x2_
 
 int32x4_t test_vmull_high_lane_s16(int16x8_t a, int16x4_t v) {
   // CHECK: test_vmull_high_lane_s16
-  return vmull_high_lane_s16(a, v, 1);
-  // CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vmull_high_lane_s16(a, v, 3);
+  // CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
 }
 
 int64x2_t test_vmull_high_lane_s32(int32x4_t a, int32x2_t v) {
@@ -514,8 +502,8 @@ int64x2_t test_vmull_high_lane_s32(int32
 
 uint32x4_t test_vmull_high_lane_u16(uint16x8_t a, uint16x4_t v) {
   // CHECK: test_vmull_high_lane_u16
-  return vmull_high_lane_u16(a, v, 1);
-  // CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vmull_high_lane_u16(a, v, 3);
+  // CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
 }
 
 uint64x2_t test_vmull_high_lane_u32(uint32x4_t a, uint32x2_t v) {
@@ -526,56 +514,56 @@ uint64x2_t test_vmull_high_lane_u32(uint
 
 int32x4_t test_vmull_laneq_s16(int16x4_t a, int16x8_t v) {
   // CHECK: test_vmull_laneq_s16
-  return vmull_laneq_s16(a, v, 1);
-  // CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vmull_laneq_s16(a, v, 7);
+  // CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
 }
 
 int64x2_t test_vmull_laneq_s32(int32x2_t a, int32x4_t v) {
   // CHECK: test_vmull_laneq_s32
-  return vmull_laneq_s32(a, v, 1);
-  // CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  return vmull_laneq_s32(a, v, 3);
+  // CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
 }
 
 uint32x4_t test_vmull_laneq_u16(uint16x4_t a, uint16x8_t v) {
   // CHECK: test_vmull_laneq_u16
-  return vmull_laneq_u16(a, v, 1);
-  // CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vmull_laneq_u16(a, v, 7);
+  // CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
 }
 
 uint64x2_t test_vmull_laneq_u32(uint32x2_t a, uint32x4_t v) {
   // CHECK: test_vmull_laneq_u32
-  return vmull_laneq_u32(a, v, 1);
-  // CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  return vmull_laneq_u32(a, v, 3);
+  // CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
 }
 
 int32x4_t test_vmull_high_laneq_s16(int16x8_t a, int16x8_t v) {
   // CHECK: test_vmull_high_laneq_s16
-  return vmull_high_laneq_s16(a, v, 1);
-  // CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vmull_high_laneq_s16(a, v, 7);
+  // CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
 }
 
 int64x2_t test_vmull_high_laneq_s32(int32x4_t a, int32x4_t v) {
   // CHECK: test_vmull_high_laneq_s32
-  return vmull_high_laneq_s32(a, v, 1);
-  // CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+  return vmull_high_laneq_s32(a, v, 3);
+  // CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
 }
 
 uint32x4_t test_vmull_high_laneq_u16(uint16x8_t a, uint16x8_t v) {
   // CHECK: test_vmull_high_laneq_u16
-  return vmull_high_laneq_u16(a, v, 1);
-  // CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vmull_high_laneq_u16(a, v, 7);
+  // CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
 }
 
 uint64x2_t test_vmull_high_laneq_u32(uint32x4_t a, uint32x4_t v) {
   // CHECK: test_vmull_high_laneq_u32
-  return vmull_high_laneq_u32(a, v, 1);
-  // CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+  return vmull_high_laneq_u32(a, v, 3);
+  // CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
 }
 
 int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) {
   // CHECK: test_vqdmlal_lane_s16
-  return vqdmlal_lane_s16(a, b, v, 1);
-  // CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vqdmlal_lane_s16(a, b, v, 3);
+  // CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
 }
 
 int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) {
@@ -586,8 +574,8 @@ int64x2_t test_vqdmlal_lane_s32(int64x2_
 
 int32x4_t test_vqdmlal_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) {
   // CHECK: test_vqdmlal_high_lane_s16
-  return vqdmlal_high_lane_s16(a, b, v, 1);
-  // CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vqdmlal_high_lane_s16(a, b, v, 3);
+  // CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
 }
 
 int64x2_t test_vqdmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) {
@@ -598,8 +586,8 @@ int64x2_t test_vqdmlal_high_lane_s32(int
 
 int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) {
   // CHECK: test_vqdmlsl_lane_s16
-  return vqdmlsl_lane_s16(a, b, v, 1);
-  // CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vqdmlsl_lane_s16(a, b, v, 3);
+  // CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
 }
 
 int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) {
@@ -610,8 +598,8 @@ int64x2_t test_vqdmlsl_lane_s32(int64x2_
 
 int32x4_t test_vqdmlsl_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) {
   // CHECK: test_vqdmlsl_high_lane_s16
-  return vqdmlsl_high_lane_s16(a, b, v, 1);
-  // CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vqdmlsl_high_lane_s16(a, b, v, 3);
+  // CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
 }
 
 int64x2_t test_vqdmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) {
@@ -622,8 +610,8 @@ int64x2_t test_vqdmlsl_high_lane_s32(int
 
 int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t v) {
   // CHECK: test_vqdmull_lane_s16
-  return vqdmull_lane_s16(a, v, 1);
-  // CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vqdmull_lane_s16(a, v, 3);
+  // CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
 }
 
 int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t v) {
@@ -634,20 +622,20 @@ int64x2_t test_vqdmull_lane_s32(int32x2_
 
 int32x4_t test_vqdmull_laneq_s16(int16x4_t a, int16x8_t v) {
   // CHECK: test_vqdmull_laneq_s16
-  return vqdmull_laneq_s16(a, v, 1);
-  // CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vqdmull_laneq_s16(a, v, 3);
+  // CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
 }
 
 int64x2_t test_vqdmull_laneq_s32(int32x2_t a, int32x4_t v) {
   // CHECK: test_vqdmull_laneq_s32
-  return vqdmull_laneq_s32(a, v, 1);
-  // CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  return vqdmull_laneq_s32(a, v, 3);
+  // CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
 }
 
 int32x4_t test_vqdmull_high_lane_s16(int16x8_t a, int16x4_t v) {
   // CHECK: test_vqdmull_high_lane_s16
-  return vqdmull_high_lane_s16(a, v, 1);
-  // CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vqdmull_high_lane_s16(a, v, 3);
+  // CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
 }
 
 int64x2_t test_vqdmull_high_lane_s32(int32x4_t a, int32x2_t v) {
@@ -658,26 +646,26 @@ int64x2_t test_vqdmull_high_lane_s32(int
 
 int32x4_t test_vqdmull_high_laneq_s16(int16x8_t a, int16x8_t v) {
   // CHECK: test_vqdmull_high_laneq_s16
-  return vqdmull_high_laneq_s16(a, v, 1);
-  // CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vqdmull_high_laneq_s16(a, v, 7);
+  // CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
 }
 
 int64x2_t test_vqdmull_high_laneq_s32(int32x4_t a, int32x4_t v) {
   // CHECK: test_vqdmull_high_laneq_s32
-  return vqdmull_high_laneq_s32(a, v, 1);
-  // CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+  return vqdmull_high_laneq_s32(a, v, 3);
+  // CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
 }
 
 int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t v) {
   // CHECK: test_vqdmulh_lane_s16
-  return vqdmulh_lane_s16(a, v, 1);
-  // CHECK: sqdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vqdmulh_lane_s16(a, v, 3);
+  // CHECK: sqdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
 }
 
 int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t v) {
   // CHECK: test_vqdmulhq_lane_s16
-  return vqdmulhq_lane_s16(a, v, 1);
-  // CHECK: sqdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vqdmulhq_lane_s16(a, v, 3);
+  // CHECK: sqdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
 }
 
 int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t v) {
@@ -694,14 +682,14 @@ int32x4_t test_vqdmulhq_lane_s32(int32x4
 
 int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t v) {
   // CHECK: test_vqrdmulh_lane_s16
-  return vqrdmulh_lane_s16(a, v, 1);
-  // CHECK: sqrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1]
+  return vqrdmulh_lane_s16(a, v, 3);
+  // CHECK: sqrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
 }
 
 int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t v) {
   // CHECK: test_vqrdmulhq_lane_s16
-  return vqrdmulhq_lane_s16(a, v, 1);
-  // CHECK: sqrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1]
+  return vqrdmulhq_lane_s16(a, v, 3);
+  // CHECK: sqrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
 }
 
 int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t v) {
@@ -744,27 +732,21 @@ float64x2_t test_vmulq_lane_f64(float64x
 
 float32x2_t test_vmul_laneq_f32(float32x2_t a, float32x4_t v) {
   // CHECK: test_vmul_laneq_f32
-  return vmul_laneq_f32(a, v, 1);
-  // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  return vmul_laneq_f32(a, v, 3);
+  // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
 }
 
-float64x1_t test_vmul_laneq_f64_0(float64x1_t a, float64x2_t v) {
-  // CHECK: test_vmul_laneq_f64_0
-  return vmul_laneq_f64(a, v, 0);
-  // CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+float64x1_t test_vmul_laneq_f64(float64x1_t a, float64x2_t v) {
+  // CHECK: test_vmul_laneq_f64
+  return vmul_laneq_f64(a, v, 1);
+  // CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
 }
 
 
 float32x4_t test_vmulq_laneq_f32(float32x4_t a, float32x4_t v) {
   // CHECK: test_vmulq_laneq_f32
-  return vmulq_laneq_f32(a, v, 1);
-  // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-}
-
-float64x2_t test_vmulq_laneq_f64_0(float64x2_t a, float64x2_t v) {
-  // CHECK: test_vmulq_laneq_f64
-  return vmulq_laneq_f64(a, v, 0);
-  // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+  return vmulq_laneq_f32(a, v, 3);
+  // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
 }
 
 float64x2_t test_vmulq_laneq_f64(float64x2_t a, float64x2_t v) {
@@ -793,20 +775,14 @@ float64x2_t test_vmulxq_lane_f64(float64
 
 float32x2_t test_vmulx_laneq_f32(float32x2_t a, float32x4_t v) {
   // CHECK: test_vmulx_laneq_f32
-  return vmulx_laneq_f32(a, v, 1);
-  // CHECK: fmulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  return vmulx_laneq_f32(a, v, 3);
+  // CHECK: fmulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
 }
 
 float32x4_t test_vmulxq_laneq_f32(float32x4_t a, float32x4_t v) {
   // CHECK: test_vmulxq_laneq_f32
-  return vmulxq_laneq_f32(a, v, 1);
-  // CHECK: fmulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-}
-
-float64x2_t test_vmulxq_laneq_f64_0(float64x2_t a, float64x2_t v) {
-  // CHECK: test_vmulxq_laneq_f64
-  return vmulxq_laneq_f64(a, v, 0);
-  // CHECK: fmulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+  return vmulxq_laneq_f32(a, v, 3);
+  // CHECK: fmulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
 }
 
 float64x2_t test_vmulxq_laneq_f64(float64x2_t a, float64x2_t v) {
@@ -815,3 +791,759 @@ float64x2_t test_vmulxq_laneq_f64(float6
   // CHECK: fmulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
 }
 
+int16x4_t test_vmla_lane_s16_0(int16x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK: test_vmla_lane_s16_0
+  return vmla_lane_s16(a, b, v, 0);
+  // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int16x8_t test_vmlaq_lane_s16_0(int16x8_t a, int16x8_t b, int16x4_t v) {
+  // CHECK: test_vmlaq_lane_s16_0
+  return vmlaq_lane_s16(a, b, v, 0);
+  // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int32x2_t test_vmla_lane_s32_0(int32x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK: test_vmla_lane_s32_0
+  return vmla_lane_s32(a, b, v, 0);
+  // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlaq_lane_s32_0(int32x4_t a, int32x4_t b, int32x2_t v) {
+  // CHECK: test_vmlaq_lane_s32_0
+  return vmlaq_lane_s32(a, b, v, 0);
+  // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int16x4_t test_vmla_laneq_s16_0(int16x4_t a, int16x4_t b, int16x8_t v) {
+  // CHECK: test_vmla_laneq_s16_0
+  return vmla_laneq_s16(a, b, v, 0);
+  // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int16x8_t test_vmlaq_laneq_s16_0(int16x8_t a, int16x8_t b, int16x8_t v) {
+  // CHECK: test_vmlaq_laneq_s16_0
+  return vmlaq_laneq_s16(a, b, v, 0);
+  // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int32x2_t test_vmla_laneq_s32_0(int32x2_t a, int32x2_t b, int32x4_t v) {
+  // CHECK: test_vmla_laneq_s32_0
+  return vmla_laneq_s32(a, b, v, 0);
+  // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlaq_laneq_s32_0(int32x4_t a, int32x4_t b, int32x4_t v) {
+  // CHECK: test_vmlaq_laneq_s32_0
+  return vmlaq_laneq_s32(a, b, v, 0);
+  // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int16x4_t test_vmls_lane_s16_0(int16x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK: test_vmls_lane_s16_0
+  return vmls_lane_s16(a, b, v, 0);
+  // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int16x8_t test_vmlsq_lane_s16_0(int16x8_t a, int16x8_t b, int16x4_t v) {
+  // CHECK: test_vmlsq_lane_s16_0
+  return vmlsq_lane_s16(a, b, v, 0);
+  // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int32x2_t test_vmls_lane_s32_0(int32x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK: test_vmls_lane_s32_0
+  return vmls_lane_s32(a, b, v, 0);
+  // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlsq_lane_s32_0(int32x4_t a, int32x4_t b, int32x2_t v) {
+  // CHECK: test_vmlsq_lane_s32_0
+  return vmlsq_lane_s32(a, b, v, 0);
+  // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int16x4_t test_vmls_laneq_s16_0(int16x4_t a, int16x4_t b, int16x8_t v) {
+  // CHECK: test_vmls_laneq_s16_0
+  return vmls_laneq_s16(a, b, v, 0);
+  // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int16x8_t test_vmlsq_laneq_s16_0(int16x8_t a, int16x8_t b, int16x8_t v) {
+  // CHECK: test_vmlsq_laneq_s16_0
+  return vmlsq_laneq_s16(a, b, v, 0);
+  // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int32x2_t test_vmls_laneq_s32_0(int32x2_t a, int32x2_t b, int32x4_t v) {
+  // CHECK: test_vmls_laneq_s32_0
+  return vmls_laneq_s32(a, b, v, 0);
+  // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlsq_laneq_s32_0(int32x4_t a, int32x4_t b, int32x4_t v) {
+  // CHECK: test_vmlsq_laneq_s32_0
+  return vmlsq_laneq_s32(a, b, v, 0);
+  // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int16x4_t test_vmul_lane_s16_0(int16x4_t a, int16x4_t v) {
+  // CHECK: test_vmul_lane_s16_0
+  return vmul_lane_s16(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int16x8_t test_vmulq_lane_s16_0(int16x8_t a, int16x4_t v) {
+  // CHECK: test_vmulq_lane_s16_0
+  return vmulq_lane_s16(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int32x2_t test_vmul_lane_s32_0(int32x2_t a, int32x2_t v) {
+  // CHECK: test_vmul_lane_s32_0
+  return vmul_lane_s32(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmulq_lane_s32_0(int32x4_t a, int32x2_t v) {
+  // CHECK: test_vmulq_lane_s32_0
+  return vmulq_lane_s32(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+uint16x4_t test_vmul_lane_u16_0(uint16x4_t a, uint16x4_t v) {
+  // CHECK: test_vmul_lane_u16_0
+  return vmul_lane_u16(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+uint16x8_t test_vmulq_lane_u16_0(uint16x8_t a, uint16x4_t v) {
+  // CHECK: test_vmulq_lane_u16_0
+  return vmulq_lane_u16(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+uint32x2_t test_vmul_lane_u32_0(uint32x2_t a, uint32x2_t v) {
+  // CHECK: test_vmul_lane_u32_0
+  return vmul_lane_u32(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vmulq_lane_u32_0(uint32x4_t a, uint32x2_t v) {
+  // CHECK: test_vmulq_lane_u32_0
+  return vmulq_lane_u32(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int16x4_t test_vmul_laneq_s16_0(int16x4_t a, int16x8_t v) {
+  // CHECK: test_vmul_laneq_s16_0
+  return vmul_laneq_s16(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int16x8_t test_vmulq_laneq_s16_0(int16x8_t a, int16x8_t v) {
+  // CHECK: test_vmulq_laneq_s16_0
+  return vmulq_laneq_s16(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int32x2_t test_vmul_laneq_s32_0(int32x2_t a, int32x4_t v) {
+  // CHECK: test_vmul_laneq_s32_0
+  return vmul_laneq_s32(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmulq_laneq_s32_0(int32x4_t a, int32x4_t v) {
+  // CHECK: test_vmulq_laneq_s32_0
+  return vmulq_laneq_s32(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+uint16x4_t test_vmul_laneq_u16_0(uint16x4_t a, uint16x8_t v) {
+  // CHECK: test_vmul_laneq_u16_0
+  return vmul_laneq_u16(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+uint16x8_t test_vmulq_laneq_u16_0(uint16x8_t a, uint16x8_t v) {
+  // CHECK: test_vmulq_laneq_u16_0
+  return vmulq_laneq_u16(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+uint32x2_t test_vmul_laneq_u32_0(uint32x2_t a, uint32x4_t v) {
+  // CHECK: test_vmul_laneq_u32_0
+  return vmul_laneq_u32(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vmulq_laneq_u32_0(uint32x4_t a, uint32x4_t v) {
+  // CHECK: test_vmulq_laneq_u32_0
+  return vmulq_laneq_u32(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float32x2_t test_vfma_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
+  // CHECK: test_vfma_lane_f32_0
+  return vfma_lane_f32(a, b, v, 0);
+  // CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vfmaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
+  // CHECK: test_vfmaq_lane_f32_0
+  return vfmaq_lane_f32(a, b, v, 0);
+  // CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float32x2_t test_vfma_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
+  // CHECK: test_vfma_laneq_f32_0
+  return vfma_laneq_f32(a, b, v, 0);
+  // CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vfmaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
+  // CHECK: test_vfmaq_laneq_f32_0
+  return vfmaq_laneq_f32(a, b, v, 0);
+  // CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float32x2_t test_vfms_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
+  // CHECK: test_vfms_lane_f32_0
+  return vfms_lane_f32(a, b, v, 0);
+  // CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vfmsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
+  // CHECK: test_vfmsq_lane_f32_0
+  return vfmsq_lane_f32(a, b, v, 0);
+  // CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float32x2_t test_vfms_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
+  // CHECK: test_vfms_laneq_f32_0
+  return vfms_laneq_f32(a, b, v, 0);
+  // CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vfmsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
+  // CHECK: test_vfmsq_laneq_f32_0
+  return vfmsq_laneq_f32(a, b, v, 0);
+  // CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float64x2_t test_vfmaq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v) {
+  // CHECK: test_vfmaq_laneq_f64_0
+  return vfmaq_laneq_f64(a, b, v, 0);
+  // CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+float64x2_t test_vfmsq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v) {
+  // CHECK: test_vfmsq_laneq_f64_0
+  return vfmsq_laneq_f64(a, b, v, 0);
+  // CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+int32x4_t test_vmlal_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK: test_vmlal_lane_s16_0
+  return vmlal_lane_s16(a, b, v, 0);
+  // CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlal_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK: test_vmlal_lane_s32_0
+  return vmlal_lane_s32(a, b, v, 0);
+  // CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlal_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) {
+  // CHECK: test_vmlal_laneq_s16_0
+  return vmlal_laneq_s16(a, b, v, 0);
+  // CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlal_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) {
+  // CHECK: test_vmlal_laneq_s32_0
+  return vmlal_laneq_s32(a, b, v, 0);
+  // CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlal_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
+  // CHECK: test_vmlal_high_lane_s16_0
+  return vmlal_high_lane_s16(a, b, v, 0);
+  // CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlal_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
+  // CHECK: test_vmlal_high_lane_s32_0
+  return vmlal_high_lane_s32(a, b, v, 0);
+  // CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlal_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) {
+  // CHECK: test_vmlal_high_laneq_s16_0
+  return vmlal_high_laneq_s16(a, b, v, 0);
+  // CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlal_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) {
+  // CHECK: test_vmlal_high_laneq_s32_0
+  return vmlal_high_laneq_s32(a, b, v, 0);
+  // CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlsl_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK: test_vmlsl_lane_s16_0
+  return vmlsl_lane_s16(a, b, v, 0);
+  // CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlsl_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK: test_vmlsl_lane_s32_0
+  return vmlsl_lane_s32(a, b, v, 0);
+  // CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlsl_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) {
+  // CHECK: test_vmlsl_laneq_s16_0
+  return vmlsl_laneq_s16(a, b, v, 0);
+  // CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlsl_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) {
+  // CHECK: test_vmlsl_laneq_s32_0
+  return vmlsl_laneq_s32(a, b, v, 0);
+  // CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlsl_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
+  // CHECK: test_vmlsl_high_lane_s16_0
+  return vmlsl_high_lane_s16(a, b, v, 0);
+  // CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlsl_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
+  // CHECK: test_vmlsl_high_lane_s32_0
+  return vmlsl_high_lane_s32(a, b, v, 0);
+  // CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlsl_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) {
+  // CHECK: test_vmlsl_high_laneq_s16_0
+  return vmlsl_high_laneq_s16(a, b, v, 0);
+  // CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlsl_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) {
+  // CHECK: test_vmlsl_high_laneq_s32_0
+  return vmlsl_high_laneq_s32(a, b, v, 0);
+  // CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlal_lane_u16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK: test_vmlal_lane_u16_0
+  return vmlal_lane_u16(a, b, v, 0);
+  // CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlal_lane_u32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK: test_vmlal_lane_u32_0
+  return vmlal_lane_u32(a, b, v, 0);
+  // CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlal_laneq_u16_0(int32x4_t a, int16x4_t b, int16x8_t v) {
+  // CHECK: test_vmlal_laneq_u16_0
+  return vmlal_laneq_u16(a, b, v, 0);
+  // CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlal_laneq_u32_0(int64x2_t a, int32x2_t b, int32x4_t v) {
+  // CHECK: test_vmlal_laneq_u32_0
+  return vmlal_laneq_u32(a, b, v, 0);
+  // CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlal_high_lane_u16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
+  // CHECK: test_vmlal_high_lane_u16_0
+  return vmlal_high_lane_u16(a, b, v, 0);
+  // CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlal_high_lane_u32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
+  // CHECK: test_vmlal_high_lane_u32_0
+  return vmlal_high_lane_u32(a, b, v, 0);
+  // CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlal_high_laneq_u16_0(int32x4_t a, int16x8_t b, int16x8_t v) {
+  // CHECK: test_vmlal_high_laneq_u16_0
+  return vmlal_high_laneq_u16(a, b, v, 0);
+  // CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlal_high_laneq_u32_0(int64x2_t a, int32x4_t b, int32x4_t v) {
+  // CHECK: test_vmlal_high_laneq_u32_0
+  return vmlal_high_laneq_u32(a, b, v, 0);
+  // CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlsl_lane_u16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK: test_vmlsl_lane_u16_0
+  return vmlsl_lane_u16(a, b, v, 0);
+  // CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlsl_lane_u32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK: test_vmlsl_lane_u32_0
+  return vmlsl_lane_u32(a, b, v, 0);
+  // CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlsl_laneq_u16_0(int32x4_t a, int16x4_t b, int16x8_t v) {
+  // CHECK: test_vmlsl_laneq_u16_0
+  return vmlsl_laneq_u16(a, b, v, 0);
+  // CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlsl_laneq_u32_0(int64x2_t a, int32x2_t b, int32x4_t v) {
+  // CHECK: test_vmlsl_laneq_u32_0
+  return vmlsl_laneq_u32(a, b, v, 0);
+  // CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlsl_high_lane_u16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
+  // CHECK: test_vmlsl_high_lane_u16_0
+  return vmlsl_high_lane_u16(a, b, v, 0);
+  // CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlsl_high_lane_u32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
+  // CHECK: test_vmlsl_high_lane_u32_0
+  return vmlsl_high_lane_u32(a, b, v, 0);
+  // CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlsl_high_laneq_u16_0(int32x4_t a, int16x8_t b, int16x8_t v) {
+  // CHECK: test_vmlsl_high_laneq_u16_0
+  return vmlsl_high_laneq_u16(a, b, v, 0);
+  // CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlsl_high_laneq_u32_0(int64x2_t a, int32x4_t b, int32x4_t v) {
+  // CHECK: test_vmlsl_high_laneq_u32_0
+  return vmlsl_high_laneq_u32(a, b, v, 0);
+  // CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmull_lane_s16_0(int16x4_t a, int16x4_t v) {
+  // CHECK: test_vmull_lane_s16_0
+  return vmull_lane_s16(a, v, 0);
+  // CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmull_lane_s32_0(int32x2_t a, int32x2_t v) {
+  // CHECK: test_vmull_lane_s32_0
+  return vmull_lane_s32(a, v, 0);
+  // CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vmull_lane_u16_0(uint16x4_t a, uint16x4_t v) {
+  // CHECK: test_vmull_lane_u16_0
+  return vmull_lane_u16(a, v, 0);
+  // CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+uint64x2_t test_vmull_lane_u32_0(uint32x2_t a, uint32x2_t v) {
+  // CHECK: test_vmull_lane_u32_0
+  return vmull_lane_u32(a, v, 0);
+  // CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmull_high_lane_s16_0(int16x8_t a, int16x4_t v) {
+  // CHECK: test_vmull_high_lane_s16_0
+  return vmull_high_lane_s16(a, v, 0);
+  // CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmull_high_lane_s32_0(int32x4_t a, int32x2_t v) {
+  // CHECK: test_vmull_high_lane_s32_0
+  return vmull_high_lane_s32(a, v, 0);
+  // CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vmull_high_lane_u16_0(uint16x8_t a, uint16x4_t v) {
+  // CHECK: test_vmull_high_lane_u16_0
+  return vmull_high_lane_u16(a, v, 0);
+  // CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+uint64x2_t test_vmull_high_lane_u32_0(uint32x4_t a, uint32x2_t v) {
+  // CHECK: test_vmull_high_lane_u32_0
+  return vmull_high_lane_u32(a, v, 0);
+  // CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmull_laneq_s16_0(int16x4_t a, int16x8_t v) {
+  // CHECK: test_vmull_laneq_s16_0
+  return vmull_laneq_s16(a, v, 0);
+  // CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmull_laneq_s32_0(int32x2_t a, int32x4_t v) {
+  // CHECK: test_vmull_laneq_s32_0
+  return vmull_laneq_s32(a, v, 0);
+  // CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vmull_laneq_u16_0(uint16x4_t a, uint16x8_t v) {
+  // CHECK: test_vmull_laneq_u16_0
+  return vmull_laneq_u16(a, v, 0);
+  // CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+uint64x2_t test_vmull_laneq_u32_0(uint32x2_t a, uint32x4_t v) {
+  // CHECK: test_vmull_laneq_u32_0
+  return vmull_laneq_u32(a, v, 0);
+  // CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmull_high_laneq_s16_0(int16x8_t a, int16x8_t v) {
+  // CHECK: test_vmull_high_laneq_s16_0
+  return vmull_high_laneq_s16(a, v, 0);
+  // CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmull_high_laneq_s32_0(int32x4_t a, int32x4_t v) {
+  // CHECK: test_vmull_high_laneq_s32_0
+  return vmull_high_laneq_s32(a, v, 0);
+  // CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vmull_high_laneq_u16_0(uint16x8_t a, uint16x8_t v) {
+  // CHECK: test_vmull_high_laneq_u16_0
+  return vmull_high_laneq_u16(a, v, 0);
+  // CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+uint64x2_t test_vmull_high_laneq_u32_0(uint32x4_t a, uint32x4_t v) {
+  // CHECK: test_vmull_high_laneq_u32_0
+  return vmull_high_laneq_u32(a, v, 0);
+  // CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmlal_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK: test_vqdmlal_lane_s16_0
+  return vqdmlal_lane_s16(a, b, v, 0);
+  // CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vqdmlal_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK: test_vqdmlal_lane_s32_0
+  return vqdmlal_lane_s32(a, b, v, 0);
+  // CHECK: sqdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmlal_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
+  // CHECK: test_vqdmlal_high_lane_s16_0
+  return vqdmlal_high_lane_s16(a, b, v, 0);
+  // CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vqdmlal_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
+  // CHECK: test_vqdmlal_high_lane_s32_0
+  return vqdmlal_high_lane_s32(a, b, v, 0);
+  // CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmlsl_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK: test_vqdmlsl_lane_s16_0
+  return vqdmlsl_lane_s16(a, b, v, 0);
+  // CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vqdmlsl_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK: test_vqdmlsl_lane_s32_0
+  return vqdmlsl_lane_s32(a, b, v, 0);
+  // CHECK: sqdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmlsl_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
+  // CHECK: test_vqdmlsl_high_lane_s16_0
+  return vqdmlsl_high_lane_s16(a, b, v, 0);
+  // CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vqdmlsl_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
+  // CHECK: test_vqdmlsl_high_lane_s32_0
+  return vqdmlsl_high_lane_s32(a, b, v, 0);
+  // CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmull_lane_s16_0(int16x4_t a, int16x4_t v) {
+  // CHECK: test_vqdmull_lane_s16_0
+  return vqdmull_lane_s16(a, v, 0);
+  // CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vqdmull_lane_s32_0(int32x2_t a, int32x2_t v) {
+  // CHECK: test_vqdmull_lane_s32_0
+  return vqdmull_lane_s32(a, v, 0);
+  // CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmull_laneq_s16_0(int16x4_t a, int16x8_t v) {
+  // CHECK: test_vqdmull_laneq_s16_0
+  return vqdmull_laneq_s16(a, v, 0);
+  // CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vqdmull_laneq_s32_0(int32x2_t a, int32x4_t v) {
+  // CHECK: test_vqdmull_laneq_s32_0
+  return vqdmull_laneq_s32(a, v, 0);
+  // CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmull_high_lane_s16_0(int16x8_t a, int16x4_t v) {
+  // CHECK: test_vqdmull_high_lane_s16_0
+  return vqdmull_high_lane_s16(a, v, 0);
+  // CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vqdmull_high_lane_s32_0(int32x4_t a, int32x2_t v) {
+  // CHECK: test_vqdmull_high_lane_s32_0
+  return vqdmull_high_lane_s32(a, v, 0);
+  // CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmull_high_laneq_s16_0(int16x8_t a, int16x8_t v) {
+  // CHECK: test_vqdmull_high_laneq_s16_0
+  return vqdmull_high_laneq_s16(a, v, 0);
+  // CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vqdmull_high_laneq_s32_0(int32x4_t a, int32x4_t v) {
+  // CHECK: test_vqdmull_high_laneq_s32_0
+  return vqdmull_high_laneq_s32(a, v, 0);
+  // CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int16x4_t test_vqdmulh_lane_s16_0(int16x4_t a, int16x4_t v) {
+  // CHECK: test_vqdmulh_lane_s16_0
+  return vqdmulh_lane_s16(a, v, 0);
+  // CHECK: sqdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int16x8_t test_vqdmulhq_lane_s16_0(int16x8_t a, int16x4_t v) {
+  // CHECK: test_vqdmulhq_lane_s16_0
+  return vqdmulhq_lane_s16(a, v, 0);
+  // CHECK: sqdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int32x2_t test_vqdmulh_lane_s32_0(int32x2_t a, int32x2_t v) {
+  // CHECK: test_vqdmulh_lane_s32_0
+  return vqdmulh_lane_s32(a, v, 0);
+  // CHECK: sqdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmulhq_lane_s32_0(int32x4_t a, int32x2_t v) {
+  // CHECK: test_vqdmulhq_lane_s32_0
+  return vqdmulhq_lane_s32(a, v, 0);
+  // CHECK: sqdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int16x4_t test_vqrdmulh_lane_s16_0(int16x4_t a, int16x4_t v) {
+  // CHECK: test_vqrdmulh_lane_s16_0
+  return vqrdmulh_lane_s16(a, v, 0);
+  // CHECK: sqrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int16x8_t test_vqrdmulhq_lane_s16_0(int16x8_t a, int16x4_t v) {
+  // CHECK: test_vqrdmulhq_lane_s16_0
+  return vqrdmulhq_lane_s16(a, v, 0);
+  // CHECK: sqrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int32x2_t test_vqrdmulh_lane_s32_0(int32x2_t a, int32x2_t v) {
+  // CHECK: test_vqrdmulh_lane_s32_0
+  return vqrdmulh_lane_s32(a, v, 0);
+  // CHECK: sqrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqrdmulhq_lane_s32_0(int32x4_t a, int32x2_t v) {
+  // CHECK: test_vqrdmulhq_lane_s32_0
+  return vqrdmulhq_lane_s32(a, v, 0);
+  // CHECK: sqrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float32x2_t test_vmul_lane_f32_0(float32x2_t a, float32x2_t v) {
+  // CHECK: test_vmul_lane_f32_0
+  return vmul_lane_f32(a, v, 0);
+  // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vmulq_lane_f32_0(float32x4_t a, float32x2_t v) {
+  // CHECK: test_vmulq_lane_f32_0
+  return vmulq_lane_f32(a, v, 0);
+  // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float32x2_t test_vmul_laneq_f32_0(float32x2_t a, float32x4_t v) {
+  // CHECK: test_vmul_laneq_f32_0
+  return vmul_laneq_f32(a, v, 0);
+  // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float64x1_t test_vmul_laneq_f64_0(float64x1_t a, float64x2_t v) {
+  // CHECK: test_vmul_laneq_f64_0
+  return vmul_laneq_f64(a, v, 0);
+  // CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+float32x4_t test_vmulq_laneq_f32_0(float32x4_t a, float32x4_t v) {
+  // CHECK: test_vmulq_laneq_f32_0
+  return vmulq_laneq_f32(a, v, 0);
+  // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float64x2_t test_vmulq_laneq_f64_0(float64x2_t a, float64x2_t v) {
+  // CHECK: test_vmulq_laneq_f64_0
+  return vmulq_laneq_f64(a, v, 0);
+  // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+float32x2_t test_vmulx_lane_f32_0(float32x2_t a, float32x2_t v) {
+  // CHECK: test_vmulx_lane_f32_0
+  return vmulx_lane_f32(a, v, 0);
+  // CHECK: fmulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vmulxq_lane_f32_0(float32x4_t a, float32x2_t v) {
+  // CHECK: test_vmulxq_lane_f32_0
+  return vmulxq_lane_f32(a, v, 0);
+  // CHECK: fmulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float64x2_t test_vmulxq_lane_f64_0(float64x2_t a, float64x1_t v) {
+  // CHECK: test_vmulxq_lane_f64_0
+  return vmulxq_lane_f64(a, v, 0);
+  // CHECK: fmulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+float32x2_t test_vmulx_laneq_f32_0(float32x2_t a, float32x4_t v) {
+  // CHECK: test_vmulx_laneq_f32_0
+  return vmulx_laneq_f32(a, v, 0);
+  // CHECK: fmulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vmulxq_laneq_f32_0(float32x4_t a, float32x4_t v) {
+  // CHECK: test_vmulxq_laneq_f32_0
+  return vmulxq_laneq_f32(a, v, 0);
+  // CHECK: fmulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float64x2_t test_vmulxq_laneq_f64_0(float64x2_t a, float64x2_t v) {
+  // CHECK: test_vmulxq_laneq_f64_0
+  return vmulxq_laneq_f64(a, v, 0);
+  // CHECK: fmulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+

Modified: cfe/trunk/utils/TableGen/NeonEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/NeonEmitter.cpp?rev=195844&r1=195843&r2=195844&view=diff
==============================================================================
--- cfe/trunk/utils/TableGen/NeonEmitter.cpp (original)
+++ cfe/trunk/utils/TableGen/NeonEmitter.cpp Wed Nov 27 08:02:55 2013
@@ -2209,10 +2209,17 @@ static unsigned GetNeonEnum(const std::s
   return Flags.getFlags();
 }
 
+// We don't check 'a' in this function, because for builtin function the
+// argument matching to 'a' uses a vector type splatted from a scalar type.
 static bool ProtoHasScalar(const std::string proto)
 {
   return (proto.find('s') != std::string::npos
-          || proto.find('r') != std::string::npos);
+          || proto.find('z') != std::string::npos
+          || proto.find('r') != std::string::npos
+          || proto.find('b') != std::string::npos
+          || proto.find('$') != std::string::npos
+          || proto.find('y') != std::string::npos
+          || proto.find('o') != std::string::npos);
 }
 
 // Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
@@ -2782,6 +2789,8 @@ NeonEmitter::genIntrinsicRangeCheckCode(
       PrintFatalError(R->getLoc(), "Builtin has no class kind");
 
     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
+    if (!ProtoHasScalar(Proto))
+      ck = ClassB;
 
     // Do not include AArch64 range checks if not generating code for AArch64.
     bool isA64 = R->getValueAsBit("isA64");
@@ -2820,17 +2829,15 @@ NeonEmitter::genIntrinsicRangeCheckCode(
 
         rangestr += "u = " +
           utostr(RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti]));
-      } else if (!ProtoHasScalar(Proto)) {
+      } else if (R->getValueAsBit("isShift")) {
         // Builtins which are overloaded by type will need to have their upper
         // bound computed at Sema time based on the type constant.
-        ck = ClassB;
-        if (R->getValueAsBit("isShift")) {
-          shiftstr = ", true";
+        shiftstr = ", true";
+
+        // Right shifts have an 'r' in the name, left shifts do not.
+        if (name.find('r') != std::string::npos)
+          rangestr = "l = 1; ";
 
-          // Right shifts have an 'r' in the name, left shifts do not.
-          if (name.find('r') != std::string::npos)
-            rangestr = "l = 1; ";
-        }
         rangestr += "u = RFT(TV" + shiftstr + ")";
       } else {
         // The immediate generally refers to a lane in the preceding argument.





More information about the cfe-commits mailing list