r194124 - Implement AArch64 Neon instruction set Perm.

Jiangning Liu jiangning.liu at arm.com
Tue Nov 5 19:35:53 PST 2013


Author: jiangning
Date: Tue Nov  5 21:35:53 2013
New Revision: 194124

URL: http://llvm.org/viewvc/llvm-project?rev=194124&view=rev
Log:
Implement AArch64 Neon instruction set Perm.

Added:
    cfe/trunk/test/CodeGen/aarch64-neon-perm.c
Modified:
    cfe/trunk/include/clang/Basic/arm_neon.td
    cfe/trunk/lib/CodeGen/CGBuiltin.cpp
    cfe/trunk/utils/TableGen/NeonEmitter.cpp

Modified: cfe/trunk/include/clang/Basic/arm_neon.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_neon.td?rev=194124&r1=194123&r2=194124&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/arm_neon.td (original)
+++ cfe/trunk/include/clang/Basic/arm_neon.td Tue Nov  5 21:35:53 2013
@@ -59,6 +59,12 @@ def OP_QDMULH_LN : Op;
 def OP_QRDMULH_LN : Op;
 def OP_FMS_LN : Op;
 def OP_FMS_LNQ : Op;
+def OP_TRN1  : Op;
+def OP_ZIP1  : Op;
+def OP_UZP1  : Op;
+def OP_TRN2  : Op;
+def OP_ZIP2  : Op;
+def OP_UZP2  : Op;
 def OP_EQ    : Op;
 def OP_GE    : Op;
 def OP_LE    : Op;
@@ -793,6 +799,21 @@ def SHA256H2 : SInst<"vsha256h2", "dddd"
 def SHA256SU1 : SInst<"vsha256su1", "dddd", "QUi">;
 
 ////////////////////////////////////////////////////////////////////////////////
+// Permutation
+def VTRN1 : SOpInst<"vtrn1", "ddd",
+                    "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_TRN1>;
+def VZIP1 : SOpInst<"vzip1", "ddd",
+                    "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_ZIP1>;
+def VUZP1 : SOpInst<"vuzp1", "ddd",
+                    "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_UZP1>;
+def VTRN2 : SOpInst<"vtrn2", "ddd",
+                    "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_TRN2>;
+def VZIP2 : SOpInst<"vzip2", "ddd",
+                    "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_ZIP2>;
+def VUZP2 : SOpInst<"vuzp2", "ddd",
+                    "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_UZP2>;
+
+////////////////////////////////////////////////////////////////////////////////
 // Scalar Arithmetic
 
 // Scalar Addition

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=194124&r1=194123&r2=194124&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Tue Nov  5 21:35:53 2013
@@ -2497,6 +2497,18 @@ Value *CodeGenFunction::EmitAArch64Built
   // AArch64 builtins mapping to legacy ARM v7 builtins.
   // FIXME: the mapped builtins listed correspond to what has been tested
   // in aarch64-neon-intrinsics.c so far.
+  case AArch64::BI__builtin_neon_vuzp_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vuzp_v, E);
+  case AArch64::BI__builtin_neon_vuzpq_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vuzpq_v, E);
+  case AArch64::BI__builtin_neon_vzip_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vzip_v, E);
+  case AArch64::BI__builtin_neon_vzipq_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vzipq_v, E);
+  case AArch64::BI__builtin_neon_vtrn_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtrn_v, E);
+  case AArch64::BI__builtin_neon_vtrnq_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtrnq_v, E);
   case AArch64::BI__builtin_neon_vext_v:
     return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vext_v, E);
   case AArch64::BI__builtin_neon_vextq_v:

Added: cfe/trunk/test/CodeGen/aarch64-neon-perm.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-perm.c?rev=194124&view=auto
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-perm.c (added)
+++ cfe/trunk/test/CodeGen/aarch64-neon-perm.c Tue Nov  5 21:35:53 2013
@@ -0,0 +1,1093 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+// Test new aarch64 intrinsics and types
+
+#include <arm_neon.h>
+
+int8x8_t test_vuzp1_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vuzp1_s8
+  return vuzp1_s8(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vuzp1q_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vuzp1q_s8
+  return vuzp1q_s8(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x4_t test_vuzp1_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vuzp1_s16
+  return vuzp1_s16(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int16x8_t test_vuzp1q_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vuzp1q_s16
+  return vuzp1q_s16(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x2_t test_vuzp1_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vuzp1_s32
+  return vuzp1_s32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vuzp1q_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vuzp1q_s32
+  return vuzp1q_s32(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vuzp1q_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vuzp1q_s64
+  return vuzp1q_s64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+uint8x8_t test_vuzp1_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vuzp1_u8
+  return vuzp1_u8(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vuzp1q_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vuzp1q_u8
+  return vuzp1q_u8(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x4_t test_vuzp1_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vuzp1_u16
+  return vuzp1_u16(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint16x8_t test_vuzp1q_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vuzp1q_u16
+  return vuzp1q_u16(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x2_t test_vuzp1_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vuzp1_u32
+  return vuzp1_u32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vuzp1q_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vuzp1q_u32
+  return vuzp1q_u32(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vuzp1q_u64(uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vuzp1q_u64
+  return vuzp1q_u64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+float32x2_t test_vuzp1_f32(float32x2_t a, float32x2_t b) {
+  // CHECK: test_vuzp1_f32
+  return vuzp1_f32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vuzp1q_f32(float32x4_t a, float32x4_t b) {
+  // CHECK: test_vuzp1q_f32
+  return vuzp1q_f32(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vuzp1q_f64(float64x2_t a, float64x2_t b) {
+  // CHECK: test_vuzp1q_f64
+  return vuzp1q_f64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+poly8x8_t test_vuzp1_p8(poly8x8_t a, poly8x8_t b) {
+  // CHECK: test_vuzp1_p8
+  return vuzp1_p8(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vuzp1q_p8(poly8x16_t a, poly8x16_t b) {
+  // CHECK: test_vuzp1q_p8
+  return vuzp1q_p8(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+poly16x4_t test_vuzp1_p16(poly16x4_t a, poly16x4_t b) {
+  // CHECK: test_vuzp1_p16
+  return vuzp1_p16(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+poly16x8_t test_vuzp1q_p16(poly16x8_t a, poly16x8_t b) {
+  // CHECK: test_vuzp1q_p16
+  return vuzp1q_p16(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int8x8_t test_vuzp2_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vuzp2_s8
+  return vuzp2_s8(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vuzp2q_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vuzp2q_s8
+  return vuzp2q_s8(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x4_t test_vuzp2_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vuzp2_s16
+  return vuzp2_s16(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int16x8_t test_vuzp2q_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vuzp2q_s16
+  return vuzp2q_s16(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x2_t test_vuzp2_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vuzp2_s32
+  return vuzp2_s32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vuzp2q_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vuzp2q_s32
+  return vuzp2q_s32(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vuzp2q_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vuzp2q_s64
+  return vuzp2q_s64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+uint8x8_t test_vuzp2_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vuzp2_u8
+  return vuzp2_u8(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vuzp2q_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vuzp2q_u8
+  return vuzp2q_u8(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x4_t test_vuzp2_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vuzp2_u16
+  return vuzp2_u16(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint16x8_t test_vuzp2q_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vuzp2q_u16
+  return vuzp2q_u16(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x2_t test_vuzp2_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vuzp2_u32
+  return vuzp2_u32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+uint32x4_t test_vuzp2q_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vuzp2q_u32
+  return vuzp2q_u32(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vuzp2q_u64(uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vuzp2q_u64
+  return vuzp2q_u64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+float32x2_t test_vuzp2_f32(float32x2_t a, float32x2_t b) {
+  // CHECK: test_vuzp2_f32
+  return vuzp2_f32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+float32x4_t test_vuzp2q_f32(float32x4_t a, float32x4_t b) {
+  // CHECK: test_vuzp2q_f32
+  return vuzp2q_f32(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vuzp2q_f64(float64x2_t a, float64x2_t b) {
+  // CHECK: test_vuzp2q_f64
+  return vuzp2q_f64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+poly8x8_t test_vuzp2_p8(poly8x8_t a, poly8x8_t b) {
+  // CHECK: test_vuzp2_p8
+  return vuzp2_p8(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vuzp2q_p8(poly8x16_t a, poly8x16_t b) {
+  // CHECK: test_vuzp2q_p8
+  return vuzp2q_p8(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+poly16x4_t test_vuzp2_p16(poly16x4_t a, poly16x4_t b) {
+  // CHECK: test_vuzp2_p16
+  return vuzp2_p16(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+poly16x8_t test_vuzp2q_p16(poly16x8_t a, poly16x8_t b) {
+  // CHECK: test_vuzp2q_p16
+  return vuzp2q_p16(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int8x8_t test_vzip1_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vzip1_s8
+  return vzip1_s8(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vzip1q_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vzip1q_s8
+  return vzip1q_s8(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x4_t test_vzip1_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vzip1_s16
+  return vzip1_s16(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int16x8_t test_vzip1q_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vzip1q_s16
+  return vzip1q_s16(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x2_t test_vzip1_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vzip1_s32
+  return vzip1_s32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vzip1q_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vzip1q_s32
+  return vzip1q_s32(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vzip1q_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vzip1q_s64
+  return vzip1q_s64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+uint8x8_t test_vzip1_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vzip1_u8
+  return vzip1_u8(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vzip1q_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vzip1q_u8
+  return vzip1q_u8(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x4_t test_vzip1_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vzip1_u16
+  return vzip1_u16(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint16x8_t test_vzip1q_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vzip1q_u16
+  return vzip1q_u16(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x2_t test_vzip1_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vzip1_u32
+  return vzip1_u32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vzip1q_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vzip1q_u32
+  return vzip1q_u32(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vzip1q_u64(uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vzip1q_u64
+  return vzip1q_u64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+float32x2_t test_vzip1_f32(float32x2_t a, float32x2_t b) {
+  // CHECK: test_vzip1_f32
+  return vzip1_f32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vzip1q_f32(float32x4_t a, float32x4_t b) {
+  // CHECK: test_vzip1q_f32
+  return vzip1q_f32(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vzip1q_f64(float64x2_t a, float64x2_t b) {
+  // CHECK: test_vzip1q_f64
+  return vzip1q_f64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+poly8x8_t test_vzip1_p8(poly8x8_t a, poly8x8_t b) {
+  // CHECK: test_vzip1_p8
+  return vzip1_p8(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vzip1q_p8(poly8x16_t a, poly8x16_t b) {
+  // CHECK: test_vzip1q_p8
+  return vzip1q_p8(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+poly16x4_t test_vzip1_p16(poly16x4_t a, poly16x4_t b) {
+  // CHECK: test_vzip1_p16
+  return vzip1_p16(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+poly16x8_t test_vzip1q_p16(poly16x8_t a, poly16x8_t b) {
+  // CHECK: test_vzip1q_p16
+  return vzip1q_p16(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int8x8_t test_vzip2_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vzip2_s8
+  return vzip2_s8(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vzip2q_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vzip2q_s8
+  return vzip2q_s8(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x4_t test_vzip2_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vzip2_s16
+  return vzip2_s16(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int16x8_t test_vzip2q_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vzip2q_s16
+  return vzip2q_s16(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x2_t test_vzip2_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vzip2_s32
+  return vzip2_s32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vzip2q_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vzip2q_s32
+  return vzip2q_s32(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vzip2q_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vzip2q_s64
+  return vzip2q_s64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+uint8x8_t test_vzip2_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vzip2_u8
+  return vzip2_u8(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vzip2q_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vzip2q_u8
+  return vzip2q_u8(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x4_t test_vzip2_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vzip2_u16
+  return vzip2_u16(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint16x8_t test_vzip2q_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vzip2q_u16
+  return vzip2q_u16(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x2_t test_vzip2_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vzip2_u32
+  return vzip2_u32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+uint32x4_t test_vzip2q_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vzip2q_u32
+  return vzip2q_u32(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vzip2q_u64(uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vzip2q_u64
+  return vzip2q_u64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+float32x2_t test_vzip2_f32(float32x2_t a, float32x2_t b) {
+  // CHECK: test_vzip2_f32
+  return vzip2_f32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+float32x4_t test_vzip2q_f32(float32x4_t a, float32x4_t b) {
+  // CHECK: test_vzip2q_f32
+  return vzip2q_f32(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vzip2q_f64(float64x2_t a, float64x2_t b) {
+  // CHECK: test_vzip2q_f64
+  return vzip2q_f64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+poly8x8_t test_vzip2_p8(poly8x8_t a, poly8x8_t b) {
+  // CHECK: test_vzip2_p8
+  return vzip2_p8(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vzip2q_p8(poly8x16_t a, poly8x16_t b) {
+  // CHECK: test_vzip2q_p8
+  return vzip2q_p8(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+poly16x4_t test_vzip2_p16(poly16x4_t a, poly16x4_t b) {
+  // CHECK: test_vzip2_p16
+  return vzip2_p16(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+poly16x8_t test_vzip2q_p16(poly16x8_t a, poly16x8_t b) {
+  // CHECK: test_vzip2q_p16
+  return vzip2q_p16(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int8x8_t test_vtrn1_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vtrn1_s8
+  return vtrn1_s8(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vtrn1q_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vtrn1q_s8
+  return vtrn1q_s8(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x4_t test_vtrn1_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vtrn1_s16
+  return vtrn1_s16(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int16x8_t test_vtrn1q_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vtrn1q_s16
+  return vtrn1q_s16(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x2_t test_vtrn1_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vtrn1_s32
+  return vtrn1_s32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vtrn1q_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vtrn1q_s32
+  return vtrn1q_s32(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vtrn1q_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vtrn1q_s64
+  return vtrn1q_s64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+uint8x8_t test_vtrn1_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vtrn1_u8
+  return vtrn1_u8(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vtrn1q_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vtrn1q_u8
+  return vtrn1q_u8(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x4_t test_vtrn1_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vtrn1_u16
+  return vtrn1_u16(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint16x8_t test_vtrn1q_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vtrn1q_u16
+  return vtrn1q_u16(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x2_t test_vtrn1_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vtrn1_u32
+  return vtrn1_u32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vtrn1q_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vtrn1q_u32
+  return vtrn1q_u32(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vtrn1q_u64(uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vtrn1q_u64
+  return vtrn1q_u64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+float32x2_t test_vtrn1_f32(float32x2_t a, float32x2_t b) {
+  // CHECK: test_vtrn1_f32
+  return vtrn1_f32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vtrn1q_f32(float32x4_t a, float32x4_t b) {
+  // CHECK: test_vtrn1q_f32
+  return vtrn1q_f32(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vtrn1q_f64(float64x2_t a, float64x2_t b) {
+  // CHECK: test_vtrn1q_f64
+  return vtrn1q_f64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+poly8x8_t test_vtrn1_p8(poly8x8_t a, poly8x8_t b) {
+  // CHECK: test_vtrn1_p8
+  return vtrn1_p8(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vtrn1q_p8(poly8x16_t a, poly8x16_t b) {
+  // CHECK: test_vtrn1q_p8
+  return vtrn1q_p8(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+poly16x4_t test_vtrn1_p16(poly16x4_t a, poly16x4_t b) {
+  // CHECK: test_vtrn1_p16
+  return vtrn1_p16(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+poly16x8_t test_vtrn1q_p16(poly16x8_t a, poly16x8_t b) {
+  // CHECK: test_vtrn1q_p16
+  return vtrn1q_p16(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int8x8_t test_vtrn2_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vtrn2_s8
+  return vtrn2_s8(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vtrn2q_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vtrn2q_s8
+  return vtrn2q_s8(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x4_t test_vtrn2_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vtrn2_s16
+  return vtrn2_s16(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int16x8_t test_vtrn2q_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vtrn2q_s16
+  return vtrn2q_s16(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x2_t test_vtrn2_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vtrn2_s32
+  return vtrn2_s32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vtrn2q_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vtrn2q_s32
+  return vtrn2q_s32(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vtrn2q_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vtrn2q_s64
+  return vtrn2q_s64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+uint8x8_t test_vtrn2_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vtrn2_u8
+  return vtrn2_u8(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vtrn2q_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vtrn2q_u8
+  return vtrn2q_u8(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x4_t test_vtrn2_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vtrn2_u16
+  return vtrn2_u16(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint16x8_t test_vtrn2q_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vtrn2q_u16
+  return vtrn2q_u16(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x2_t test_vtrn2_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vtrn2_u32
+  return vtrn2_u32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+uint32x4_t test_vtrn2q_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vtrn2q_u32
+  return vtrn2q_u32(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vtrn2q_u64(uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vtrn2q_u64
+  return vtrn2q_u64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+float32x2_t test_vtrn2_f32(float32x2_t a, float32x2_t b) {
+  // CHECK: test_vtrn2_f32
+  return vtrn2_f32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+float32x4_t test_vtrn2q_f32(float32x4_t a, float32x4_t b) {
+  // CHECK: test_vtrn2q_f32
+  return vtrn2q_f32(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vtrn2q_f64(float64x2_t a, float64x2_t b) {
+  // CHECK: test_vtrn2q_f64
+  return vtrn2q_f64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+poly8x8_t test_vtrn2_p8(poly8x8_t a, poly8x8_t b) {
+  // CHECK: test_vtrn2_p8
+  return vtrn2_p8(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vtrn2q_p8(poly8x16_t a, poly8x16_t b) {
+  // CHECK: test_vtrn2q_p8
+  return vtrn2q_p8(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+poly16x4_t test_vtrn2_p16(poly16x4_t a, poly16x4_t b) {
+  // CHECK: test_vtrn2_p16
+  return vtrn2_p16(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+poly16x8_t test_vtrn2q_p16(poly16x8_t a, poly16x8_t b) {
+  // CHECK: test_vtrn2q_p16
+  return vtrn2q_p16(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vuzp_s8
+  return vuzp_s8(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vuzp_s16
+  return vuzp_s16(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  // CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vuzp_s32
+  return vuzp_s32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vuzp_u8
+  return vuzp_u8(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vuzp_u16
+  return vuzp_u16(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  // CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vuzp_u32
+  return vuzp_u32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) {
+  // CHECK: test_vuzp_f32
+  return vuzp_f32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) {
+  // CHECK: test_vuzp_p8
+  return vuzp_p8(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) {
+  // CHECK: test_vuzp_p16
+  return vuzp_p16(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  // CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vuzpq_s8
+  return vuzpq_s8(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+  // CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vuzpq_s16
+  return vuzpq_s16(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+  // CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vuzpq_s32
+  return vuzpq_s32(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+  // CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vuzpq_u8
+  return vuzpq_u8(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+  // CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vuzpq_u16
+  return vuzpq_u16(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+  // CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vuzpq_u32
+  return vuzpq_u32(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+  // CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) {
+  // CHECK: test_vuzpq_f32
+  return vuzpq_f32(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+  // CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) {
+  // CHECK: test_vuzpq_p8
+  return vuzpq_p8(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+  // CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) {
+  // CHECK: test_vuzpq_p16
+  return vuzpq_p16(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+  // CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vzip_s8
+  return vzip_s8(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vzip_s16
+  return vzip_s16(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  // CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vzip_s32
+  return vzip_s32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vzip_u8
+  return vzip_u8(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vzip_u16
+  return vzip_u16(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  // CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vzip_u32
+  return vzip_u32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) {
+  // CHECK: test_vzip_f32
+  return vzip_f32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) {
+  // CHECK: test_vzip_p8
+  return vzip_p8(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) {
+  // CHECK: test_vzip_p16
+  return vzip_p16(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  // CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vzipq_s8
+  return vzipq_s8(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+  // CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vzipq_s16
+  return vzipq_s16(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+  // CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vzipq_s32
+  return vzipq_s32(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+  // CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vzipq_u8
+  return vzipq_u8(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+  // CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vzipq_u16
+  return vzipq_u16(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+  // CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vzipq_u32
+  return vzipq_u32(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+  // CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) {
+  // CHECK: test_vzipq_f32
+  return vzipq_f32(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+  // CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) {
+  // CHECK: test_vzipq_p8
+  return vzipq_p8(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+  // CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) {
+  // CHECK: test_vzipq_p16
+  return vzipq_p16(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+  // CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vtrn_s8
+  return vtrn_s8(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vtrn_s16
+  return vtrn_s16(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  // CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vtrn_s32
+  return vtrn_s32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vtrn_u8
+  return vtrn_u8(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vtrn_u16
+  return vtrn_u16(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  // CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vtrn_u32
+  return vtrn_u32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) {
+  // CHECK: test_vtrn_f32
+  return vtrn_f32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) {
+  // CHECK: test_vtrn_p8
+  return vtrn_p8(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) {
+  // CHECK: test_vtrn_p16
+  return vtrn_p16(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  // CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vtrnq_s8
+  return vtrnq_s8(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+  // CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vtrnq_s16
+  return vtrnq_s16(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+  // CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vtrnq_s32
+  return vtrnq_s32(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+  // CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vtrnq_u8
+  return vtrnq_u8(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+  // CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vtrnq_u16
+  return vtrnq_u16(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+  // CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vtrnq_u32
+  return vtrnq_u32(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+  // CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) {
+  // CHECK: test_vtrnq_f32
+  return vtrnq_f32(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+  // CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) {
+  // CHECK: test_vtrnq_p8
+  return vtrnq_p8(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+  // CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) {
+  // CHECK: test_vtrnq_p16
+  return vtrnq_p16(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+  // CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}

Modified: cfe/trunk/utils/TableGen/NeonEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/NeonEmitter.cpp?rev=194124&r1=194123&r2=194124&view=diff
==============================================================================
--- cfe/trunk/utils/TableGen/NeonEmitter.cpp (original)
+++ cfe/trunk/utils/TableGen/NeonEmitter.cpp Tue Nov  5 21:35:53 2013
@@ -81,6 +81,12 @@ enum OpKind {
   OpQRDMulhLane,
   OpFMSLane,
   OpFMSLaneQ,
+  OpTrn1,
+  OpZip1,
+  OpUzp1,
+  OpTrn2,
+  OpZip2,
+  OpUzp2,
   OpEq,
   OpGe,
   OpLe,
@@ -228,6 +234,12 @@ public:
     OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
     OpMap["OP_FMS_LN"] = OpFMSLane;
     OpMap["OP_FMS_LNQ"] = OpFMSLaneQ;
+    OpMap["OP_TRN1"]  = OpTrn1;
+    OpMap["OP_ZIP1"]  = OpZip1;
+    OpMap["OP_UZP1"]  = OpUzp1;
+    OpMap["OP_TRN2"]  = OpTrn2;
+    OpMap["OP_ZIP2"]  = OpZip2;
+    OpMap["OP_UZP2"]  = OpUzp2;
     OpMap["OP_EQ"]    = OpEq;
     OpMap["OP_GE"]    = OpGe;
     OpMap["OP_LE"]    = OpLe;
@@ -1776,6 +1788,42 @@ static std::string GenOpString(const std
     s += ");";
     break;
   }
+  case OpUzp1:
+    s += "__builtin_shufflevector(__a, __b";
+    for (unsigned i = 0; i < nElts; i++)
+      s += ", " + utostr(2*i);
+    s += ");";
+    break;
+  case OpUzp2:
+    s += "__builtin_shufflevector(__a, __b";
+    for (unsigned i = 0; i < nElts; i++)
+      s += ", " + utostr(2*i+1);
+    s += ");";
+    break;
+  case OpZip1:
+    s += "__builtin_shufflevector(__a, __b";
+    for (unsigned i = 0; i < (nElts/2); i++)
+       s += ", " + utostr(i) + ", " + utostr(i+nElts);
+    s += ");";
+    break;
+  case OpZip2:
+    s += "__builtin_shufflevector(__a, __b";
+    for (unsigned i = nElts/2; i < nElts; i++)
+       s += ", " + utostr(i) + ", " + utostr(i+nElts);
+    s += ");";
+    break;
+  case OpTrn1:
+    s += "__builtin_shufflevector(__a, __b";
+    for (unsigned i = 0; i < (nElts/2); i++)
+       s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts);
+    s += ");";
+    break;
+  case OpTrn2:
+    s += "__builtin_shufflevector(__a, __b";
+    for (unsigned i = 0; i < (nElts/2); i++)
+       s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts);
+    s += ");";
+    break;
   case OpAbdl: {
     std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
     if (typestr[0] != 'U') {





More information about the cfe-commits mailing list