r194124 - Implement AArch64 Neon instruction set Perm.
Jiangning Liu
jiangning.liu at arm.com
Tue Nov 5 19:35:53 PST 2013
Author: jiangning
Date: Tue Nov 5 21:35:53 2013
New Revision: 194124
URL: http://llvm.org/viewvc/llvm-project?rev=194124&view=rev
Log:
Implement AArch64 Neon instruction set Perm.
Added:
cfe/trunk/test/CodeGen/aarch64-neon-perm.c
Modified:
cfe/trunk/include/clang/Basic/arm_neon.td
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/utils/TableGen/NeonEmitter.cpp
Modified: cfe/trunk/include/clang/Basic/arm_neon.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_neon.td?rev=194124&r1=194123&r2=194124&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/arm_neon.td (original)
+++ cfe/trunk/include/clang/Basic/arm_neon.td Tue Nov 5 21:35:53 2013
@@ -59,6 +59,12 @@ def OP_QDMULH_LN : Op;
def OP_QRDMULH_LN : Op;
def OP_FMS_LN : Op;
def OP_FMS_LNQ : Op;
+def OP_TRN1 : Op;
+def OP_ZIP1 : Op;
+def OP_UZP1 : Op;
+def OP_TRN2 : Op;
+def OP_ZIP2 : Op;
+def OP_UZP2 : Op;
def OP_EQ : Op;
def OP_GE : Op;
def OP_LE : Op;
@@ -793,6 +799,21 @@ def SHA256H2 : SInst<"vsha256h2", "dddd"
def SHA256SU1 : SInst<"vsha256su1", "dddd", "QUi">;
////////////////////////////////////////////////////////////////////////////////
+// Permutation
+def VTRN1 : SOpInst<"vtrn1", "ddd",
+ "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_TRN1>;
+def VZIP1 : SOpInst<"vzip1", "ddd",
+ "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_ZIP1>;
+def VUZP1 : SOpInst<"vuzp1", "ddd",
+ "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_UZP1>;
+def VTRN2 : SOpInst<"vtrn2", "ddd",
+ "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_TRN2>;
+def VZIP2 : SOpInst<"vzip2", "ddd",
+ "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_ZIP2>;
+def VUZP2 : SOpInst<"vuzp2", "ddd",
+ "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_UZP2>;
+
+////////////////////////////////////////////////////////////////////////////////
// Scalar Arithmetic
// Scalar Addition
Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=194124&r1=194123&r2=194124&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Tue Nov 5 21:35:53 2013
@@ -2497,6 +2497,18 @@ Value *CodeGenFunction::EmitAArch64Built
// AArch64 builtins mapping to legacy ARM v7 builtins.
// FIXME: the mapped builtins listed correspond to what has been tested
// in aarch64-neon-intrinsics.c so far.
+ case AArch64::BI__builtin_neon_vuzp_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vuzp_v, E);
+ case AArch64::BI__builtin_neon_vuzpq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vuzpq_v, E);
+ case AArch64::BI__builtin_neon_vzip_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vzip_v, E);
+ case AArch64::BI__builtin_neon_vzipq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vzipq_v, E);
+ case AArch64::BI__builtin_neon_vtrn_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtrn_v, E);
+ case AArch64::BI__builtin_neon_vtrnq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtrnq_v, E);
case AArch64::BI__builtin_neon_vext_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vext_v, E);
case AArch64::BI__builtin_neon_vextq_v:
Added: cfe/trunk/test/CodeGen/aarch64-neon-perm.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-perm.c?rev=194124&view=auto
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-perm.c (added)
+++ cfe/trunk/test/CodeGen/aarch64-neon-perm.c Tue Nov 5 21:35:53 2013
@@ -0,0 +1,1093 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+// Test new aarch64 intrinsics and types
+
+#include <arm_neon.h>
+
+int8x8_t test_vuzp1_s8(int8x8_t a, int8x8_t b) {
+ // CHECK: test_vuzp1_s8
+ return vuzp1_s8(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vuzp1q_s8(int8x16_t a, int8x16_t b) {
+ // CHECK: test_vuzp1q_s8
+ return vuzp1q_s8(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x4_t test_vuzp1_s16(int16x4_t a, int16x4_t b) {
+ // CHECK: test_vuzp1_s16
+ return vuzp1_s16(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int16x8_t test_vuzp1q_s16(int16x8_t a, int16x8_t b) {
+ // CHECK: test_vuzp1q_s16
+ return vuzp1q_s16(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x2_t test_vuzp1_s32(int32x2_t a, int32x2_t b) {
+ // CHECK: test_vuzp1_s32
+ return vuzp1_s32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vuzp1q_s32(int32x4_t a, int32x4_t b) {
+ // CHECK: test_vuzp1q_s32
+ return vuzp1q_s32(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vuzp1q_s64(int64x2_t a, int64x2_t b) {
+ // CHECK: test_vuzp1q_s64
+ return vuzp1q_s64(a, b);
+ // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+uint8x8_t test_vuzp1_u8(uint8x8_t a, uint8x8_t b) {
+ // CHECK: test_vuzp1_u8
+ return vuzp1_u8(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vuzp1q_u8(uint8x16_t a, uint8x16_t b) {
+ // CHECK: test_vuzp1q_u8
+ return vuzp1q_u8(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x4_t test_vuzp1_u16(uint16x4_t a, uint16x4_t b) {
+ // CHECK: test_vuzp1_u16
+ return vuzp1_u16(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint16x8_t test_vuzp1q_u16(uint16x8_t a, uint16x8_t b) {
+ // CHECK: test_vuzp1q_u16
+ return vuzp1q_u16(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x2_t test_vuzp1_u32(uint32x2_t a, uint32x2_t b) {
+ // CHECK: test_vuzp1_u32
+ return vuzp1_u32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vuzp1q_u32(uint32x4_t a, uint32x4_t b) {
+ // CHECK: test_vuzp1q_u32
+ return vuzp1q_u32(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vuzp1q_u64(uint64x2_t a, uint64x2_t b) {
+ // CHECK: test_vuzp1q_u64
+ return vuzp1q_u64(a, b);
+ // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+float32x2_t test_vuzp1_f32(float32x2_t a, float32x2_t b) {
+ // CHECK: test_vuzp1_f32
+ return vuzp1_f32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vuzp1q_f32(float32x4_t a, float32x4_t b) {
+ // CHECK: test_vuzp1q_f32
+ return vuzp1q_f32(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vuzp1q_f64(float64x2_t a, float64x2_t b) {
+ // CHECK: test_vuzp1q_f64
+ return vuzp1q_f64(a, b);
+ // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+poly8x8_t test_vuzp1_p8(poly8x8_t a, poly8x8_t b) {
+ // CHECK: test_vuzp1_p8
+ return vuzp1_p8(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vuzp1q_p8(poly8x16_t a, poly8x16_t b) {
+ // CHECK: test_vuzp1q_p8
+ return vuzp1q_p8(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+poly16x4_t test_vuzp1_p16(poly16x4_t a, poly16x4_t b) {
+ // CHECK: test_vuzp1_p16
+ return vuzp1_p16(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+poly16x8_t test_vuzp1q_p16(poly16x8_t a, poly16x8_t b) {
+ // CHECK: test_vuzp1q_p16
+ return vuzp1q_p16(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int8x8_t test_vuzp2_s8(int8x8_t a, int8x8_t b) {
+ // CHECK: test_vuzp2_s8
+ return vuzp2_s8(a, b);
+ // CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vuzp2q_s8(int8x16_t a, int8x16_t b) {
+ // CHECK: test_vuzp2q_s8
+ return vuzp2q_s8(a, b);
+ // CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x4_t test_vuzp2_s16(int16x4_t a, int16x4_t b) {
+ // CHECK: test_vuzp2_s16
+ return vuzp2_s16(a, b);
+ // CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int16x8_t test_vuzp2q_s16(int16x8_t a, int16x8_t b) {
+ // CHECK: test_vuzp2q_s16
+ return vuzp2q_s16(a, b);
+ // CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x2_t test_vuzp2_s32(int32x2_t a, int32x2_t b) {
+ // CHECK: test_vuzp2_s32
+ return vuzp2_s32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vuzp2q_s32(int32x4_t a, int32x4_t b) {
+ // CHECK: test_vuzp2q_s32
+ return vuzp2q_s32(a, b);
+ // CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vuzp2q_s64(int64x2_t a, int64x2_t b) {
+ // CHECK: test_vuzp2q_s64
+ return vuzp2q_s64(a, b);
+ // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+uint8x8_t test_vuzp2_u8(uint8x8_t a, uint8x8_t b) {
+ // CHECK: test_vuzp2_u8
+ return vuzp2_u8(a, b);
+ // CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vuzp2q_u8(uint8x16_t a, uint8x16_t b) {
+ // CHECK: test_vuzp2q_u8
+ return vuzp2q_u8(a, b);
+ // CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x4_t test_vuzp2_u16(uint16x4_t a, uint16x4_t b) {
+ // CHECK: test_vuzp2_u16
+ return vuzp2_u16(a, b);
+ // CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint16x8_t test_vuzp2q_u16(uint16x8_t a, uint16x8_t b) {
+ // CHECK: test_vuzp2q_u16
+ return vuzp2q_u16(a, b);
+ // CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x2_t test_vuzp2_u32(uint32x2_t a, uint32x2_t b) {
+ // CHECK: test_vuzp2_u32
+ return vuzp2_u32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+uint32x4_t test_vuzp2q_u32(uint32x4_t a, uint32x4_t b) {
+ // CHECK: test_vuzp2q_u32
+ return vuzp2q_u32(a, b);
+ // CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vuzp2q_u64(uint64x2_t a, uint64x2_t b) {
+ // CHECK: test_vuzp2q_u64
+ return vuzp2q_u64(a, b);
+ // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+float32x2_t test_vuzp2_f32(float32x2_t a, float32x2_t b) {
+ // CHECK: test_vuzp2_f32
+ return vuzp2_f32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+float32x4_t test_vuzp2q_f32(float32x4_t a, float32x4_t b) {
+ // CHECK: test_vuzp2q_f32
+ return vuzp2q_f32(a, b);
+ // CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vuzp2q_f64(float64x2_t a, float64x2_t b) {
+ // CHECK: test_vuzp2q_f64
+ return vuzp2q_f64(a, b);
+ // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+poly8x8_t test_vuzp2_p8(poly8x8_t a, poly8x8_t b) {
+ // CHECK: test_vuzp2_p8
+ return vuzp2_p8(a, b);
+ // CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vuzp2q_p8(poly8x16_t a, poly8x16_t b) {
+ // CHECK: test_vuzp2q_p8
+ return vuzp2q_p8(a, b);
+ // CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+poly16x4_t test_vuzp2_p16(poly16x4_t a, poly16x4_t b) {
+ // CHECK: test_vuzp2_p16
+ return vuzp2_p16(a, b);
+ // CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+poly16x8_t test_vuzp2q_p16(poly16x8_t a, poly16x8_t b) {
+ // CHECK: test_vuzp2q_p16
+ return vuzp2q_p16(a, b);
+ // CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int8x8_t test_vzip1_s8(int8x8_t a, int8x8_t b) {
+ // CHECK: test_vzip1_s8
+ return vzip1_s8(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vzip1q_s8(int8x16_t a, int8x16_t b) {
+ // CHECK: test_vzip1q_s8
+ return vzip1q_s8(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x4_t test_vzip1_s16(int16x4_t a, int16x4_t b) {
+ // CHECK: test_vzip1_s16
+ return vzip1_s16(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int16x8_t test_vzip1q_s16(int16x8_t a, int16x8_t b) {
+ // CHECK: test_vzip1q_s16
+ return vzip1q_s16(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x2_t test_vzip1_s32(int32x2_t a, int32x2_t b) {
+ // CHECK: test_vzip1_s32
+ return vzip1_s32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vzip1q_s32(int32x4_t a, int32x4_t b) {
+ // CHECK: test_vzip1q_s32
+ return vzip1q_s32(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vzip1q_s64(int64x2_t a, int64x2_t b) {
+ // CHECK: test_vzip1q_s64
+ return vzip1q_s64(a, b);
+ // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+uint8x8_t test_vzip1_u8(uint8x8_t a, uint8x8_t b) {
+ // CHECK: test_vzip1_u8
+ return vzip1_u8(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vzip1q_u8(uint8x16_t a, uint8x16_t b) {
+ // CHECK: test_vzip1q_u8
+ return vzip1q_u8(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x4_t test_vzip1_u16(uint16x4_t a, uint16x4_t b) {
+ // CHECK: test_vzip1_u16
+ return vzip1_u16(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint16x8_t test_vzip1q_u16(uint16x8_t a, uint16x8_t b) {
+ // CHECK: test_vzip1q_u16
+ return vzip1q_u16(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x2_t test_vzip1_u32(uint32x2_t a, uint32x2_t b) {
+ // CHECK: test_vzip1_u32
+ return vzip1_u32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vzip1q_u32(uint32x4_t a, uint32x4_t b) {
+ // CHECK: test_vzip1q_u32
+ return vzip1q_u32(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vzip1q_u64(uint64x2_t a, uint64x2_t b) {
+ // CHECK: test_vzip1q_u64
+ return vzip1q_u64(a, b);
+ // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+float32x2_t test_vzip1_f32(float32x2_t a, float32x2_t b) {
+ // CHECK: test_vzip1_f32
+ return vzip1_f32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vzip1q_f32(float32x4_t a, float32x4_t b) {
+ // CHECK: test_vzip1q_f32
+ return vzip1q_f32(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vzip1q_f64(float64x2_t a, float64x2_t b) {
+ // CHECK: test_vzip1q_f64
+ return vzip1q_f64(a, b);
+ // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+poly8x8_t test_vzip1_p8(poly8x8_t a, poly8x8_t b) {
+ // CHECK: test_vzip1_p8
+ return vzip1_p8(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vzip1q_p8(poly8x16_t a, poly8x16_t b) {
+ // CHECK: test_vzip1q_p8
+ return vzip1q_p8(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+poly16x4_t test_vzip1_p16(poly16x4_t a, poly16x4_t b) {
+ // CHECK: test_vzip1_p16
+ return vzip1_p16(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+poly16x8_t test_vzip1q_p16(poly16x8_t a, poly16x8_t b) {
+ // CHECK: test_vzip1q_p16
+ return vzip1q_p16(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int8x8_t test_vzip2_s8(int8x8_t a, int8x8_t b) {
+ // CHECK: test_vzip2_s8
+ return vzip2_s8(a, b);
+ // CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vzip2q_s8(int8x16_t a, int8x16_t b) {
+ // CHECK: test_vzip2q_s8
+ return vzip2q_s8(a, b);
+ // CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x4_t test_vzip2_s16(int16x4_t a, int16x4_t b) {
+ // CHECK: test_vzip2_s16
+ return vzip2_s16(a, b);
+ // CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int16x8_t test_vzip2q_s16(int16x8_t a, int16x8_t b) {
+ // CHECK: test_vzip2q_s16
+ return vzip2q_s16(a, b);
+ // CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x2_t test_vzip2_s32(int32x2_t a, int32x2_t b) {
+ // CHECK: test_vzip2_s32
+ return vzip2_s32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vzip2q_s32(int32x4_t a, int32x4_t b) {
+ // CHECK: test_vzip2q_s32
+ return vzip2q_s32(a, b);
+ // CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vzip2q_s64(int64x2_t a, int64x2_t b) {
+ // CHECK: test_vzip2q_s64
+ return vzip2q_s64(a, b);
+ // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+uint8x8_t test_vzip2_u8(uint8x8_t a, uint8x8_t b) {
+ // CHECK: test_vzip2_u8
+ return vzip2_u8(a, b);
+ // CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vzip2q_u8(uint8x16_t a, uint8x16_t b) {
+ // CHECK: test_vzip2q_u8
+ return vzip2q_u8(a, b);
+ // CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x4_t test_vzip2_u16(uint16x4_t a, uint16x4_t b) {
+ // CHECK: test_vzip2_u16
+ return vzip2_u16(a, b);
+ // CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint16x8_t test_vzip2q_u16(uint16x8_t a, uint16x8_t b) {
+ // CHECK: test_vzip2q_u16
+ return vzip2q_u16(a, b);
+ // CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x2_t test_vzip2_u32(uint32x2_t a, uint32x2_t b) {
+ // CHECK: test_vzip2_u32
+ return vzip2_u32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+uint32x4_t test_vzip2q_u32(uint32x4_t a, uint32x4_t b) {
+ // CHECK: test_vzip2q_u32
+ return vzip2q_u32(a, b);
+ // CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vzip2q_u64(uint64x2_t a, uint64x2_t b) {
+ // CHECK: test_vzip2q_u64
+ return vzip2q_u64(a, b);
+ // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+float32x2_t test_vzip2_f32(float32x2_t a, float32x2_t b) {
+ // CHECK: test_vzip2_f32
+ return vzip2_f32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+float32x4_t test_vzip2q_f32(float32x4_t a, float32x4_t b) {
+ // CHECK: test_vzip2q_f32
+ return vzip2q_f32(a, b);
+ // CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vzip2q_f64(float64x2_t a, float64x2_t b) {
+ // CHECK: test_vzip2q_f64
+ return vzip2q_f64(a, b);
+ // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+poly8x8_t test_vzip2_p8(poly8x8_t a, poly8x8_t b) {
+ // CHECK: test_vzip2_p8
+ return vzip2_p8(a, b);
+ // CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vzip2q_p8(poly8x16_t a, poly8x16_t b) {
+ // CHECK: test_vzip2q_p8
+ return vzip2q_p8(a, b);
+ // CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+poly16x4_t test_vzip2_p16(poly16x4_t a, poly16x4_t b) {
+ // CHECK: test_vzip2_p16
+ return vzip2_p16(a, b);
+ // CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+poly16x8_t test_vzip2q_p16(poly16x8_t a, poly16x8_t b) {
+ // CHECK: test_vzip2q_p16
+ return vzip2q_p16(a, b);
+ // CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int8x8_t test_vtrn1_s8(int8x8_t a, int8x8_t b) {
+ // CHECK: test_vtrn1_s8
+ return vtrn1_s8(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vtrn1q_s8(int8x16_t a, int8x16_t b) {
+ // CHECK: test_vtrn1q_s8
+ return vtrn1q_s8(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x4_t test_vtrn1_s16(int16x4_t a, int16x4_t b) {
+ // CHECK: test_vtrn1_s16
+ return vtrn1_s16(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int16x8_t test_vtrn1q_s16(int16x8_t a, int16x8_t b) {
+ // CHECK: test_vtrn1q_s16
+ return vtrn1q_s16(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x2_t test_vtrn1_s32(int32x2_t a, int32x2_t b) {
+ // CHECK: test_vtrn1_s32
+ return vtrn1_s32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vtrn1q_s32(int32x4_t a, int32x4_t b) {
+ // CHECK: test_vtrn1q_s32
+ return vtrn1q_s32(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vtrn1q_s64(int64x2_t a, int64x2_t b) {
+ // CHECK: test_vtrn1q_s64
+ return vtrn1q_s64(a, b);
+ // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+uint8x8_t test_vtrn1_u8(uint8x8_t a, uint8x8_t b) {
+ // CHECK: test_vtrn1_u8
+ return vtrn1_u8(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vtrn1q_u8(uint8x16_t a, uint8x16_t b) {
+ // CHECK: test_vtrn1q_u8
+ return vtrn1q_u8(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x4_t test_vtrn1_u16(uint16x4_t a, uint16x4_t b) {
+ // CHECK: test_vtrn1_u16
+ return vtrn1_u16(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint16x8_t test_vtrn1q_u16(uint16x8_t a, uint16x8_t b) {
+ // CHECK: test_vtrn1q_u16
+ return vtrn1q_u16(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x2_t test_vtrn1_u32(uint32x2_t a, uint32x2_t b) {
+ // CHECK: test_vtrn1_u32
+ return vtrn1_u32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vtrn1q_u32(uint32x4_t a, uint32x4_t b) {
+ // CHECK: test_vtrn1q_u32
+ return vtrn1q_u32(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vtrn1q_u64(uint64x2_t a, uint64x2_t b) {
+ // CHECK: test_vtrn1q_u64
+ return vtrn1q_u64(a, b);
+ // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+float32x2_t test_vtrn1_f32(float32x2_t a, float32x2_t b) {
+ // CHECK: test_vtrn1_f32
+ return vtrn1_f32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vtrn1q_f32(float32x4_t a, float32x4_t b) {
+ // CHECK: test_vtrn1q_f32
+ return vtrn1q_f32(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vtrn1q_f64(float64x2_t a, float64x2_t b) {
+ // CHECK: test_vtrn1q_f64
+ return vtrn1q_f64(a, b);
+ // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+poly8x8_t test_vtrn1_p8(poly8x8_t a, poly8x8_t b) {
+ // CHECK: test_vtrn1_p8
+ return vtrn1_p8(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vtrn1q_p8(poly8x16_t a, poly8x16_t b) {
+ // CHECK: test_vtrn1q_p8
+ return vtrn1q_p8(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+poly16x4_t test_vtrn1_p16(poly16x4_t a, poly16x4_t b) {
+ // CHECK: test_vtrn1_p16
+ return vtrn1_p16(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+poly16x8_t test_vtrn1q_p16(poly16x8_t a, poly16x8_t b) {
+ // CHECK: test_vtrn1q_p16
+ return vtrn1q_p16(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int8x8_t test_vtrn2_s8(int8x8_t a, int8x8_t b) {
+ // CHECK: test_vtrn2_s8
+ return vtrn2_s8(a, b);
+ // CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vtrn2q_s8(int8x16_t a, int8x16_t b) {
+ // CHECK: test_vtrn2q_s8
+ return vtrn2q_s8(a, b);
+ // CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x4_t test_vtrn2_s16(int16x4_t a, int16x4_t b) {
+ // CHECK: test_vtrn2_s16
+ return vtrn2_s16(a, b);
+ // CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int16x8_t test_vtrn2q_s16(int16x8_t a, int16x8_t b) {
+ // CHECK: test_vtrn2q_s16
+ return vtrn2q_s16(a, b);
+ // CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x2_t test_vtrn2_s32(int32x2_t a, int32x2_t b) {
+ // CHECK: test_vtrn2_s32
+ return vtrn2_s32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vtrn2q_s32(int32x4_t a, int32x4_t b) {
+ // CHECK: test_vtrn2q_s32
+ return vtrn2q_s32(a, b);
+ // CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vtrn2q_s64(int64x2_t a, int64x2_t b) {
+ // CHECK: test_vtrn2q_s64
+ return vtrn2q_s64(a, b);
+ // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+uint8x8_t test_vtrn2_u8(uint8x8_t a, uint8x8_t b) {
+ // CHECK: test_vtrn2_u8
+ return vtrn2_u8(a, b);
+ // CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vtrn2q_u8(uint8x16_t a, uint8x16_t b) {
+ // CHECK: test_vtrn2q_u8
+ return vtrn2q_u8(a, b);
+ // CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x4_t test_vtrn2_u16(uint16x4_t a, uint16x4_t b) {
+ // CHECK: test_vtrn2_u16
+ return vtrn2_u16(a, b);
+ // CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint16x8_t test_vtrn2q_u16(uint16x8_t a, uint16x8_t b) {
+ // CHECK: test_vtrn2q_u16
+ return vtrn2q_u16(a, b);
+ // CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x2_t test_vtrn2_u32(uint32x2_t a, uint32x2_t b) {
+ // CHECK: test_vtrn2_u32
+ return vtrn2_u32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+uint32x4_t test_vtrn2q_u32(uint32x4_t a, uint32x4_t b) {
+ // CHECK: test_vtrn2q_u32
+ return vtrn2q_u32(a, b);
+ // CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vtrn2q_u64(uint64x2_t a, uint64x2_t b) {
+ // CHECK: test_vtrn2q_u64
+ return vtrn2q_u64(a, b);
+ // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+float32x2_t test_vtrn2_f32(float32x2_t a, float32x2_t b) {
+ // CHECK: test_vtrn2_f32
+ return vtrn2_f32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+float32x4_t test_vtrn2q_f32(float32x4_t a, float32x4_t b) {
+ // CHECK: test_vtrn2q_f32
+ return vtrn2q_f32(a, b);
+ // CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vtrn2q_f64(float64x2_t a, float64x2_t b) {
+ // CHECK: test_vtrn2q_f64
+ return vtrn2q_f64(a, b);
+ // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+poly8x8_t test_vtrn2_p8(poly8x8_t a, poly8x8_t b) {
+ // CHECK: test_vtrn2_p8
+ return vtrn2_p8(a, b);
+ // CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vtrn2q_p8(poly8x16_t a, poly8x16_t b) {
+ // CHECK: test_vtrn2q_p8
+ return vtrn2q_p8(a, b);
+ // CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+poly16x4_t test_vtrn2_p16(poly16x4_t a, poly16x4_t b) {
+ // CHECK: test_vtrn2_p16
+ return vtrn2_p16(a, b);
+ // CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+poly16x8_t test_vtrn2q_p16(poly16x8_t a, poly16x8_t b) {
+ // CHECK: test_vtrn2q_p16
+ return vtrn2q_p16(a, b);
+ // CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) {
+ // CHECK: test_vuzp_s8
+ return vuzp_s8(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+ // CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) {
+ // CHECK: test_vuzp_s16
+ return vuzp_s16(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+ // CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) {
+ // CHECK: test_vuzp_s32
+ return vuzp_s32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+ // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) {
+ // CHECK: test_vuzp_u8
+ return vuzp_u8(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+ // CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) {
+ // CHECK: test_vuzp_u16
+ return vuzp_u16(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+ // CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) {
+ // CHECK: test_vuzp_u32
+ return vuzp_u32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+ // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) {
+ // CHECK: test_vuzp_f32
+ return vuzp_f32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+ // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) {
+ // CHECK: test_vuzp_p8
+ return vuzp_p8(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+ // CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) {
+ // CHECK: test_vuzp_p16
+ return vuzp_p16(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+ // CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) {
+ // CHECK: test_vuzpq_s8
+ return vuzpq_s8(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+ // CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) {
+ // CHECK: test_vuzpq_s16
+ return vuzpq_s16(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+ // CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) {
+ // CHECK: test_vuzpq_s32
+ return vuzpq_s32(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+ // CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) {
+ // CHECK: test_vuzpq_u8
+ return vuzpq_u8(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+ // CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) {
+ // CHECK: test_vuzpq_u16
+ return vuzpq_u16(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+ // CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) {
+ // CHECK: test_vuzpq_u32
+ return vuzpq_u32(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+ // CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) {
+ // CHECK: test_vuzpq_f32
+ return vuzpq_f32(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+ // CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) {
+ // CHECK: test_vuzpq_p8
+ return vuzpq_p8(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+ // CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) {
+ // CHECK: test_vuzpq_p16
+ return vuzpq_p16(a, b);
+ // CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+ // CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) {
+ // CHECK: test_vzip_s8
+ return vzip_s8(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+ // CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) {
+ // CHECK: test_vzip_s16
+ return vzip_s16(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+ // CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) {
+ // CHECK: test_vzip_s32
+ return vzip_s32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+ // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) {
+ // CHECK: test_vzip_u8
+ return vzip_u8(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+ // CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) {
+ // CHECK: test_vzip_u16
+ return vzip_u16(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+ // CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) {
+ // CHECK: test_vzip_u32
+ return vzip_u32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+ // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) {
+ // CHECK: test_vzip_f32
+ return vzip_f32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+ // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) {
+ // CHECK: test_vzip_p8
+ return vzip_p8(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+ // CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) {
+ // CHECK: test_vzip_p16
+ return vzip_p16(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+ // CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) {
+ // CHECK: test_vzipq_s8
+ return vzipq_s8(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+ // CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) {
+ // CHECK: test_vzipq_s16
+ return vzipq_s16(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+ // CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) {
+ // CHECK: test_vzipq_s32
+ return vzipq_s32(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+ // CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) {
+ // CHECK: test_vzipq_u8
+ return vzipq_u8(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+ // CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) {
+ // CHECK: test_vzipq_u16
+ return vzipq_u16(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+ // CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) {
+ // CHECK: test_vzipq_u32
+ return vzipq_u32(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+ // CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) {
+ // CHECK: test_vzipq_f32
+ return vzipq_f32(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+ // CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) {
+ // CHECK: test_vzipq_p8
+ return vzipq_p8(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+ // CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) {
+ // CHECK: test_vzipq_p16
+ return vzipq_p16(a, b);
+ // CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+ // CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) {
+ // CHECK: test_vtrn_s8
+ return vtrn_s8(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+ // CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) {
+ // CHECK: test_vtrn_s16
+ return vtrn_s16(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+ // CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) {
+ // CHECK: test_vtrn_s32
+ return vtrn_s32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+ // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) {
+ // CHECK: test_vtrn_u8
+ return vtrn_u8(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+ // CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) {
+ // CHECK: test_vtrn_u16
+ return vtrn_u16(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+ // CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) {
+ // CHECK: test_vtrn_u32
+ return vtrn_u32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+ // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) {
+ // CHECK: test_vtrn_f32
+ return vtrn_f32(a, b);
+ // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+ // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) {
+ // CHECK: test_vtrn_p8
+ return vtrn_p8(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+ // CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) {
+ // CHECK: test_vtrn_p16
+ return vtrn_p16(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+ // CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) {
+ // CHECK: test_vtrnq_s8
+ return vtrnq_s8(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+ // CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
+ // CHECK: test_vtrnq_s16
+ return vtrnq_s16(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+ // CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) {
+ // CHECK: test_vtrnq_s32
+ return vtrnq_s32(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+ // CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
+ // CHECK: test_vtrnq_u8
+ return vtrnq_u8(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+ // CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) {
+ // CHECK: test_vtrnq_u16
+ return vtrnq_u16(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+ // CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
+ // CHECK: test_vtrnq_u32
+ return vtrnq_u32(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+ // CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) {
+ // CHECK: test_vtrnq_f32
+ return vtrnq_f32(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+ // CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) {
+ // CHECK: test_vtrnq_p8
+ return vtrnq_p8(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+ // CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) {
+ // CHECK: test_vtrnq_p16
+ return vtrnq_p16(a, b);
+ // CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+ // CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
Modified: cfe/trunk/utils/TableGen/NeonEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/NeonEmitter.cpp?rev=194124&r1=194123&r2=194124&view=diff
==============================================================================
--- cfe/trunk/utils/TableGen/NeonEmitter.cpp (original)
+++ cfe/trunk/utils/TableGen/NeonEmitter.cpp Tue Nov 5 21:35:53 2013
@@ -81,6 +81,12 @@ enum OpKind {
OpQRDMulhLane,
OpFMSLane,
OpFMSLaneQ,
+ OpTrn1,
+ OpZip1,
+ OpUzp1,
+ OpTrn2,
+ OpZip2,
+ OpUzp2,
OpEq,
OpGe,
OpLe,
@@ -228,6 +234,12 @@ public:
OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
OpMap["OP_FMS_LN"] = OpFMSLane;
OpMap["OP_FMS_LNQ"] = OpFMSLaneQ;
+ OpMap["OP_TRN1"] = OpTrn1;
+ OpMap["OP_ZIP1"] = OpZip1;
+ OpMap["OP_UZP1"] = OpUzp1;
+ OpMap["OP_TRN2"] = OpTrn2;
+ OpMap["OP_ZIP2"] = OpZip2;
+ OpMap["OP_UZP2"] = OpUzp2;
OpMap["OP_EQ"] = OpEq;
OpMap["OP_GE"] = OpGe;
OpMap["OP_LE"] = OpLe;
@@ -1776,6 +1788,42 @@ static std::string GenOpString(const std
s += ");";
break;
}
+ case OpUzp1:
+ s += "__builtin_shufflevector(__a, __b";
+ for (unsigned i = 0; i < nElts; i++)
+ s += ", " + utostr(2*i);
+ s += ");";
+ break;
+ case OpUzp2:
+ s += "__builtin_shufflevector(__a, __b";
+ for (unsigned i = 0; i < nElts; i++)
+ s += ", " + utostr(2*i+1);
+ s += ");";
+ break;
+ case OpZip1:
+ s += "__builtin_shufflevector(__a, __b";
+ for (unsigned i = 0; i < (nElts/2); i++)
+ s += ", " + utostr(i) + ", " + utostr(i+nElts);
+ s += ");";
+ break;
+ case OpZip2:
+ s += "__builtin_shufflevector(__a, __b";
+ for (unsigned i = nElts/2; i < nElts; i++)
+ s += ", " + utostr(i) + ", " + utostr(i+nElts);
+ s += ");";
+ break;
+ case OpTrn1:
+ s += "__builtin_shufflevector(__a, __b";
+ for (unsigned i = 0; i < (nElts/2); i++)
+ s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts);
+ s += ");";
+ break;
+ case OpTrn2:
+ s += "__builtin_shufflevector(__a, __b";
+ for (unsigned i = 0; i < (nElts/2); i++)
+ s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts);
+ s += ");";
+ break;
case OpAbdl: {
std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
if (typestr[0] != 'U') {
More information about the cfe-commits
mailing list