r190289 - Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,

Jiangning Liu jiangning.liu at arm.com
Sun Sep 8 19:21:08 PDT 2013


Author: jiangning
Date: Sun Sep  8 21:21:08 2013
New Revision: 190289

URL: http://llvm.org/viewvc/llvm-project?rev=190289&view=rev
Log:
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,

SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL


Modified:
    cfe/trunk/include/clang/Basic/arm_neon.td
    cfe/trunk/lib/CodeGen/CGBuiltin.cpp
    cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c
    cfe/trunk/utils/TableGen/NeonEmitter.cpp

Modified: cfe/trunk/include/clang/Basic/arm_neon.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_neon.td?rev=190289&r1=190288&r2=190289&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/arm_neon.td (original)
+++ cfe/trunk/include/clang/Basic/arm_neon.td Sun Sep  8 21:21:08 2013
@@ -18,15 +18,22 @@ def OP_NONE  : Op;
 def OP_UNAVAILABLE : Op;
 def OP_ADD   : Op;
 def OP_ADDL  : Op;
+def OP_ADDLHi : Op;
 def OP_ADDW  : Op;
+def OP_ADDWHi : Op;
 def OP_SUB   : Op;
 def OP_SUBL  : Op;
+def OP_SUBLHi : Op;
 def OP_SUBW  : Op;
+def OP_SUBWHi : Op;
 def OP_MUL   : Op;
 def OP_MLA   : Op;
 def OP_MLAL  : Op;
+def OP_MULLHi : Op;
+def OP_MLALHi : Op;
 def OP_MLS   : Op;
 def OP_MLSL  : Op;
+def OP_MLSLHi : Op;
 def OP_MUL_N : Op;
 def OP_MLA_N : Op;
 def OP_MLS_N : Op;
@@ -66,9 +73,18 @@ def OP_REV64 : Op;
 def OP_REV32 : Op;
 def OP_REV16 : Op;
 def OP_REINT : Op;
+def OP_ADDHNHi : Op;
+def OP_RADDHNHi : Op;
+def OP_SUBHNHi : Op;
+def OP_RSUBHNHi : Op;
 def OP_ABDL  : Op;
+def OP_ABDLHi : Op;
 def OP_ABA   : Op;
 def OP_ABAL  : Op;
+def OP_ABALHi : Op;
+def OP_QDMULLHi : Op;
+def OP_QDMLALHi : Op;
+def OP_QDMLSLHi : Op;
 def OP_DIV  : Op;
 def OP_LONG_HI : Op;
 def OP_NARROW_HI : Op;
@@ -133,6 +149,7 @@ class NoTestOpInst<string n, string p, s
 // w: double width elements, same num elts
 // n: double width elements, half num elts
 // h: half width elements, double num elts
+// q: half width elements, quad num elts
 // e: half width elements, double num elts, unsigned
 // m: half width elements, same num elts
 // i: constant int
@@ -590,6 +607,29 @@ def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "
 }
 
 ////////////////////////////////////////////////////////////////////////////////
+// 3VDiff class using high 64-bit in operands
+def VADDL_HIGH   : SOpInst<"vaddl_high", "wkk", "csiUcUsUi", OP_ADDLHi>;
+def VADDW_HIGH   : SOpInst<"vaddw_high", "wwk", "csiUcUsUi", OP_ADDWHi>;
+def VSUBL_HIGH   : SOpInst<"vsubl_high", "wkk", "csiUcUsUi", OP_SUBLHi>;
+def VSUBW_HIGH   : SOpInst<"vsubw_high", "wwk", "csiUcUsUi", OP_SUBWHi>;
+
+def VABDL_HIGH   : SOpInst<"vabdl_high", "wkk",  "csiUcUsUi", OP_ABDLHi>;
+def VABAL_HIGH   : SOpInst<"vabal_high", "wwkk", "csiUcUsUi", OP_ABALHi>;
+
+def VMULL_HIGH   : SOpInst<"vmull_high", "wkk", "csiUcUsUiPc", OP_MULLHi>;
+def VMLAL_HIGH   : SOpInst<"vmlal_high", "wwkk", "csiUcUsUi", OP_MLALHi>;
+def VMLSL_HIGH   : SOpInst<"vmlsl_high", "wwkk", "csiUcUsUi", OP_MLSLHi>;
+
+def VADDHN_HIGH  : SOpInst<"vaddhn_high", "qhkk", "silUsUiUl", OP_ADDHNHi>;
+def VRADDHN_HIGH : SOpInst<"vraddhn_high", "qhkk", "silUsUiUl", OP_RADDHNHi>;
+def VSUBHN_HIGH  : SOpInst<"vsubhn_high", "qhkk", "silUsUiUl", OP_SUBHNHi>;
+def VRSUBHN_HIGH : SOpInst<"vrsubhn_high", "qhkk", "silUsUiUl", OP_RSUBHNHi>;
+
+def VQDMULL_HIGH : SOpInst<"vqdmull_high", "wkk", "si", OP_QDMULLHi>;
+def VQDMLAL_HIGH : SOpInst<"vqdmlal_high", "wwkk", "si", OP_QDMLALHi>;
+def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "wwkk", "si", OP_QDMLSLHi>;
+
+////////////////////////////////////////////////////////////////////////////////
 // Scalar Arithmetic
 
 // Scalar Addition

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=190289&r1=190288&r2=190289&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Sun Sep  8 21:21:08 2013
@@ -1840,6 +1840,22 @@ Value *CodeGenFunction::EmitAArch64Built
     return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshl_v, E);
   case AArch64::BI__builtin_neon_vqrshlq_v:
     return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshlq_v, E);
+  case AArch64::BI__builtin_neon_vaddhn_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vaddhn_v, E);
+  case AArch64::BI__builtin_neon_vraddhn_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vraddhn_v, E);
+  case AArch64::BI__builtin_neon_vsubhn_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsubhn_v, E);
+  case AArch64::BI__builtin_neon_vrsubhn_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsubhn_v, E);
+  case AArch64::BI__builtin_neon_vmull_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmull_v, E);
+  case AArch64::BI__builtin_neon_vqdmull_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmull_v, E);
+  case AArch64::BI__builtin_neon_vqdmlal_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmlal_v, E);
+  case AArch64::BI__builtin_neon_vqdmlsl_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmlsl_v, E);
   case AArch64::BI__builtin_neon_vmax_v:
     return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmax_v, E);
   case AArch64::BI__builtin_neon_vmaxq_v:

Modified: cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c?rev=190289&r1=190288&r2=190289&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c Sun Sep  8 21:21:08 2013
@@ -4274,3 +4274,971 @@ uint64x2_t test_vcvtq_n_u64_f64(float64x
   return vcvtq_n_u64_f64(a, 50);
   // CHECK: fcvtzu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
 }
+
+int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vaddl_s8
+  return vaddl_s8(a, b);
+  // CHECK: saddl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+
+int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vaddl_s16
+  return vaddl_s16(a, b);
+  // CHECK: saddl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+
+int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vaddl_s32
+  return vaddl_s32(a, b);
+  // CHECK: saddl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vaddl_u8
+  return vaddl_u8(a, b);
+  // CHECK: uaddl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+
+uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vaddl_u16
+  return vaddl_u16(a, b);
+  // CHECK: uaddl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+
+uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vaddl_u32
+  return vaddl_u32(a, b);
+  // CHECK: uaddl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vaddl_high_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vaddl_high_s8
+  return vaddl_high_s8(a, b);
+  // CHECK: saddl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+
+int32x4_t test_vaddl_high_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vaddl_high_s16
+  return vaddl_high_s16(a, b);
+  // CHECK: saddl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int64x2_t test_vaddl_high_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vaddl_high_s32
+  return vaddl_high_s32(a, b);
+  // CHECK: saddl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint16x8_t test_vaddl_high_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vaddl_high_u8
+  return vaddl_high_u8(a, b);
+  // CHECK: uaddl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+
+uint32x4_t test_vaddl_high_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vaddl_high_u16
+  return vaddl_high_u16(a, b);
+  // CHECK: uaddl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint64x2_t test_vaddl_high_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vaddl_high_u32
+  return vaddl_high_u32(a, b);
+  // CHECK: uaddl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
+  // CHECK: test_vaddw_s8
+  return vaddw_s8(a, b);
+  // CHECK: saddw {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8b
+}
+
+int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
+  // CHECK: test_vaddw_s16
+  return vaddw_s16(a, b);
+  // CHECK: saddw {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4h
+}
+
+int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
+  // CHECK: test_vaddw_s32
+  return vaddw_s32(a, b);
+  // CHECK: saddw {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2s
+}
+
+uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
+  // CHECK: test_vaddw_u8
+  return vaddw_u8(a, b);
+  // CHECK: uaddw {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8b
+}
+
+uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
+  // CHECK: test_vaddw_u16
+  return vaddw_u16(a, b);
+  // CHECK: uaddw {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4h
+}
+
+uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
+  // CHECK: test_vaddw_u32
+  return vaddw_u32(a, b);
+  // CHECK: uaddw {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vaddw_high_s8(int16x8_t a, int8x16_t b) {
+  // CHECK: test_vaddw_high_s8
+  return vaddw_high_s8(a, b);
+  // CHECK: saddw2 {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.16b
+}
+
+int32x4_t test_vaddw_high_s16(int32x4_t a, int16x8_t b) {
+  // CHECK: test_vaddw_high_s16
+  return vaddw_high_s16(a, b);
+  // CHECK: saddw2 {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.8h
+}
+
+int64x2_t test_vaddw_high_s32(int64x2_t a, int32x4_t b) {
+  // CHECK: test_vaddw_high_s32
+  return vaddw_high_s32(a, b);
+  // CHECK: saddw2 {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.4s
+}
+
+uint16x8_t test_vaddw_high_u8(uint16x8_t a, uint8x16_t b) {
+  // CHECK: test_vaddw_high_u8
+  return vaddw_high_u8(a, b);
+  // CHECK: uaddw2 {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.16b
+}
+
+uint32x4_t test_vaddw_high_u16(uint32x4_t a, uint16x8_t b) {
+  // CHECK: test_vaddw_high_u16
+  return vaddw_high_u16(a, b);
+  // CHECK: uaddw2 {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.8h
+}
+
+uint64x2_t test_vaddw_high_u32(uint64x2_t a, uint32x4_t b) {
+  // CHECK: test_vaddw_high_u32
+  return vaddw_high_u32(a, b);
+  // CHECK: uaddw2 {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.4s
+}
+
+int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vsubl_s8
+  return vsubl_s8(a, b);
+  // CHECK: ssubl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+
+int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vsubl_s16
+  return vsubl_s16(a, b);
+  // CHECK: ssubl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+
+int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vsubl_s32
+  return vsubl_s32(a, b);
+  // CHECK: ssubl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vsubl_u8
+  return vsubl_u8(a, b);
+  // CHECK: usubl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+
+uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vsubl_u16
+  return vsubl_u16(a, b);
+  // CHECK: usubl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+
+uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vsubl_u32
+  return vsubl_u32(a, b);
+  // CHECK: usubl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vsubl_high_s8
+  return vsubl_high_s8(a, b);
+  // CHECK: ssubl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+
+int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vsubl_high_s16
+  return vsubl_high_s16(a, b);
+  // CHECK: ssubl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vsubl_high_s32
+  return vsubl_high_s32(a, b);
+  // CHECK: ssubl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vsubl_high_u8
+  return vsubl_high_u8(a, b);
+  // CHECK: usubl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+
+uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vsubl_high_u16
+  return vsubl_high_u16(a, b);
+  // CHECK: usubl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vsubl_high_u32
+  return vsubl_high_u32(a, b);
+  // CHECK: usubl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) {
+  // CHECK: test_vsubw_s8
+  return vsubw_s8(a, b);
+  // CHECK: ssubw {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8b
+}
+
+int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) {
+  // CHECK: test_vsubw_s16
+  return vsubw_s16(a, b);
+  // CHECK: ssubw {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4h
+}
+
+int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) {
+  // CHECK: test_vsubw_s32
+  return vsubw_s32(a, b);
+  // CHECK: ssubw {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2s
+}
+
+uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) {
+  // CHECK: test_vsubw_u8
+  return vsubw_u8(a, b);
+  // CHECK: usubw {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8b
+}
+
+uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) {
+  // CHECK: test_vsubw_u16
+  return vsubw_u16(a, b);
+  // CHECK: usubw {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4h
+}
+
+uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) {
+  // CHECK: test_vsubw_u32
+  return vsubw_u32(a, b);
+  // CHECK: usubw {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) {
+  // CHECK: test_vsubw_high_s8
+  return vsubw_high_s8(a, b);
+  // CHECK: ssubw2 {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.16b
+}
+
+int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) {
+  // CHECK: test_vsubw_high_s16
+  return vsubw_high_s16(a, b);
+  // CHECK: ssubw2 {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.8h
+}
+
+int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) {
+  // CHECK: test_vsubw_high_s32
+  return vsubw_high_s32(a, b);
+  // CHECK: ssubw2 {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.4s
+}
+
+uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) {
+  // CHECK: test_vsubw_high_u8
+  return vsubw_high_u8(a, b);
+  // CHECK: usubw2 {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.16b
+}
+
+uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) {
+  // CHECK: test_vsubw_high_u16
+  return vsubw_high_u16(a, b);
+  // CHECK: usubw2 {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.8h
+}
+
+uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) {
+  // CHECK: test_vsubw_high_u32
+  return vsubw_high_u32(a, b);
+  // CHECK: usubw2 {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.4s
+}
+
+int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vaddhn_s16
+  return vaddhn_s16(a, b);
+  // CHECK: addhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vaddhn_s32
+  return vaddhn_s32(a, b);
+  // CHECK: addhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vaddhn_s64
+  return vaddhn_s64(a, b);
+  // CHECK: addhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vaddhn_u16
+  return vaddhn_u16(a, b);
+  // CHECK: addhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vaddhn_u32
+  return vaddhn_u32(a, b);
+  // CHECK: addhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vaddhn_u64
+  return vaddhn_u64(a, b);
+  // CHECK: addhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+int8x16_t test_vaddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
+  // CHECK: test_vaddhn_high_s16
+  return vaddhn_high_s16(r, a, b);
+  // CHECK: addhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int16x8_t test_vaddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
+  // CHECK: test_vaddhn_high_s32
+  return vaddhn_high_s32(r, a, b);
+  // CHECK: addhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x4_t test_vaddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
+  // CHECK: test_vaddhn_high_s64
+  return vaddhn_high_s64(r, a, b);
+  // CHECK: addhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+uint8x16_t test_vaddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vaddhn_high_u16
+  return vaddhn_high_u16(r, a, b);
+  // CHECK: addhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint16x8_t test_vaddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vaddhn_high_u32
+  return vaddhn_high_u32(r, a, b);
+  // CHECK: addhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint32x4_t test_vaddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vaddhn_high_u64
+  return vaddhn_high_u64(r, a, b);
+  // CHECK: addhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vraddhn_s16
+  return vraddhn_s16(a, b);
+  // CHECK: raddhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vraddhn_s32
+  return vraddhn_s32(a, b);
+  // CHECK: raddhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vraddhn_s64
+  return vraddhn_s64(a, b);
+  // CHECK: raddhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vraddhn_u16
+  return vraddhn_u16(a, b);
+  // CHECK: raddhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vraddhn_u32
+  return vraddhn_u32(a, b);
+  // CHECK: raddhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vraddhn_u64
+  return vraddhn_u64(a, b);
+  // CHECK: raddhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+int8x16_t test_vraddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
+  // CHECK: test_vraddhn_high_s16
+  return vraddhn_high_s16(r, a, b);
+  // CHECK: raddhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int16x8_t test_vraddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
+  // CHECK: test_vraddhn_high_s32
+  return vraddhn_high_s32(r, a, b);
+  // CHECK: raddhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x4_t test_vraddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
+  // CHECK: test_vraddhn_high_s64
+  return vraddhn_high_s64(r, a, b);
+  // CHECK: raddhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+uint8x16_t test_vraddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vraddhn_high_u16
+  return vraddhn_high_u16(r, a, b);
+  // CHECK: raddhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint16x8_t test_vraddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vraddhn_high_u32
+  return vraddhn_high_u32(r, a, b);
+  // CHECK: raddhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint32x4_t test_vraddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vraddhn_high_u64
+  return vraddhn_high_u64(r, a, b);
+  // CHECK: raddhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vsubhn_s16
+  return vsubhn_s16(a, b);
+  // CHECK: subhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vsubhn_s32
+  return vsubhn_s32(a, b);
+  // CHECK: subhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vsubhn_s64
+  return vsubhn_s64(a, b);
+  // CHECK: subhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vsubhn_u16
+  return vsubhn_u16(a, b);
+  // CHECK: subhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vsubhn_u32
+  return vsubhn_u32(a, b);
+  // CHECK: subhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vsubhn_u64
+  return vsubhn_u64(a, b);
+  // CHECK: subhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+int8x16_t test_vsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
+  // CHECK: test_vsubhn_high_s16
+  return vsubhn_high_s16(r, a, b);
+  // CHECK: subhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int16x8_t test_vsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
+  // CHECK: test_vsubhn_high_s32
+  return vsubhn_high_s32(r, a, b);
+  // CHECK: subhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x4_t test_vsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
+  // CHECK: test_vsubhn_high_s64
+  return vsubhn_high_s64(r, a, b);
+  // CHECK: subhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+uint8x16_t test_vsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vsubhn_high_u16
+  return vsubhn_high_u16(r, a, b);
+  // CHECK: subhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint16x8_t test_vsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vsubhn_high_u32
+  return vsubhn_high_u32(r, a, b);
+  // CHECK: subhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint32x4_t test_vsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vsubhn_high_u64
+  return vsubhn_high_u64(r, a, b);
+  // CHECK: subhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vrsubhn_s16
+  return vrsubhn_s16(a, b);
+  // CHECK: rsubhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vrsubhn_s32
+  return vrsubhn_s32(a, b);
+  // CHECK: rsubhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vrsubhn_s64
+  return vrsubhn_s64(a, b);
+  // CHECK: rsubhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vrsubhn_u16
+  return vrsubhn_u16(a, b);
+  // CHECK: rsubhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vrsubhn_u32
+  return vrsubhn_u32(a, b);
+  // CHECK: rsubhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vrsubhn_u64
+  return vrsubhn_u64(a, b);
+  // CHECK: rsubhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+int8x16_t test_vrsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
+  // CHECK: test_vrsubhn_high_s16
+  return vrsubhn_high_s16(r, a, b);
+  // CHECK: rsubhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int16x8_t test_vrsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
+  // CHECK: test_vrsubhn_high_s32
+  return vrsubhn_high_s32(r, a, b);
+  // CHECK: rsubhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x4_t test_vrsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
+  // CHECK: test_vrsubhn_high_s64
+  return vrsubhn_high_s64(r, a, b);
+  // CHECK: rsubhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+uint8x16_t test_vrsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vrsubhn_high_u16
+  return vrsubhn_high_u16(r, a, b);
+  // CHECK: rsubhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint16x8_t test_vrsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vrsubhn_high_u32
+  return vrsubhn_high_u32(r, a, b);
+  // CHECK: rsubhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint32x4_t test_vrsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vrsubhn_high_u64
+  return vrsubhn_high_u64(r, a, b);
+  // CHECK: rsubhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vabdl_s8
+  return vabdl_s8(a, b);
+  // CHECK: sabdl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vabdl_s16
+  return vabdl_s16(a, b);
+  // CHECK: sabdl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vabdl_s32
+  return vabdl_s32(a, b);
+  // CHECK: sabdl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vabdl_u8
+  return vabdl_u8(a, b);
+  // CHECK: uabdl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vabdl_u16
+  return vabdl_u16(a, b);
+  // CHECK: uabdl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vabdl_u32
+  return vabdl_u32(a, b);
+  // CHECK: uabdl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
+  // CHECK: test_vabal_s8
+  return vabal_s8(a, b, c);
+  // CHECK: sabal {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+  // CHECK: test_vabal_s16
+  return vabal_s16(a, b, c);
+  // CHECK: sabal {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+  // CHECK: test_vabal_s32
+  return vabal_s32(a, b, c);
+  // CHECK: sabal {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
+  // CHECK: test_vabal_u8
+  return vabal_u8(a, b, c);
+  // CHECK: uabal {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
+  // CHECK: test_vabal_u16
+  return vabal_u16(a, b, c);
+  // CHECK: uabal {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
+  // CHECK: test_vabal_u32
+  return vabal_u32(a, b, c);
+  // CHECK: uabal {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vabdl_high_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vabdl_high_s8
+  return vabdl_high_s8(a, b);
+  // CHECK: sabdl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+int32x4_t test_vabdl_high_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vabdl_high_s16
+  return vabdl_high_s16(a, b);
+  // CHECK: sabdl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+int64x2_t test_vabdl_high_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vabdl_high_s32
+  return vabdl_high_s32(a, b);
+  // CHECK: sabdl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+uint16x8_t test_vabdl_high_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vabdl_high_u8
+  return vabdl_high_u8(a, b);
+  // CHECK: uabdl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+uint32x4_t test_vabdl_high_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vabdl_high_u16
+  return vabdl_high_u16(a, b);
+  // CHECK: uabdl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+uint64x2_t test_vabdl_high_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vabdl_high_u32
+  return vabdl_high_u32(a, b);
+  // CHECK: uabdl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int16x8_t test_vabal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
+  // CHECK: test_vabal_high_s8
+  return vabal_high_s8(a, b, c);
+  // CHECK: sabal2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+int32x4_t test_vabal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
+  // CHECK: test_vabal_high_s16
+  return vabal_high_s16(a, b, c);
+  // CHECK: sabal2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+int64x2_t test_vabal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
+  // CHECK: test_vabal_high_s32
+  return vabal_high_s32(a, b, c);
+  // CHECK: sabal2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+uint16x8_t test_vabal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
+  // CHECK: test_vabal_high_u8
+  return vabal_high_u8(a, b, c);
+  // CHECK: uabal2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+uint32x4_t test_vabal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
+  // CHECK: test_vabal_high_u16
+  return vabal_high_u16(a, b, c);
+  // CHECK: uabal2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+uint64x2_t test_vabal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
+  // CHECK: test_vabal_high_u32
+  return vabal_high_u32(a, b, c);
+  // CHECK: uabal2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vmull_s8
+  return vmull_s8(a, b);
+  // CHECK: smull {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vmull_s16
+  return vmull_s16(a, b);
+  // CHECK: smull {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vmull_s32
+  return vmull_s32(a, b);
+  // CHECK: smull {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vmull_u8
+  return vmull_u8(a, b);
+  // CHECK: umull {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vmull_u16
+  return vmull_u16(a, b);
+  // CHECK: umull {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vmull_u32
+  return vmull_u32(a, b);
+  // CHECK: umull {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vmull_high_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vmull_high_s8
+  return vmull_high_s8(a, b);
+  // CHECK: smull2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+int32x4_t test_vmull_high_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vmull_high_s16
+  return vmull_high_s16(a, b);
+  // CHECK: smull2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+int64x2_t test_vmull_high_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vmull_high_s32
+  return vmull_high_s32(a, b);
+  // CHECK: smull2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+uint16x8_t test_vmull_high_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vmull_high_u8
+  return vmull_high_u8(a, b);
+  // CHECK: umull2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+uint32x4_t test_vmull_high_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vmull_high_u16
+  return vmull_high_u16(a, b);
+  // CHECK: umull2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+uint64x2_t test_vmull_high_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vmull_high_u32
+  return vmull_high_u32(a, b);
+  // CHECK: umull2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
+  // CHECK: test_vmlal_s8
+  return vmlal_s8(a, b, c);
+  // CHECK: smlal {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+  // CHECK: test_vmlal_s16
+  return vmlal_s16(a, b, c);
+  // CHECK: smlal {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+  // CHECK: test_vmlal_s32
+  return vmlal_s32(a, b, c);
+  // CHECK: smlal {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
+  // CHECK: test_vmlal_u8
+  return vmlal_u8(a, b, c);
+  // CHECK: umlal {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
+  // CHECK: test_vmlal_u16
+  return vmlal_u16(a, b, c);
+  // CHECK: umlal {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
+  // CHECK: test_vmlal_u32
+  return vmlal_u32(a, b, c);
+  // CHECK: umlal {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vmlal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
+  // CHECK: test_vmlal_high_s8
+  return vmlal_high_s8(a, b, c);
+  // CHECK: smlal2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+int32x4_t test_vmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
+  // CHECK: test_vmlal_high_s16
+  return vmlal_high_s16(a, b, c);
+  // CHECK: smlal2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+int64x2_t test_vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
+  // CHECK: test_vmlal_high_s32
+  return vmlal_high_s32(a, b, c);
+  // CHECK: smlal2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+uint16x8_t test_vmlal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
+  // CHECK: test_vmlal_high_u8
+  return vmlal_high_u8(a, b, c);
+  // CHECK: umlal2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+uint32x4_t test_vmlal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
+  // CHECK: test_vmlal_high_u16
+  return vmlal_high_u16(a, b, c);
+  // CHECK: umlal2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+uint64x2_t test_vmlal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
+  // CHECK: test_vmlal_high_u32
+  return vmlal_high_u32(a, b, c);
+  // CHECK: umlal2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
+  // CHECK: test_vmlsl_s8
+  return vmlsl_s8(a, b, c);
+  // CHECK: smlsl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+  // CHECK: test_vmlsl_s16
+  return vmlsl_s16(a, b, c);
+  // CHECK: smlsl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+  // CHECK: test_vmlsl_s32
+  return vmlsl_s32(a, b, c);
+  // CHECK: smlsl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
+  // CHECK: test_vmlsl_u8
+  return vmlsl_u8(a, b, c);
+  // CHECK: umlsl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
+  // CHECK: test_vmlsl_u16
+  return vmlsl_u16(a, b, c);
+  // CHECK: umlsl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
+  // CHECK: test_vmlsl_u32
+  return vmlsl_u32(a, b, c);
+  // CHECK: umlsl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vmlsl_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
+  // CHECK: test_vmlsl_high_s8
+  return vmlsl_high_s8(a, b, c);
+  // CHECK: smlsl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+int32x4_t test_vmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
+  // CHECK: test_vmlsl_high_s16
+  return vmlsl_high_s16(a, b, c);
+  // CHECK: smlsl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+int64x2_t test_vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
+  // CHECK: test_vmlsl_high_s32
+  return vmlsl_high_s32(a, b, c);
+  // CHECK: smlsl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+uint16x8_t test_vmlsl_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
+  // CHECK: test_vmlsl_high_u8
+  return vmlsl_high_u8(a, b, c);
+  // CHECK: umlsl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+uint32x4_t test_vmlsl_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
+  // CHECK: test_vmlsl_high_u16
+  return vmlsl_high_u16(a, b, c);
+  // CHECK: umlsl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+uint64x2_t test_vmlsl_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
+  // CHECK: test_vmlsl_high_u32
+  return vmlsl_high_u32(a, b, c);
+  // CHECK: umlsl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vqdmull_s16
+  return vqdmull_s16(a, b);
+  // CHECK: sqdmull {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vqdmull_s32
+  return vqdmull_s32(a, b);
+  // CHECK: sqdmull {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+  // CHECK: test_vqdmlal_s16
+  return vqdmlal_s16(a, b, c);
+  // CHECK: sqdmlal {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+
+int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+  // CHECK: test_vqdmlal_s32
+  return vqdmlal_s32(a, b, c);
+  // CHECK: sqdmlal {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+  // CHECK: test_vqdmlsl_s16
+  return vqdmlsl_s16(a, b, c);
+  // CHECK: sqdmlsl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+
+int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+  // CHECK: test_vqdmlsl_s32
+  return vqdmlsl_s32(a, b, c);
+  // CHECK: sqdmlsl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int32x4_t test_vqdmull_high_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vqdmull_high_s16
+  return vqdmull_high_s16(a, b);
+  // CHECK: sqdmull2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+int64x2_t test_vqdmull_high_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vqdmull_high_s32
+  return vqdmull_high_s32(a, b);
+  // CHECK: sqdmull2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x4_t test_vqdmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
+  // CHECK: test_vqdmlal_high_s16
+  return vqdmlal_high_s16(a, b, c);
+  // CHECK: sqdmlal2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int64x2_t test_vqdmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
+  // CHECK: test_vqdmlal_high_s32
+  return vqdmlal_high_s32(a, b, c);
+  // CHECK: sqdmlal2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x4_t test_vqdmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
+  // CHECK: test_vqdmlsl_high_s16
+  return vqdmlsl_high_s16(a, b, c);
+  // CHECK: sqdmlsl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int64x2_t test_vqdmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
+  // CHECK: test_vqdmlsl_high_s32
+  return vqdmlsl_high_s32(a, b, c);
+  // CHECK: sqdmlsl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
+  // CHECK: test_vmull_p8
+  return vmull_p8(a, b);
+  // CHECK: pmull {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+
+poly16x8_t test_vmull_high_p8(poly8x16_t a, poly8x16_t b) {
+  // CHECK: test_vmull_high_p8
+  return vmull_high_p8(a, b);
+  // CHECK: pmull2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}

Modified: cfe/trunk/utils/TableGen/NeonEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/NeonEmitter.cpp?rev=190289&r1=190288&r2=190289&view=diff
==============================================================================
--- cfe/trunk/utils/TableGen/NeonEmitter.cpp (original)
+++ cfe/trunk/utils/TableGen/NeonEmitter.cpp Sun Sep  8 21:21:08 2013
@@ -40,15 +40,22 @@ enum OpKind {
   OpUnavailable,
   OpAdd,
   OpAddl,
+  OpAddlHi,
   OpAddw,
+  OpAddwHi,
   OpSub,
   OpSubl,
+  OpSublHi,
   OpSubw,
+  OpSubwHi,
   OpMul,
   OpMla,
   OpMlal,
+  OpMullHi,
+  OpMlalHi,
   OpMls,
   OpMlsl,
+  OpMlslHi,
   OpMulN,
   OpMlaN,
   OpMlsN,
@@ -88,9 +95,18 @@ enum OpKind {
   OpRev32,
   OpRev64,
   OpReinterpret,
+  OpAddhnHi,
+  OpRAddhnHi,
+  OpSubhnHi,
+  OpRSubhnHi,
   OpAbdl,
+  OpAbdlHi,
   OpAba,
   OpAbal,
+  OpAbalHi,
+  OpQDMullHi,
+  OpQDMlalHi,
+  OpQDMlslHi,
   OpDiv,
   OpLongHi,
   OpNarrowHi,
@@ -159,15 +175,22 @@ public:
     OpMap["OP_UNAVAILABLE"] = OpUnavailable;
     OpMap["OP_ADD"]   = OpAdd;
     OpMap["OP_ADDL"]  = OpAddl;
+    OpMap["OP_ADDLHi"] = OpAddlHi;
     OpMap["OP_ADDW"]  = OpAddw;
+    OpMap["OP_ADDWHi"] = OpAddwHi;
     OpMap["OP_SUB"]   = OpSub;
     OpMap["OP_SUBL"]  = OpSubl;
+    OpMap["OP_SUBLHi"] = OpSublHi;
     OpMap["OP_SUBW"]  = OpSubw;
+    OpMap["OP_SUBWHi"] = OpSubwHi;
     OpMap["OP_MUL"]   = OpMul;
     OpMap["OP_MLA"]   = OpMla;
     OpMap["OP_MLAL"]  = OpMlal;
+    OpMap["OP_MULLHi"]  = OpMullHi;
+    OpMap["OP_MLALHi"]  = OpMlalHi;
     OpMap["OP_MLS"]   = OpMls;
     OpMap["OP_MLSL"]  = OpMlsl;
+    OpMap["OP_MLSLHi"] = OpMlslHi;
     OpMap["OP_MUL_N"] = OpMulN;
     OpMap["OP_MLA_N"] = OpMlaN;
     OpMap["OP_MLS_N"] = OpMlsN;
@@ -207,9 +230,18 @@ public:
     OpMap["OP_REV32"] = OpRev32;
     OpMap["OP_REV64"] = OpRev64;
     OpMap["OP_REINT"] = OpReinterpret;
+    OpMap["OP_ADDHNHi"] = OpAddhnHi;
+    OpMap["OP_RADDHNHi"] = OpRAddhnHi;
+    OpMap["OP_SUBHNHi"] = OpSubhnHi;
+    OpMap["OP_RSUBHNHi"] = OpRSubhnHi;
     OpMap["OP_ABDL"]  = OpAbdl;
+    OpMap["OP_ABDLHi"] = OpAbdlHi;
     OpMap["OP_ABA"]   = OpAba;
     OpMap["OP_ABAL"]  = OpAbal;
+    OpMap["OP_ABALHi"] = OpAbalHi;
+    OpMap["OP_QDMULLHi"] = OpQDMullHi;
+    OpMap["OP_QDMLALHi"] = OpQDMlalHi;
+    OpMap["OP_QDMLSLHi"] = OpQDMlslHi;
     OpMap["OP_DIV"] = OpDiv;
     OpMap["OP_LONG_HI"] = OpLongHi;
     OpMap["OP_NARROW_HI"] = OpNarrowHi;
@@ -326,6 +358,29 @@ static char Narrow(const char t) {
   }
 }
 
+static std::string GetNarrowTypestr(StringRef ty)
+{
+  std::string s;
+  for (size_t i = 0, end = ty.size(); i < end; i++) {
+    switch (ty[i]) {
+      case 's':
+        s += 'c';
+        break;
+      case 'i':
+        s += 's';
+        break;
+      case 'l':
+        s += 'i';
+        break;
+      default:
+        s += ty[i];
+        break;
+    }
+  }
+
+  return s;
+}
+
 /// For a particular StringRef, return the base type code, and whether it has
 /// the quad-vector, polynomial, or unsigned modifiers set.
 static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
@@ -426,6 +481,10 @@ static char ModType(const char mod, char
       if (type == 'h')
         quad = false;
       break;
+    case 'q':
+      type = Narrow(type);
+      quad = true;
+      break;
     case 'e':
       type = Narrow(type);
       usgn = true;
@@ -1286,13 +1345,60 @@ static std::string GenMacroLocals(const
 }
 
 // Use the vmovl builtin to sign-extend or zero-extend a vector.
-static std::string Extend(StringRef typestr, const std::string &a) {
+static std::string Extend(StringRef typestr, const std::string &a, bool h=0) {
+  std::string s, high;
+  high = h ? "_high" : "";
+  s = MangleName("vmovl" + high, typestr, ClassS);
+  s += "(" + a + ")";
+  return s;
+}
+
+// Get the high 64-bit part of a vector
+static std::string GetHigh(const std::string &a, StringRef typestr) {
   std::string s;
-  s = MangleName("vmovl", typestr, ClassS);
+  s = MangleName("vget_high", typestr, ClassS);
   s += "(" + a + ")";
   return s;
 }
 
+// Gen operation with two operands and get high 64-bit for both of two operands.
+static std::string Gen2OpWith2High(StringRef typestr,
+                                   const std::string &op,
+                                   const std::string &a,
+                                   const std::string &b) {
+  std::string s;
+  std::string Op1 = GetHigh(a, typestr);
+  std::string Op2 = GetHigh(b, typestr);
+  s = MangleName(op, typestr, ClassS);
+  s += "(" + Op1 + ", " + Op2 + ");";
+  return s;
+}
+
+// Gen operation with three operands and get high 64-bit of the latter 
+// two operands.
+static std::string Gen3OpWith2High(StringRef typestr,
+                                   const std::string &op,
+                                   const std::string &a,
+                                   const std::string &b,
+                                   const std::string &c) {
+  std::string s;
+  std::string Op1 = GetHigh(b, typestr);
+  std::string Op2 = GetHigh(c, typestr);
+  s = MangleName(op, typestr, ClassS);
+  s += "(" + a + ", " + Op1 + ", " + Op2 + ");";
+  return s;
+}
+
+// Gen combine operation by putting a on low 64-bit, and b on high 64-bit.
+static std::string GenCombine(std::string typestr,
+                              const std::string &a,
+                              const std::string &b) {
+  std::string s;
+  s = MangleName("vcombine", typestr, ClassS);
+  s += "(" + a + ", " + b + ")";
+  return s;
+}
+
 static std::string Duplicate(unsigned nElts, StringRef typestr,
                              const std::string &a) {
   std::string s;
@@ -1368,18 +1474,30 @@ static std::string GenOpString(const std
   case OpAddl:
     s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
     break;
+  case OpAddlHi:
+    s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";";
+    break;
   case OpAddw:
     s += "__a + " + Extend(typestr, "__b") + ";";
     break;
+  case OpAddwHi:
+    s += "__a + " + Extend(typestr, "__b", 1) + ";";
+    break;
   case OpSub:
     s += "__a - __b;";
     break;
   case OpSubl:
     s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
     break;
+  case OpSublHi:
+    s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";";
+    break;
   case OpSubw:
     s += "__a - " + Extend(typestr, "__b") + ";";
     break;
+  case OpSubwHi:
+    s += "__a - " + Extend(typestr, "__b", 1) + ";";
+    break;
   case OpMulN:
     s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
     break;
@@ -1413,6 +1531,12 @@ static std::string GenOpString(const std
   case OpMlal:
     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
     break;
+  case OpMullHi:
+    s += Gen2OpWith2High(typestr, "vmull", "__a", "__b");
+    break;
+  case OpMlalHi:
+    s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c");
+    break;
   case OpMlsN:
     s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
     break;
@@ -1433,6 +1557,9 @@ static std::string GenOpString(const std
   case OpMlsl:
     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
     break;
+  case OpMlslHi:
+    s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c");
+    break;
   case OpQDMullLane:
     s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
       SplatLane(nElts, "__b", "__c") + ");";
@@ -1560,23 +1687,51 @@ static std::string GenOpString(const std
     }
     break;
   }
+  case OpAbdlHi:
+    s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b");
+    break;
+  case OpAddhnHi: {
+    std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)";
+    s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn);
+    s += ";";
+    break;
+  }
+  case OpRAddhnHi: {
+    std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)";
+    s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn);
+    s += ";";
+    break;
+  }
+  case OpSubhnHi: {
+    std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)";
+    s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn);
+    s += ";";
+    break;
+  }
+  case OpRSubhnHi: {
+    std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)";
+    s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn);
+    s += ";";
+    break;
+  }
   case OpAba:
     s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
     break;
-  case OpAbal: {
-    s += "__a + ";
-    std::string abd = MangleName("vabd", typestr, ClassS) + "(__b, __c)";
-    if (typestr[0] != 'U') {
-      // vabd results are always unsigned and must be zero-extended.
-      std::string utype = "U" + typestr.str();
-      s += "(" + TypeString(proto[0], typestr) + ")";
-      abd = "(" + TypeString('d', utype) + ")" + abd;
-      s += Extend(utype, abd) + ";";
-    } else {
-      s += Extend(typestr, abd) + ";";
-    }
+  case OpAbal:
+    s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);";
+    break;
+  case OpAbalHi:
+    s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c");
+    break;
+  case OpQDMullHi:
+    s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b");
+    break;
+  case OpQDMlalHi:
+    s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c");
+    break;
+  case OpQDMlslHi:
+    s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c");
     break;
-  }
   case OpDiv:
     s += "__a / __b;";
     break;
@@ -1993,6 +2148,7 @@ void NeonEmitter::run(raw_ostream &OS) {
   emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
   emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
   emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
+  emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap);
 
   // ARM intrinsics must be emitted before AArch64 intrinsics to ensure
   // common intrinsics appear only once in the output stream.
@@ -2014,6 +2170,10 @@ void NeonEmitter::run(raw_ostream &OS) {
   // Emit AArch64-specific intrinsics.
   OS << "#ifdef __aarch64__\n";
 
+  emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap);
+  emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap);
+  emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap);
+
   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
     Record *R = RV[i];
 





More information about the cfe-commits mailing list