r196189 - Add some missing AArch64 Neon intrinsics like vmull_high_n_s16 and friends.

Mon Dec 2 17:28:56 PST 2013

Author: jiangning
Date: Mon Dec  2 19:28:55 2013
New Revision: 196189

URL: http://llvm.org/viewvc/llvm-project?rev=196189&view=rev
Log:
Add some missing AArch64 Neon intrinsics like vmull_high_n_s16 and friends.

Modified:
    cfe/trunk/include/clang/Basic/arm_neon.td
    cfe/trunk/test/CodeGen/aarch64-neon-2velem.c
    cfe/trunk/utils/TableGen/NeonEmitter.cpp

Modified: cfe/trunk/include/clang/Basic/arm_neon.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_neon.td?rev=196189&r1=196188&r2=196189&view=diff
==============================================================================

--- cfe/trunk/include/clang/Basic/arm_neon.td (original)
+++ cfe/trunk/include/clang/Basic/arm_neon.td Mon Dec  2 19:28:55 2013
@@ -30,13 +30,18 @@ def OP_MUL   : Op;
 def OP_MLA   : Op;
 def OP_MLAL  : Op;
 def OP_MULLHi : Op;
+def OP_MULLHi_N : Op;
 def OP_MLALHi : Op;
+def OP_MLALHi_N : Op;
 def OP_MLS   : Op;
 def OP_MLSL  : Op;
 def OP_MLSLHi : Op;
+def OP_MLSLHi_N : Op;
 def OP_MUL_N : Op;
 def OP_MLA_N : Op;
 def OP_MLS_N : Op;
+def OP_FMLA_N : Op;
+def OP_FMLS_N : Op;
 def OP_MLAL_N : Op;
 def OP_MLSL_N : Op;
 def OP_MUL_LN: Op;
@@ -104,8 +109,11 @@ def OP_ABA   : Op;
 def OP_ABAL  : Op;
 def OP_ABALHi : Op;
 def OP_QDMULLHi : Op;
+def OP_QDMULLHi_N : Op;
 def OP_QDMLALHi : Op;
+def OP_QDMLALHi_N : Op;
 def OP_QDMLSLHi : Op;
+def OP_QDMLSLHi_N : Op;
 def OP_DIV  : Op;
 def OP_LONG_HI : Op;
 def OP_NARROW_HI : Op;
@@ -626,6 +634,12 @@ def FMLA : SInst<"vfma", "dddd", "fQfQd"
 def FMLS : SInst<"vfms", "dddd", "fQfQd">;
 
 ////////////////////////////////////////////////////////////////////////////////
+// MUL, FMA, FMS definitions with scalar argument
+def VMUL_N_A64 : IOpInst<"vmul_n", "dds", "Qd", OP_MUL_N>;
+def FMLA_N : SOpInst<"vfma_n", "ddds", "fQf", OP_FMLA_N>;
+def FMLS_N : SOpInst<"vfms_n", "ddds", "fQf", OP_FMLS_N>;
+
+////////////////////////////////////////////////////////////////////////////////
 // Logical operations
 // With additional Qd, Ql, QPl type.
 def BSL : SInst<"vbsl", "dudd",
@@ -817,8 +831,11 @@ def VABDL_HIGH   : SOpInst<"vabdl_high",
 def VABAL_HIGH   : SOpInst<"vabal_high", "wwkk", "csiUcUsUi", OP_ABALHi>;
 
 def VMULL_HIGH   : SOpInst<"vmull_high", "wkk", "csiUcUsUiPc", OP_MULLHi>;
+def VMULL_HIGH_N : SOpInst<"vmull_high_n", "wks", "siUsUi", OP_MULLHi_N>;
 def VMLAL_HIGH   : SOpInst<"vmlal_high", "wwkk", "csiUcUsUi", OP_MLALHi>;
+def VMLAL_HIGH_N : SOpInst<"vmlal_high_n", "wwks", "siUsUi", OP_MLALHi_N>;
 def VMLSL_HIGH   : SOpInst<"vmlsl_high", "wwkk", "csiUcUsUi", OP_MLSLHi>;
+def VMLSL_HIGH_N : SOpInst<"vmlsl_high_n", "wwks", "siUsUi", OP_MLSLHi_N>;
 
 def VADDHN_HIGH  : SOpInst<"vaddhn_high", "qhkk", "silUsUiUl", OP_ADDHNHi>;
 def VRADDHN_HIGH : SOpInst<"vraddhn_high", "qhkk", "silUsUiUl", OP_RADDHNHi>;
@@ -826,8 +843,11 @@ def VSUBHN_HIGH  : SOpInst<"vsubhn_high"
 def VRSUBHN_HIGH : SOpInst<"vrsubhn_high", "qhkk", "silUsUiUl", OP_RSUBHNHi>;
 
 def VQDMULL_HIGH : SOpInst<"vqdmull_high", "wkk", "si", OP_QDMULLHi>;
+def VQDMULL_HIGH_N : SOpInst<"vqdmull_high_n", "wks", "si", OP_QDMULLHi_N>;
 def VQDMLAL_HIGH : SOpInst<"vqdmlal_high", "wwkk", "si", OP_QDMLALHi>;
+def VQDMLAL_HIGH_N : SOpInst<"vqdmlal_high_n", "wwks", "si", OP_QDMLALHi_N>;
 def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "wwkk", "si", OP_QDMLSLHi>;
+def VQDMLSL_HIGH_N : SOpInst<"vqdmlsl_high_n", "wwks", "si", OP_QDMLSLHi_N>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Extract or insert element from vector

Modified: cfe/trunk/test/CodeGen/aarch64-neon-2velem.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-2velem.c?rev=196189&r1=196188&r2=196189&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-2velem.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-neon-2velem.c Mon Dec  2 19:28:55 2013
@@ -1547,3 +1547,152 @@ float64x2_t test_vmulxq_laneq_f64_0(floa
   // CHECK: fmulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
 }
 
+int32x4_t test_vmull_high_n_s16(int16x8_t a, int16_t b) {
+  // CHECK: test_vmull_high_n_s16
+  return vmull_high_n_s16(a, b);
+  // CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmull_high_n_s32(int32x4_t a, int32_t b) {
+  // CHECK: test_vmull_high_n_s32
+  return vmull_high_n_s32(a, b);
+  // CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vmull_high_n_u16(uint16x8_t a, uint16_t b) {
+  // CHECK: test_vmull_high_n_u16
+  return vmull_high_n_u16(a, b);
+  // CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+uint64x2_t test_vmull_high_n_u32(uint32x4_t a, uint32_t b) {
+  // CHECK: test_vmull_high_n_u32
+  return vmull_high_n_u32(a, b);
+  // CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmull_high_n_s16(int16x8_t a, int16_t b) {
+  // CHECK: test_vqdmull_high_n_s16
+  return vqdmull_high_n_s16(a, b);
+  // CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vqdmull_high_n_s32(int32x4_t a, int32_t b) {
+  // CHECK: test_vqdmull_high_n_s32
+  return vqdmull_high_n_s32(a, b);
+  // CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) {
+  // CHECK: test_vmlal_high_n_s16
+  return vmlal_high_n_s16(a, b, c);
+  // CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) {
+  // CHECK: test_vmlal_high_n_s32
+  return vmlal_high_n_s32(a, b, c);
+  // CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vmlal_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c) {
+  // CHECK: test_vmlal_high_n_u16
+  return vmlal_high_n_u16(a, b, c);
+  // CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+uint64x2_t test_vmlal_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) {
+  // CHECK: test_vmlal_high_n_u32
+  return vmlal_high_n_u32(a, b, c);
+  // CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) {
+  // CHECK: test_vqdmlal_high_n_s16
+  return vqdmlal_high_n_s16(a, b, c);
+  // CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vqdmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) {
+  // CHECK: test_vqdmlal_high_n_s32
+  return vqdmlal_high_n_s32(a, b, c);
+  // CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) {
+  // CHECK: test_vmlsl_high_n_s16
+  return vmlsl_high_n_s16(a, b, c);
+  // CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) {
+  // CHECK: test_vmlsl_high_n_s32
+  return vmlsl_high_n_s32(a, b, c);
+  // CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vmlsl_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c) {
+  // CHECK: test_vmlsl_high_n_u16
+  return vmlsl_high_n_u16(a, b, c);
+  // CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+uint64x2_t test_vmlsl_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) {
+  // CHECK: test_vmlsl_high_n_u32
+  return vmlsl_high_n_u32(a, b, c);
+  // CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) {
+  // CHECK: test_vqdmlsl_high_n_s16
+  return vqdmlsl_high_n_s16(a, b, c);
+  // CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vqdmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) {
+  // CHECK: test_vqdmlsl_high_n_s32
+  return vqdmlsl_high_n_s32(a, b, c);
+  // CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float32x2_t test_vmul_n_f32(float32x2_t a, float32_t b) {
+  // CHECK: test_vmul_n_f32
+  return vmul_n_f32(a, b);
+  // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b) {
+  // CHECK: test_vmulq_n_f32
+  return vmulq_n_f32(a, b);
+  // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float64x2_t test_vmulq_n_f64(float64x2_t a, float64_t b) {
+  // CHECK: test_vmulq_n_f64
+  return vmulq_n_f64(a, b);
+  // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+float32x2_t test_vfma_n_f32(float32x2_t a, float32x2_t b, float32_t n) {
+  // CHECK: test_vfma_n_f32
+  return vfma_n_f32(a, b, n);
+  // CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t n) {
+  // CHECK: test_vfmaq_n_f32
+  return vfmaq_n_f32(a, b, n);
+  // CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float32x2_t test_vfms_n_f32(float32x2_t a, float32x2_t b, float32_t n) {
+  // CHECK: test_vfms_n_f32
+  return vfms_n_f32(a, b, n);
+  // CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vfmsq_n_f32(float32x4_t a, float32x4_t b, float32_t n) {
+  // CHECK: test_vfmsq_n_f32
+  return vfmsq_n_f32(a, b, n);
+  // CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}

Modified: cfe/trunk/utils/TableGen/NeonEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/NeonEmitter.cpp?rev=196189&r1=196188&r2=196189&view=diff
==============================================================================
--- cfe/trunk/utils/TableGen/NeonEmitter.cpp (original)
+++ cfe/trunk/utils/TableGen/NeonEmitter.cpp Mon Dec  2 19:28:55 2013
@@ -52,13 +52,18 @@ enum OpKind {
   OpMla,
   OpMlal,
   OpMullHi,
+  OpMullHiN,
   OpMlalHi,
+  OpMlalHiN,
   OpMls,
   OpMlsl,
   OpMlslHi,
+  OpMlslHiN,
   OpMulN,
   OpMlaN,
   OpMlsN,
+  OpFMlaN,
+  OpFMlsN,
   OpMlalN,
   OpMlslN,
   OpMulLane,
@@ -126,8 +131,11 @@ enum OpKind {
   OpAbal,
   OpAbalHi,
   OpQDMullHi,
+  OpQDMullHiN,
   OpQDMlalHi,
+  OpQDMlalHiN,
   OpQDMlslHi,
+  OpQDMlslHiN,
   OpDiv,
   OpLongHi,
   OpNarrowHi,
@@ -224,13 +232,18 @@ public:
     OpMap["OP_MLA"]   = OpMla;
     OpMap["OP_MLAL"]  = OpMlal;
     OpMap["OP_MULLHi"]  = OpMullHi;
+    OpMap["OP_MULLHi_N"]  = OpMullHiN;
     OpMap["OP_MLALHi"]  = OpMlalHi;
+    OpMap["OP_MLALHi_N"]  = OpMlalHiN;
     OpMap["OP_MLS"]   = OpMls;
     OpMap["OP_MLSL"]  = OpMlsl;
     OpMap["OP_MLSLHi"] = OpMlslHi;
+    OpMap["OP_MLSLHi_N"] = OpMlslHiN;
     OpMap["OP_MUL_N"] = OpMulN;
     OpMap["OP_MLA_N"] = OpMlaN;
     OpMap["OP_MLS_N"] = OpMlsN;
+    OpMap["OP_FMLA_N"] = OpFMlaN;
+    OpMap["OP_FMLS_N"] = OpFMlsN;
     OpMap["OP_MLAL_N"] = OpMlalN;
     OpMap["OP_MLSL_N"] = OpMlslN;
     OpMap["OP_MUL_LN"]= OpMulLane;
@@ -298,8 +311,11 @@ public:
     OpMap["OP_ABAL"]  = OpAbal;
     OpMap["OP_ABALHi"] = OpAbalHi;
     OpMap["OP_QDMULLHi"] = OpQDMullHi;
+    OpMap["OP_QDMULLHi_N"] = OpQDMullHiN;
     OpMap["OP_QDMLALHi"] = OpQDMlalHi;
+    OpMap["OP_QDMLALHi_N"] = OpQDMlalHiN;
     OpMap["OP_QDMLSLHi"] = OpQDMlslHi;
+    OpMap["OP_QDMLSLHi_N"] = OpQDMlslHiN;
     OpMap["OP_DIV"] = OpDiv;
     OpMap["OP_LONG_HI"] = OpLongHi;
     OpMap["OP_NARROW_HI"] = OpNarrowHi;
@@ -1660,6 +1676,14 @@ static std::string GenOpString(const std
   case OpMul:
     s += "__a * __b;";
     break;
+  case OpFMlaN:
+    s += MangleName("vfma", typestr, ClassS);
+    s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");";
+    break;
+  case OpFMlsN:
+    s += MangleName("vfms", typestr, ClassS);
+    s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");";
+    break;
   case OpMullLane:
     s += MangleName("vmull", typestr, ClassS) + "(__a, " +
       SplatLane(nElts, "__b", "__c") + ");";
@@ -1695,9 +1719,17 @@ static std::string GenOpString(const std
   case OpMullHi:
     s += Gen2OpWith2High(typestr, "vmull", "__a", "__b");
     break;
+  case OpMullHiN:
+    s += MangleName("vmull_n", typestr, ClassS);
+    s += "(" + GetHigh("__a", typestr) + ", __b);";
+    return s;
   case OpMlalHi:
     s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c");
     break;
+  case OpMlalHiN:
+    s += MangleName("vmlal_n", typestr, ClassS);
+    s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
+    return s;
   case OpMlsN:
     s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
     break;
@@ -1737,6 +1769,10 @@ static std::string GenOpString(const std
   case OpMlslHi:
     s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c");
     break;
+  case OpMlslHiN:
+    s += MangleName("vmlsl_n", typestr, ClassS);
+    s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
+    break;
   case OpQDMullLane:
     s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
       SplatLane(nElts, "__b", "__c") + ");";
@@ -2003,12 +2039,24 @@ static std::string GenOpString(const std
   case OpQDMullHi:
     s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b");
     break;
+  case OpQDMullHiN:
+    s += MangleName("vqdmull_n", typestr, ClassS);
+    s += "(" + GetHigh("__a", typestr) + ", __b);";
+    return s;
   case OpQDMlalHi:
     s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c");
     break;
+  case OpQDMlalHiN:
+    s += MangleName("vqdmlal_n", typestr, ClassS);
+    s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
+    return s;
   case OpQDMlslHi:
     s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c");
     break;
+  case OpQDMlslHiN:
+    s += MangleName("vqdmlsl_n", typestr, ClassS);
+    s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
+    return s;
   case OpDiv:
     s += "__a / __b;";
     break;