r196189 - Add some missing AArch64 Neon intrinsics like vmull_high_n_s16 and friends.
Jiangning Liu
jiangning.liu at arm.com
Mon Dec 2 17:28:56 PST 2013
Author: jiangning
Date: Mon Dec 2 19:28:55 2013
New Revision: 196189
URL: http://llvm.org/viewvc/llvm-project?rev=196189&view=rev
Log:
Add some missing AArch64 Neon intrinsics like vmull_high_n_s16 and friends.
Modified:
cfe/trunk/include/clang/Basic/arm_neon.td
cfe/trunk/test/CodeGen/aarch64-neon-2velem.c
cfe/trunk/utils/TableGen/NeonEmitter.cpp
Modified: cfe/trunk/include/clang/Basic/arm_neon.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_neon.td?rev=196189&r1=196188&r2=196189&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/arm_neon.td (original)
+++ cfe/trunk/include/clang/Basic/arm_neon.td Mon Dec 2 19:28:55 2013
@@ -30,13 +30,18 @@ def OP_MUL : Op;
def OP_MLA : Op;
def OP_MLAL : Op;
def OP_MULLHi : Op;
+def OP_MULLHi_N : Op;
def OP_MLALHi : Op;
+def OP_MLALHi_N : Op;
def OP_MLS : Op;
def OP_MLSL : Op;
def OP_MLSLHi : Op;
+def OP_MLSLHi_N : Op;
def OP_MUL_N : Op;
def OP_MLA_N : Op;
def OP_MLS_N : Op;
+def OP_FMLA_N : Op;
+def OP_FMLS_N : Op;
def OP_MLAL_N : Op;
def OP_MLSL_N : Op;
def OP_MUL_LN: Op;
@@ -104,8 +109,11 @@ def OP_ABA : Op;
def OP_ABAL : Op;
def OP_ABALHi : Op;
def OP_QDMULLHi : Op;
+def OP_QDMULLHi_N : Op;
def OP_QDMLALHi : Op;
+def OP_QDMLALHi_N : Op;
def OP_QDMLSLHi : Op;
+def OP_QDMLSLHi_N : Op;
def OP_DIV : Op;
def OP_LONG_HI : Op;
def OP_NARROW_HI : Op;
@@ -626,6 +634,12 @@ def FMLA : SInst<"vfma", "dddd", "fQfQd"
def FMLS : SInst<"vfms", "dddd", "fQfQd">;
////////////////////////////////////////////////////////////////////////////////
+// MUL, FMA, FMS definitions with scalar argument
+def VMUL_N_A64 : IOpInst<"vmul_n", "dds", "Qd", OP_MUL_N>;
+def FMLA_N : SOpInst<"vfma_n", "ddds", "fQf", OP_FMLA_N>;
+def FMLS_N : SOpInst<"vfms_n", "ddds", "fQf", OP_FMLS_N>;
+
+////////////////////////////////////////////////////////////////////////////////
// Logical operations
// With additional Qd, Ql, QPl type.
def BSL : SInst<"vbsl", "dudd",
@@ -817,8 +831,11 @@ def VABDL_HIGH : SOpInst<"vabdl_high",
def VABAL_HIGH : SOpInst<"vabal_high", "wwkk", "csiUcUsUi", OP_ABALHi>;
def VMULL_HIGH : SOpInst<"vmull_high", "wkk", "csiUcUsUiPc", OP_MULLHi>;
+def VMULL_HIGH_N : SOpInst<"vmull_high_n", "wks", "siUsUi", OP_MULLHi_N>;
def VMLAL_HIGH : SOpInst<"vmlal_high", "wwkk", "csiUcUsUi", OP_MLALHi>;
+def VMLAL_HIGH_N : SOpInst<"vmlal_high_n", "wwks", "siUsUi", OP_MLALHi_N>;
def VMLSL_HIGH : SOpInst<"vmlsl_high", "wwkk", "csiUcUsUi", OP_MLSLHi>;
+def VMLSL_HIGH_N : SOpInst<"vmlsl_high_n", "wwks", "siUsUi", OP_MLSLHi_N>;
def VADDHN_HIGH : SOpInst<"vaddhn_high", "qhkk", "silUsUiUl", OP_ADDHNHi>;
def VRADDHN_HIGH : SOpInst<"vraddhn_high", "qhkk", "silUsUiUl", OP_RADDHNHi>;
@@ -826,8 +843,11 @@ def VSUBHN_HIGH : SOpInst<"vsubhn_high"
def VRSUBHN_HIGH : SOpInst<"vrsubhn_high", "qhkk", "silUsUiUl", OP_RSUBHNHi>;
def VQDMULL_HIGH : SOpInst<"vqdmull_high", "wkk", "si", OP_QDMULLHi>;
+def VQDMULL_HIGH_N : SOpInst<"vqdmull_high_n", "wks", "si", OP_QDMULLHi_N>;
def VQDMLAL_HIGH : SOpInst<"vqdmlal_high", "wwkk", "si", OP_QDMLALHi>;
+def VQDMLAL_HIGH_N : SOpInst<"vqdmlal_high_n", "wwks", "si", OP_QDMLALHi_N>;
def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "wwkk", "si", OP_QDMLSLHi>;
+def VQDMLSL_HIGH_N : SOpInst<"vqdmlsl_high_n", "wwks", "si", OP_QDMLSLHi_N>;
////////////////////////////////////////////////////////////////////////////////
// Extract or insert element from vector
Modified: cfe/trunk/test/CodeGen/aarch64-neon-2velem.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-2velem.c?rev=196189&r1=196188&r2=196189&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-2velem.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-neon-2velem.c Mon Dec 2 19:28:55 2013
@@ -1547,3 +1547,152 @@ float64x2_t test_vmulxq_laneq_f64_0(floa
// CHECK: fmulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
}
+int32x4_t test_vmull_high_n_s16(int16x8_t a, int16_t b) {
+ // CHECK: test_vmull_high_n_s16
+ return vmull_high_n_s16(a, b);
+ // CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmull_high_n_s32(int32x4_t a, int32_t b) {
+ // CHECK: test_vmull_high_n_s32
+ return vmull_high_n_s32(a, b);
+ // CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vmull_high_n_u16(uint16x8_t a, uint16_t b) {
+ // CHECK: test_vmull_high_n_u16
+ return vmull_high_n_u16(a, b);
+ // CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+uint64x2_t test_vmull_high_n_u32(uint32x4_t a, uint32_t b) {
+ // CHECK: test_vmull_high_n_u32
+ return vmull_high_n_u32(a, b);
+ // CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmull_high_n_s16(int16x8_t a, int16_t b) {
+ // CHECK: test_vqdmull_high_n_s16
+ return vqdmull_high_n_s16(a, b);
+ // CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vqdmull_high_n_s32(int32x4_t a, int32_t b) {
+ // CHECK: test_vqdmull_high_n_s32
+ return vqdmull_high_n_s32(a, b);
+ // CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) {
+ // CHECK: test_vmlal_high_n_s16
+ return vmlal_high_n_s16(a, b, c);
+ // CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) {
+ // CHECK: test_vmlal_high_n_s32
+ return vmlal_high_n_s32(a, b, c);
+ // CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vmlal_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c) {
+ // CHECK: test_vmlal_high_n_u16
+ return vmlal_high_n_u16(a, b, c);
+ // CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+uint64x2_t test_vmlal_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) {
+ // CHECK: test_vmlal_high_n_u32
+ return vmlal_high_n_u32(a, b, c);
+ // CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) {
+ // CHECK: test_vqdmlal_high_n_s16
+ return vqdmlal_high_n_s16(a, b, c);
+ // CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vqdmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) {
+ // CHECK: test_vqdmlal_high_n_s32
+ return vqdmlal_high_n_s32(a, b, c);
+ // CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) {
+ // CHECK: test_vmlsl_high_n_s16
+ return vmlsl_high_n_s16(a, b, c);
+ // CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) {
+ // CHECK: test_vmlsl_high_n_s32
+ return vmlsl_high_n_s32(a, b, c);
+ // CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vmlsl_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c) {
+ // CHECK: test_vmlsl_high_n_u16
+ return vmlsl_high_n_u16(a, b, c);
+ // CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+uint64x2_t test_vmlsl_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) {
+ // CHECK: test_vmlsl_high_n_u32
+ return vmlsl_high_n_u32(a, b, c);
+ // CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) {
+ // CHECK: test_vqdmlsl_high_n_s16
+ return vqdmlsl_high_n_s16(a, b, c);
+ // CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vqdmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) {
+ // CHECK: test_vqdmlsl_high_n_s32
+ return vqdmlsl_high_n_s32(a, b, c);
+ // CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float32x2_t test_vmul_n_f32(float32x2_t a, float32_t b) {
+ // CHECK: test_vmul_n_f32
+ return vmul_n_f32(a, b);
+ // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b) {
+ // CHECK: test_vmulq_n_f32
+ return vmulq_n_f32(a, b);
+ // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float64x2_t test_vmulq_n_f64(float64x2_t a, float64_t b) {
+ // CHECK: test_vmulq_n_f64
+ return vmulq_n_f64(a, b);
+ // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+float32x2_t test_vfma_n_f32(float32x2_t a, float32x2_t b, float32_t n) {
+ // CHECK: test_vfma_n_f32
+ return vfma_n_f32(a, b, n);
+ // CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t n) {
+ // CHECK: test_vfmaq_n_f32
+ return vfmaq_n_f32(a, b, n);
+ // CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float32x2_t test_vfms_n_f32(float32x2_t a, float32x2_t b, float32_t n) {
+ // CHECK: test_vfms_n_f32
+ return vfms_n_f32(a, b, n);
+ // CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vfmsq_n_f32(float32x4_t a, float32x4_t b, float32_t n) {
+ // CHECK: test_vfmsq_n_f32
+ return vfmsq_n_f32(a, b, n);
+ // CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
Modified: cfe/trunk/utils/TableGen/NeonEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/NeonEmitter.cpp?rev=196189&r1=196188&r2=196189&view=diff
==============================================================================
--- cfe/trunk/utils/TableGen/NeonEmitter.cpp (original)
+++ cfe/trunk/utils/TableGen/NeonEmitter.cpp Mon Dec 2 19:28:55 2013
@@ -52,13 +52,18 @@ enum OpKind {
OpMla,
OpMlal,
OpMullHi,
+ OpMullHiN,
OpMlalHi,
+ OpMlalHiN,
OpMls,
OpMlsl,
OpMlslHi,
+ OpMlslHiN,
OpMulN,
OpMlaN,
OpMlsN,
+ OpFMlaN,
+ OpFMlsN,
OpMlalN,
OpMlslN,
OpMulLane,
@@ -126,8 +131,11 @@ enum OpKind {
OpAbal,
OpAbalHi,
OpQDMullHi,
+ OpQDMullHiN,
OpQDMlalHi,
+ OpQDMlalHiN,
OpQDMlslHi,
+ OpQDMlslHiN,
OpDiv,
OpLongHi,
OpNarrowHi,
@@ -224,13 +232,18 @@ public:
OpMap["OP_MLA"] = OpMla;
OpMap["OP_MLAL"] = OpMlal;
OpMap["OP_MULLHi"] = OpMullHi;
+ OpMap["OP_MULLHi_N"] = OpMullHiN;
OpMap["OP_MLALHi"] = OpMlalHi;
+ OpMap["OP_MLALHi_N"] = OpMlalHiN;
OpMap["OP_MLS"] = OpMls;
OpMap["OP_MLSL"] = OpMlsl;
OpMap["OP_MLSLHi"] = OpMlslHi;
+ OpMap["OP_MLSLHi_N"] = OpMlslHiN;
OpMap["OP_MUL_N"] = OpMulN;
OpMap["OP_MLA_N"] = OpMlaN;
OpMap["OP_MLS_N"] = OpMlsN;
+ OpMap["OP_FMLA_N"] = OpFMlaN;
+ OpMap["OP_FMLS_N"] = OpFMlsN;
OpMap["OP_MLAL_N"] = OpMlalN;
OpMap["OP_MLSL_N"] = OpMlslN;
OpMap["OP_MUL_LN"]= OpMulLane;
@@ -298,8 +311,11 @@ public:
OpMap["OP_ABAL"] = OpAbal;
OpMap["OP_ABALHi"] = OpAbalHi;
OpMap["OP_QDMULLHi"] = OpQDMullHi;
+ OpMap["OP_QDMULLHi_N"] = OpQDMullHiN;
OpMap["OP_QDMLALHi"] = OpQDMlalHi;
+ OpMap["OP_QDMLALHi_N"] = OpQDMlalHiN;
OpMap["OP_QDMLSLHi"] = OpQDMlslHi;
+ OpMap["OP_QDMLSLHi_N"] = OpQDMlslHiN;
OpMap["OP_DIV"] = OpDiv;
OpMap["OP_LONG_HI"] = OpLongHi;
OpMap["OP_NARROW_HI"] = OpNarrowHi;
@@ -1660,6 +1676,14 @@ static std::string GenOpString(const std
case OpMul:
s += "__a * __b;";
break;
+ case OpFMlaN:
+ s += MangleName("vfma", typestr, ClassS);
+ s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");";
+ break;
+ case OpFMlsN:
+ s += MangleName("vfms", typestr, ClassS);
+ s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");";
+ break;
case OpMullLane:
s += MangleName("vmull", typestr, ClassS) + "(__a, " +
SplatLane(nElts, "__b", "__c") + ");";
@@ -1695,9 +1719,17 @@ static std::string GenOpString(const std
case OpMullHi:
s += Gen2OpWith2High(typestr, "vmull", "__a", "__b");
break;
+ case OpMullHiN:
+ s += MangleName("vmull_n", typestr, ClassS);
+ s += "(" + GetHigh("__a", typestr) + ", __b);";
+ return s;
case OpMlalHi:
s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c");
break;
+ case OpMlalHiN:
+ s += MangleName("vmlal_n", typestr, ClassS);
+ s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
+ return s;
case OpMlsN:
s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
break;
@@ -1737,6 +1769,10 @@ static std::string GenOpString(const std
case OpMlslHi:
s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c");
break;
+ case OpMlslHiN:
+ s += MangleName("vmlsl_n", typestr, ClassS);
+ s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
+ break;
case OpQDMullLane:
s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
SplatLane(nElts, "__b", "__c") + ");";
@@ -2003,12 +2039,24 @@ static std::string GenOpString(const std
case OpQDMullHi:
s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b");
break;
+ case OpQDMullHiN:
+ s += MangleName("vqdmull_n", typestr, ClassS);
+ s += "(" + GetHigh("__a", typestr) + ", __b);";
+ return s;
case OpQDMlalHi:
s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c");
break;
+ case OpQDMlalHiN:
+ s += MangleName("vqdmlal_n", typestr, ClassS);
+ s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
+ return s;
case OpQDMlslHi:
s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c");
break;
+ case OpQDMlslHiN:
+ s += MangleName("vqdmlsl_n", typestr, ClassS);
+ s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
+ return s;
case OpDiv:
s += "__a / __b;";
break;
More information about the cfe-commits
mailing list