r194660 - [AArch64 neon] support poly64 and relevant intrinsic functions.
Kevin Qin
Kevin.Qin at arm.com
Wed Nov 13 19:29:16 PST 2013
Author: kevinqin
Date: Wed Nov 13 21:29:16 2013
New Revision: 194660
URL: http://llvm.org/viewvc/llvm-project?rev=194660&view=rev
Log:
[AArch64 neon] support poly64 and relevant intrinsic functions.
Modified:
cfe/trunk/include/clang/Basic/TargetBuiltins.h
cfe/trunk/include/clang/Basic/arm_neon.td
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/Sema/SemaChecking.cpp
cfe/trunk/lib/Sema/SemaType.cpp
cfe/trunk/utils/TableGen/NeonEmitter.cpp
Modified: cfe/trunk/include/clang/Basic/TargetBuiltins.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/TargetBuiltins.h?rev=194660&r1=194659&r2=194660&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/TargetBuiltins.h (original)
+++ cfe/trunk/include/clang/Basic/TargetBuiltins.h Wed Nov 13 21:29:16 2013
@@ -90,6 +90,7 @@ namespace clang {
Int64,
Poly8,
Poly16,
+ Poly64,
Float16,
Float32,
Float64
Modified: cfe/trunk/include/clang/Basic/arm_neon.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_neon.td?rev=194660&r1=194659&r2=194660&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/arm_neon.td (original)
+++ cfe/trunk/include/clang/Basic/arm_neon.td Wed Nov 13 21:29:16 2013
@@ -519,23 +519,23 @@ let isA64 = 1 in {
////////////////////////////////////////////////////////////////////////////////
// Load/Store
-// With additional QUl, Ql, Qd type.
+// With additional QUl, Ql, Qd, Pl, QPl type.
def LD1 : WInst<"vld1", "dc",
- "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">;
+ "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
def LD2 : WInst<"vld2", "2c",
- "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">;
+ "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
def LD3 : WInst<"vld3", "3c",
- "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">;
+ "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
def LD4 : WInst<"vld4", "4c",
- "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">;
+ "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
def ST1 : WInst<"vst1", "vpd",
- "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">;
+ "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
def ST2 : WInst<"vst2", "vp2",
- "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">;
+ "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
def ST3 : WInst<"vst3", "vp3",
- "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">;
+ "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
def ST4 : WInst<"vst4", "vp4",
- "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">;
+ "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
////////////////////////////////////////////////////////////////////////////////
// Addition
@@ -570,8 +570,9 @@ def FMLS : SInst<"vfms", "dddd", "fQfQd"
////////////////////////////////////////////////////////////////////////////////
// Logical operations
-// With additional Qd type.
-def BSL : SInst<"vbsl", "dudd", "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPsQd">;
+// With additional Qd, Ql, QPl type.
+def BSL : SInst<"vbsl", "dudd",
+ "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPsQdPlQPl">;
////////////////////////////////////////////////////////////////////////////////
// Absolute Difference
@@ -658,13 +659,16 @@ def FSQRT : SInst<"vsqrt", "dd", "fQfQ
////////////////////////////////////////////////////////////////////////////////
// Comparison
-// With additional Qd type.
+// With additional Qd, Ql, QPl type.
+def VVCEQ : IOpInst<"vceq", "udd", "csifUcUsUiPcQcQsQiQfQUcQUsQUiQPcPlQPl",
+ OP_EQ>;
def FCAGE : IInst<"vcage", "udd", "fQfQd">;
def FCAGT : IInst<"vcagt", "udd", "fQfQd">;
def FCALE : IInst<"vcale", "udd", "fQfQd">;
def FCALT : IInst<"vcalt", "udd", "fQfQd">;
// With additional Ql, QUl, Qd types.
-def CMTST : WInst<"vtst", "udd", "csiUcUsUiPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs">;
+def CMTST : WInst<"vtst", "udd",
+ "csiUcUsUiPcPsQcQsQiQUcQUsQUiQPcQPslUlQlQUlPlQPl">;
def CFMEQ : SOpInst<"vceq", "udd",
"csifUcUsUiPcQcQsQiQlQfQUcQUsQUiQUlQPcQd", OP_EQ>;
def CFMGE : SOpInst<"vcge", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_GE>;
@@ -713,6 +717,13 @@ let isShift = 1 in {
def SHLL_HIGH_N : SOpInst<"vshll_high_n", "ndi", "HcHsHiHUcHUsHUi",
OP_LONG_HI>;
+////////////////////////////////////////////////////////////////////////////////
+// Shifts with insert, with additional Ql, QPl type.
+def SRI_N : WInst<"vsri_n", "dddi",
+ "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPsPlQPl">;
+def SLI_N : WInst<"vsli_n", "dddi",
+ "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPsPlQPl">;
+
// Right shift narrow high
def SHRN_HIGH_N : IOpInst<"vshrn_high_n", "hmdi",
"HsHiHlHUsHUiHUl", OP_NARROW_HI>;
@@ -764,36 +775,40 @@ def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high
////////////////////////////////////////////////////////////////////////////////
// Extract or insert element from vector
def GET_LANE : IInst<"vget_lane", "sdi",
- "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQd">;
+ "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQdPlQPl">;
def SET_LANE : IInst<"vset_lane", "dsdi",
- "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQd">;
+ "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQdPlQPl">;
def COPY_LANE : IOpInst<"vcopy_lane", "ddidi",
- "csiPcPsUcUsUiPcPsf", OP_COPY_LN>;
+ "csiPcPsUcUsUiPcPsfPl", OP_COPY_LN>;
def COPYQ_LANE : IOpInst<"vcopy_lane", "ddigi",
- "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQd", OP_COPYQ_LN>;
+ "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPYQ_LN>;
def COPY_LANEQ : IOpInst<"vcopy_laneq", "ddiki",
"csiPcPsUcUsUif", OP_COPY_LNQ>;
def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "ddidi",
- "QcQsQiQlQUcQUsQUiQUlQPcQPsQfd", OP_COPY_LN>;
-
+ "QcQsQiQlQUcQUsQUiQUlQPcQPsQfdQPl", OP_COPY_LN>;
////////////////////////////////////////////////////////////////////////////////
// Set all lanes to same value
def VDUP_LANE1: WOpInst<"vdup_lane", "dgi",
- "csilPcPsUcUsUiUlhfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQd",
+ "csilPcPsUcUsUiUlhfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl",
OP_DUP_LN>;
def VDUP_LANE2: WOpInst<"vdup_laneq", "dki",
- "csilPcPsUcUsUiUlhfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQd",
+ "csilPcPsUcUsUiUlhfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl",
OP_DUP_LN>;
def DUP_N : WOpInst<"vdup_n", "ds",
- "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUldQd",
+ "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUldQdPlQPl",
OP_DUP>;
def MOV_N : WOpInst<"vmov_n", "ds",
"UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUldQd",
OP_DUP>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Combining vectors, with additional Pl
+def COMBINE : NoTestOpInst<"vcombine", "kdd", "csilhfUcUsUiUlPcPsPl", OP_CONC>;
+
////////////////////////////////////////////////////////////////////////////////
-//Initialize a vector from bit pattern
-def CREATE : NoTestOpInst<"vcreate", "dl", "csihfdUcUsUiUlPcPsl", OP_CAST>;
+//Initialize a vector from bit pattern, with additional Pl
+def CREATE : NoTestOpInst<"vcreate", "dl", "csihfdUcUsUiUlPcPslPl", OP_CAST>;
////////////////////////////////////////////////////////////////////////////////
@@ -864,7 +879,7 @@ def FMINNMV : SInst<"vminnmv", "sd", "Qf
////////////////////////////////////////////////////////////////////////////////
// Newly added Vector Extract for f64
def VEXT_A64 : WInst<"vext", "dddi",
- "cUcPcsUsPsiUilUlfdQcQUcQPcQsQUsQPsQiQUiQlQUlQfQd">;
+ "cUcPcsUsPsiUilUlfdQcQUcQPcQsQUsQPsQiQUiQlQUlQfQdPlQPl">;
////////////////////////////////////////////////////////////////////////////////
// Crypto
@@ -888,17 +903,17 @@ def SHA256SU1 : SInst<"vsha256su1", "ddd
////////////////////////////////////////////////////////////////////////////////
// Permutation
def VTRN1 : SOpInst<"vtrn1", "ddd",
- "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_TRN1>;
+ "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_TRN1>;
def VZIP1 : SOpInst<"vzip1", "ddd",
- "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_ZIP1>;
+ "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_ZIP1>;
def VUZP1 : SOpInst<"vuzp1", "ddd",
- "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_UZP1>;
+ "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_UZP1>;
def VTRN2 : SOpInst<"vtrn2", "ddd",
- "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_TRN2>;
+ "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_TRN2>;
def VZIP2 : SOpInst<"vzip2", "ddd",
- "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_ZIP2>;
+ "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_ZIP2>;
def VUZP2 : SOpInst<"vuzp2", "ddd",
- "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_UZP2>;
+ "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_UZP2>;
////////////////////////////////////////////////////////////////////////////////
// Table lookup
Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=194660&r1=194659&r2=194660&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Wed Nov 13 21:29:16 2013
@@ -1622,6 +1622,7 @@ static llvm::VectorType *GetNeonType(Cod
case NeonTypeFlags::Int32:
return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
case NeonTypeFlags::Int64:
+ case NeonTypeFlags::Poly64:
return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
case NeonTypeFlags::Float32:
return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
Modified: cfe/trunk/lib/Sema/SemaChecking.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=194660&r1=194659&r2=194660&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaChecking.cpp (original)
+++ cfe/trunk/lib/Sema/SemaChecking.cpp Wed Nov 13 21:29:16 2013
@@ -339,6 +339,7 @@ static unsigned RFT(unsigned t, bool shi
case NeonTypeFlags::Int32:
return shift ? 31 : (2 << IsQuad) - 1;
case NeonTypeFlags::Int64:
+ case NeonTypeFlags::Poly64:
return shift ? 63 : (1 << IsQuad) - 1;
case NeonTypeFlags::Float16:
assert(!shift && "cannot shift float types!");
@@ -356,7 +357,8 @@ static unsigned RFT(unsigned t, bool shi
/// getNeonEltType - Return the QualType corresponding to the elements of
/// the vector type specified by the NeonTypeFlags. This is used to check
/// the pointer arguments for Neon load/store intrinsics.
-static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context) {
+static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context,
+ bool IsAArch64) {
switch (Flags.getEltType()) {
case NeonTypeFlags::Int8:
return Flags.isUnsigned() ? Context.UnsignedCharTy : Context.SignedCharTy;
@@ -367,11 +369,13 @@ static QualType getNeonEltType(NeonTypeF
case NeonTypeFlags::Int64:
return Flags.isUnsigned() ? Context.UnsignedLongLongTy : Context.LongLongTy;
case NeonTypeFlags::Poly8:
- return Context.SignedCharTy;
+ return IsAArch64 ? Context.UnsignedCharTy : Context.SignedCharTy;
case NeonTypeFlags::Poly16:
- return Context.ShortTy;
+ return IsAArch64 ? Context.UnsignedShortTy : Context.ShortTy;
+ case NeonTypeFlags::Poly64:
+ return Context.UnsignedLongLongTy;
case NeonTypeFlags::Float16:
- return Context.UnsignedShortTy;
+ return Context.HalfTy;
case NeonTypeFlags::Float32:
return Context.FloatTy;
case NeonTypeFlags::Float64:
@@ -415,7 +419,7 @@ bool Sema::CheckAArch64BuiltinFunctionCa
Arg = ICE->getSubExpr();
ExprResult RHS = DefaultFunctionArrayLvalueConversion(Arg);
QualType RHSTy = RHS.get()->getType();
- QualType EltTy = getNeonEltType(NeonTypeFlags(TV), Context);
+ QualType EltTy = getNeonEltType(NeonTypeFlags(TV), Context, true);
if (HasConstPtr)
EltTy = EltTy.withConst();
QualType LHSTy = Context.getPointerType(EltTy);
@@ -602,7 +606,7 @@ bool Sema::CheckARMBuiltinFunctionCall(u
Arg = ICE->getSubExpr();
ExprResult RHS = DefaultFunctionArrayLvalueConversion(Arg);
QualType RHSTy = RHS.get()->getType();
- QualType EltTy = getNeonEltType(NeonTypeFlags(TV), Context);
+ QualType EltTy = getNeonEltType(NeonTypeFlags(TV), Context, false);
if (HasConstPtr)
EltTy = EltTy.withConst();
QualType LHSTy = Context.getPointerType(EltTy);
Modified: cfe/trunk/lib/Sema/SemaType.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaType.cpp?rev=194660&r1=194659&r2=194660&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaType.cpp (original)
+++ cfe/trunk/lib/Sema/SemaType.cpp Wed Nov 13 21:29:16 2013
@@ -4766,9 +4766,10 @@ static bool isPermittedNeonBaseType(Qual
if (VecKind == VectorType::NeonPolyVector) {
if (IsAArch64) {
- // AArch64 polynomial vectors are unsigned
+ // AArch64 polynomial vectors are unsigned and support poly64.
return BTy->getKind() == BuiltinType::UChar ||
- BTy->getKind() == BuiltinType::UShort;
+ BTy->getKind() == BuiltinType::UShort ||
+ BTy->getKind() == BuiltinType::ULongLong;
} else {
// AArch32 polynomial vector are signed.
return BTy->getKind() == BuiltinType::SChar ||
Modified: cfe/trunk/utils/TableGen/NeonEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/NeonEmitter.cpp?rev=194660&r1=194659&r2=194660&view=diff
==============================================================================
--- cfe/trunk/utils/TableGen/NeonEmitter.cpp (original)
+++ cfe/trunk/utils/TableGen/NeonEmitter.cpp Wed Nov 13 21:29:16 2013
@@ -170,6 +170,7 @@ public:
Int64,
Poly8,
Poly16,
+ Poly64,
Float16,
Float32,
Float64
@@ -626,7 +627,7 @@ static std::string TypeString(const char
s += quad ? "x4" : "x2";
break;
case 'l':
- s += "int64";
+ s += (poly && !usgn)? "poly64" : "int64";
if (scal)
break;
s += quad ? "x2" : "x1";
@@ -810,7 +811,7 @@ static void InstructionTypeCode(const St
break;
case 'l':
switch (ck) {
- case ClassS: typeCode = usgn ? "u64" : "s64"; break;
+ case ClassS: typeCode = poly ? "p64" : usgn ? "u64" : "s64"; break;
case ClassI: typeCode = "i64"; break;
case ClassW: typeCode = "64"; break;
default: break;
@@ -2040,7 +2041,7 @@ static unsigned GetNeonEnum(const std::s
ET = NeonTypeFlags::Int32;
break;
case 'l':
- ET = NeonTypeFlags::Int64;
+ ET = poly ? NeonTypeFlags::Poly64 : NeonTypeFlags::Int64;
break;
case 'h':
ET = NeonTypeFlags::Float16;
@@ -2325,6 +2326,7 @@ void NeonEmitter::run(raw_ostream &OS) {
OS << "#ifdef __aarch64__\n";
OS << "typedef uint8_t poly8_t;\n";
OS << "typedef uint16_t poly16_t;\n";
+ OS << "typedef uint64_t poly64_t;\n";
OS << "#else\n";
OS << "typedef int8_t poly8_t;\n";
OS << "typedef int16_t poly16_t;\n";
@@ -2332,19 +2334,21 @@ void NeonEmitter::run(raw_ostream &OS) {
// Emit Neon vector typedefs.
std::string TypedefTypes(
- "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPs");
+ "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl");
SmallVector<StringRef, 24> TDTypeVec;
ParseTypes(0, TypedefTypes, TDTypeVec);
// Emit vector typedefs.
bool isA64 = false;
+ bool preinsert;
+ bool postinsert;
for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
bool dummy, quad = false, poly = false;
char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
- bool preinsert = false;
- bool postinsert = false;
+ preinsert = false;
+ postinsert = false;
- if (type == 'd') {
+ if (type == 'd' || (type == 'l' && poly)) {
preinsert = isA64? false: true;
isA64 = true;
} else {
@@ -2370,6 +2374,9 @@ void NeonEmitter::run(raw_ostream &OS) {
OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
}
+ postinsert = isA64? true: false;
+ if (postinsert)
+ OS << "#endif\n";
OS << "\n";
// Emit struct typedefs.
@@ -2378,10 +2385,10 @@ void NeonEmitter::run(raw_ostream &OS) {
for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
bool dummy, quad = false, poly = false;
char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
- bool preinsert = false;
- bool postinsert = false;
+ preinsert = false;
+ postinsert = false;
- if (type == 'd') {
+ if (type == 'd' || (type == 'l' && poly)) {
preinsert = isA64? false: true;
isA64 = true;
} else {
@@ -2403,6 +2410,10 @@ void NeonEmitter::run(raw_ostream &OS) {
OS << "\n";
}
}
+ postinsert = isA64? true: false;
+ if (postinsert)
+ OS << "#endif\n";
+ OS << "\n";
OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
More information about the cfe-commits
mailing list