r194649 - Implement AArch64 NEON instruction set AdvSIMD (table).
Jiangning Liu
jiangning.liu at arm.com
Wed Nov 13 17:57:55 PST 2013
Author: jiangning
Date: Wed Nov 13 19:57:55 2013
New Revision: 194649
URL: http://llvm.org/viewvc/llvm-project?rev=194649&view=rev
Log:
Implement AArch64 NEON instruction set AdvSIMD (table).
Added:
cfe/trunk/test/CodeGen/aarch64-neon-tbl.c
Modified:
cfe/trunk/include/clang/Basic/arm_neon.td
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/utils/TableGen/NeonEmitter.cpp
Modified: cfe/trunk/include/clang/Basic/arm_neon.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_neon.td?rev=194649&r1=194648&r2=194649&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/arm_neon.td (original)
+++ cfe/trunk/include/clang/Basic/arm_neon.td Wed Nov 13 19:57:55 2013
@@ -183,7 +183,8 @@ class NoTestOpInst<string n, string p, s
// y: scalar of float
// o: scalar of double
// k: default elt width, double num elts
-// #: array of default vectors
+// 2,3,4: array of default vectors
+// B,C,D: array of default elts, force 'Q' size modifier.
// p: pointer type
// c: const pointer type
@@ -815,6 +816,21 @@ def VUZP2 : SOpInst<"vuzp2", "ddd",
"csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_UZP2>;
////////////////////////////////////////////////////////////////////////////////
+// Table lookup
+let InstName = "vtbl" in {
+def VQTBL1_A64 : WInst<"vqtbl1", "djt", "UccPcQUcQcQPc">;
+def VQTBL2_A64 : WInst<"vqtbl2", "dBt", "UccPcQUcQcQPc">;
+def VQTBL3_A64 : WInst<"vqtbl3", "dCt", "UccPcQUcQcQPc">;
+def VQTBL4_A64 : WInst<"vqtbl4", "dDt", "UccPcQUcQcQPc">;
+}
+let InstName = "vtbx" in {
+def VQTBX1_A64 : WInst<"vqtbx1", "ddjt", "UccPcQUcQcQPc">;
+def VQTBX2_A64 : WInst<"vqtbx2", "ddBt", "UccPcQUcQcQPc">;
+def VQTBX3_A64 : WInst<"vqtbx3", "ddCt", "UccPcQUcQcQPc">;
+def VQTBX4_A64 : WInst<"vqtbx4", "ddDt", "UccPcQUcQcQPc">;
+}
+
+////////////////////////////////////////////////////////////////////////////////
// Scalar Arithmetic
// Scalar Addition
Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=194649&r1=194648&r2=194649&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Wed Nov 13 19:57:55 2013
@@ -2463,13 +2463,240 @@ static Value *EmitAArch64ScalarBuiltinEx
return CGF.Builder.CreateBitCast(Result, ResultType, s);
}
-Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
- const CallExpr *E) {
+static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
+ Value *ExtOp, Value *IndexOp,
+ llvm::Type *ResTy, unsigned IntID,
+ const char *Name) {
+ SmallVector<Value *, 2> TblOps;
+ if (ExtOp)
+ TblOps.push_back(ExtOp);
+
+ // Build a vector containing sequential number like (0, 1, 2, ..., 15)
+ SmallVector<Constant*, 16> Indices;
+ llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
+ for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
+ Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i));
+ Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i+1));
+ }
+ Value *SV = llvm::ConstantVector::get(Indices);
+
+ int PairPos = 0, End = Ops.size() - 1;
+ while (PairPos < End) {
+ TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
+ Ops[PairPos+1], SV, Name));
+ PairPos += 2;
+ }
+
+ // If there's an odd number of 64-bit lookup table, fill the high 64-bit
+ // of the 128-bit lookup table with zero.
+ if (PairPos == End) {
+ Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
+ TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
+ ZeroTbl, SV, Name));
+ }
+
+ TblTy = llvm::VectorType::get(TblTy->getElementType(),
+ 2*TblTy->getNumElements());
+ llvm::Type *Tys[2] = { ResTy, TblTy };
+
+ Function *TblF;
+ TblOps.push_back(IndexOp);
+ TblF = CGF.CGM.getIntrinsic(IntID, Tys);
+
+ return CGF.EmitNeonCall(TblF, TblOps, Name);
+}
+
+static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF,
+ unsigned BuiltinID,
+ const CallExpr *E) {
+ unsigned int Int = 0;
+ const char *s = NULL;
+
+ unsigned TblPos;
+ switch (BuiltinID) {
+ default:
+ return 0;
+ case AArch64::BI__builtin_neon_vtbl1_v:
+ case AArch64::BI__builtin_neon_vqtbl1_v:
+ case AArch64::BI__builtin_neon_vqtbl1q_v:
+ case AArch64::BI__builtin_neon_vtbl2_v:
+ case AArch64::BI__builtin_neon_vqtbl2_v:
+ case AArch64::BI__builtin_neon_vqtbl2q_v:
+ case AArch64::BI__builtin_neon_vtbl3_v:
+ case AArch64::BI__builtin_neon_vqtbl3_v:
+ case AArch64::BI__builtin_neon_vqtbl3q_v:
+ case AArch64::BI__builtin_neon_vtbl4_v:
+ case AArch64::BI__builtin_neon_vqtbl4_v:
+ case AArch64::BI__builtin_neon_vqtbl4q_v:
+ TblPos = 0;
+ break;
+ case AArch64::BI__builtin_neon_vtbx1_v:
+ case AArch64::BI__builtin_neon_vqtbx1_v:
+ case AArch64::BI__builtin_neon_vqtbx1q_v:
+ case AArch64::BI__builtin_neon_vtbx2_v:
+ case AArch64::BI__builtin_neon_vqtbx2_v:
+ case AArch64::BI__builtin_neon_vqtbx2q_v:
+ case AArch64::BI__builtin_neon_vtbx3_v:
+ case AArch64::BI__builtin_neon_vqtbx3_v:
+ case AArch64::BI__builtin_neon_vqtbx3q_v:
+ case AArch64::BI__builtin_neon_vtbx4_v:
+ case AArch64::BI__builtin_neon_vqtbx4_v:
+ case AArch64::BI__builtin_neon_vqtbx4q_v:
+ TblPos = 1;
+ break;
+ }
+
+ assert(E->getNumArgs() >= 3);
+
+ // Get the last argument, which specifies the vector type.
+ llvm::APSInt Result;
+ const Expr *Arg = E->getArg(E->getNumArgs() - 1);
+ if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
+ return 0;
+
+ // Determine the type of this overloaded NEON intrinsic.
+ NeonTypeFlags Type(Result.getZExtValue());
+ llvm::VectorType *VTy = GetNeonType(&CGF, Type);
+ llvm::Type *Ty = VTy;
+ if (!Ty)
+ return 0;
+ SmallVector<Value *, 4> Ops;
+ for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
+ Ops.push_back(CGF.EmitScalarExpr(E->getArg(i)));
+ }
+
+ Arg = E->getArg(TblPos);
+ llvm::Type *TblTy = CGF.ConvertType(Arg->getType());
+ llvm::VectorType *VTblTy = cast<llvm::VectorType>(TblTy);
+ llvm::Type *Tys[2] = { Ty, VTblTy };
+ unsigned nElts = VTy->getNumElements();
+
+ // AArch64 scalar builtins are not overloaded, they do not have an extra
+ // argument that specifies the vector type, need to handle each case.
+ SmallVector<Value *, 2> TblOps;
+ switch (BuiltinID) {
+ case AArch64::BI__builtin_neon_vtbl1_v: {
+ TblOps.push_back(Ops[0]);
+ return packTBLDVectorList(CGF, TblOps, 0, Ops[1], Ty,
+ Intrinsic::aarch64_neon_vtbl1, "vtbl1");
+ }
+ case AArch64::BI__builtin_neon_vtbl2_v: {
+ TblOps.push_back(Ops[0]);
+ TblOps.push_back(Ops[1]);
+ return packTBLDVectorList(CGF, TblOps, 0, Ops[2], Ty,
+ Intrinsic::aarch64_neon_vtbl1, "vtbl1");
+ }
+ case AArch64::BI__builtin_neon_vtbl3_v: {
+ TblOps.push_back(Ops[0]);
+ TblOps.push_back(Ops[1]);
+ TblOps.push_back(Ops[2]);
+ return packTBLDVectorList(CGF, TblOps, 0, Ops[3], Ty,
+ Intrinsic::aarch64_neon_vtbl2, "vtbl2");
+ }
+ case AArch64::BI__builtin_neon_vtbl4_v: {
+ TblOps.push_back(Ops[0]);
+ TblOps.push_back(Ops[1]);
+ TblOps.push_back(Ops[2]);
+ TblOps.push_back(Ops[3]);
+ return packTBLDVectorList(CGF, TblOps, 0, Ops[4], Ty,
+ Intrinsic::aarch64_neon_vtbl2, "vtbl2");
+ }
+ case AArch64::BI__builtin_neon_vtbx1_v: {
+ TblOps.push_back(Ops[1]);
+ Value *TblRes = packTBLDVectorList(CGF, TblOps, 0, Ops[2], Ty,
+ Intrinsic::aarch64_neon_vtbl1, "vtbl1");
+
+ llvm::Constant *Eight = ConstantInt::get(VTy->getElementType(), 8);
+ Value* EightV = llvm::ConstantVector::getSplat(nElts, Eight);
+ Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
+ CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty);
+
+ SmallVector<Value *, 4> BslOps;
+ BslOps.push_back(CmpRes);
+ BslOps.push_back(Ops[0]);
+ BslOps.push_back(TblRes);
+ Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty);
+ return CGF.EmitNeonCall(BslF, BslOps, "vbsl");
+ }
+ case AArch64::BI__builtin_neon_vtbx2_v: {
+ TblOps.push_back(Ops[1]);
+ TblOps.push_back(Ops[2]);
+ return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[3], Ty,
+ Intrinsic::aarch64_neon_vtbx1, "vtbx1");
+ }
+ case AArch64::BI__builtin_neon_vtbx3_v: {
+ TblOps.push_back(Ops[1]);
+ TblOps.push_back(Ops[2]);
+ TblOps.push_back(Ops[3]);
+ Value *TblRes = packTBLDVectorList(CGF, TblOps, 0, Ops[4], Ty,
+ Intrinsic::aarch64_neon_vtbl2, "vtbl2");
+
+ llvm::Constant *TwentyFour = ConstantInt::get(VTy->getElementType(), 24);
+ Value* TwentyFourV = llvm::ConstantVector::getSplat(nElts, TwentyFour);
+ Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
+ TwentyFourV);
+ CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty);
+
+ SmallVector<Value *, 4> BslOps;
+ BslOps.push_back(CmpRes);
+ BslOps.push_back(Ops[0]);
+ BslOps.push_back(TblRes);
+ Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty);
+ return CGF.EmitNeonCall(BslF, BslOps, "vbsl");
+ }
+ case AArch64::BI__builtin_neon_vtbx4_v: {
+ TblOps.push_back(Ops[1]);
+ TblOps.push_back(Ops[2]);
+ TblOps.push_back(Ops[3]);
+ TblOps.push_back(Ops[4]);
+ return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[5], Ty,
+ Intrinsic::aarch64_neon_vtbx2, "vtbx2");
+ }
+ case AArch64::BI__builtin_neon_vqtbl1_v:
+ case AArch64::BI__builtin_neon_vqtbl1q_v:
+ Int = Intrinsic::aarch64_neon_vtbl1; s = "vtbl1"; break;
+ case AArch64::BI__builtin_neon_vqtbl2_v:
+ case AArch64::BI__builtin_neon_vqtbl2q_v: {
+ Int = Intrinsic::aarch64_neon_vtbl2; s = "vtbl2"; break;
+ case AArch64::BI__builtin_neon_vqtbl3_v:
+ case AArch64::BI__builtin_neon_vqtbl3q_v:
+ Int = Intrinsic::aarch64_neon_vtbl3; s = "vtbl3"; break;
+ case AArch64::BI__builtin_neon_vqtbl4_v:
+ case AArch64::BI__builtin_neon_vqtbl4q_v:
+ Int = Intrinsic::aarch64_neon_vtbl4; s = "vtbl4"; break;
+ case AArch64::BI__builtin_neon_vqtbx1_v:
+ case AArch64::BI__builtin_neon_vqtbx1q_v:
+ Int = Intrinsic::aarch64_neon_vtbx1; s = "vtbx1"; break;
+ case AArch64::BI__builtin_neon_vqtbx2_v:
+ case AArch64::BI__builtin_neon_vqtbx2q_v:
+ Int = Intrinsic::aarch64_neon_vtbx2; s = "vtbx2"; break;
+ case AArch64::BI__builtin_neon_vqtbx3_v:
+ case AArch64::BI__builtin_neon_vqtbx3q_v:
+ Int = Intrinsic::aarch64_neon_vtbx3; s = "vtbx3"; break;
+ case AArch64::BI__builtin_neon_vqtbx4_v:
+ case AArch64::BI__builtin_neon_vqtbx4q_v:
+ Int = Intrinsic::aarch64_neon_vtbx4; s = "vtbx4"; break;
+ }
+ }
+
+ if (!Int)
+ return 0;
+
+ Function *F = CGF.CGM.getIntrinsic(Int, Tys);
+ return CGF.EmitNeonCall(F, Ops, s);
+}
+
+Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
+ const CallExpr *E) {
// Process AArch64 scalar builtins
if (Value *Result = EmitAArch64ScalarBuiltinExpr(*this, BuiltinID, E))
return Result;
+ // Process AArch64 table lookup builtins
+ if (Value *Result = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E))
+ return Result;
+
if (BuiltinID == AArch64::BI__clear_cache) {
assert(E->getNumArgs() == 2 &&
"Variadic __clear_cache slipped through on AArch64");
Added: cfe/trunk/test/CodeGen/aarch64-neon-tbl.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-tbl.c?rev=194649&view=auto
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-tbl.c (added)
+++ cfe/trunk/test/CodeGen/aarch64-neon-tbl.c Wed Nov 13 19:57:55 2013
@@ -0,0 +1,463 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+// Test new aarch64 intrinsics and types
+
+#include <arm_neon.h>
+
+int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) {
+ // CHECK: test_vtbl1_s8
+ return vtbl1_s8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbl1_s8(int8x16_t a, int8x8_t b) {
+ // CHECK: test_vqtbl1_s8
+ return vqtbl1_s8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) {
+ // CHECK: test_vtbl2_s8
+ return vtbl2_s8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbl2_s8(int8x16x2_t a, int8x8_t b) {
+ // CHECK: test_vqtbl2_s8
+ return vqtbl2_s8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) {
+ // CHECK: test_vtbl3_s8
+ return vtbl3_s8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbl3_s8(int8x16x3_t a, int8x8_t b) {
+ // CHECK: test_vqtbl3_s8
+ return vqtbl3_s8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) {
+ // CHECK: test_vtbl4_s8
+ return vtbl4_s8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbl4_s8(int8x16x4_t a, int8x8_t b) {
+ // CHECK: test_vqtbl4_s8
+ return vqtbl4_s8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vqtbl1q_s8(int8x16_t a, int8x16_t b) {
+ // CHECK: test_vqtbl1q_s8
+ return vqtbl1q_s8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbl2q_s8(int8x16x2_t a, int8x16_t b) {
+ // CHECK: test_vqtbl2q_s8
+ return vqtbl2q_s8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbl3q_s8(int8x16x3_t a, int8x16_t b) {
+ // CHECK: test_vqtbl3q_s8
+ return vqtbl3q_s8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbl4q_s8(int8x16x4_t a, int8x16_t b) {
+ // CHECK: test_vqtbl4q_s8
+ return vqtbl4q_s8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
+ // CHECK: test_vtbx1_s8
+ return vtbx1_s8(a, b, c);
+ // CHECK: movi {{v[0-9]+}}.8b, #0
+ // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+ // CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+ // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) {
+ // CHECK: test_vtbx2_s8
+ return vtbx2_s8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) {
+ // CHECK: test_vtbx3_s8
+ return vtbx3_s8(a, b, c);
+ // CHECK: movi {{v[0-9]+}}.8b, #0
+ // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+ // CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+ // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) {
+ // CHECK: test_vtbx4_s8
+ return vtbx4_s8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbx1_s8(int8x8_t a, int8x16_t b, int8x8_t c) {
+ // CHECK: test_vqtbx1_s8
+ return vqtbx1_s8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbx2_s8(int8x8_t a, int8x16x2_t b, int8x8_t c) {
+ // CHECK: test_vqtbx2_s8
+ return vqtbx2_s8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbx3_s8(int8x8_t a, int8x16x3_t b, int8x8_t c) {
+ // CHECK: test_vqtbx3_s8
+ return vqtbx3_s8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbx4_s8(int8x8_t a, int8x16x4_t b, int8x8_t c) {
+ // CHECK: test_vqtbx4_s8
+ return vqtbx4_s8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vqtbx1q_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
+ // CHECK: test_vqtbx1q_s8
+ return vqtbx1q_s8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbx2q_s8(int8x16_t a, int8x16x2_t b, int8x16_t c) {
+ // CHECK: test_vqtbx2q_s8
+ return vqtbx2q_s8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbx3q_s8(int8x16_t a, int8x16x3_t b, int8x16_t c) {
+ // CHECK: test_vqtbx3q_s8
+ return vqtbx3q_s8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbx4q_s8(int8x16_t a, int8x16x4_t b, int8x16_t c) {
+ // CHECK: test_vqtbx4q_s8
+ return vqtbx4q_s8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) {
+ // CHECK: test_vtbl1_u8
+ return vtbl1_u8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbl1_u8(uint8x16_t a, uint8x8_t b) {
+ // CHECK: test_vqtbl1_u8
+ return vqtbl1_u8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) {
+ // CHECK: test_vtbl2_u8
+ return vtbl2_u8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbl2_u8(uint8x16x2_t a, uint8x8_t b) {
+ // CHECK: test_vqtbl2_u8
+ return vqtbl2_u8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) {
+ // CHECK: test_vtbl3_u8
+ return vtbl3_u8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbl3_u8(uint8x16x3_t a, uint8x8_t b) {
+ // CHECK: test_vqtbl3_u8
+ return vqtbl3_u8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) {
+ // CHECK: test_vtbl4_u8
+ return vtbl4_u8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbl4_u8(uint8x16x4_t a, uint8x8_t b) {
+ // CHECK: test_vqtbl4_u8
+ return vqtbl4_u8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) {
+ // CHECK: test_vqtbl1q_u8
+ return vqtbl1q_u8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbl2q_u8(uint8x16x2_t a, uint8x16_t b) {
+ // CHECK: test_vqtbl2q_u8
+ return vqtbl2q_u8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbl3q_u8(uint8x16x3_t a, uint8x16_t b) {
+ // CHECK: test_vqtbl3q_u8
+ return vqtbl3q_u8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbl4q_u8(uint8x16x4_t a, uint8x16_t b) {
+ // CHECK: test_vqtbl4q_u8
+ return vqtbl4q_u8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
+ // CHECK: test_vtbx1_u8
+ return vtbx1_u8(a, b, c);
+ // CHECK: movi {{v[0-9]+}}.8b, #0
+ // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+ // CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+ // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) {
+ // CHECK: test_vtbx2_u8
+ return vtbx2_u8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) {
+ // CHECK: test_vtbx3_u8
+ return vtbx3_u8(a, b, c);
+ // CHECK: movi {{v[0-9]+}}.8b, #0
+ // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+ // CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+ // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) {
+ // CHECK: test_vtbx4_u8
+ return vtbx4_u8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbx1_u8(uint8x8_t a, uint8x16_t b, uint8x8_t c) {
+ // CHECK: test_vqtbx1_u8
+ return vqtbx1_u8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbx2_u8(uint8x8_t a, uint8x16x2_t b, uint8x8_t c) {
+ // CHECK: test_vqtbx2_u8
+ return vqtbx2_u8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbx3_u8(uint8x8_t a, uint8x16x3_t b, uint8x8_t c) {
+ // CHECK: test_vqtbx3_u8
+ return vqtbx3_u8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbx4_u8(uint8x8_t a, uint8x16x4_t b, uint8x8_t c) {
+ // CHECK: test_vqtbx4_u8
+ return vqtbx4_u8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vqtbx1q_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
+ // CHECK: test_vqtbx1q_u8
+ return vqtbx1q_u8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbx2q_u8(uint8x16_t a, uint8x16x2_t b, uint8x16_t c) {
+ // CHECK: test_vqtbx2q_u8
+ return vqtbx2q_u8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbx3q_u8(uint8x16_t a, uint8x16x3_t b, uint8x16_t c) {
+ // CHECK: test_vqtbx3q_u8
+ return vqtbx3q_u8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbx4q_u8(uint8x16_t a, uint8x16x4_t b, uint8x16_t c) {
+ // CHECK: test_vqtbx4q_u8
+ return vqtbx4q_u8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) {
+ // CHECK: test_vtbl1_p8
+ return vtbl1_p8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbl1_p8(poly8x16_t a, uint8x8_t b) {
+ // CHECK: test_vqtbl1_p8
+ return vqtbl1_p8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) {
+ // CHECK: test_vtbl2_p8
+ return vtbl2_p8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbl2_p8(poly8x16x2_t a, uint8x8_t b) {
+ // CHECK: test_vqtbl2_p8
+ return vqtbl2_p8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) {
+ // CHECK: test_vtbl3_p8
+ return vtbl3_p8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbl3_p8(poly8x16x3_t a, uint8x8_t b) {
+ // CHECK: test_vqtbl3_p8
+ return vqtbl3_p8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) {
+ // CHECK: test_vtbl4_p8
+ return vtbl4_p8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbl4_p8(poly8x16x4_t a, uint8x8_t b) {
+ // CHECK: test_vqtbl4_p8
+ return vqtbl4_p8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vqtbl1q_p8(poly8x16_t a, uint8x16_t b) {
+ // CHECK: test_vqtbl1q_p8
+ return vqtbl1q_p8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbl2q_p8(poly8x16x2_t a, uint8x16_t b) {
+ // CHECK: test_vqtbl2q_p8
+ return vqtbl2q_p8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbl3q_p8(poly8x16x3_t a, uint8x16_t b) {
+ // CHECK: test_vqtbl3q_p8
+ return vqtbl3q_p8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbl4q_p8(poly8x16x4_t a, uint8x16_t b) {
+ // CHECK: test_vqtbl4q_p8
+ return vqtbl4q_p8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) {
+ // CHECK: test_vtbx1_p8
+ return vtbx1_p8(a, b, c);
+ // CHECK: movi {{v[0-9]+}}.8b, #0
+ // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+ // CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+ // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) {
+ // CHECK: test_vtbx2_p8
+ return vtbx2_p8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) {
+ // CHECK: test_vtbx3_p8
+ return vtbx3_p8(a, b, c);
+ // CHECK: movi {{v[0-9]+}}.8b, #0
+ // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+ // CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+ // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) {
+ // CHECK: test_vtbx4_p8
+ return vtbx4_p8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbx1_p8(poly8x8_t a, uint8x16_t b, uint8x8_t c) {
+ // CHECK: test_vqtbx1_p8
+ return vqtbx1_p8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbx2_p8(poly8x8_t a, poly8x16x2_t b, uint8x8_t c) {
+ // CHECK: test_vqtbx2_p8
+ return vqtbx2_p8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbx3_p8(poly8x8_t a, poly8x16x3_t b, uint8x8_t c) {
+ // CHECK: test_vqtbx3_p8
+ return vqtbx3_p8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbx4_p8(poly8x8_t a, poly8x16x4_t b, uint8x8_t c) {
+ // CHECK: test_vqtbx4_p8
+ return vqtbx4_p8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vqtbx1q_p8(poly8x16_t a, uint8x16_t b, uint8x16_t c) {
+ // CHECK: test_vqtbx1q_p8
+ return vqtbx1q_p8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbx2q_p8(poly8x16_t a, poly8x16x2_t b, uint8x16_t c) {
+ // CHECK: test_vqtbx2q_p8
+ return vqtbx2q_p8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbx3q_p8(poly8x16_t a, poly8x16x3_t b, uint8x16_t c) {
+ // CHECK: test_vqtbx3q_p8
+ return vqtbx3q_p8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbx4q_p8(poly8x16_t a, poly8x16x4_t b, uint8x16_t c) {
+ // CHECK: test_vqtbx4q_p8
+ return vqtbx4q_p8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
Modified: cfe/trunk/utils/TableGen/NeonEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/NeonEmitter.cpp?rev=194649&r1=194648&r2=194649&view=diff
==============================================================================
--- cfe/trunk/utils/TableGen/NeonEmitter.cpp (original)
+++ cfe/trunk/utils/TableGen/NeonEmitter.cpp Wed Nov 13 19:57:55 2013
@@ -494,6 +494,9 @@ static char ModType(const char mod, char
case 'g':
quad = false;
break;
+ case 'B':
+ case 'C':
+ case 'D':
case 'j':
quad = true;
break;
@@ -557,6 +560,10 @@ static char ModType(const char mod, char
return type;
}
+static bool IsMultiVecProto(const char p) {
+ return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D'));
+}
+
/// TypeString - for a modifier and type, generate the name of the typedef for
/// that type. QUc -> uint8x8_t.
static std::string TypeString(const char mod, StringRef typestr) {
@@ -631,11 +638,11 @@ static std::string TypeString(const char
PrintFatalError("unhandled type!");
}
- if (mod == '2')
+ if (mod == '2' || mod == 'B')
s += "x2";
- if (mod == '3')
+ if (mod == '3' || mod == 'C')
s += "x3";
- if (mod == '4')
+ if (mod == '4' || mod == 'D')
s += "x4";
// Append _t, finishing the type string typedef type.
@@ -712,7 +719,7 @@ static std::string BuiltinTypeString(con
// returning structs of 2, 3, or 4 vectors which are returned in a sret-like
// fashion, storing them to a pointer arg.
if (ret) {
- if (mod >= '2' && mod <= '4')
+ if (IsMultiVecProto(mod))
return "vv*"; // void result with void* first argument
if (mod == 'f' || (ck != ClassB && type == 'f'))
return quad ? "V4f" : "V2f";
@@ -729,11 +736,11 @@ static std::string BuiltinTypeString(con
}
// Non-return array types are passed as individual vectors.
- if (mod == '2')
+ if (mod == '2' || mod == 'B')
return quad ? "V16ScV16Sc" : "V8ScV8Sc";
- if (mod == '3')
+ if (mod == '3' || mod == 'C')
return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
- if (mod == '4')
+ if (mod == '4' || mod == 'D')
return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
if (mod == 'f' || (ck != ClassB && type == 'f'))
@@ -1996,7 +2003,7 @@ static std::string GenBuiltin(const std:
// If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
// sret-like argument.
- bool sret = (proto[0] >= '2' && proto[0] <= '4');
+ bool sret = IsMultiVecProto(proto[0]);
bool define = UseMacro(proto);
@@ -2056,12 +2063,19 @@ static std::string GenBuiltin(const std:
// Handle multiple-vector values specially, emitting each subvector as an
// argument to the __builtin.
+ unsigned NumOfVec = 0;
if (proto[i] >= '2' && proto[i] <= '4') {
+ NumOfVec = proto[i] - '0';
+ } else if (proto[i] >= 'B' && proto[i] <= 'D') {
+ NumOfVec = proto[i] - 'A' + 1;
+ }
+
+ if (NumOfVec > 0) {
// Check if an explicit cast is needed.
if (argType != 'c' || argPoly || argUsgn)
args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
- for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) {
+ for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) {
s += args + ".val[" + utostr(vi) + "]";
if ((vi + 1) < ve)
s += ", ";
@@ -2586,7 +2600,7 @@ NeonEmitter::genIntrinsicRangeCheckCode(
// Builtins that return a struct of multiple vectors have an extra
// leading arg for the struct return.
- if (Proto[0] >= '2' && Proto[0] <= '4')
+ if (IsMultiVecProto(Proto[0]))
++immidx;
// Add one to the index for each argument until we reach the immediate
@@ -2597,12 +2611,15 @@ NeonEmitter::genIntrinsicRangeCheckCode(
immidx += 1;
break;
case '2':
+ case 'B':
immidx += 2;
break;
case '3':
+ case 'C':
immidx += 3;
break;
case '4':
+ case 'D':
immidx += 4;
break;
case 'i':
@@ -2710,7 +2727,7 @@ NeonEmitter::genOverloadTypeCheckCode(ra
}
}
// For sret builtins, adjust the pointer argument index.
- if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4'))
+ if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0]))
PtrArgNum += 1;
// Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
More information about the cfe-commits
mailing list