r194649 - Implement AArch64 NEON instruction set AdvSIMD (table).

Wed Nov 13 17:57:55 PST 2013

Author: jiangning
Date: Wed Nov 13 19:57:55 2013
New Revision: 194649

URL: http://llvm.org/viewvc/llvm-project?rev=194649&view=rev
Log:
Implement AArch64 NEON instruction set AdvSIMD (table).

Added:
    cfe/trunk/test/CodeGen/aarch64-neon-tbl.c
Modified:
    cfe/trunk/include/clang/Basic/arm_neon.td
    cfe/trunk/lib/CodeGen/CGBuiltin.cpp
    cfe/trunk/utils/TableGen/NeonEmitter.cpp

Modified: cfe/trunk/include/clang/Basic/arm_neon.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_neon.td?rev=194649&r1=194648&r2=194649&view=diff
==============================================================================

--- cfe/trunk/include/clang/Basic/arm_neon.td (original)
+++ cfe/trunk/include/clang/Basic/arm_neon.td Wed Nov 13 19:57:55 2013
@@ -183,7 +183,8 @@ class NoTestOpInst<string n, string p, s
 // y: scalar of float
 // o: scalar of double
 // k: default elt width, double num elts
-// #: array of default vectors
+// 2,3,4: array of default vectors
+// B,C,D: array of default elts, force 'Q' size modifier.
 // p: pointer type
 // c: const pointer type
 
@@ -815,6 +816,21 @@ def VUZP2 : SOpInst<"vuzp2", "ddd",
                     "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_UZP2>;
 
 ////////////////////////////////////////////////////////////////////////////////
+// Table lookup
+let InstName = "vtbl" in {
+def VQTBL1_A64 : WInst<"vqtbl1", "djt",  "UccPcQUcQcQPc">;
+def VQTBL2_A64 : WInst<"vqtbl2", "dBt",  "UccPcQUcQcQPc">;
+def VQTBL3_A64 : WInst<"vqtbl3", "dCt",  "UccPcQUcQcQPc">;
+def VQTBL4_A64 : WInst<"vqtbl4", "dDt",  "UccPcQUcQcQPc">;
+}
+let InstName = "vtbx" in {
+def VQTBX1_A64 : WInst<"vqtbx1", "ddjt", "UccPcQUcQcQPc">;
+def VQTBX2_A64 : WInst<"vqtbx2", "ddBt", "UccPcQUcQcQPc">;
+def VQTBX3_A64 : WInst<"vqtbx3", "ddCt", "UccPcQUcQcQPc">;
+def VQTBX4_A64 : WInst<"vqtbx4", "ddDt", "UccPcQUcQcQPc">;
+}
+
+////////////////////////////////////////////////////////////////////////////////
 // Scalar Arithmetic
 
 // Scalar Addition

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=194649&r1=194648&r2=194649&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Wed Nov 13 19:57:55 2013
@@ -2463,13 +2463,240 @@ static Value *EmitAArch64ScalarBuiltinEx
   return CGF.Builder.CreateBitCast(Result, ResultType, s);
 }
 
-Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
-                                                     const CallExpr *E) {
+static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
+                                 Value *ExtOp, Value *IndexOp,
+                                 llvm::Type *ResTy, unsigned IntID,
+                                 const char *Name) {
+  SmallVector<Value *, 2> TblOps;
+  if (ExtOp)
+    TblOps.push_back(ExtOp);
+
+  // Build a vector containing sequential number like (0, 1, 2, ..., 15)  
+  SmallVector<Constant*, 16> Indices;
+  llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
+  for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
+    Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i));
+    Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i+1));
+  }
+  Value *SV = llvm::ConstantVector::get(Indices);
+
+  int PairPos = 0, End = Ops.size() - 1;
+  while (PairPos < End) {
+    TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
+                                                     Ops[PairPos+1], SV, Name));
+    PairPos += 2;
+  }
+
+  // If there's an odd number of 64-bit lookup table, fill the high 64-bit
+  // of the 128-bit lookup table with zero.
+  if (PairPos == End) {
+    Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
+    TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
+                                                     ZeroTbl, SV, Name));
+  }
+
+  TblTy = llvm::VectorType::get(TblTy->getElementType(),
+                                2*TblTy->getNumElements());
+  llvm::Type *Tys[2] = { ResTy, TblTy };
+
+  Function *TblF;
+  TblOps.push_back(IndexOp);
+  TblF = CGF.CGM.getIntrinsic(IntID, Tys);
+  
+  return CGF.EmitNeonCall(TblF, TblOps, Name);
+}
+
+static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF,
+                                        unsigned BuiltinID,
+                                        const CallExpr *E) {
+  unsigned int Int = 0;
+  const char *s = NULL;
+
+  unsigned TblPos;
+  switch (BuiltinID) {
+  default:
+    return 0;
+  case AArch64::BI__builtin_neon_vtbl1_v:
+  case AArch64::BI__builtin_neon_vqtbl1_v:
+  case AArch64::BI__builtin_neon_vqtbl1q_v:
+  case AArch64::BI__builtin_neon_vtbl2_v:
+  case AArch64::BI__builtin_neon_vqtbl2_v:
+  case AArch64::BI__builtin_neon_vqtbl2q_v:
+  case AArch64::BI__builtin_neon_vtbl3_v:
+  case AArch64::BI__builtin_neon_vqtbl3_v:
+  case AArch64::BI__builtin_neon_vqtbl3q_v:
+  case AArch64::BI__builtin_neon_vtbl4_v:
+  case AArch64::BI__builtin_neon_vqtbl4_v:
+  case AArch64::BI__builtin_neon_vqtbl4q_v:
+    TblPos = 0;
+    break;
+  case AArch64::BI__builtin_neon_vtbx1_v:
+  case AArch64::BI__builtin_neon_vqtbx1_v:
+  case AArch64::BI__builtin_neon_vqtbx1q_v:
+  case AArch64::BI__builtin_neon_vtbx2_v:
+  case AArch64::BI__builtin_neon_vqtbx2_v:
+  case AArch64::BI__builtin_neon_vqtbx2q_v:
+  case AArch64::BI__builtin_neon_vtbx3_v:
+  case AArch64::BI__builtin_neon_vqtbx3_v:
+  case AArch64::BI__builtin_neon_vqtbx3q_v:
+  case AArch64::BI__builtin_neon_vtbx4_v:
+  case AArch64::BI__builtin_neon_vqtbx4_v:
+  case AArch64::BI__builtin_neon_vqtbx4q_v:
+    TblPos = 1;
+    break;
+  }
+
+  assert(E->getNumArgs() >= 3);
+
+  // Get the last argument, which specifies the vector type.
+  llvm::APSInt Result;
+  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
+  if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
+    return 0;
+
+  // Determine the type of this overloaded NEON intrinsic.
+  NeonTypeFlags Type(Result.getZExtValue());
+  llvm::VectorType *VTy = GetNeonType(&CGF, Type);
+  llvm::Type *Ty = VTy;
+  if (!Ty)
+    return 0;
 
+  SmallVector<Value *, 4> Ops;
+  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
+    Ops.push_back(CGF.EmitScalarExpr(E->getArg(i)));
+  }
+
+  Arg = E->getArg(TblPos);
+  llvm::Type *TblTy = CGF.ConvertType(Arg->getType());
+  llvm::VectorType *VTblTy = cast<llvm::VectorType>(TblTy);
+  llvm::Type *Tys[2] = { Ty, VTblTy };
+  unsigned nElts = VTy->getNumElements();  
+
+  // AArch64 scalar builtins are not overloaded, they do not have an extra
+  // argument that specifies the vector type, need to handle each case.
+  SmallVector<Value *, 2> TblOps;
+  switch (BuiltinID) {
+  case AArch64::BI__builtin_neon_vtbl1_v: {
+    TblOps.push_back(Ops[0]);
+    return packTBLDVectorList(CGF, TblOps, 0, Ops[1], Ty,
+                              Intrinsic::aarch64_neon_vtbl1, "vtbl1");
+  }
+  case AArch64::BI__builtin_neon_vtbl2_v: {
+    TblOps.push_back(Ops[0]);
+    TblOps.push_back(Ops[1]);
+    return packTBLDVectorList(CGF, TblOps, 0, Ops[2], Ty,
+                              Intrinsic::aarch64_neon_vtbl1, "vtbl1");
+  }
+  case AArch64::BI__builtin_neon_vtbl3_v: {
+    TblOps.push_back(Ops[0]);
+    TblOps.push_back(Ops[1]);
+    TblOps.push_back(Ops[2]);
+    return packTBLDVectorList(CGF, TblOps, 0, Ops[3], Ty,
+                              Intrinsic::aarch64_neon_vtbl2, "vtbl2");
+  }
+  case AArch64::BI__builtin_neon_vtbl4_v: {
+    TblOps.push_back(Ops[0]);
+    TblOps.push_back(Ops[1]);
+    TblOps.push_back(Ops[2]);
+    TblOps.push_back(Ops[3]);
+    return packTBLDVectorList(CGF, TblOps, 0, Ops[4], Ty,
+                              Intrinsic::aarch64_neon_vtbl2, "vtbl2");
+  }
+  case AArch64::BI__builtin_neon_vtbx1_v: {
+    TblOps.push_back(Ops[1]);
+    Value *TblRes = packTBLDVectorList(CGF, TblOps, 0, Ops[2], Ty,
+                                    Intrinsic::aarch64_neon_vtbl1, "vtbl1");
+
+    llvm::Constant *Eight = ConstantInt::get(VTy->getElementType(), 8);
+    Value* EightV = llvm::ConstantVector::getSplat(nElts, Eight);
+    Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
+    CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty);
+
+    SmallVector<Value *, 4> BslOps;
+    BslOps.push_back(CmpRes);
+    BslOps.push_back(Ops[0]);
+    BslOps.push_back(TblRes);
+    Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty);
+    return CGF.EmitNeonCall(BslF, BslOps, "vbsl");
+  }
+  case AArch64::BI__builtin_neon_vtbx2_v: {
+    TblOps.push_back(Ops[1]);
+    TblOps.push_back(Ops[2]);
+    return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[3], Ty,
+                              Intrinsic::aarch64_neon_vtbx1, "vtbx1");
+  }
+  case AArch64::BI__builtin_neon_vtbx3_v: {
+    TblOps.push_back(Ops[1]);
+    TblOps.push_back(Ops[2]);
+    TblOps.push_back(Ops[3]);
+    Value *TblRes = packTBLDVectorList(CGF, TblOps, 0, Ops[4], Ty,
+                                       Intrinsic::aarch64_neon_vtbl2, "vtbl2");
+
+    llvm::Constant *TwentyFour = ConstantInt::get(VTy->getElementType(), 24);
+    Value* TwentyFourV = llvm::ConstantVector::getSplat(nElts, TwentyFour);
+    Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
+                                           TwentyFourV);
+    CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty);
+  
+    SmallVector<Value *, 4> BslOps;
+    BslOps.push_back(CmpRes);
+    BslOps.push_back(Ops[0]);
+    BslOps.push_back(TblRes);
+    Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty);
+    return CGF.EmitNeonCall(BslF, BslOps, "vbsl");
+  }
+  case AArch64::BI__builtin_neon_vtbx4_v: {
+    TblOps.push_back(Ops[1]);
+    TblOps.push_back(Ops[2]);
+    TblOps.push_back(Ops[3]);
+    TblOps.push_back(Ops[4]);
+    return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[5], Ty,
+                              Intrinsic::aarch64_neon_vtbx2, "vtbx2");
+  }
+  case AArch64::BI__builtin_neon_vqtbl1_v:
+  case AArch64::BI__builtin_neon_vqtbl1q_v:
+    Int = Intrinsic::aarch64_neon_vtbl1; s = "vtbl1"; break;
+  case AArch64::BI__builtin_neon_vqtbl2_v:
+  case AArch64::BI__builtin_neon_vqtbl2q_v: {
+    Int = Intrinsic::aarch64_neon_vtbl2; s = "vtbl2"; break;
+  case AArch64::BI__builtin_neon_vqtbl3_v:
+  case AArch64::BI__builtin_neon_vqtbl3q_v:
+    Int = Intrinsic::aarch64_neon_vtbl3; s = "vtbl3"; break;
+  case AArch64::BI__builtin_neon_vqtbl4_v:
+  case AArch64::BI__builtin_neon_vqtbl4q_v:
+    Int = Intrinsic::aarch64_neon_vtbl4; s = "vtbl4"; break;
+  case AArch64::BI__builtin_neon_vqtbx1_v:
+  case AArch64::BI__builtin_neon_vqtbx1q_v:
+    Int = Intrinsic::aarch64_neon_vtbx1; s = "vtbx1"; break;
+  case AArch64::BI__builtin_neon_vqtbx2_v:
+  case AArch64::BI__builtin_neon_vqtbx2q_v:
+    Int = Intrinsic::aarch64_neon_vtbx2; s = "vtbx2"; break;
+  case AArch64::BI__builtin_neon_vqtbx3_v:
+  case AArch64::BI__builtin_neon_vqtbx3q_v:
+    Int = Intrinsic::aarch64_neon_vtbx3; s = "vtbx3"; break;
+  case AArch64::BI__builtin_neon_vqtbx4_v:
+  case AArch64::BI__builtin_neon_vqtbx4q_v:
+    Int = Intrinsic::aarch64_neon_vtbx4; s = "vtbx4"; break;
+  }
+  }
+
+  if (!Int)
+    return 0;
+
+  Function *F = CGF.CGM.getIntrinsic(Int, Tys);
+  return CGF.EmitNeonCall(F, Ops, s);
+}
+
+Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
+                                               const CallExpr *E) {
   // Process AArch64 scalar builtins
   if (Value *Result = EmitAArch64ScalarBuiltinExpr(*this, BuiltinID, E))
     return Result;
 
+  // Process AArch64 table lookup builtins
+  if (Value *Result = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E))
+    return Result;
+
   if (BuiltinID == AArch64::BI__clear_cache) {
     assert(E->getNumArgs() == 2 &&
            "Variadic __clear_cache slipped through on AArch64");

Added: cfe/trunk/test/CodeGen/aarch64-neon-tbl.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-tbl.c?rev=194649&view=auto
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-tbl.c (added)
+++ cfe/trunk/test/CodeGen/aarch64-neon-tbl.c Wed Nov 13 19:57:55 2013
@@ -0,0 +1,463 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+// Test new aarch64 intrinsics and types
+
+#include <arm_neon.h>
+
+int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vtbl1_s8
+  return vtbl1_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbl1_s8(int8x16_t a, int8x8_t b) {
+  // CHECK: test_vqtbl1_s8
+  return vqtbl1_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) {
+  // CHECK: test_vtbl2_s8
+  return vtbl2_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbl2_s8(int8x16x2_t a, int8x8_t b) {
+  // CHECK: test_vqtbl2_s8
+  return vqtbl2_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) {
+  // CHECK: test_vtbl3_s8
+  return vtbl3_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbl3_s8(int8x16x3_t a, int8x8_t b) {
+  // CHECK: test_vqtbl3_s8
+  return vqtbl3_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) {
+  // CHECK: test_vtbl4_s8
+  return vtbl4_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbl4_s8(int8x16x4_t a, int8x8_t b) {
+  // CHECK: test_vqtbl4_s8
+  return vqtbl4_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vqtbl1q_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vqtbl1q_s8
+  return vqtbl1q_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbl2q_s8(int8x16x2_t a, int8x16_t b) {
+  // CHECK: test_vqtbl2q_s8
+  return vqtbl2q_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbl3q_s8(int8x16x3_t a, int8x16_t b) {
+  // CHECK: test_vqtbl3q_s8
+  return vqtbl3q_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbl4q_s8(int8x16x4_t a, int8x16_t b) {
+  // CHECK: test_vqtbl4q_s8
+  return vqtbl4q_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
+  // CHECK: test_vtbx1_s8
+  return vtbx1_s8(a, b, c);
+  // CHECK: movi {{v[0-9]+}}.8b, #0
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+  // CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) {
+  // CHECK: test_vtbx2_s8
+  return vtbx2_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) {
+  // CHECK: test_vtbx3_s8
+  return vtbx3_s8(a, b, c);
+  // CHECK: movi {{v[0-9]+}}.8b, #0
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+  // CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) {
+  // CHECK: test_vtbx4_s8
+  return vtbx4_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbx1_s8(int8x8_t a, int8x16_t b, int8x8_t c) {
+  // CHECK: test_vqtbx1_s8
+  return vqtbx1_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbx2_s8(int8x8_t a, int8x16x2_t b, int8x8_t c) {
+  // CHECK: test_vqtbx2_s8
+  return vqtbx2_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbx3_s8(int8x8_t a, int8x16x3_t b, int8x8_t c) {
+  // CHECK: test_vqtbx3_s8
+  return vqtbx3_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbx4_s8(int8x8_t a, int8x16x4_t b, int8x8_t c) {
+  // CHECK: test_vqtbx4_s8
+  return vqtbx4_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vqtbx1q_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
+  // CHECK: test_vqtbx1q_s8
+  return vqtbx1q_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbx2q_s8(int8x16_t a, int8x16x2_t b, int8x16_t c) {
+  // CHECK: test_vqtbx2q_s8
+  return vqtbx2q_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbx3q_s8(int8x16_t a, int8x16x3_t b, int8x16_t c) {
+  // CHECK: test_vqtbx3q_s8
+  return vqtbx3q_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbx4q_s8(int8x16_t a, int8x16x4_t b, int8x16_t c) {
+  // CHECK: test_vqtbx4q_s8
+  return vqtbx4q_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vtbl1_u8
+  return vtbl1_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbl1_u8(uint8x16_t a, uint8x8_t b) {
+  // CHECK: test_vqtbl1_u8
+  return vqtbl1_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) {
+  // CHECK: test_vtbl2_u8
+  return vtbl2_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbl2_u8(uint8x16x2_t a, uint8x8_t b) {
+  // CHECK: test_vqtbl2_u8
+  return vqtbl2_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) {
+  // CHECK: test_vtbl3_u8
+  return vtbl3_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbl3_u8(uint8x16x3_t a, uint8x8_t b) {
+  // CHECK: test_vqtbl3_u8
+  return vqtbl3_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) {
+  // CHECK: test_vtbl4_u8
+  return vtbl4_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbl4_u8(uint8x16x4_t a, uint8x8_t b) {
+  // CHECK: test_vqtbl4_u8
+  return vqtbl4_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vqtbl1q_u8
+  return vqtbl1q_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbl2q_u8(uint8x16x2_t a, uint8x16_t b) {
+  // CHECK: test_vqtbl2q_u8
+  return vqtbl2q_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbl3q_u8(uint8x16x3_t a, uint8x16_t b) {
+  // CHECK: test_vqtbl3q_u8
+  return vqtbl3q_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbl4q_u8(uint8x16x4_t a, uint8x16_t b) {
+  // CHECK: test_vqtbl4q_u8
+  return vqtbl4q_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
+  // CHECK: test_vtbx1_u8
+  return vtbx1_u8(a, b, c);
+  // CHECK: movi {{v[0-9]+}}.8b, #0
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+  // CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) {
+  // CHECK: test_vtbx2_u8
+  return vtbx2_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) {
+  // CHECK: test_vtbx3_u8
+  return vtbx3_u8(a, b, c);
+  // CHECK: movi {{v[0-9]+}}.8b, #0
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+  // CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) {
+  // CHECK: test_vtbx4_u8
+  return vtbx4_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbx1_u8(uint8x8_t a, uint8x16_t b, uint8x8_t c) {
+  // CHECK: test_vqtbx1_u8
+  return vqtbx1_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbx2_u8(uint8x8_t a, uint8x16x2_t b, uint8x8_t c) {
+  // CHECK: test_vqtbx2_u8
+  return vqtbx2_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbx3_u8(uint8x8_t a, uint8x16x3_t b, uint8x8_t c) {
+  // CHECK: test_vqtbx3_u8
+  return vqtbx3_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbx4_u8(uint8x8_t a, uint8x16x4_t b, uint8x8_t c) {
+  // CHECK: test_vqtbx4_u8
+  return vqtbx4_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vqtbx1q_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
+  // CHECK: test_vqtbx1q_u8
+  return vqtbx1q_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbx2q_u8(uint8x16_t a, uint8x16x2_t b, uint8x16_t c) {
+  // CHECK: test_vqtbx2q_u8
+  return vqtbx2q_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbx3q_u8(uint8x16_t a, uint8x16x3_t b, uint8x16_t c) {
+  // CHECK: test_vqtbx3q_u8
+  return vqtbx3q_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbx4q_u8(uint8x16_t a, uint8x16x4_t b, uint8x16_t c) {
+  // CHECK: test_vqtbx4q_u8
+  return vqtbx4q_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) {
+  // CHECK: test_vtbl1_p8
+  return vtbl1_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbl1_p8(poly8x16_t a, uint8x8_t b) {
+  // CHECK: test_vqtbl1_p8
+  return vqtbl1_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) {
+  // CHECK: test_vtbl2_p8
+  return vtbl2_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbl2_p8(poly8x16x2_t a, uint8x8_t b) {
+  // CHECK: test_vqtbl2_p8
+  return vqtbl2_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) {
+  // CHECK: test_vtbl3_p8
+  return vtbl3_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbl3_p8(poly8x16x3_t a, uint8x8_t b) {
+  // CHECK: test_vqtbl3_p8
+  return vqtbl3_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) {
+  // CHECK: test_vtbl4_p8
+  return vtbl4_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbl4_p8(poly8x16x4_t a, uint8x8_t b) {
+  // CHECK: test_vqtbl4_p8
+  return vqtbl4_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vqtbl1q_p8(poly8x16_t a, uint8x16_t b) {
+  // CHECK: test_vqtbl1q_p8
+  return vqtbl1q_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbl2q_p8(poly8x16x2_t a, uint8x16_t b) {
+  // CHECK: test_vqtbl2q_p8
+  return vqtbl2q_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbl3q_p8(poly8x16x3_t a, uint8x16_t b) {
+  // CHECK: test_vqtbl3q_p8
+  return vqtbl3q_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbl4q_p8(poly8x16x4_t a, uint8x16_t b) {
+  // CHECK: test_vqtbl4q_p8
+  return vqtbl4q_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) {
+  // CHECK: test_vtbx1_p8
+  return vtbx1_p8(a, b, c);
+  // CHECK: movi {{v[0-9]+}}.8b, #0
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+  // CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) {
+  // CHECK: test_vtbx2_p8
+  return vtbx2_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) {
+  // CHECK: test_vtbx3_p8
+  return vtbx3_p8(a, b, c);
+  // CHECK: movi {{v[0-9]+}}.8b, #0
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+  // CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) {
+  // CHECK: test_vtbx4_p8
+  return vtbx4_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbx1_p8(poly8x8_t a, uint8x16_t b, uint8x8_t c) {
+  // CHECK: test_vqtbx1_p8
+  return vqtbx1_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbx2_p8(poly8x8_t a, poly8x16x2_t b, uint8x8_t c) {
+  // CHECK: test_vqtbx2_p8
+  return vqtbx2_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbx3_p8(poly8x8_t a, poly8x16x3_t b, uint8x8_t c) {
+  // CHECK: test_vqtbx3_p8
+  return vqtbx3_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbx4_p8(poly8x8_t a, poly8x16x4_t b, uint8x8_t c) {
+  // CHECK: test_vqtbx4_p8
+  return vqtbx4_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vqtbx1q_p8(poly8x16_t a, uint8x16_t b, uint8x16_t c) {
+  // CHECK: test_vqtbx1q_p8
+  return vqtbx1q_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbx2q_p8(poly8x16_t a, poly8x16x2_t b, uint8x16_t c) {
+  // CHECK: test_vqtbx2q_p8
+  return vqtbx2q_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbx3q_p8(poly8x16_t a, poly8x16x3_t b, uint8x16_t c) {
+  // CHECK: test_vqtbx3q_p8
+  return vqtbx3q_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbx4q_p8(poly8x16_t a, poly8x16x4_t b, uint8x16_t c) {
+  // CHECK: test_vqtbx4q_p8
+  return vqtbx4q_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}

Modified: cfe/trunk/utils/TableGen/NeonEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/NeonEmitter.cpp?rev=194649&r1=194648&r2=194649&view=diff
==============================================================================
--- cfe/trunk/utils/TableGen/NeonEmitter.cpp (original)
+++ cfe/trunk/utils/TableGen/NeonEmitter.cpp Wed Nov 13 19:57:55 2013
@@ -494,6 +494,9 @@ static char ModType(const char mod, char
     case 'g':
       quad = false;
       break;
+    case 'B':
+    case 'C':
+    case 'D':
     case 'j':
       quad = true;
       break;
@@ -557,6 +560,10 @@ static char ModType(const char mod, char
   return type;
 }
 
+static bool IsMultiVecProto(const char p) {
+  return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D'));
+}
+
 /// TypeString - for a modifier and type, generate the name of the typedef for
 /// that type.  QUc -> uint8x8_t.
 static std::string TypeString(const char mod, StringRef typestr) {
@@ -631,11 +638,11 @@ static std::string TypeString(const char
       PrintFatalError("unhandled type!");
   }
 
-  if (mod == '2')
+  if (mod == '2' || mod == 'B')
     s += "x2";
-  if (mod == '3')
+  if (mod == '3' || mod == 'C')
     s += "x3";
-  if (mod == '4')
+  if (mod == '4' || mod == 'D')
     s += "x4";
 
   // Append _t, finishing the type string typedef type.
@@ -712,7 +719,7 @@ static std::string BuiltinTypeString(con
   // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
   // fashion, storing them to a pointer arg.
   if (ret) {
-    if (mod >= '2' && mod <= '4')
+    if (IsMultiVecProto(mod))
       return "vv*"; // void result with void* first argument
     if (mod == 'f' || (ck != ClassB && type == 'f'))
       return quad ? "V4f" : "V2f";
@@ -729,11 +736,11 @@ static std::string BuiltinTypeString(con
   }
 
   // Non-return array types are passed as individual vectors.
-  if (mod == '2')
+  if (mod == '2' || mod == 'B')
     return quad ? "V16ScV16Sc" : "V8ScV8Sc";
-  if (mod == '3')
+  if (mod == '3' || mod == 'C')
     return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
-  if (mod == '4')
+  if (mod == '4' || mod == 'D')
     return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
 
   if (mod == 'f' || (ck != ClassB && type == 'f'))
@@ -1996,7 +2003,7 @@ static std::string GenBuiltin(const std:
 
   // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
   // sret-like argument.
-  bool sret = (proto[0] >= '2' && proto[0] <= '4');
+  bool sret = IsMultiVecProto(proto[0]);
 
   bool define = UseMacro(proto);
 
@@ -2056,12 +2063,19 @@ static std::string GenBuiltin(const std:
 
     // Handle multiple-vector values specially, emitting each subvector as an
     // argument to the __builtin.
+    unsigned NumOfVec = 0;
     if (proto[i] >= '2' && proto[i] <= '4') {
+      NumOfVec = proto[i] - '0';
+    } else if (proto[i] >= 'B' && proto[i] <= 'D') {
+      NumOfVec = proto[i] - 'A' + 1;
+    }
+    
+    if (NumOfVec > 0) {
       // Check if an explicit cast is needed.
       if (argType != 'c' || argPoly || argUsgn)
         args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
 
-      for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) {
+      for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) {
         s += args + ".val[" + utostr(vi) + "]";
         if ((vi + 1) < ve)
           s += ", ";
@@ -2586,7 +2600,7 @@ NeonEmitter::genIntrinsicRangeCheckCode(
 
       // Builtins that return a struct of multiple vectors have an extra
       // leading arg for the struct return.
-      if (Proto[0] >= '2' && Proto[0] <= '4')
+      if (IsMultiVecProto(Proto[0]))
         ++immidx;
 
       // Add one to the index for each argument until we reach the immediate
@@ -2597,12 +2611,15 @@ NeonEmitter::genIntrinsicRangeCheckCode(
           immidx += 1;
           break;
         case '2':
+        case 'B':
           immidx += 2;
           break;
         case '3':
+        case 'C':
           immidx += 3;
           break;
         case '4':
+        case 'D':
           immidx += 4;
           break;
         case 'i':
@@ -2710,7 +2727,7 @@ NeonEmitter::genOverloadTypeCheckCode(ra
       }
     }
     // For sret builtins, adjust the pointer argument index.
-    if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4'))
+    if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0]))
       PtrArgNum += 1;
 
     // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,