r201660 - ARM NEON: use more flexible TableGen field for defs.

Tim Northover tnorthover at apple.com
Wed Feb 19 02:37:09 PST 2014


Author: tnorthover
Date: Wed Feb 19 04:37:09 2014
New Revision: 201660

URL: http://llvm.org/viewvc/llvm-project?rev=201660&view=rev
Log:
ARM NEON: use more flexible TableGen field for defs.

We used to have special handling for isCrypto and isA64 bits in the
NeonEmitter.cpp file (it knew the former was predicated on __ARM_FEATURE_CRYPTO
and the latter on __aarch64__ and went through various contortions to make sure
the correct intrinsics were emitted under the correct guard.

This is ugly and has obvious scalability problems (e.g. vcvtX intrinsics are
needed, which are ARMv8 only but available on both, yet another category). This
patch moves the #if predicate into the arm_neon.td file directly and makes
NeonEmitter.cpp agnostic about what goes in there.

It also deduplicates arm_neon.td so that each desired intrinsic is mentioned in
just one place (necessary because of the new mechanism for creating
arm_neon.h).

rdar://problem/16035743

Modified:
    cfe/trunk/include/clang/Basic/arm_neon.td
    cfe/trunk/utils/TableGen/NeonEmitter.cpp

Modified: cfe/trunk/include/clang/Basic/arm_neon.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_neon.td?rev=201660&r1=201659&r2=201660&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/arm_neon.td (original)
+++ cfe/trunk/include/clang/Basic/arm_neon.td Wed Feb 19 04:37:09 2014
@@ -141,13 +141,13 @@ class Inst <string n, string p, string t
   string Name = n;
   string Prototype = p;
   string Types = t;
+  string ArchGuard = "";
+
   Op Operand = o;
   bit isShift = 0;
   bit isScalarShift = 0;
   bit isScalarNarrowShift = 0;
   bit isVCVT_N = 0;
-  bit isA64 = 0;
-  bit isCrypto = 0;
   // For immediate checks: the immediate will be assumed to specify the lane of
   // a Q register. Only used for intrinsics which end up calling polymorphic
   // builtins.
@@ -546,90 +546,66 @@ def VFMA : SInst<"vfma", "dddd", "fQf">;
 ////////////////////////////////////////////////////////////////////////////////
 // AArch64 Intrinsics
 
-let isA64 = 1 in {
+let ArchGuard = "defined(__aarch64__)" in {
 
 ////////////////////////////////////////////////////////////////////////////////
 // Load/Store
-// With additional QUl, Ql, d, Qd, Pl, QPl type.
-def LD1 : WInst<"vld1", "dc",
-                "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
-def LD2 : WInst<"vld2", "2c",
-                "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
-def LD3 : WInst<"vld3", "3c",
-                "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
-def LD4 : WInst<"vld4", "4c",
-                "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
-def ST1 : WInst<"vst1", "vpd",
-                "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
-def ST2 : WInst<"vst2", "vp2",
-                "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
-def ST3 : WInst<"vst3", "vp3",
-                "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
-def ST4 : WInst<"vst4", "vp4",
-                "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
+def LD1 : WInst<"vld1", "dc", "dQdPlQPl">;
+def LD2 : WInst<"vld2", "2c", "QUlQldQdPlQPl">;
+def LD3 : WInst<"vld3", "3c", "QUlQldQdPlQPl">;
+def LD4 : WInst<"vld4", "4c", "QUlQldQdPlQPl">;
+def ST1 : WInst<"vst1", "vpd", "dQdPlQPl">;
+def ST2 : WInst<"vst2", "vp2", "QUlQldQdPlQPl">;
+def ST3 : WInst<"vst3", "vp3", "QUlQldQdPlQPl">;
+def ST4 : WInst<"vst4", "vp4", "QUlQldQdPlQPl">;
 
 def LD1_X2 : WInst<"vld1_x2", "2c",
-                   "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+                   "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPsQUlQldQdPlQPl">;
 def LD3_x3 : WInst<"vld1_x3", "3c",
-                   "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+                   "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPsQUlQldQdPlQPl">;
 def LD4_x4 : WInst<"vld1_x4", "4c",
-                   "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+                   "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPsQUlQldQdPlQPl">;
 
 def ST1_X2 : WInst<"vst1_x2", "vp2",
-                   "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+                   "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPsQUlQldQdPlQPl">;
 def ST1_X3 : WInst<"vst1_x3", "vp3",
-                   "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+                   "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPsQUlQldQdPlQPl">;
 def ST1_X4 : WInst<"vst1_x4", "vp4",
-                   "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+                   "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPsQUlQldQdPlQPl">;
 
-// With additional QUl, Ql, d, Qd, Pl, QPl type.
-def LD1_LANE : WInst<"vld1_lane", "dcdi",
-                    "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
-def LD2_LANE : WInst<"vld2_lane", "2c2i",
-                    "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
-def LD3_LANE : WInst<"vld3_lane", "3c3i",
-                    "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
-def LD4_LANE : WInst<"vld4_lane", "4c4i",
-                    "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
-def ST1_LANE : WInst<"vst1_lane", "vpdi",
-                    "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
-def ST2_LANE : WInst<"vst2_lane", "vp2i",
-                    "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
-def ST3_LANE : WInst<"vst3_lane", "vp3i",
-                    "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
-def ST4_LANE : WInst<"vst4_lane", "vp4i",
-                    "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+def LD1_LANE : WInst<"vld1_lane", "dcdi", "dQdPlQPl">;
+def LD2_LANE : WInst<"vld2_lane", "2c2i", "lUlQcQUcQPcQlQUldQdPlQPl">;
+def LD3_LANE : WInst<"vld3_lane", "3c3i", "lUlQcQUcQPcQlQUldQdPlQPl">;
+def LD4_LANE : WInst<"vld4_lane", "4c4i", "lUlQcQUcQPcQlQUldQdPlQPl">;
+def ST1_LANE : WInst<"vst1_lane", "vpdi", "dQdPlQPl">;
+def ST2_LANE : WInst<"vst2_lane", "vp2i", "lUlQcQUcQPcQlQUldQdPlQPl">;
+def ST3_LANE : WInst<"vst3_lane", "vp3i", "lUlQcQUcQPcQlQUldQdPlQPl">;
+def ST4_LANE : WInst<"vst4_lane", "vp4i", "lUlQcQUcQPcQlQUldQdPlQPl">;
 
-def LD1_DUP  : WInst<"vld1_dup", "dc",
-                    "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+def LD1_DUP  : WInst<"vld1_dup", "dc", "dQdPlQPl">;
 def LD2_DUP  : WInst<"vld2_dup", "2c",
-                    "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+                     "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPldPl">;
 def LD3_DUP  : WInst<"vld3_dup", "3c",
-                    "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+                     "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPldPl">;
 def LD4_DUP  : WInst<"vld4_dup", "4c",
-                    "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
+                     "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPldPl">;
 
 def VLDRQ : WInst<"vldrq", "sc", "Pk">;
 def VSTRQ : WInst<"vstrq", "vps", "Pk">;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Addition
-// With additional d, Qd type.
-def ADD : IOpInst<"vadd", "ddd", "csilfdUcUsUiUlQcQsQiQlQfQUcQUsQUiQUlQd",
-                  OP_ADD>;
+def ADD : IOpInst<"vadd", "ddd", "dQd", OP_ADD>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Subtraction
-// With additional Qd type.
-def SUB : IOpInst<"vsub", "ddd", "csildfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUlQd",
-                  OP_SUB>;
+def SUB : IOpInst<"vsub", "ddd", "dQd", OP_SUB>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Multiplication
-// With additional Qd type.
-def MUL     : IOpInst<"vmul", "ddd", "csifdUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MUL>;
-def MLA     : IOpInst<"vmla", "dddd", "csifdUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MLA>;
-def MLS     : IOpInst<"vmls", "dddd", "csifdUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MLS>;
+def MUL     : IOpInst<"vmul", "ddd", "dQd", OP_MUL>;
+def MLA     : IOpInst<"vmla", "dddd", "dQd", OP_MLA>;
+def MLS     : IOpInst<"vmls", "dddd", "dQd", OP_MLS>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Multiplication Extended
@@ -641,8 +617,7 @@ def FDIV : IOpInst<"vdiv", "ddd",  "fdQf
 
 ////////////////////////////////////////////////////////////////////////////////
 // Vector fused multiply-add operations
-// With additional d, Qd type.
-def FMLA : SInst<"vfma", "dddd", "fdQfQd">;
+def FMLA : SInst<"vfma", "dddd", "dQd">;
 def FMLS : SInst<"vfms", "dddd", "fdQfQd">;
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -653,22 +628,18 @@ def FMLS_N : SOpInst<"vfms_n", "ddds", "
 
 ////////////////////////////////////////////////////////////////////////////////
 // Logical operations
-// With additional Qd, Ql, QPl type.
-def BSL : SInst<"vbsl", "dudd",
-                "csilUcUsUiUlfdPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPsQdPlQPl">;
+def BSL : SInst<"vbsl", "dudd", "dPlQdQPl">;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Absolute Difference
-// With additional Qd type.
-def ABD  : SInst<"vabd", "ddd",  "csiUcUsUifdQcQsQiQUcQUsQUiQfQd">;
+def ABD  : SInst<"vabd", "ddd",  "dQd">;
 
 ////////////////////////////////////////////////////////////////////////////////
 // saturating absolute/negate
-// With additional Qd/Ql type.
-def ABS    : SInst<"vabs", "dd", "csilfdQcQsQiQfQlQd">;
-def QABS   : SInst<"vqabs", "dd", "csilQcQsQiQl">;
-def NEG    : SOpInst<"vneg", "dd", "csilfdQcQsQiQfQdQl", OP_NEG>;
-def QNEG   : SInst<"vqneg", "dd", "csilQcQsQiQl">;
+def ABS    : SInst<"vabs", "dd", "dQdlQl">;
+def QABS   : SInst<"vqabs", "dd", "lQl">;
+def NEG    : SOpInst<"vneg", "dd", "dlQdQl", OP_NEG>;
+def QNEG   : SInst<"vqneg", "dd", "lQl">;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Signed Saturating Accumulated of Unsigned Value
@@ -680,9 +651,8 @@ def USQADD : SInst<"vsqadd", "ddd", "UcU
 
 ////////////////////////////////////////////////////////////////////////////////
 // Reciprocal/Sqrt
-// With additional d, Qd type.
-def FRECPS  : IInst<"vrecps", "ddd", "fdQfQd">;
-def FRSQRTS : IInst<"vrsqrts", "ddd", "fdQfQd">;
+def FRECPS  : IInst<"vrecps", "ddd", "dQd">;
+def FRSQRTS : IInst<"vrsqrts", "ddd", "dQd">;
 
 ////////////////////////////////////////////////////////////////////////////////
 // bitwise reverse
@@ -736,31 +706,22 @@ def FCVTAS_S32 : SInst<"vcvta_s32", "xd"
 def FCVTAS_S64 : SInst<"vcvta_s64", "xd", "dQd">;
 def FCVTAU_S32 : SInst<"vcvta_u32", "ud", "fQf">;
 def FCVTAU_S64 : SInst<"vcvta_u64", "ud", "dQd">;
-def FRECPE  : SInst<"vrecpe", "dd", "fdUiQfQUiQd">;
-def FRSQRTE : SInst<"vrsqrte", "dd", "fdUiQfQUiQd">;
+def FRECPE  : SInst<"vrecpe", "dd", "dQd">;
+def FRSQRTE : SInst<"vrsqrte", "dd", "dQd">;
 def FSQRT   : SInst<"vsqrt", "dd", "fdQfQd">;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Comparison
-// With additional Qd, Ql, QPl type.
-def FCAGE : IInst<"vcage", "udd", "fdQfQd">;
-def FCAGT : IInst<"vcagt", "udd", "fdQfQd">;
-def FCALE : IInst<"vcale", "udd", "fdQfQd">;
-def FCALT : IInst<"vcalt", "udd", "fdQfQd">;
-// With additional Ql, QUl, Qd types.
-def CMTST  : WInst<"vtst", "udd",
-                   "csiUcUsUiPcPsQcQsQiQUcQUsQUiQPcQPslUlQlQUlPlQPl">;
-// With additional l, Ul,d, Qd, Ql, QUl, Qd types.
-def CFMEQ  : SOpInst<"vceq", "udd",
-                     "csilfUcUsUiUlPcQcdQdQsQiQfQUcQUsQUiQUlQlQPcPlQPl", OP_EQ>;
-def CFMGE  : SOpInst<"vcge", "udd",
-                     "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUldQd", OP_GE>;
-def CFMLE  : SOpInst<"vcle", "udd",
-                     "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUldQd", OP_LE>;
-def CFMGT  : SOpInst<"vcgt", "udd",
-                     "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUldQd", OP_GT>;
-def CFMLT  : SOpInst<"vclt", "udd",
-                     "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUldQd", OP_LT>;
+def FCAGE : IInst<"vcage", "udd", "dQd">;
+def FCAGT : IInst<"vcagt", "udd", "dQd">;
+def FCALE : IInst<"vcale", "udd", "dQd">;
+def FCALT : IInst<"vcalt", "udd", "dQd">;
+def CMTST  : WInst<"vtst", "udd", "lUlPlQlQUlQPl">;
+def CFMEQ  : SOpInst<"vceq", "udd", "lUldQdQlQUlPlQPl", OP_EQ>;
+def CFMGE  : SOpInst<"vcge", "udd", "lUldQdQlQUl", OP_GE>;
+def CFMLE  : SOpInst<"vcle", "udd", "lUldQdQlQUl", OP_LE>;
+def CFMGT  : SOpInst<"vcgt", "udd", "lUldQdQlQUl", OP_GT>;
+def CFMLT  : SOpInst<"vclt", "udd", "lUldQdQlQUl", OP_LT>;
 
 def CMEQ  : SInst<"vceqz", "ud",
                   "csilfUcUsUiUlPcPsPlQcQsQiQlQfQUcQUsQUiQUlQPcQPsdQdQPl">;
@@ -771,9 +732,8 @@ def CMLT  : SInst<"vcltz", "ud", "csilfd
 
 ////////////////////////////////////////////////////////////////////////////////
 // Max/Min Integer
-// With additional Qd type.
-def MAX : SInst<"vmax", "ddd", "csiUcUsUifdQcQsQiQUcQUsQUiQfQd">;
-def MIN : SInst<"vmin", "ddd", "csiUcUsUifdQcQsQiQUcQUsQUiQfQd">;
+def MAX : SInst<"vmax", "ddd", "dQd">;
+def MIN : SInst<"vmin", "ddd", "dQd">;
 
 ////////////////////////////////////////////////////////////////////////////////
 // MaxNum/MinNum Floating Point
@@ -782,9 +742,8 @@ def FMINNM : SInst<"vminnm", "ddd", "fdQ
 
 ////////////////////////////////////////////////////////////////////////////////
 // Pairwise Max/Min
-// With additional Qc Qs Qi QUc QUs QUi Qf Qd types.
-def MAXP : SInst<"vpmax", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">;
-def MINP : SInst<"vpmin", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">;
+def MAXP : SInst<"vpmax", "ddd", "QcQsQiQUcQUsQUiQfQd">;
+def MINP : SInst<"vpmin", "ddd", "QcQsQiQUcQUsQUiQfQd">;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Pairwise MaxNum/MinNum Floating Point
@@ -793,8 +752,7 @@ def FMINNMP : SInst<"vpminnm", "ddd", "f
 
 ////////////////////////////////////////////////////////////////////////////////
 // Pairwise Addition
-// With additional Qc Qs Qi QUc QUs QUi Qf Qd types.
-def ADDP  : IInst<"vpadd", "ddd", "csiUcUsUifQcQsQiQlQUcQUsQUiQUlQfQd">;
+def ADDP  : IInst<"vpadd", "ddd", "QcQsQiQlQUcQUsQUiQUlQfQd">;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Shifts by constant
@@ -804,11 +762,8 @@ def SHLL_HIGH_N    : SOpInst<"vshll_high
                              OP_LONG_HI>;
 
 ////////////////////////////////////////////////////////////////////////////////
-// Shifts with insert, with additional Ql, QPl type.
-def SRI_N : WInst<"vsri_n", "dddi",
-                  "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPsPlQPl">;
-def SLI_N : WInst<"vsli_n", "dddi",
-                  "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPsPlQPl">;
+def SRI_N : WInst<"vsri_n", "dddi", "PlQPl">;
+def SLI_N : WInst<"vsli_n", "dddi", "PlQPl">;
 
 // Right shift narrow high
 def SHRN_HIGH_N    : IOpInst<"vshrn_high_n", "hmdi",
@@ -869,12 +824,10 @@ def VMULL_HIGH_P64 : SOpInst<"vmull_high
 
 ////////////////////////////////////////////////////////////////////////////////
 // Extract or insert element from vector
-def GET_LANE : IInst<"vget_lane", "sdi",
-                     "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQdPlQPl">;
-def SET_LANE : IInst<"vset_lane", "dsdi",
-                     "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQdPlQPl">;
+def GET_LANE : IInst<"vget_lane", "sdi", "dQdPlQPl">;
+def SET_LANE : IInst<"vset_lane", "dsdi", "dQdPlQPl">;
 def COPY_LANE : IOpInst<"vcopy_lane", "ddidi",
-                        "csilPcPsUcUsUiUlPcPsPlfd", OP_COPY_LN>;
+                        "csilUcUsUiUlPcPsPlfd", OP_COPY_LN>;
 def COPYQ_LANE : IOpInst<"vcopy_lane", "ddigi",
                         "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPYQ_LN>;
 def COPY_LANEQ : IOpInst<"vcopy_laneq", "ddiki",
@@ -884,26 +837,19 @@ def COPYQ_LANEQ : IOpInst<"vcopy_laneq",
 
 ////////////////////////////////////////////////////////////////////////////////
 // Set all lanes to same value
-def VDUP_LANE1: WOpInst<"vdup_lane", "dgi",
-                  "csilPcPsUcUsUiUlhfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl",
-                        OP_DUP_LN>;
+def VDUP_LANE1: WOpInst<"vdup_lane", "dgi", "hdQhQdPlQPl", OP_DUP_LN>;
 def VDUP_LANE2: WOpInst<"vdup_laneq", "dki",
-                  "csilPcPsUcUsUiUlhfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl",
+                  "csilUcUsUiUlPcPshfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl",
                         OP_DUP_LN>;
-def DUP_N   : WOpInst<"vdup_n", "ds",
-                       "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUldQdPlQPl",
-                       OP_DUP>;
-def MOV_N   : WOpInst<"vmov_n", "ds",
-                       "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUldQd",
-                       OP_DUP>;
+def DUP_N   : WOpInst<"vdup_n", "ds", "dQdPlQPl", OP_DUP>;
+def MOV_N   : WOpInst<"vmov_n", "ds", "dQd", OP_DUP>;
 
 ////////////////////////////////////////////////////////////////////////////////
-// Combining vectors, with additional Pl
-def COMBINE : NoTestOpInst<"vcombine", "kdd", "csilhfdUcUsUiUlPcPsPl", OP_CONC>;
+def COMBINE : NoTestOpInst<"vcombine", "kdd", "dPl", OP_CONC>;
 
 ////////////////////////////////////////////////////////////////////////////////
-//Initialize a vector from bit pattern, with additional Pl
-def CREATE : NoTestOpInst<"vcreate", "dl", "csihfdUcUsUiUlPcPslPl", OP_CAST>;
+//Initialize a vector from bit pattern
+def CREATE : NoTestOpInst<"vcreate", "dl", "dPl", OP_CAST>;
 
 ////////////////////////////////////////////////////////////////////////////////
 
@@ -947,7 +893,7 @@ def VMUL_LANE_A64 : IOpInst<"vmul_lane",
 
 // Note: d type is handled by SCALAR_VMUL_LANEQ
 def VMUL_LANEQ   : IOpInst<"vmul_laneq", "ddji",
-                           "sifUsUiQsQiQfQUsQUiQfQd", OP_MUL_LN>;
+                           "sifUsUiQsQiQUsQUiQfQd", OP_MUL_LN>;
 def VMULL_LANEQ  : SOpInst<"vmull_laneq", "wdki", "siUsUi", OP_MULL_LN>;
 def VMULL_HIGH_LANE   : SOpInst<"vmull_high_lane", "wkdi", "siUsUi",
                                 OP_MULLHi_LN>;
@@ -979,12 +925,11 @@ def FMINNMV : SInst<"vminnmv", "sd", "fQ
  
 ////////////////////////////////////////////////////////////////////////////////
 // Newly added Vector Extract for f64
-def VEXT_A64 : WInst<"vext", "dddi",
-                     "cUcPcsUsPsiUilUlfdQcQUcQPcQsQUsQPsQiQUiQlQUlQfQdPlQPl">;
+def VEXT_A64 : WInst<"vext", "dddi", "dQdPlQPl">;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Crypto
-let isCrypto = 1 in {
+let ArchGuard = "__ARM_FEATURE_CRYPTO" in {
 def AESE : SInst<"vaese", "ddd", "QUc">;
 def AESD : SInst<"vaesd", "ddd", "QUc">;
 def AESMC : SInst<"vaesmc", "dd", "QUc">;
@@ -1035,10 +980,17 @@ def VQTBX4_A64 : WInst<"vqtbx4", "ddDt",
 
 ////////////////////////////////////////////////////////////////////////////////
 // Vector reinterpret cast operations
-// With additional d, Qd, pl, Qpl types
-def REINTERPRET
+
+// NeonEmitter implicitly takes the cartesian product of the type string with
+// itself during generation so, unlike all other intrinsics, this one should
+// include *all* types, not just additional ones.
+//
+// We also rely on NeonEmitter handling the 32-bit vreinterpret before the
+// 64-bit one so that the common casts don't get guarded as AArch64-only
+// (FIXME).
+def VVREINTERPRET
   : NoTestOpInst<"vreinterpret", "dd",
-         "csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk", OP_REINT>;
+       "csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk", OP_REINT>;
 
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -1056,10 +1008,8 @@ def SCALAR_SUB : SInst<"vsub", "sss",  "
 def SCALAR_QSUB   : SInst<"vqsub", "sss", "ScSsSiSlSUcSUsSUiSUl">;
 
 let InstName = "vmov" in {
-def VGET_HIGH_A64 : NoTestOpInst<"vget_high", "dk", "csilhfdUcUsUiUlPcPsPl",
-                                 OP_HI>;
-def VGET_LOW_A64  : NoTestOpInst<"vget_low", "dk", "csilhfdUcUsUiUlPcPsPl",
-                                 OP_LO>;
+def VGET_HIGH_A64 : NoTestOpInst<"vget_high", "dk", "dPl", OP_HI>;
+def VGET_LOW_A64  : NoTestOpInst<"vget_low", "dk", "dPl", OP_LO>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////

Modified: cfe/trunk/utils/TableGen/NeonEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/NeonEmitter.cpp?rev=201660&r1=201659&r2=201660&view=diff
==============================================================================
--- cfe/trunk/utils/TableGen/NeonEmitter.cpp (original)
+++ cfe/trunk/utils/TableGen/NeonEmitter.cpp Wed Feb 19 04:37:09 2014
@@ -372,13 +372,15 @@ public:
   void runTests(raw_ostream &o);
 
 private:
+  void emitGuardedIntrinsic(raw_ostream &OS, Record *R,
+                            std::string &CurrentGuard, bool &InGuard,
+                            StringMap<ClassKind> &EmittedMap);
   void emitIntrinsic(raw_ostream &OS, Record *R,
                      StringMap<ClassKind> &EmittedMap);
   void genBuiltinsDef(raw_ostream &OS);
   void genOverloadTypeCheckCode(raw_ostream &OS);
   void genIntrinsicRangeCheckCode(raw_ostream &OS);
-  void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
-                     bool isA64TestGen);
+  void genTargetTest(raw_ostream &OS);
 };
 } // end anonymous namespace
 
@@ -2731,78 +2733,57 @@ void NeonEmitter::run(raw_ostream &OS) {
   std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
 
   StringMap<ClassKind> EmittedMap;
+  std::string CurrentGuard = "";
+  bool InGuard = false;
 
-  // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
-  // intrinsics.  (Some of the saturating multiply instructions are also
-  // used to implement the corresponding "_lane" variants, but tablegen
-  // sorts the records into alphabetical order so that the "_lane" variants
-  // come after the intrinsics they use.)
-  emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
-  emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
-  emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
-  emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap);
-
-  // ARM intrinsics must be emitted before AArch64 intrinsics to ensure
-  // common intrinsics appear only once in the output stream.
-  // The check for uniquiness is done in emitIntrinsic.
-  // Emit ARM intrinsics.
-  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
-    Record *R = RV[i];
-
-    // Skip AArch64 intrinsics; they will be emitted at the end.
-    bool isA64 = R->getValueAsBit("isA64");
-    if (isA64)
-      continue;
-
-    if (R->getName() != "VMOVL" && R->getName() != "VMULL" &&
-        R->getName() != "VABD")
-      emitIntrinsic(OS, R, EmittedMap);
+  // Some intrinsics are used to express others. These need to be emitted near
+  // the beginning so that the declarations are present when needed. This is
+  // rather an ugly, arbitrary list, but probably simpler than actually tracking
+  // dependency info.
+  static const char *EarlyDefsArr[] =
+      { "VFMA",      "VQMOVN",    "VQMOVUN",  "VABD",    "VMOVL",
+        "VABDL",     "VGET_HIGH", "VCOMBINE", "VSHLL_N", "VMOVL_HIGH",
+        "VMULL",     "VMLAL_N",   "VMLSL_N",  "VMULL_N", "VMULL_P64",
+        "VQDMLAL_N", "VQDMLSL_N", "VQDMULL_N" };
+  ArrayRef<const char *> EarlyDefs(EarlyDefsArr);
+
+  for (unsigned i = 0; i < EarlyDefs.size(); ++i) {
+    Record *R = Records.getDef(EarlyDefs[i]);
+    emitGuardedIntrinsic(OS, R, CurrentGuard, InGuard, EmittedMap);
   }
 
-  // Emit AArch64-specific intrinsics.
-  OS << "#ifdef __aarch64__\n";
-
-  emitIntrinsic(OS, Records.getDef("VMULL_P64"), EmittedMap);
-  emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap);
-  emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap);
-  emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap);
-
   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
     Record *R = RV[i];
-
-    // Skip ARM intrinsics already included above.
-    bool isA64 = R->getValueAsBit("isA64");
-    if (!isA64)
+    if (std::find(EarlyDefs.begin(), EarlyDefs.end(), R->getName()) !=
+        EarlyDefs.end())
       continue;
 
-    // Skip crypto temporarily, and will emit them all together at the end.
-    bool isCrypto = R->getValueAsBit("isCrypto");
-    if (isCrypto)
-      continue;
-
-    emitIntrinsic(OS, R, EmittedMap);
+    emitGuardedIntrinsic(OS, R, CurrentGuard, InGuard, EmittedMap);
   }
 
-  OS << "#endif\n\n";
-
-  // Now emit all the crypto intrinsics together
-  OS << "#ifdef __ARM_FEATURE_CRYPTO\n";
+  if (InGuard)
+    OS << "#endif\n\n";
 
-  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
-    Record *R = RV[i];
+  OS << "#undef __ai\n\n";
+  OS << "#endif /* __ARM_NEON_H */\n";
+}
 
-    bool isCrypto = R->getValueAsBit("isCrypto");
-    if (!isCrypto)
-      continue;
+void NeonEmitter::emitGuardedIntrinsic(raw_ostream &OS, Record *R,
+                                       std::string &CurrentGuard, bool &InGuard,
+                                       StringMap<ClassKind> &EmittedMap) {
+
+  std::string NewGuard = R->getValueAsString("ArchGuard");
+  if (NewGuard != CurrentGuard) {
+    if (InGuard)
+      OS << "#endif\n\n";
+    if (NewGuard.size())
+      OS << "#if " << NewGuard << '\n';
 
-    emitIntrinsic(OS, R, EmittedMap);
+    CurrentGuard = NewGuard;
+    InGuard = NewGuard.size() != 0;
   }
 
-
-  OS << "#endif\n\n";
-
-  OS << "#undef __ai\n\n";
-  OS << "#endif /* __ARM_NEON_H */\n";
+  emitIntrinsic(OS, R, EmittedMap);
 }
 
 /// emitIntrinsic - Write out the arm_neon.h header file definitions for the
@@ -2845,8 +2826,11 @@ void NeonEmitter::emitIntrinsic(raw_ostr
     } else {
       std::string s =
           GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
-      if (EmittedMap.count(s))
+      if (EmittedMap.count(s)) {
+        errs() << "warning: duplicate definition: " << name
+               << " (type: " << TypeString('d', TypeVec[ti]) << ")\n";
         continue;
+      }
       EmittedMap[s] = classKind;
       OS << s;
     }
@@ -3329,10 +3313,10 @@ static std::string GenTest(const std::st
 
 /// Write out all intrinsic tests for the specified target, checking
 /// for intrinsic test uniqueness.
-void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
-                                bool isA64GenTest) {
-  if (isA64GenTest)
-    OS << "#ifdef __aarch64__\n";
+void NeonEmitter::genTargetTest(raw_ostream &OS) {
+  StringMap<OpKind> EmittedMap;
+  std::string CurrentGuard = "";
+  bool InGuard = false;
 
   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
@@ -3343,12 +3327,17 @@ void NeonEmitter::genTargetTest(raw_ostr
     bool isShift = R->getValueAsBit("isShift");
     std::string InstName = R->getValueAsString("InstName");
     bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
-    bool isA64 = R->getValueAsBit("isA64");
 
-    // do not include AArch64 intrinsic test if not generating
-    // code for AArch64
-    if (!isA64GenTest && isA64)
-      continue;
+    std::string NewGuard = R->getValueAsString("ArchGuard");
+    if (NewGuard != CurrentGuard) {
+      if (InGuard)
+        OS << "#endif\n\n";
+      if (NewGuard.size())
+        OS << "#if " << NewGuard << '\n';
+
+      CurrentGuard = NewGuard;
+      InGuard = NewGuard.size() != 0;
+    }
 
     SmallVector<StringRef, 16> TypeVec;
     ParseTypes(R, Types, TypeVec);
@@ -3370,8 +3359,8 @@ void NeonEmitter::genTargetTest(raw_ostr
             continue;
           std::string testFuncProto;
           std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
-                                  isShift, isHiddenLOp, ck, InstName, isA64,
-                                  testFuncProto);
+                                  isShift, isHiddenLOp, ck, InstName,
+                                  CurrentGuard.size(), testFuncProto);
           if (EmittedMap.count(testFuncProto))
             continue;
           EmittedMap[testFuncProto] = kind;
@@ -3379,17 +3368,15 @@ void NeonEmitter::genTargetTest(raw_ostr
         }
       } else {
         std::string testFuncProto;
-        std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift,
-                                isHiddenLOp, ck, InstName, isA64, testFuncProto);
-        if (EmittedMap.count(testFuncProto))
-          continue;
-        EmittedMap[testFuncProto] = kind;
+        std::string s =
+            GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift, isHiddenLOp,
+                    ck, InstName, CurrentGuard.size(), testFuncProto);
         OS << s << "\n";
       }
     }
   }
 
-  if (isA64GenTest)
+  if (InGuard)
     OS << "#endif\n";
 }
 /// runTests - Write out a complete set of tests for all of the Neon
@@ -3409,15 +3396,7 @@ void NeonEmitter::runTests(raw_ostream &
         "#include <arm_neon.h>\n"
         "\n";
 
-  // ARM tests must be emitted before AArch64 tests to ensure
-  // tests for intrinsics that are common to ARM and AArch64
-  // appear only once in the output stream.
-  // The check for uniqueness is done in genTargetTest.
-  StringMap<OpKind> EmittedMap;
-
-  genTargetTest(OS, EmittedMap, false);
-
-  genTargetTest(OS, EmittedMap, true);
+  genTargetTest(OS);
 }
 
 namespace clang {





More information about the cfe-commits mailing list