[llvm] r336075 - [X86] Remove FMA3Info DenseMap. Break into sorted tables that we can binary search.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun Jul 1 23:23:39 PDT 2018


Author: ctopper
Date: Sun Jul  1 23:23:39 2018
New Revision: 336075

URL: http://llvm.org/viewvc/llvm-project?rev=336075&view=rev
Log:
[X86] Remove FMA3Info DenseMap. Break into sorted tables that we can binary search.

I separated out the rounding and broadcast groups into their own tables because it made the ordering in the main table easier.

Further splitting of the tables might make it possible to directly index using bits from the TSFlags, but its probably not worth it right now.

Modified:
    llvm/trunk/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
    llvm/trunk/lib/Target/X86/X86InstrFMA3Info.cpp
    llvm/trunk/lib/Target/X86/X86InstrFMA3Info.h
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp

Modified: llvm/trunk/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/MCTargetDesc/X86BaseInfo.h?rev=336075&r1=336074&r2=336075&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/MCTargetDesc/X86BaseInfo.h (original)
+++ llvm/trunk/lib/Target/X86/MCTargetDesc/X86BaseInfo.h Sun Jul  1 23:23:39 2018
@@ -580,7 +580,7 @@ namespace X86II {
   // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
   // specified machine instruction.
   //
-  inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
+  inline uint8_t getBaseOpcodeFor(uint64_t TSFlags) {
     return TSFlags >> X86II::OpcodeShift;
   }
 

Modified: llvm/trunk/lib/Target/X86/X86InstrFMA3Info.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFMA3Info.cpp?rev=336075&r1=336074&r2=336075&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFMA3Info.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFMA3Info.cpp Sun Jul  1 23:23:39 2018
@@ -21,239 +21,148 @@
 
 using namespace llvm;
 
-#define FMA3BASE(X132, X213, X231, Attrs)                                      \
-  { { X132, X213, X231 }, Attrs },
+#define FMA3GROUP(Name, Suf, Attrs) \
+  { { X86::Name##132##Suf, X86::Name##213##Suf, X86::Name##231##Suf }, Attrs },
 
-#define FMA3RA(R132, R213, R231, Attrs)                                        \
-  FMA3BASE(X86::R132, X86::R213, X86::R231, Attrs)
-
-#define FMA3R(R132, R213, R231)                                                \
-  FMA3RA(R132, R213, R231, 0)
-
-#define FMA3MA(M132, M213, M231, Attrs)                                        \
-  FMA3BASE(X86::M132, X86::M213, X86::M231, Attrs)
-
-#define FMA3M(M132, M213, M231)                                                \
-  FMA3MA(M132, M213, M231, 0)
-
-#define FMA3RMA(R132, R213, R231, M132, M213, M231, Attrs)                     \
-  FMA3RA(R132, R213, R231, Attrs)                                              \
-  FMA3MA(M132, M213, M231, Attrs)
-
-#define FMA3RM(R132, R213, R231, M132, M213, M231)                             \
-  FMA3RMA(R132, R213, R231, M132, M213, M231, 0)
-
-#define FMA3_AVX2_VECTOR_GROUP(Name)                                           \
-  FMA3RM(Name##132PSr, Name##213PSr, Name##231PSr,                             \
-         Name##132PSm, Name##213PSm, Name##231PSm)                             \
-  FMA3RM(Name##132PDr, Name##213PDr, Name##231PDr,                             \
-         Name##132PDm, Name##213PDm, Name##231PDm)                             \
-  FMA3RM(Name##132PSYr, Name##213PSYr, Name##231PSYr,                          \
-         Name##132PSYm, Name##213PSYm, Name##231PSYm)                          \
-  FMA3RM(Name##132PDYr, Name##213PDYr, Name##231PDYr,                          \
-         Name##132PDYm, Name##213PDYm, Name##231PDYm)
-
-#define FMA3_AVX2_SCALAR_GROUP(Name)                                           \
-  FMA3RM(Name##132SSr, Name##213SSr, Name##231SSr,                             \
-         Name##132SSm, Name##213SSm, Name##231SSm)                             \
-  FMA3RM(Name##132SDr, Name##213SDr, Name##231SDr,                             \
-         Name##132SDm, Name##213SDm, Name##231SDm)                             \
-  FMA3RMA(Name##132SSr_Int, Name##213SSr_Int, Name##231SSr_Int,                \
-          Name##132SSm_Int, Name##213SSm_Int, Name##231SSm_Int,                \
-          X86InstrFMA3Group::X86FMA3Intrinsic)                                 \
-  FMA3RMA(Name##132SDr_Int, Name##213SDr_Int, Name##231SDr_Int,                \
-          Name##132SDm_Int, Name##213SDm_Int, Name##231SDm_Int,                \
-          X86InstrFMA3Group::X86FMA3Intrinsic)
-
-#define FMA3_AVX2_FULL_GROUP(Name)                                             \
-  FMA3_AVX2_VECTOR_GROUP(Name)                                                 \
-  FMA3_AVX2_SCALAR_GROUP(Name)
-
-#define FMA3_AVX512_VECTOR_GROUP(Name)                                         \
-  FMA3RM(Name##132PSZ128r, Name##213PSZ128r, Name##231PSZ128r,                 \
-         Name##132PSZ128m, Name##213PSZ128m, Name##231PSZ128m)                 \
-  FMA3RM(Name##132PDZ128r, Name##213PDZ128r, Name##231PDZ128r,                 \
-         Name##132PDZ128m, Name##213PDZ128m, Name##231PDZ128m)                 \
-  FMA3RM(Name##132PSZ256r, Name##213PSZ256r, Name##231PSZ256r,                 \
-         Name##132PSZ256m, Name##213PSZ256m, Name##231PSZ256m)                 \
-  FMA3RM(Name##132PDZ256r, Name##213PDZ256r, Name##231PDZ256r,                 \
-         Name##132PDZ256m, Name##213PDZ256m, Name##231PDZ256m)                 \
-  FMA3RM(Name##132PSZr,    Name##213PSZr,    Name##231PSZr,                    \
-         Name##132PSZm,    Name##213PSZm,    Name##231PSZm)                    \
-  FMA3RM(Name##132PDZr,    Name##213PDZr,    Name##231PDZr,                    \
-         Name##132PDZm,    Name##213PDZm,    Name##231PDZm)                    \
-  FMA3RMA(Name##132PSZ128rk, Name##213PSZ128rk, Name##231PSZ128rk,             \
-          Name##132PSZ128mk, Name##213PSZ128mk, Name##231PSZ128mk,             \
-          X86InstrFMA3Group::X86FMA3KMergeMasked)                              \
-  FMA3RMA(Name##132PDZ128rk, Name##213PDZ128rk, Name##231PDZ128rk,             \
-          Name##132PDZ128mk, Name##213PDZ128mk, Name##231PDZ128mk,             \
-          X86InstrFMA3Group::X86FMA3KMergeMasked)                              \
-  FMA3RMA(Name##132PSZ256rk, Name##213PSZ256rk, Name##231PSZ256rk,             \
-          Name##132PSZ256mk, Name##213PSZ256mk, Name##231PSZ256mk,             \
-          X86InstrFMA3Group::X86FMA3KMergeMasked)                              \
-  FMA3RMA(Name##132PDZ256rk, Name##213PDZ256rk, Name##231PDZ256rk,             \
-          Name##132PDZ256mk, Name##213PDZ256mk, Name##231PDZ256mk,             \
-          X86InstrFMA3Group::X86FMA3KMergeMasked)                              \
-  FMA3RMA(Name##132PSZrk,    Name##213PSZrk,    Name##231PSZrk,                \
-          Name##132PSZmk,    Name##213PSZmk,    Name##231PSZmk,                \
-          X86InstrFMA3Group::X86FMA3KMergeMasked)                              \
-  FMA3RMA(Name##132PDZrk,    Name##213PDZrk,    Name##231PDZrk,                \
-          Name##132PDZmk,    Name##213PDZmk,    Name##231PDZmk,                \
-          X86InstrFMA3Group::X86FMA3KMergeMasked)                              \
-  FMA3RMA(Name##132PSZ128rkz, Name##213PSZ128rkz, Name##231PSZ128rkz,          \
-          Name##132PSZ128mkz, Name##213PSZ128mkz, Name##231PSZ128mkz,          \
-          X86InstrFMA3Group::X86FMA3KZeroMasked)                               \
-  FMA3RMA(Name##132PDZ128rkz, Name##213PDZ128rkz, Name##231PDZ128rkz,          \
-          Name##132PDZ128mkz, Name##213PDZ128mkz, Name##231PDZ128mkz,          \
-          X86InstrFMA3Group::X86FMA3KZeroMasked)                               \
-  FMA3RMA(Name##132PSZ256rkz, Name##213PSZ256rkz, Name##231PSZ256rkz,          \
-          Name##132PSZ256mkz, Name##213PSZ256mkz, Name##231PSZ256mkz,          \
-          X86InstrFMA3Group::X86FMA3KZeroMasked)                               \
-  FMA3RMA(Name##132PDZ256rkz, Name##213PDZ256rkz, Name##231PDZ256rkz,          \
-          Name##132PDZ256mkz, Name##213PDZ256mkz, Name##231PDZ256mkz,          \
-          X86InstrFMA3Group::X86FMA3KZeroMasked)                               \
-  FMA3RMA(Name##132PSZrkz,    Name##213PSZrkz,    Name##231PSZrkz,             \
-          Name##132PSZmkz,    Name##213PSZmkz,    Name##231PSZmkz,             \
-          X86InstrFMA3Group::X86FMA3KZeroMasked)                               \
-  FMA3RMA(Name##132PDZrkz,    Name##213PDZrkz,    Name##231PDZrkz,             \
-          Name##132PDZmkz,    Name##213PDZmkz,    Name##231PDZmkz,             \
-          X86InstrFMA3Group::X86FMA3KZeroMasked)                               \
-  FMA3R(Name##132PSZrb, Name##213PSZrb, Name##231PSZrb)                        \
-  FMA3R(Name##132PDZrb, Name##213PDZrb, Name##231PDZrb)                        \
-  FMA3RA(Name##132PSZrbk, Name##213PSZrbk, Name##231PSZrbk,                    \
-         X86InstrFMA3Group::X86FMA3KMergeMasked)                               \
-  FMA3RA(Name##132PDZrbk, Name##213PDZrbk, Name##231PDZrbk,                    \
-         X86InstrFMA3Group::X86FMA3KMergeMasked)                               \
-  FMA3RA(Name##132PSZrbkz, Name##213PSZrbkz, Name##231PSZrbkz,                 \
-         X86InstrFMA3Group::X86FMA3KZeroMasked)                                \
-  FMA3RA(Name##132PDZrbkz, Name##213PDZrbkz, Name##231PDZrbkz,                 \
-         X86InstrFMA3Group::X86FMA3KZeroMasked)                                \
-  FMA3M(Name##132PSZ128mb, Name##213PSZ128mb, Name##231PSZ128mb)               \
-  FMA3M(Name##132PDZ128mb, Name##213PDZ128mb, Name##231PDZ128mb)               \
-  FMA3M(Name##132PSZ256mb, Name##213PSZ256mb, Name##231PSZ256mb)               \
-  FMA3M(Name##132PDZ256mb, Name##213PDZ256mb, Name##231PDZ256mb)               \
-  FMA3M(Name##132PSZmb, Name##213PSZmb, Name##231PSZmb)                        \
-  FMA3M(Name##132PDZmb, Name##213PDZmb, Name##231PDZmb)                        \
-  FMA3MA(Name##132PSZ128mbk, Name##213PSZ128mbk, Name##231PSZ128mbk,           \
-         X86InstrFMA3Group::X86FMA3KMergeMasked)                               \
-  FMA3MA(Name##132PDZ128mbk, Name##213PDZ128mbk, Name##231PDZ128mbk,           \
-         X86InstrFMA3Group::X86FMA3KMergeMasked)                               \
-  FMA3MA(Name##132PSZ256mbk, Name##213PSZ256mbk, Name##231PSZ256mbk,           \
-         X86InstrFMA3Group::X86FMA3KMergeMasked)                               \
-  FMA3MA(Name##132PDZ256mbk, Name##213PDZ256mbk, Name##231PDZ256mbk,           \
-         X86InstrFMA3Group::X86FMA3KMergeMasked)                               \
-  FMA3MA(Name##132PSZmbk,    Name##213PSZmbk,    Name##231PSZmbk,              \
-         X86InstrFMA3Group::X86FMA3KMergeMasked)                               \
-  FMA3MA(Name##132PDZmbk,    Name##213PDZmbk,    Name##231PDZmbk,              \
-         X86InstrFMA3Group::X86FMA3KMergeMasked)                               \
-  FMA3MA(Name##132PSZ128mbkz, Name##213PSZ128mbkz, Name##231PSZ128mbkz,        \
-         X86InstrFMA3Group::X86FMA3KZeroMasked)                                \
-  FMA3MA(Name##132PDZ128mbkz, Name##213PDZ128mbkz, Name##231PDZ128mbkz,        \
-         X86InstrFMA3Group::X86FMA3KZeroMasked)                                \
-  FMA3MA(Name##132PSZ256mbkz, Name##213PSZ256mbkz, Name##231PSZ256mbkz,        \
-         X86InstrFMA3Group::X86FMA3KZeroMasked)                                \
-  FMA3MA(Name##132PDZ256mbkz, Name##213PDZ256mbkz, Name##231PDZ256mbkz,        \
-         X86InstrFMA3Group::X86FMA3KZeroMasked)                                \
-  FMA3MA(Name##132PSZmbkz, Name##213PSZmbkz, Name##231PSZmbkz,                 \
-         X86InstrFMA3Group::X86FMA3KZeroMasked)                                \
-  FMA3MA(Name##132PDZmbkz, Name##213PDZmbkz, Name##231PDZmbkz,                 \
-         X86InstrFMA3Group::X86FMA3KZeroMasked)
-
-#define FMA3_AVX512_SCALAR_GROUP(Name)                                         \
-  FMA3RM(Name##132SSZr,      Name##213SSZr,     Name##231SSZr,                 \
-         Name##132SSZm,      Name##213SSZm,     Name##231SSZm)                 \
-  FMA3RM(Name##132SDZr,      Name##213SDZr,     Name##231SDZr,                 \
-         Name##132SDZm,      Name##213SDZm,     Name##231SDZm)                 \
-  FMA3RMA(Name##132SSZr_Int, Name##213SSZr_Int, Name##231SSZr_Int,             \
-          Name##132SSZm_Int, Name##213SSZm_Int, Name##231SSZm_Int,             \
-          X86InstrFMA3Group::X86FMA3Intrinsic)                                 \
-  FMA3RMA(Name##132SDZr_Int, Name##213SDZr_Int, Name##231SDZr_Int,             \
-          Name##132SDZm_Int, Name##213SDZm_Int, Name##231SDZm_Int,             \
-          X86InstrFMA3Group::X86FMA3Intrinsic)                                 \
-  FMA3RMA(Name##132SSZr_Intk, Name##213SSZr_Intk, Name##231SSZr_Intk,          \
-          Name##132SSZm_Intk, Name##213SSZm_Intk, Name##231SSZm_Intk,          \
-          X86InstrFMA3Group::X86FMA3Intrinsic |                                \
-              X86InstrFMA3Group::X86FMA3KMergeMasked)                          \
-  FMA3RMA(Name##132SDZr_Intk, Name##213SDZr_Intk, Name##231SDZr_Intk,          \
-          Name##132SDZm_Intk, Name##213SDZm_Intk, Name##231SDZm_Intk,          \
-          X86InstrFMA3Group::X86FMA3Intrinsic |                                \
-              X86InstrFMA3Group::X86FMA3KMergeMasked)                          \
-  FMA3RMA(Name##132SSZr_Intkz, Name##213SSZr_Intkz, Name##231SSZr_Intkz,       \
-          Name##132SSZm_Intkz, Name##213SSZm_Intkz, Name##231SSZm_Intkz,       \
-          X86InstrFMA3Group::X86FMA3Intrinsic |                                \
-              X86InstrFMA3Group::X86FMA3KZeroMasked)                           \
-  FMA3RMA(Name##132SDZr_Intkz, Name##213SDZr_Intkz, Name##231SDZr_Intkz,       \
-          Name##132SDZm_Intkz, Name##213SDZm_Intkz, Name##231SDZm_Intkz,       \
-          X86InstrFMA3Group::X86FMA3Intrinsic |                                \
-              X86InstrFMA3Group::X86FMA3KZeroMasked)                           \
-  FMA3RA(Name##132SSZrb_Int, Name##213SSZrb_Int, Name##231SSZrb_Int,           \
-         X86InstrFMA3Group::X86FMA3Intrinsic)                                  \
-  FMA3RA(Name##132SDZrb_Int, Name##213SDZrb_Int, Name##231SDZrb_Int,           \
-         X86InstrFMA3Group::X86FMA3Intrinsic)                                  \
-  FMA3RA(Name##132SSZrb_Intk, Name##213SSZrb_Intk, Name##231SSZrb_Intk,        \
-         X86InstrFMA3Group::X86FMA3Intrinsic |                                 \
-             X86InstrFMA3Group::X86FMA3KMergeMasked)                           \
-  FMA3RA(Name##132SDZrb_Intk, Name##213SDZrb_Intk, Name##231SDZrb_Intk,        \
-         X86InstrFMA3Group::X86FMA3Intrinsic |                                 \
-             X86InstrFMA3Group::X86FMA3KMergeMasked)                           \
-  FMA3RA(Name##132SSZrb_Intkz, Name##213SSZrb_Intkz, Name##231SSZrb_Intkz,     \
-         X86InstrFMA3Group::X86FMA3Intrinsic |                                 \
-             X86InstrFMA3Group::X86FMA3KZeroMasked)                            \
-  FMA3RA(Name##132SDZrb_Intkz, Name##213SDZrb_Intkz, Name##231SDZrb_Intkz,     \
-         X86InstrFMA3Group::X86FMA3Intrinsic |                                 \
-             X86InstrFMA3Group::X86FMA3KZeroMasked)
-
-#define FMA3_AVX512_FULL_GROUP(Name)                                           \
-  FMA3_AVX512_VECTOR_GROUP(Name)                                               \
-  FMA3_AVX512_SCALAR_GROUP(Name)
+#define FMA3GROUP_MASKED(Name, Suf, Attrs) \
+  FMA3GROUP(Name, Suf, Attrs) \
+  FMA3GROUP(Name, Suf##k, Attrs | X86InstrFMA3Group::KMergeMasked) \
+  FMA3GROUP(Name, Suf##kz, Attrs | X86InstrFMA3Group::KZeroMasked)
+
+#define FMA3GROUP_PACKED_WIDTHS(Name, Suf, Attrs) \
+  FMA3GROUP(Name, Suf##Ym, Attrs) \
+  FMA3GROUP(Name, Suf##Yr, Attrs) \
+  FMA3GROUP_MASKED(Name, Suf##Z128m, Attrs) \
+  FMA3GROUP_MASKED(Name, Suf##Z128r, Attrs) \
+  FMA3GROUP_MASKED(Name, Suf##Z256m, Attrs) \
+  FMA3GROUP_MASKED(Name, Suf##Z256r, Attrs) \
+  FMA3GROUP_MASKED(Name, Suf##Zm, Attrs) \
+  FMA3GROUP_MASKED(Name, Suf##Zr, Attrs) \
+  FMA3GROUP(Name, Suf##m, Attrs) \
+  FMA3GROUP(Name, Suf##r, Attrs)
+
+#define FMA3GROUP_PACKED(Name, Attrs) \
+  FMA3GROUP_PACKED_WIDTHS(Name, PD, Attrs) \
+  FMA3GROUP_PACKED_WIDTHS(Name, PS, Attrs)
+
+#define FMA3GROUP_SCALAR_WIDTHS(Name, Suf, Attrs) \
+  FMA3GROUP(Name, Suf##Zm, Attrs) \
+  FMA3GROUP_MASKED(Name, Suf##Zm_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
+  FMA3GROUP(Name, Suf##Zr, Attrs) \
+  FMA3GROUP_MASKED(Name, Suf##Zr_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
+  FMA3GROUP(Name, Suf##m, Attrs) \
+  FMA3GROUP(Name, Suf##m_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
+  FMA3GROUP(Name, Suf##r, Attrs) \
+  FMA3GROUP(Name, Suf##r_Int, Attrs | X86InstrFMA3Group::Intrinsic)
+
+#define FMA3GROUP_SCALAR(Name, Attrs) \
+  FMA3GROUP_SCALAR_WIDTHS(Name, SD, Attrs) \
+  FMA3GROUP_SCALAR_WIDTHS(Name, SS, Attrs) \
+
+#define FMA3GROUP_FULL(Name, Attrs) \
+  FMA3GROUP_PACKED(Name, Attrs) \
+  FMA3GROUP_SCALAR(Name, Attrs)
 
 static const X86InstrFMA3Group Groups[] = {
-  FMA3_AVX2_FULL_GROUP(VFMADD)
-  FMA3_AVX2_FULL_GROUP(VFMSUB)
-  FMA3_AVX2_FULL_GROUP(VFNMADD)
-  FMA3_AVX2_FULL_GROUP(VFNMSUB)
-
-  FMA3_AVX2_VECTOR_GROUP(VFMADDSUB)
-  FMA3_AVX2_VECTOR_GROUP(VFMSUBADD)
-
-  FMA3_AVX512_FULL_GROUP(VFMADD)
-  FMA3_AVX512_FULL_GROUP(VFMSUB)
-  FMA3_AVX512_FULL_GROUP(VFNMADD)
-  FMA3_AVX512_FULL_GROUP(VFNMSUB)
-
-  FMA3_AVX512_VECTOR_GROUP(VFMADDSUB)
-  FMA3_AVX512_VECTOR_GROUP(VFMSUBADD)
+  FMA3GROUP_FULL(VFMADD, 0)
+  FMA3GROUP_PACKED(VFMADDSUB, 0)
+  FMA3GROUP_FULL(VFMSUB, 0)
+  FMA3GROUP_PACKED(VFMSUBADD, 0)
+  FMA3GROUP_FULL(VFNMADD, 0)
+  FMA3GROUP_FULL(VFNMSUB, 0)
 };
 
-namespace {
+#define FMA3GROUP_PACKED_AVX512_WIDTHS(Name, Type, Suf, Attrs) \
+  FMA3GROUP_MASKED(Name, Type##Z128##Suf, Attrs) \
+  FMA3GROUP_MASKED(Name, Type##Z256##Suf, Attrs) \
+  FMA3GROUP_MASKED(Name, Type##Z##Suf, Attrs)
+
+#define FMA3GROUP_PACKED_AVX512(Name, Suf, Attrs) \
+  FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PD, Suf, Attrs) \
+  FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PS, Suf, Attrs)
+
+#define FMA3GROUP_PACKED_AVX512_ROUND(Name, Suf, Attrs) \
+  FMA3GROUP_MASKED(Name, PDZ##Suf, Attrs) \
+  FMA3GROUP_MASKED(Name, PSZ##Suf, Attrs)
+
+#define FMA3GROUP_SCALAR_AVX512(Name, Suf, Attrs) \
+  FMA3GROUP_MASKED(Name, SDZ##Suf, Attrs) \
+  FMA3GROUP_MASKED(Name, SSZ##Suf, Attrs)
+
+static const X86InstrFMA3Group BroadcastGroups[] = {
+  FMA3GROUP_PACKED_AVX512(VFMADD, mb, 0)
+  FMA3GROUP_PACKED_AVX512(VFMADDSUB, mb, 0)
+  FMA3GROUP_PACKED_AVX512(VFMSUB, mb, 0)
+  FMA3GROUP_PACKED_AVX512(VFMSUBADD, mb, 0)
+  FMA3GROUP_PACKED_AVX512(VFNMADD, mb, 0)
+  FMA3GROUP_PACKED_AVX512(VFNMSUB, mb, 0)
+};
 
-struct X86InstrFMA3Info {
-  /// A map that is used to find the group of FMA opcodes using any FMA opcode
-  /// from the group.
-  DenseMap<unsigned, const X86InstrFMA3Group *> OpcodeToGroup;
-
-  /// Constructor. Just creates an object of the class.
-  X86InstrFMA3Info() {
-    for (const X86InstrFMA3Group &G : Groups) {
-      OpcodeToGroup[G.Opcodes[0]] = &G;
-      OpcodeToGroup[G.Opcodes[1]] = &G;
-      OpcodeToGroup[G.Opcodes[2]] = &G;
-    }
-  }
+static const X86InstrFMA3Group RoundGroups[] = {
+  FMA3GROUP_PACKED_AVX512_ROUND(VFMADD, rb, 0)
+  FMA3GROUP_SCALAR_AVX512(VFMADD, rb_Int, X86InstrFMA3Group::Intrinsic)
+  FMA3GROUP_PACKED_AVX512_ROUND(VFMADDSUB, rb, 0)
+  FMA3GROUP_PACKED_AVX512_ROUND(VFMSUB, rb, 0)
+  FMA3GROUP_SCALAR_AVX512(VFMSUB, rb_Int, X86InstrFMA3Group::Intrinsic)
+  FMA3GROUP_PACKED_AVX512_ROUND(VFMSUBADD, rb, 0)
+  FMA3GROUP_PACKED_AVX512_ROUND(VFNMADD, rb, 0)
+  FMA3GROUP_SCALAR_AVX512(VFNMADD, rb_Int, X86InstrFMA3Group::Intrinsic)
+  FMA3GROUP_PACKED_AVX512_ROUND(VFNMSUB, rb, 0)
+  FMA3GROUP_SCALAR_AVX512(VFNMSUB, rb_Int, X86InstrFMA3Group::Intrinsic)
 };
 
+static void verifyTables() {
+#ifndef NDEBUG
+  static std::atomic<bool> TableChecked(false);
+  if (!TableChecked.load(std::memory_order_relaxed)) {
+    assert(std::is_sorted(std::begin(Groups), std::end(Groups)) &&
+           std::is_sorted(std::begin(RoundGroups), std::end(RoundGroups)) &&
+           std::is_sorted(std::begin(BroadcastGroups),
+                          std::end(BroadcastGroups)) &&
+           "FMA3 tables not sorted!");
+    TableChecked.store(true, std::memory_order_relaxed);
+  }
+#endif
 }
 
-static ManagedStatic<X86InstrFMA3Info> X86FMA3InfoObj;
-
 /// Returns a reference to a group of FMA3 opcodes to where the given
 /// \p Opcode is included. If the given \p Opcode is not recognized as FMA3
 /// and not included into any FMA3 group, then nullptr is returned.
-const X86InstrFMA3Group *llvm::getFMA3Group(unsigned Opcode) {
-  auto &Map = X86FMA3InfoObj->OpcodeToGroup;
-  auto I = Map.find(Opcode);
-  if (I != Map.end())
-    return I->second;
+const X86InstrFMA3Group *llvm::getFMA3Group(unsigned Opcode, uint64_t TSFlags) {
 
-  return nullptr;
+  // FMA3 instructions have a well defined encoding pattern we can exploit.
+  uint8_t BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
+  bool IsFMA3 = ((TSFlags & X86II::EncodingMask) == X86II::VEX ||
+                 (TSFlags & X86II::EncodingMask) == X86II::EVEX) &&
+                (TSFlags & X86II::OpMapMask) == X86II::T8 &&
+                (TSFlags & X86II::OpPrefixMask) == X86II::PD &&
+                ((BaseOpcode >= 0x96 && BaseOpcode <= 0x9F) ||
+                 (BaseOpcode >= 0xA6 && BaseOpcode <= 0xAF) ||
+                 (BaseOpcode >= 0xB6 && BaseOpcode <= 0xBF));
+  if (!IsFMA3)
+    return nullptr;
+
+  verifyTables();
+
+  ArrayRef<X86InstrFMA3Group> Table;
+  if (TSFlags & X86II::EVEX_RC)
+    Table = makeArrayRef(RoundGroups);
+  else if (TSFlags & X86II::EVEX_B)
+    Table = makeArrayRef(BroadcastGroups);
+  else
+    Table = makeArrayRef(Groups);
+
+  // FMA 132 instructions have an opcode of 0x96-0x9F
+  // FMA 213 instructions have an opcode of 0xA6-0xAF
+  // FMA 231 instructions have an opcode of 0xB6-0xBF
+  unsigned FormIndex = ((BaseOpcode - 0x90) >> 4) & 0x3;
+
+  auto I = std::lower_bound(Table.begin(), Table.end(), Opcode,
+                            [FormIndex](const X86InstrFMA3Group &Group,
+                                        unsigned Opcode) {
+                              return Group.Opcodes[FormIndex] < Opcode;
+                            });
+  assert(I != Table.end() && I->Opcodes[FormIndex] == Opcode &&
+         "Couldn't find FMA3 opcode!");
+  return I;
 }

Modified: llvm/trunk/lib/Target/X86/X86InstrFMA3Info.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFMA3Info.h?rev=336075&r1=336074&r2=336075&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFMA3Info.h (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFMA3Info.h Sun Jul  1 23:23:39 2018
@@ -43,17 +43,17 @@ struct X86InstrFMA3Group {
   enum : uint16_t {
     /// This bit must be set in the 'Attributes' field of FMA group if such
     /// group of FMA opcodes consists of FMA intrinsic opcodes.
-    X86FMA3Intrinsic = 0x1,
+    Intrinsic = 0x1,
 
     /// This bit must be set in the 'Attributes' field of FMA group if such
     /// group of FMA opcodes consists of AVX512 opcodes accepting a k-mask and
     /// passing the elements from the 1st operand to the result of the operation
     /// when the correpondings bits in the k-mask are unset.
-    X86FMA3KMergeMasked = 0x2,
+    KMergeMasked = 0x2,
 
     /// This bit must be set in the 'Attributes' field of FMA group if such
     /// group of FMA opcodes consists of AVX512 opcodes accepting a k-zeromask.
-    X86FMA3KZeroMasked = 0x4,
+    KZeroMasked = 0x4,
   };
 
   /// Returns the 132 form of FMA opcode.
@@ -72,26 +72,30 @@ struct X86InstrFMA3Group {
   }
 
   /// Returns true iff the group of FMA opcodes holds intrinsic opcodes.
-  bool isIntrinsic() const { return (Attributes & X86FMA3Intrinsic) != 0; }
+  bool isIntrinsic() const { return (Attributes & Intrinsic) != 0; }
 
   /// Returns true iff the group of FMA opcodes holds k-merge-masked opcodes.
   bool isKMergeMasked() const {
-    return (Attributes & X86FMA3KMergeMasked) != 0;
+    return (Attributes & KMergeMasked) != 0;
   }
 
   /// Returns true iff the group of FMA opcodes holds k-zero-masked opcodes.
-  bool isKZeroMasked() const { return (Attributes & X86FMA3KZeroMasked) != 0; }
+  bool isKZeroMasked() const { return (Attributes &KZeroMasked) != 0; }
 
   /// Returns true iff the group of FMA opcodes holds any of k-masked opcodes.
   bool isKMasked() const {
-    return (Attributes & (X86FMA3KMergeMasked | X86FMA3KZeroMasked)) != 0;
+    return (Attributes & (KMergeMasked | KZeroMasked)) != 0;
+  }
+
+  bool operator<(const X86InstrFMA3Group &RHS) const {
+    return Opcodes[0] < RHS.Opcodes[0];
   }
 };
 
 /// Returns a reference to a group of FMA3 opcodes to where the given
 /// \p Opcode is included. If the given \p Opcode is not recognized as FMA3
 /// and not included into any FMA3 group, then nullptr is returned.
-const X86InstrFMA3Group *getFMA3Group(unsigned Opcode);
+const X86InstrFMA3Group *getFMA3Group(unsigned Opcode, uint64_t TSFlags);
 
 } // end namespace llvm
 

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=336075&r1=336074&r2=336075&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sun Jul  1 23:23:39 2018
@@ -1803,7 +1803,8 @@ MachineInstr *X86InstrInfo::commuteInstr
                                                      OpIdx1, OpIdx2);
     }
 
-    const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode());
+    const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode(),
+                                                      MI.getDesc().TSFlags);
     if (FMA3Group) {
       unsigned Opc =
         getFMA3OpcodeToCommuteOperands(MI, OpIdx1, OpIdx2, *FMA3Group);
@@ -2039,7 +2040,8 @@ bool X86InstrInfo::findCommutedOpIndices
   }
 
   default:
-    const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode());
+    const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode(),
+                                                      MI.getDesc().TSFlags);
     if (FMA3Group)
       return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2,
                                            FMA3Group->isIntrinsic());




More information about the llvm-commits mailing list