[llvm] [TableGen] Extend direct lookup to instruction values in generic tables. (PR #80486)

Jason Eckhardt via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 6 07:18:53 PST 2024


https://github.com/nvjle updated https://github.com/llvm/llvm-project/pull/80486

>From b4ca7d31fe122bb2f49f9549fde07195c7dc7642 Mon Sep 17 00:00:00 2001
From: Jason Eckhardt <jeckhardt at nvidia.com>
Date: Fri, 2 Feb 2024 13:21:37 -0600
Subject: [PATCH 1/4] [TableGen] Extend direct lookup to instruction values in
 generic tables.

Currently, for some tables involving a single primary key field which is
integral and densely numbered, a direct lookup is generated rather than
a binary search. This patch extends the direct lookup function generation
to instructions, where the integral value corresponds to the instruction's
enum value.

While this isn't as common as for other tables, it does occur in at least
one downstream backend and one in-tree backend.

Added a unit test and minimally updated the documentation.
---
 llvm/docs/TableGen/BackEnds.rst               |  4 +-
 .../TableGen/generic-tables-instruction.td    | 61 ++++++++++++++++++-
 .../utils/TableGen/SearchableTableEmitter.cpp | 53 +++++++++++++---
 3 files changed, 106 insertions(+), 12 deletions(-)

diff --git a/llvm/docs/TableGen/BackEnds.rst b/llvm/docs/TableGen/BackEnds.rst
index 742fea51bcf32b..901cb989a5edba 100644
--- a/llvm/docs/TableGen/BackEnds.rst
+++ b/llvm/docs/TableGen/BackEnds.rst
@@ -817,7 +817,9 @@ The table entries in ``ATable`` are sorted in order by ``Val1``, and within
 each of those values, by ``Val2``. This allows a binary search of the table,
 which is performed in the lookup function by ``std::lower_bound``. The
 lookup function returns a reference to the found table entry, or the null
-pointer if no entry is found.
+pointer if no entry is found. If the table has a single primary key field
+which is integral and densely numbered, a direct lookup is generated rather
+than a binary search.
 
 This example includes a field whose type TableGen cannot deduce. The ``Kind``
 field uses the enumerated type ``CEnum`` defined above. To inform TableGen
diff --git a/llvm/test/TableGen/generic-tables-instruction.td b/llvm/test/TableGen/generic-tables-instruction.td
index a3bed650890bd3..3be2462c9ab69e 100644
--- a/llvm/test/TableGen/generic-tables-instruction.td
+++ b/llvm/test/TableGen/generic-tables-instruction.td
@@ -2,6 +2,10 @@
 // XFAIL: vg_leak
 
 include "llvm/TableGen/SearchableTable.td"
+include "llvm/Target/Target.td"
+
+def ArchInstrInfo : InstrInfo { }
+def Arch : Target { let InstructionSet = ArchInstrInfo; }
 
 // CHECK-LABEL: GET_InstrTable_IMPL
 // CHECK: constexpr MyInstr InstrTable[] = {
@@ -11,11 +15,20 @@ include "llvm/TableGen/SearchableTable.td"
 // CHECK:   { D, 0x8 },
 // CHECK: };
 
-class Instruction {
-  bit isPseudo = 0;
-}
+// A contiguous primary (Instruction) key should get a direct lookup instead of
+// binary search.
+// CHECK: const MyInstr *getCustomEncodingHelper(unsigned Opcode) {
+// CHECK:   if ((Opcode < B) ||
+// CHECK:       (Opcode > D))
+// CHECK:     return nullptr;
+// CHECK:   auto Table = ArrayRef(InstrTable);
+// CHECK:   size_t Idx = Opcode - B;
+// CHECK:   return &Table[Idx];
+
 
 class MyInstr<int op> : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins);
   Instruction Opcode = !cast<Instruction>(NAME);
   bits<16> CustomEncoding = op;
 }
@@ -34,3 +47,45 @@ def InstrTable : GenericTable {
   let PrimaryKey = ["Opcode"];
   let PrimaryKeyName = "getCustomEncodingHelper";
 }
+
+
+// Non-contiguous instructions should get a binary search instead of direct
+// lookup.
+// CHECK: const MyInfoEntry *getTable2ByOpcode(unsigned Opcode) {
+// CHECK:   auto Idx = std::lower_bound(Table.begin(), Table.end(), Key,
+//
+// Verify contiguous check for SearchIndex.
+// const MyInfoEntry *getTable2ByValue(uint8_t Value) {
+// CHECK:   if ((Value < 0xB) ||
+// CHECK:      (Value > 0xD))
+// CHECK:    return nullptr;
+// CHECK:  auto Table = ArrayRef(Index);
+// CHECK:  size_t Idx = Value - 0xB;
+// CHECK:  return &InstrTable2[Table[Idx]._index];
+
+
+class MyInfoEntry<int V, string S> {
+  Instruction Opcode = !cast<Instruction>(NAME);
+  bits<4> Value = V;
+  string Name = S;
+}
+
+let OutOperandList = (outs), InOperandList = (ins) in {
+def W : Instruction, MyInfoEntry<12, "IW">;
+def X : Instruction;
+def Y : Instruction, MyInfoEntry<13, "IY">;
+def Z : Instruction, MyInfoEntry<11, "IZ">;
+}
+
+def InstrTable2 : GenericTable {
+  let FilterClass = "MyInfoEntry";
+  let Fields = ["Opcode", "Value", "Name"];
+
+  let PrimaryKey = ["Opcode"];
+  let PrimaryKeyName = "getTable2ByOpcode";
+}
+
+def getTable2ByValue : SearchIndex {
+  let Table = InstrTable2;
+  let Key = ["Value"];
+}
diff --git a/llvm/utils/TableGen/SearchableTableEmitter.cpp b/llvm/utils/TableGen/SearchableTableEmitter.cpp
index 9987d1ec73d9f4..0953ee977b3ed7 100644
--- a/llvm/utils/TableGen/SearchableTableEmitter.cpp
+++ b/llvm/utils/TableGen/SearchableTableEmitter.cpp
@@ -7,12 +7,15 @@
 //===----------------------------------------------------------------------===//
 //
 // This tablegen backend emits a generic array initialized by specified fields,
-// together with companion index tables and lookup functions (binary search,
-// currently).
+// together with companion index tables and lookup functions. The lookup
+// function generated is either a direct lookup (when a single primary key field
+// is integral and densely numbered) or a binary search otherwise.
 //
 //===----------------------------------------------------------------------===//
 
+#include "CodeGenInstruction.h"
 #include "CodeGenIntrinsics.h"
+#include "CodeGenTarget.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
@@ -31,6 +34,8 @@ using namespace llvm;
 
 namespace {
 
+using InstrEnumMapT = DenseMap<Record *, unsigned>;
+
 int64_t getAsInt(Init *B) {
   return cast<IntInit>(
              B->convertInitializerTo(IntRecTy::get(B->getRecordKeeper())))
@@ -94,6 +99,7 @@ class SearchableTableEmitter {
   std::vector<std::unique_ptr<GenericEnum>> Enums;
   DenseMap<Record *, GenericEnum *> EnumMap;
   std::set<std::string> PreprocessorGuards;
+  InstrEnumMapT InstrEnumValueMap;
 
 public:
   SearchableTableEmitter(RecordKeeper &R) : Records(R) {}
@@ -207,12 +213,17 @@ class SearchableTableEmitter {
 
 // For search indices that consists of a single field whose numeric value is
 // known, return that numeric value.
-static int64_t getNumericKey(const SearchIndex &Index, Record *Rec) {
+static int64_t getNumericKey(const SearchIndex &Index, Record *Rec,
+                             InstrEnumMapT &InstrEnumMap) {
   assert(Index.Fields.size() == 1);
 
   if (Index.Fields[0].Enum) {
     Record *EnumEntry = Rec->getValueAsDef(Index.Fields[0].Name);
     return Index.Fields[0].Enum->EntryMap[EnumEntry]->second;
+  } else if (Index.Fields[0].IsInstruction) {
+    Record *TheDef = Rec->getValueAsDef(Index.Fields[0].Name);
+    assert(!InstrEnumMap.empty());
+    return InstrEnumMap[TheDef];
   }
 
   return getInt(Rec, Index.Fields[0].Name);
@@ -368,12 +379,16 @@ void SearchableTableEmitter::emitLookupFunction(const GenericTable &Table,
   }
 
   bool IsContiguous = false;
+  int64_t FirstKeyVal = 0;
 
   if (Index.Fields.size() == 1 &&
-      (Index.Fields[0].Enum || isa<BitsRecTy>(Index.Fields[0].RecType))) {
+      (Index.Fields[0].Enum || isa<BitsRecTy>(Index.Fields[0].RecType) ||
+       Index.Fields[0].IsInstruction)) {
+    FirstKeyVal = getNumericKey(Index, IndexRows[0], InstrEnumValueMap);
     IsContiguous = true;
     for (unsigned i = 0; i < IndexRows.size(); ++i) {
-      if (getNumericKey(Index, IndexRows[i]) != i) {
+      if (getNumericKey(Index, IndexRows[i], InstrEnumValueMap) !=
+          (FirstKeyVal + i)) {
         IsContiguous = false;
         break;
       }
@@ -381,9 +396,18 @@ void SearchableTableEmitter::emitLookupFunction(const GenericTable &Table,
   }
 
   if (IsContiguous) {
+    const GenericField &Field = Index.Fields[0];
+    std::string FirstRepr = primaryRepresentation(
+        Index.Loc, Field, IndexRows[0]->getValueInit(Field.Name));
+    std::string LastRepr = primaryRepresentation(
+        Index.Loc, Field, IndexRows.back()->getValueInit(Field.Name));
+    OS << "  if ((" << Field.Name << " < " << FirstRepr << ") ||\n";
+    OS << "      (" << Field.Name << " > " << LastRepr << "))\n";
+    OS << "    return nullptr;\n";
     OS << "  auto Table = ArrayRef(" << IndexName << ");\n";
-    OS << "  size_t Idx = " << Index.Fields[0].Name << ";\n";
-    OS << "  return Idx >= Table.size() ? nullptr : ";
+    OS << "  size_t Idx = " << Index.Fields[0].Name << " - " << FirstRepr
+       << ";\n";
+    OS << "  return ";
     if (IsPrimary)
       OS << "&Table[Idx]";
     else
@@ -638,6 +662,7 @@ void SearchableTableEmitter::collectTableEntries(
 
   Record *IntrinsicClass = Records.getClass("Intrinsic");
   Record *InstructionClass = Records.getClass("Instruction");
+  bool SawInstructionField = false;
   for (auto &Field : Table.Fields) {
     if (!Field.RecType)
       PrintFatalError(Twine("Cannot determine type of field '") + Field.Name +
@@ -646,11 +671,23 @@ void SearchableTableEmitter::collectTableEntries(
     if (auto RecordTy = dyn_cast<RecordRecTy>(Field.RecType)) {
       if (IntrinsicClass && RecordTy->isSubClassOf(IntrinsicClass))
         Field.IsIntrinsic = true;
-      else if (InstructionClass && RecordTy->isSubClassOf(InstructionClass))
+      else if (InstructionClass && RecordTy->isSubClassOf(InstructionClass)) {
         Field.IsInstruction = true;
+        SawInstructionField = true;
+      }
     }
   }
 
+  // Build instruction-to-int map to check for contiguous instruction values.
+  // These are the same values emitted by InstrInfoEmitter. Do this on demand
+  // only after it is known that there are definitely instruction fields.
+  if (SawInstructionField && InstrEnumValueMap.empty()) {
+    CodeGenTarget Target(Records);
+    unsigned Num = 0;
+    for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue())
+      InstrEnumValueMap[Inst->TheDef] = Num++;
+  }
+
   SearchIndex Idx;
   std::copy(Table.Fields.begin(), Table.Fields.end(),
             std::back_inserter(Idx.Fields));

>From c16b70e80d0a5832cf38cb718c2d58e870e07879 Mon Sep 17 00:00:00 2001
From: Jason Eckhardt <jeckhardt at nvidia.com>
Date: Sun, 4 Feb 2024 15:34:24 -0600
Subject: [PATCH 2/4] Address review comments.

Move the instruction-to-int map into CodeGenTarget::InstrToIntMap and
add CodeGenTarget::getInstrIntValue query method.

Populate the map in CodeGenTarget::ComputeInstrsByEnum just below
where the order is created in the first place.
---
 llvm/utils/TableGen/CodeGenTarget.cpp         |  7 ++++
 llvm/utils/TableGen/CodeGenTarget.h           | 10 ++++++
 .../utils/TableGen/SearchableTableEmitter.cpp | 36 ++++++-------------
 3 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp
index 37fa30349eea9f..cf0049e6d33e3c 100644
--- a/llvm/utils/TableGen/CodeGenTarget.cpp
+++ b/llvm/utils/TableGen/CodeGenTarget.cpp
@@ -538,6 +538,13 @@ void CodeGenTarget::ComputeInstrsByEnum() const {
         return std::make_tuple(!D1.getValueAsBit("isPseudo"), D1.getName()) <
                std::make_tuple(!D2.getValueAsBit("isPseudo"), D2.getName());
       });
+
+  // Build the instruction-to-int map using the same values emitted by
+  // InstrInfoEmitter::emitEnums.
+  assert(InstrToIntMap.empty());
+  unsigned Num = 0;
+  for (const CodeGenInstruction *Inst : InstrsByEnum)
+    InstrToIntMap[Inst->TheDef] = Num++;
 }
 
 
diff --git a/llvm/utils/TableGen/CodeGenTarget.h b/llvm/utils/TableGen/CodeGenTarget.h
index 64c3f4470d5e08..0bb8e45a56064e 100644
--- a/llvm/utils/TableGen/CodeGenTarget.h
+++ b/llvm/utils/TableGen/CodeGenTarget.h
@@ -74,6 +74,7 @@ class CodeGenTarget {
 
   mutable StringRef InstNamespace;
   mutable std::vector<const CodeGenInstruction*> InstrsByEnum;
+  mutable DenseMap<const Record *, unsigned> InstrToIntMap;
   mutable unsigned NumPseudoInstructions = 0;
 public:
   CodeGenTarget(RecordKeeper &Records);
@@ -192,6 +193,15 @@ class CodeGenTarget {
     return InstrsByEnum;
   }
 
+  /// Return the integer enum value corresponding to this instruction record.
+  unsigned getInstrIntValue(const Record *R) const {
+    if (InstrsByEnum.empty())
+      ComputeInstrsByEnum();
+    auto V = InstrToIntMap.find(R);
+    assert(V != InstrToIntMap.end() && "instr not in InstrToIntMap");
+    return V->second;
+  }
+
   typedef ArrayRef<const CodeGenInstruction *>::const_iterator inst_iterator;
   inst_iterator inst_begin() const{return getInstructionsByEnumValue().begin();}
   inst_iterator inst_end() const { return getInstructionsByEnumValue().end(); }
diff --git a/llvm/utils/TableGen/SearchableTableEmitter.cpp b/llvm/utils/TableGen/SearchableTableEmitter.cpp
index 0953ee977b3ed7..d4929f6044cb45 100644
--- a/llvm/utils/TableGen/SearchableTableEmitter.cpp
+++ b/llvm/utils/TableGen/SearchableTableEmitter.cpp
@@ -13,7 +13,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "CodeGenInstruction.h"
 #include "CodeGenIntrinsics.h"
 #include "CodeGenTarget.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -34,8 +33,6 @@ using namespace llvm;
 
 namespace {
 
-using InstrEnumMapT = DenseMap<Record *, unsigned>;
-
 int64_t getAsInt(Init *B) {
   return cast<IntInit>(
              B->convertInitializerTo(IntRecTy::get(B->getRecordKeeper())))
@@ -95,11 +92,11 @@ struct GenericTable {
 
 class SearchableTableEmitter {
   RecordKeeper &Records;
+  std::unique_ptr<CodeGenTarget> Target;
   DenseMap<Init *, std::unique_ptr<CodeGenIntrinsic>> Intrinsics;
   std::vector<std::unique_ptr<GenericEnum>> Enums;
   DenseMap<Record *, GenericEnum *> EnumMap;
   std::set<std::string> PreprocessorGuards;
-  InstrEnumMapT InstrEnumValueMap;
 
 public:
   SearchableTableEmitter(RecordKeeper &R) : Records(R) {}
@@ -207,14 +204,15 @@ class SearchableTableEmitter {
                           const std::vector<Record *> &Items);
   void collectTableEntries(GenericTable &Table,
                            const std::vector<Record *> &Items);
+  int64_t getNumericKey(const SearchIndex &Index, Record *Rec);
 };
 
 } // End anonymous namespace.
 
 // For search indices that consists of a single field whose numeric value is
 // known, return that numeric value.
-static int64_t getNumericKey(const SearchIndex &Index, Record *Rec,
-                             InstrEnumMapT &InstrEnumMap) {
+int64_t SearchableTableEmitter::getNumericKey(const SearchIndex &Index,
+                                              Record *Rec) {
   assert(Index.Fields.size() == 1);
 
   if (Index.Fields[0].Enum) {
@@ -222,8 +220,7 @@ static int64_t getNumericKey(const SearchIndex &Index, Record *Rec,
     return Index.Fields[0].Enum->EntryMap[EnumEntry]->second;
   } else if (Index.Fields[0].IsInstruction) {
     Record *TheDef = Rec->getValueAsDef(Index.Fields[0].Name);
-    assert(!InstrEnumMap.empty());
-    return InstrEnumMap[TheDef];
+    return Target->getInstrIntValue(TheDef);
   }
 
   return getInt(Rec, Index.Fields[0].Name);
@@ -379,16 +376,14 @@ void SearchableTableEmitter::emitLookupFunction(const GenericTable &Table,
   }
 
   bool IsContiguous = false;
-  int64_t FirstKeyVal = 0;
 
   if (Index.Fields.size() == 1 &&
       (Index.Fields[0].Enum || isa<BitsRecTy>(Index.Fields[0].RecType) ||
        Index.Fields[0].IsInstruction)) {
-    FirstKeyVal = getNumericKey(Index, IndexRows[0], InstrEnumValueMap);
+    int64_t FirstKeyVal = getNumericKey(Index, IndexRows[0]);
     IsContiguous = true;
     for (unsigned i = 0; i < IndexRows.size(); ++i) {
-      if (getNumericKey(Index, IndexRows[i], InstrEnumValueMap) !=
-          (FirstKeyVal + i)) {
+      if (getNumericKey(Index, IndexRows[i]) != (FirstKeyVal + i)) {
         IsContiguous = false;
         break;
       }
@@ -662,7 +657,6 @@ void SearchableTableEmitter::collectTableEntries(
 
   Record *IntrinsicClass = Records.getClass("Intrinsic");
   Record *InstructionClass = Records.getClass("Instruction");
-  bool SawInstructionField = false;
   for (auto &Field : Table.Fields) {
     if (!Field.RecType)
       PrintFatalError(Twine("Cannot determine type of field '") + Field.Name +
@@ -671,23 +665,11 @@ void SearchableTableEmitter::collectTableEntries(
     if (auto RecordTy = dyn_cast<RecordRecTy>(Field.RecType)) {
       if (IntrinsicClass && RecordTy->isSubClassOf(IntrinsicClass))
         Field.IsIntrinsic = true;
-      else if (InstructionClass && RecordTy->isSubClassOf(InstructionClass)) {
+      else if (InstructionClass && RecordTy->isSubClassOf(InstructionClass))
         Field.IsInstruction = true;
-        SawInstructionField = true;
-      }
     }
   }
 
-  // Build instruction-to-int map to check for contiguous instruction values.
-  // These are the same values emitted by InstrInfoEmitter. Do this on demand
-  // only after it is known that there are definitely instruction fields.
-  if (SawInstructionField && InstrEnumValueMap.empty()) {
-    CodeGenTarget Target(Records);
-    unsigned Num = 0;
-    for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue())
-      InstrEnumValueMap[Inst->TheDef] = Num++;
-  }
-
   SearchIndex Idx;
   std::copy(Table.Fields.begin(), Table.Fields.end(),
             std::back_inserter(Idx.Fields));
@@ -700,6 +682,8 @@ void SearchableTableEmitter::run(raw_ostream &OS) {
   // Emit tables in a deterministic order to avoid needless rebuilds.
   SmallVector<std::unique_ptr<GenericTable>, 4> Tables;
   DenseMap<Record *, GenericTable *> TableMap;
+  if (Records.getClass("Instruction"))
+    Target = std::make_unique<CodeGenTarget>(Records);
 
   // Collect all definitions first.
   for (auto *EnumRec : Records.getAllDerivedDefinitions("GenericEnum")) {

>From 70396ee1f11326ab5a93768510d98dcb5b57a8f5 Mon Sep 17 00:00:00 2001
From: Jason Eckhardt <jeckhardt at nvidia.com>
Date: Mon, 5 Feb 2024 07:38:31 -0600
Subject: [PATCH 3/4] Address review nit. Add derived definitions guard.

---
 llvm/utils/TableGen/SearchableTableEmitter.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/utils/TableGen/SearchableTableEmitter.cpp b/llvm/utils/TableGen/SearchableTableEmitter.cpp
index d4929f6044cb45..cdb1a2da4a3dc1 100644
--- a/llvm/utils/TableGen/SearchableTableEmitter.cpp
+++ b/llvm/utils/TableGen/SearchableTableEmitter.cpp
@@ -682,7 +682,8 @@ void SearchableTableEmitter::run(raw_ostream &OS) {
   // Emit tables in a deterministic order to avoid needless rebuilds.
   SmallVector<std::unique_ptr<GenericTable>, 4> Tables;
   DenseMap<Record *, GenericTable *> TableMap;
-  if (Records.getClass("Instruction"))
+  if (Records.getClass("Instruction") &&
+      !Records.getAllDerivedDefinitions("Instruction").empty())
     Target = std::make_unique<CodeGenTarget>(Records);
 
   // Collect all definitions first.

>From 1fae47081b75a6539fd4f7bb174154e2dd17de9f Mon Sep 17 00:00:00 2001
From: Jason Eckhardt <jeckhardt at nvidia.com>
Date: Tue, 6 Feb 2024 08:45:46 -0600
Subject: [PATCH 4/4] Address review comments.

Use getAllDerivedDefinitionsIfDefined.
---
 llvm/utils/TableGen/SearchableTableEmitter.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/utils/TableGen/SearchableTableEmitter.cpp b/llvm/utils/TableGen/SearchableTableEmitter.cpp
index cdb1a2da4a3dc1..d75a9e95a7eb05 100644
--- a/llvm/utils/TableGen/SearchableTableEmitter.cpp
+++ b/llvm/utils/TableGen/SearchableTableEmitter.cpp
@@ -682,8 +682,7 @@ void SearchableTableEmitter::run(raw_ostream &OS) {
   // Emit tables in a deterministic order to avoid needless rebuilds.
   SmallVector<std::unique_ptr<GenericTable>, 4> Tables;
   DenseMap<Record *, GenericTable *> TableMap;
-  if (Records.getClass("Instruction") &&
-      !Records.getAllDerivedDefinitions("Instruction").empty())
+  if (!Records.getAllDerivedDefinitionsIfDefined("Instruction").empty())
     Target = std::make_unique<CodeGenTarget>(Records);
 
   // Collect all definitions first.



More information about the llvm-commits mailing list