[llvm] [TableGen] Extend direct lookup to instruction values in generic tables. (PR #80486)

Jason Eckhardt via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 2 12:09:40 PST 2024


https://github.com/nvjle created https://github.com/llvm/llvm-project/pull/80486

Currently, for some tables involving a single primary key field which is integral and densely numbered, a direct lookup is generated rather than a binary search. This patch extends the direct lookup function generation to instructions, where the integral value corresponds to the instruction's enum value.

While this isn't as common as for other tables, it does occur in at least one downstream backend and one in-tree backend.

Added a unit test and minimally updated the documentation.

>From b4ca7d31fe122bb2f49f9549fde07195c7dc7642 Mon Sep 17 00:00:00 2001
From: Jason Eckhardt <jeckhardt at nvidia.com>
Date: Fri, 2 Feb 2024 13:21:37 -0600
Subject: [PATCH] [TableGen] Extend direct lookup to instruction values in
 generic tables.

Currently, for some tables involving a single primary key field which is
integral and densely numbered, a direct lookup is generated rather than
a binary search. This patch extends the direct lookup function generation
to instructions, where the integral value corresponds to the instruction's
enum value.

While this isn't as common as for other tables, it does occur in at least
one downstream backend and one in-tree backend.

Added a unit test and minimally updated the documentation.
---
 llvm/docs/TableGen/BackEnds.rst               |  4 +-
 .../TableGen/generic-tables-instruction.td    | 61 ++++++++++++++++++-
 .../utils/TableGen/SearchableTableEmitter.cpp | 53 +++++++++++++---
 3 files changed, 106 insertions(+), 12 deletions(-)

diff --git a/llvm/docs/TableGen/BackEnds.rst b/llvm/docs/TableGen/BackEnds.rst
index 742fea51bcf32..901cb989a5edb 100644
--- a/llvm/docs/TableGen/BackEnds.rst
+++ b/llvm/docs/TableGen/BackEnds.rst
@@ -817,7 +817,9 @@ The table entries in ``ATable`` are sorted in order by ``Val1``, and within
 each of those values, by ``Val2``. This allows a binary search of the table,
 which is performed in the lookup function by ``std::lower_bound``. The
 lookup function returns a reference to the found table entry, or the null
-pointer if no entry is found.
+pointer if no entry is found. If the table has a single primary key field
+which is integral and densely numbered, a direct lookup is generated rather
+than a binary search.
 
 This example includes a field whose type TableGen cannot deduce. The ``Kind``
 field uses the enumerated type ``CEnum`` defined above. To inform TableGen
diff --git a/llvm/test/TableGen/generic-tables-instruction.td b/llvm/test/TableGen/generic-tables-instruction.td
index a3bed650890bd..3be2462c9ab69 100644
--- a/llvm/test/TableGen/generic-tables-instruction.td
+++ b/llvm/test/TableGen/generic-tables-instruction.td
@@ -2,6 +2,10 @@
 // XFAIL: vg_leak
 
 include "llvm/TableGen/SearchableTable.td"
+include "llvm/Target/Target.td"
+
+def ArchInstrInfo : InstrInfo { }
+def Arch : Target { let InstructionSet = ArchInstrInfo; }
 
 // CHECK-LABEL: GET_InstrTable_IMPL
 // CHECK: constexpr MyInstr InstrTable[] = {
@@ -11,11 +15,20 @@ include "llvm/TableGen/SearchableTable.td"
 // CHECK:   { D, 0x8 },
 // CHECK: };
 
-class Instruction {
-  bit isPseudo = 0;
-}
+// A contiguous primary (Instruction) key should get a direct lookup instead of
+// binary search.
+// CHECK: const MyInstr *getCustomEncodingHelper(unsigned Opcode) {
+// CHECK:   if ((Opcode < B) ||
+// CHECK:       (Opcode > D))
+// CHECK:     return nullptr;
+// CHECK:   auto Table = ArrayRef(InstrTable);
+// CHECK:   size_t Idx = Opcode - B;
+// CHECK:   return &Table[Idx];
+
 
 class MyInstr<int op> : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins);
   Instruction Opcode = !cast<Instruction>(NAME);
   bits<16> CustomEncoding = op;
 }
@@ -34,3 +47,45 @@ def InstrTable : GenericTable {
   let PrimaryKey = ["Opcode"];
   let PrimaryKeyName = "getCustomEncodingHelper";
 }
+
+
+// Non-contiguous instructions should get a binary search instead of direct
+// lookup.
+// CHECK: const MyInfoEntry *getTable2ByOpcode(unsigned Opcode) {
+// CHECK:   auto Idx = std::lower_bound(Table.begin(), Table.end(), Key,
+//
+// Verify contiguous check for SearchIndex.
+// const MyInfoEntry *getTable2ByValue(uint8_t Value) {
+// CHECK:   if ((Value < 0xB) ||
+// CHECK:      (Value > 0xD))
+// CHECK:    return nullptr;
+// CHECK:  auto Table = ArrayRef(Index);
+// CHECK:  size_t Idx = Value - 0xB;
+// CHECK:  return &InstrTable2[Table[Idx]._index];
+
+
+class MyInfoEntry<int V, string S> {
+  Instruction Opcode = !cast<Instruction>(NAME);
+  bits<4> Value = V;
+  string Name = S;
+}
+
+let OutOperandList = (outs), InOperandList = (ins) in {
+def W : Instruction, MyInfoEntry<12, "IW">;
+def X : Instruction;
+def Y : Instruction, MyInfoEntry<13, "IY">;
+def Z : Instruction, MyInfoEntry<11, "IZ">;
+}
+
+def InstrTable2 : GenericTable {
+  let FilterClass = "MyInfoEntry";
+  let Fields = ["Opcode", "Value", "Name"];
+
+  let PrimaryKey = ["Opcode"];
+  let PrimaryKeyName = "getTable2ByOpcode";
+}
+
+def getTable2ByValue : SearchIndex {
+  let Table = InstrTable2;
+  let Key = ["Value"];
+}
diff --git a/llvm/utils/TableGen/SearchableTableEmitter.cpp b/llvm/utils/TableGen/SearchableTableEmitter.cpp
index 9987d1ec73d9f..0953ee977b3ed 100644
--- a/llvm/utils/TableGen/SearchableTableEmitter.cpp
+++ b/llvm/utils/TableGen/SearchableTableEmitter.cpp
@@ -7,12 +7,15 @@
 //===----------------------------------------------------------------------===//
 //
 // This tablegen backend emits a generic array initialized by specified fields,
-// together with companion index tables and lookup functions (binary search,
-// currently).
+// together with companion index tables and lookup functions. The lookup
+// function generated is either a direct lookup (when a single primary key field
+// is integral and densely numbered) or a binary search otherwise.
 //
 //===----------------------------------------------------------------------===//
 
+#include "CodeGenInstruction.h"
 #include "CodeGenIntrinsics.h"
+#include "CodeGenTarget.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
@@ -31,6 +34,8 @@ using namespace llvm;
 
 namespace {
 
+using InstrEnumMapT = DenseMap<Record *, unsigned>;
+
 int64_t getAsInt(Init *B) {
   return cast<IntInit>(
              B->convertInitializerTo(IntRecTy::get(B->getRecordKeeper())))
@@ -94,6 +99,7 @@ class SearchableTableEmitter {
   std::vector<std::unique_ptr<GenericEnum>> Enums;
   DenseMap<Record *, GenericEnum *> EnumMap;
   std::set<std::string> PreprocessorGuards;
+  InstrEnumMapT InstrEnumValueMap;
 
 public:
   SearchableTableEmitter(RecordKeeper &R) : Records(R) {}
@@ -207,12 +213,17 @@ class SearchableTableEmitter {
 
 // For search indices that consists of a single field whose numeric value is
 // known, return that numeric value.
-static int64_t getNumericKey(const SearchIndex &Index, Record *Rec) {
+static int64_t getNumericKey(const SearchIndex &Index, Record *Rec,
+                             InstrEnumMapT &InstrEnumMap) {
   assert(Index.Fields.size() == 1);
 
   if (Index.Fields[0].Enum) {
     Record *EnumEntry = Rec->getValueAsDef(Index.Fields[0].Name);
     return Index.Fields[0].Enum->EntryMap[EnumEntry]->second;
+  } else if (Index.Fields[0].IsInstruction) {
+    Record *TheDef = Rec->getValueAsDef(Index.Fields[0].Name);
+    assert(!InstrEnumMap.empty());
+    return InstrEnumMap[TheDef];
   }
 
   return getInt(Rec, Index.Fields[0].Name);
@@ -368,12 +379,16 @@ void SearchableTableEmitter::emitLookupFunction(const GenericTable &Table,
   }
 
   bool IsContiguous = false;
+  int64_t FirstKeyVal = 0;
 
   if (Index.Fields.size() == 1 &&
-      (Index.Fields[0].Enum || isa<BitsRecTy>(Index.Fields[0].RecType))) {
+      (Index.Fields[0].Enum || isa<BitsRecTy>(Index.Fields[0].RecType) ||
+       Index.Fields[0].IsInstruction)) {
+    FirstKeyVal = getNumericKey(Index, IndexRows[0], InstrEnumValueMap);
     IsContiguous = true;
     for (unsigned i = 0; i < IndexRows.size(); ++i) {
-      if (getNumericKey(Index, IndexRows[i]) != i) {
+      if (getNumericKey(Index, IndexRows[i], InstrEnumValueMap) !=
+          (FirstKeyVal + i)) {
         IsContiguous = false;
         break;
       }
@@ -381,9 +396,18 @@ void SearchableTableEmitter::emitLookupFunction(const GenericTable &Table,
   }
 
   if (IsContiguous) {
+    const GenericField &Field = Index.Fields[0];
+    std::string FirstRepr = primaryRepresentation(
+        Index.Loc, Field, IndexRows[0]->getValueInit(Field.Name));
+    std::string LastRepr = primaryRepresentation(
+        Index.Loc, Field, IndexRows.back()->getValueInit(Field.Name));
+    OS << "  if ((" << Field.Name << " < " << FirstRepr << ") ||\n";
+    OS << "      (" << Field.Name << " > " << LastRepr << "))\n";
+    OS << "    return nullptr;\n";
     OS << "  auto Table = ArrayRef(" << IndexName << ");\n";
-    OS << "  size_t Idx = " << Index.Fields[0].Name << ";\n";
-    OS << "  return Idx >= Table.size() ? nullptr : ";
+    OS << "  size_t Idx = " << Index.Fields[0].Name << " - " << FirstRepr
+       << ";\n";
+    OS << "  return ";
     if (IsPrimary)
       OS << "&Table[Idx]";
     else
@@ -638,6 +662,7 @@ void SearchableTableEmitter::collectTableEntries(
 
   Record *IntrinsicClass = Records.getClass("Intrinsic");
   Record *InstructionClass = Records.getClass("Instruction");
+  bool SawInstructionField = false;
   for (auto &Field : Table.Fields) {
     if (!Field.RecType)
       PrintFatalError(Twine("Cannot determine type of field '") + Field.Name +
@@ -646,11 +671,23 @@ void SearchableTableEmitter::collectTableEntries(
     if (auto RecordTy = dyn_cast<RecordRecTy>(Field.RecType)) {
       if (IntrinsicClass && RecordTy->isSubClassOf(IntrinsicClass))
         Field.IsIntrinsic = true;
-      else if (InstructionClass && RecordTy->isSubClassOf(InstructionClass))
+      else if (InstructionClass && RecordTy->isSubClassOf(InstructionClass)) {
         Field.IsInstruction = true;
+        SawInstructionField = true;
+      }
     }
   }
 
+  // Build instruction-to-int map to check for contiguous instruction values.
+  // These are the same values emitted by InstrInfoEmitter. Do this on demand
+  // only after it is known that there are definitely instruction fields.
+  if (SawInstructionField && InstrEnumValueMap.empty()) {
+    CodeGenTarget Target(Records);
+    unsigned Num = 0;
+    for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue())
+      InstrEnumValueMap[Inst->TheDef] = Num++;
+  }
+
   SearchIndex Idx;
   std::copy(Table.Fields.begin(), Table.Fields.end(),
             std::back_inserter(Idx.Fields));



More information about the llvm-commits mailing list