[llvm] [TableGen] Improvements to Named operands in InstrInfoEmitter (PR #124960)

Rahul Joshi via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 29 10:30:15 PST 2025


https://github.com/jurahul updated https://github.com/llvm/llvm-project/pull/124960

>From 114f547dec98ae23294f6ba5c8e7ba556fea3654 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Wed, 29 Jan 2025 09:40:47 -0800
Subject: [PATCH] [TableGenn] Improvements to Named operands in
 InstrInfoEmitter

- Assign OpName enum values in the same alphabetical order in which
  they are emitted (and get of OPERAND_LAST which is not used anywhere).
- Inline `initOperandMapData` into `emitOperandNameMappings` to help
  see clearly how various maps are computed.
- Emit the static `OperandMap` table as int8_t when possible. This
  should help reduce the static size by 50% in the common case.
---
 llvm/utils/TableGen/InstrInfoEmitter.cpp | 120 +++++++++++------------
 1 file changed, 55 insertions(+), 65 deletions(-)

diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp
index 12401a2f246a144..63178e7c20cb0ce 100644
--- a/llvm/utils/TableGen/InstrInfoEmitter.cpp
+++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp
@@ -40,7 +40,7 @@
 
 using namespace llvm;
 
-cl::OptionCategory InstrInfoEmitterCat("Options for -gen-instr-info");
+static cl::OptionCategory InstrInfoEmitterCat("Options for -gen-instr-info");
 static cl::opt<bool> ExpandMIOperandInfo(
     "instr-info-expand-mi-operand-info",
     cl::desc("Expand operand's MIOperandInfo DAG into suboperands"),
@@ -67,13 +67,6 @@ class InstrInfoEmitter {
   typedef std::vector<OperandInfoTy> OperandInfoListTy;
   typedef std::map<OperandInfoTy, unsigned> OperandInfoMapTy;
 
-  /// The keys of this map are maps which have OpName enum values as their keys
-  /// and instruction operand indices as their values.  The values of this map
-  /// are lists of instruction names.
-  typedef std::map<std::map<unsigned, unsigned>, std::vector<std::string>>
-      OpNameMapTy;
-  typedef std::map<std::string, unsigned>::iterator StrUintMapIter;
-
   /// Generate member functions in the target-specific GenInstrInfo class.
   ///
   /// This method is used to custom expand TIIPredicate definitions.
@@ -95,15 +88,9 @@ class InstrInfoEmitter {
   void emitOperandTypeMappings(
       raw_ostream &OS, const CodeGenTarget &Target,
       ArrayRef<const CodeGenInstruction *> NumberedInstructions);
-  void
-  initOperandMapData(ArrayRef<const CodeGenInstruction *> NumberedInstructions,
-                     StringRef Namespace,
-                     std::map<std::string, unsigned> &Operands,
-                     OpNameMapTy &OperandMap);
   void emitOperandNameMappings(
       raw_ostream &OS, const CodeGenTarget &Target,
       ArrayRef<const CodeGenInstruction *> NumberedInstructions);
-
   void emitLogicalOperandSizeMappings(
       raw_ostream &OS, StringRef Namespace,
       ArrayRef<const CodeGenInstruction *> NumberedInstructions);
@@ -239,35 +226,6 @@ void InstrInfoEmitter::EmitOperandInfo(raw_ostream &OS,
   }
 }
 
-/// Initialize data structures for generating operand name mappings.
-///
-/// \param Operands [out] A map used to generate the OpName enum with operand
-///        names as its keys and operand enum values as its values.
-/// \param OperandMap [out] A map for representing the operand name mappings for
-///        each instructions.  This is used to generate the OperandMap table as
-///        well as the getNamedOperandIdx() function.
-void InstrInfoEmitter::initOperandMapData(
-    ArrayRef<const CodeGenInstruction *> NumberedInstructions,
-    StringRef Namespace, std::map<std::string, unsigned> &Operands,
-    OpNameMapTy &OperandMap) {
-  unsigned NumOperands = 0;
-  for (const CodeGenInstruction *Inst : NumberedInstructions) {
-    if (!Inst->TheDef->getValueAsBit("UseNamedOperandTable"))
-      continue;
-    std::map<unsigned, unsigned> OpList;
-    for (const auto &Info : Inst->Operands) {
-      StrUintMapIter I = Operands.find(Info.Name);
-
-      if (I == Operands.end()) {
-        I = Operands.insert(Operands.begin(), {Info.Name, NumOperands++});
-      }
-      OpList[I->second] = Info.MIOperandNo;
-    }
-    OperandMap[OpList].push_back(Namespace.str() +
-                                 "::" + Inst->TheDef->getName().str());
-  }
-}
-
 /// Generate a table and function for looking up the indices of operands by
 /// name.
 ///
@@ -283,22 +241,52 @@ void InstrInfoEmitter::emitOperandNameMappings(
     raw_ostream &OS, const CodeGenTarget &Target,
     ArrayRef<const CodeGenInstruction *> NumberedInstructions) {
   StringRef Namespace = Target.getInstNamespace();
-  // Map of operand names to their enumeration value.  This will be used to
-  // generate the OpName enum.
-  std::map<std::string, unsigned> Operands;
-  OpNameMapTy OperandMap;
 
-  initOperandMapData(NumberedInstructions, Namespace, Operands, OperandMap);
+  /// To facilitate assigning OpName enum values in the sorted alphabetical
+  /// order, we go through an indirection from OpName -> ID, and Enum -> ID.
+  /// This allows us to build the OpList and assign IDs to OpNames in a single
+  /// scan of the instructions below.
+
+  // Map of operand names to their ID.
+  std::map<std::string, unsigned> OperandNameToID;
+  // Map from operand name enum value -> ID.
+  std::vector<unsigned> OperandEnumToID;
+
+  /// The keys of this map is a map which have OpName ID values as their keys
+  /// and instruction operand indices as their values. The values of this map
+  /// are lists of instruction names. This map helps to unique entries among
+  /// instructions that have identical OpName -> Operand index mapping.
+  std::map<std::map<unsigned, unsigned>, std::vector<std::string>> OperandMap;
+
+  // Max operand index seen.
+  unsigned MaxOperandNo = 0;
+  unsigned NumOperandNames = 0;
+
+  for (const CodeGenInstruction *Inst : NumberedInstructions) {
+    if (!Inst->TheDef->getValueAsBit("UseNamedOperandTable"))
+      continue;
+    std::map<unsigned, unsigned> OpList;
+    for (const auto &Info : Inst->Operands) {
+      auto [I, Inserted] = OperandNameToID.try_emplace(Info.Name, 0);
+      if (Inserted)
+        I->second = NumOperandNames++;
+      OpList[I->second] = Info.MIOperandNo;
+      MaxOperandNo = std::max(MaxOperandNo, Info.MIOperandNo);
+    }
+    OperandMap[OpList].push_back(Inst->TheDef->getName().str());
+  }
+
+  OperandEnumToID.reserve(NumOperandNames);
+  for (const auto &Op : OperandNameToID)
+    OperandEnumToID.push_back(Op.second);
 
   OS << "#ifdef GET_INSTRINFO_OPERAND_ENUM\n";
   OS << "#undef GET_INSTRINFO_OPERAND_ENUM\n";
   OS << "namespace llvm::" << Namespace << "::OpName {\n";
   OS << "enum {\n";
-  for (const auto &Op : Operands)
-    OS << "  " << Op.first << " = " << Op.second << ",\n";
-
-  OS << "  OPERAND_LAST";
-  OS << "\n};\n";
+  for (const auto &[I, Op] : enumerate(OperandNameToID))
+    OS << "  " << Op.first << " = " << I << ",\n";
+  OS << "};\n";
   OS << "} // end namespace llvm::" << Namespace << "::OpName\n";
   OS << "#endif //GET_INSTRINFO_OPERAND_ENUM\n\n";
 
@@ -307,28 +295,30 @@ void InstrInfoEmitter::emitOperandNameMappings(
   OS << "namespace llvm::" << Namespace << " {\n";
   OS << "LLVM_READONLY\n";
   OS << "int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx) {\n";
-  if (!Operands.empty()) {
-    OS << "  static const int16_t OperandMap [][" << Operands.size()
+  if (NumOperandNames != 0) {
+    assert(MaxOperandNo <= INT16_MAX &&
+           "Too many operands for the operand name -> index table");
+    StringRef Type = MaxOperandNo <= INT8_MAX ? "int8_t" : "int16_t";
+    OS << "  static constexpr " << Type << " OperandMap[][" << NumOperandNames
        << "] = {\n";
     for (const auto &Entry : OperandMap) {
       const std::map<unsigned, unsigned> &OpList = Entry.first;
-      OS << "{";
-
-      // Emit a row of the OperandMap table
-      for (unsigned i = 0, e = Operands.size(); i != e; ++i)
-        OS << (OpList.count(i) == 0 ? -1 : (int)OpList.find(i)->second) << ", ";
 
+      // Emit a row of the OperandMap table.
+      OS << "  {";
+      for (int i = 0; i < static_cast<int>(NumOperandNames); ++i) {
+        unsigned ID = OperandEnumToID[i];
+        OS << (OpList.count(ID) ? (int)OpList.find(ID)->second : -1) << ", ";
+      }
       OS << "},\n";
     }
     OS << "};\n";
 
     OS << "  switch(Opcode) {\n";
-    unsigned TableIndex = 0;
-    for (const auto &Entry : OperandMap) {
+    for (const auto &[TableIndex, Entry] : enumerate(OperandMap)) {
       for (const std::string &Name : Entry.second)
-        OS << "  case " << Name << ":\n";
-
-      OS << "    return OperandMap[" << TableIndex++ << "][NamedIdx];\n";
+        OS << "  case " << Namespace << "::" << Name << ":\n";
+      OS << "    return OperandMap[" << TableIndex << "][NamedIdx];\n";
     }
     OS << "  default: return -1;\n";
     OS << "  }\n";



More information about the llvm-commits mailing list