[llvm] [TableGen][InstrInfo] Cull mapping that have not been enabled/not needed (PR #126137)

Thu Feb 6 18:32:10 PST 2025

llvmbot wrote:




@llvm/pr-subscribers-tablegen

Author: Rahul Joshi (jurahul)

<details>
<summary>Changes</summary>

Detect whether logical operand mapping/named operand mappings have been enabled in a previous pass over instructions and execute the relevant emission code only if those mappings are enabled.

For these mappings, skip the fixed set of predefined instructions as they won't have these mappings enabled.

Emit operand type mappings only for X86 target, as they are only used by X86 and look for X86 specific `X86MemOperand`.

Cleanup `emitOperandTypeMappings` code: remove code to handle empty instruction list, and use range for loops.

---
Full diff: https://github.com/llvm/llvm-project/pull/126137.diff


3 Files Affected:

- (modified) llvm/test/TableGen/get-operand-type-no-expand.td (+2-2) 
- (modified) llvm/test/TableGen/get-operand-type.td (+7-7) 
- (modified) llvm/utils/TableGen/InstrInfoEmitter.cpp (+102-77) 


``````````diff

diff --git a/llvm/test/TableGen/get-operand-type-no-expand.td b/llvm/test/TableGen/get-operand-type-no-expand.td
index 9dfcbfaec76af1e..a0a8fa957f9b6d2 100644
--- a/llvm/test/TableGen/get-operand-type-no-expand.td
+++ b/llvm/test/TableGen/get-operand-type-no-expand.td
@@ -5,7 +5,7 @@ include "llvm/Target/Target.td"
 
 def archInstrInfo : InstrInfo { }
 
-def arch : Target {
+def X86 : Target {
   let InstructionSet = archInstrInfo;
 }
 
@@ -26,7 +26,7 @@ def InstA : Instruction {
   let InOperandList = (ins i8complex:$b, i32imm:$c);
   field bits<8> Inst;
   field bits<8> SoftFail = 0;
-  let Namespace = "MyNamespace";
+  let Namespace = "X86";
 }
 
 // RUN: llvm-tblgen -gen-instr-info -I %p/../../include %s \
diff --git a/llvm/test/TableGen/get-operand-type.td b/llvm/test/TableGen/get-operand-type.td
index 6ebda5cffe8af0a..b2f63cafd6a89a2 100644
--- a/llvm/test/TableGen/get-operand-type.td
+++ b/llvm/test/TableGen/get-operand-type.td
@@ -1,12 +1,12 @@
 // RUN: llvm-tblgen -gen-instr-info -I %p/../../include %s | FileCheck %s
 
-// Check that getOperandType has the expected info in it
+// Check that getOperandType has the expected info in it.
 
 include "llvm/Target/Target.td"
 
 def archInstrInfo : InstrInfo { }
 
-def arch : Target {
+def X86 : Target {
   let InstructionSet = archInstrInfo;
 }
 
@@ -24,7 +24,7 @@ def InstA : Instruction {
   let InOperandList = (ins OpB:$b, i32imm:$c);
   field bits<8> Inst;
   field bits<8> SoftFail = 0;
-  let Namespace = "MyNamespace";
+  let Namespace = "X86";
 }
 
 def InstB : Instruction {
@@ -33,7 +33,7 @@ def InstB : Instruction {
   let InOperandList = (ins unknown:$x);
   field bits<8> Inst;
   field bits<8> SoftFail = 0;
-  let Namespace = "MyNamespace";
+  let Namespace = "X86";
 }
 
 def InstC : Instruction {
@@ -42,12 +42,12 @@ def InstC : Instruction {
   let InOperandList = (ins RegOp:$x);
   field bits<8> Inst;
   field bits<8> SoftFail = 0;
-  let Namespace = "MyNamespace";
+  let Namespace = "X86";
 }
 
 // CHECK: #ifdef GET_INSTRINFO_OPERAND_TYPE
-// CHECK: static const uint{{.*}}_t Offsets[] = {
-// CHECK: static const {{.*}} OpcodeOperandTypes[] = {
+// CHECK: static constexpr uint{{.*}}_t Offsets[] = {
+// CHECK: static constexpr {{.*}} OpcodeOperandTypes[] = {
 // CHECK:        /* InstA */
 // CHECK-NEXT:   OpA, OpB, i32imm,
 // CHECK-NEXT:   /* InstB */
diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp
index 7c46890a49c81c3..6d06811c79c2a5d 100644
--- a/llvm/utils/TableGen/InstrInfoEmitter.cpp
+++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp
@@ -92,7 +92,7 @@ class InstrInfoEmitter {
       raw_ostream &OS, const CodeGenTarget &Target,
       ArrayRef<const CodeGenInstruction *> NumberedInstructions);
   void emitLogicalOperandSizeMappings(
-      raw_ostream &OS, StringRef Namespace,
+      raw_ostream &OS, const CodeGenTarget &Target,
       ArrayRef<const CodeGenInstruction *> NumberedInstructions);
   void emitLogicalOperandTypeMappings(
       raw_ostream &OS, StringRef Namespace,
@@ -261,7 +261,11 @@ void InstrInfoEmitter::emitOperandNameMappings(
   // Max operand index seen.
   unsigned MaxOperandNo = 0;
 
-  for (const CodeGenInstruction *Inst : NumberedInstructions) {
+  // Fixed/Predefined instructions do not have UseNamedOperandTable enabled, so
+  // we can just skip them.
+  const unsigned NumFixedInsts = Target.getNumFixedInstructions();
+  for (const CodeGenInstruction *Inst :
+       NumberedInstructions.drop_front(NumFixedInsts)) {
     if (!Inst->TheDef->getValueAsBit("UseNamedOperandTable"))
       continue;
     std::map<unsigned, unsigned> OpList;
@@ -335,11 +339,18 @@ void InstrInfoEmitter::emitOperandNameMappings(
 /// Generate an enum for all the operand types for this target, under the
 /// llvm::TargetNamespace::OpTypes namespace.
 /// Operand types are all definitions derived of the Operand Target.td class.
+///
 void InstrInfoEmitter::emitOperandTypeMappings(
     raw_ostream &OS, const CodeGenTarget &Target,
     ArrayRef<const CodeGenInstruction *> NumberedInstructions) {
-
   StringRef Namespace = Target.getInstNamespace();
+
+  // These generated functions are used only by the X86 target
+  // (in bolt/lib/Target/X86/X86MCPlusBuilder.cpp). So emit them only
+  // for X86.
+  if (Namespace != "X86")
+    return;
+
   ArrayRef<const Record *> Operands =
       Records.getAllDerivedDefinitions("Operand");
   ArrayRef<const Record *> RegisterOperands =
@@ -376,73 +387,66 @@ void InstrInfoEmitter::emitOperandTypeMappings(
     return NumberedInstructions[I]->TheDef->getName();
   };
   // TODO: Factor out duplicate operand lists to compress the tables.
-  if (!NumberedInstructions.empty()) {
-    std::vector<int> OperandOffsets;
-    std::vector<const Record *> OperandRecords;
-    int CurrentOffset = 0;
-    for (const CodeGenInstruction *Inst : NumberedInstructions) {
-      OperandOffsets.push_back(CurrentOffset);
-      for (const auto &Op : Inst->Operands) {
-        const DagInit *MIOI = Op.MIOperandInfo;
-        if (!ExpandMIOperandInfo || !MIOI || MIOI->getNumArgs() == 0) {
-          // Single, anonymous, operand.
-          OperandRecords.push_back(Op.Rec);
+  std::vector<size_t> OperandOffsets;
+  std::vector<const Record *> OperandRecords;
+  size_t CurrentOffset = 0;
+  for (const CodeGenInstruction *Inst : NumberedInstructions) {
+    OperandOffsets.push_back(CurrentOffset);
+    for (const auto &Op : Inst->Operands) {
+      const DagInit *MIOI = Op.MIOperandInfo;
+      if (!ExpandMIOperandInfo || !MIOI || MIOI->getNumArgs() == 0) {
+        // Single, anonymous, operand.
+        OperandRecords.push_back(Op.Rec);
+        ++CurrentOffset;
+      } else {
+        for (const Init *Arg : MIOI->getArgs()) {
+          OperandRecords.push_back(cast<DefInit>(Arg)->getDef());
           ++CurrentOffset;
-        } else {
-          for (const Init *Arg : MIOI->getArgs()) {
-            OperandRecords.push_back(cast<DefInit>(Arg)->getDef());
-            ++CurrentOffset;
-          }
         }
       }
     }
+  }
 
-    // Emit the table of offsets (indexes) into the operand type table.
-    // Size the unsigned integer offset to save space.
-    assert(OperandRecords.size() <= UINT32_MAX &&
-           "Too many operands for offset table");
-    OS << "  static const " << getMinimalTypeForRange(OperandRecords.size());
-    OS << " Offsets[] = {\n";
-    for (int I = 0, E = OperandOffsets.size(); I != E; ++I) {
-      OS << "    /* " << getInstrName(I) << " */\n";
-      OS << "    " << OperandOffsets[I] << ",\n";
-    }
-    OS << "  };\n";
+  // Emit the table of offsets (indexes) into the operand type table.
+  // Size the unsigned integer offset to save space.
+  assert(OperandRecords.size() <= UINT32_MAX &&
+         "Too many operands for offset table");
+  OS << "  static constexpr " << getMinimalTypeForRange(OperandRecords.size());
+  OS << " Offsets[] = {\n";
+  for (const auto &[Idx, Offset] : enumerate(OperandOffsets))
+    OS << "    " << Offset << ", // " << getInstrName(Idx) << '\n';
+  OS << "  };\n";
 
-    // Add an entry for the end so that we don't need to special case it below.
-    OperandOffsets.push_back(OperandRecords.size());
-
-    // Emit the actual operand types in a flat table.
-    // Size the signed integer operand type to save space.
-    assert(EnumVal <= INT16_MAX &&
-           "Too many operand types for operand types table");
-    OS << "\n  using namespace OpTypes;\n";
-    OS << "  static";
-    OS << ((EnumVal <= INT8_MAX) ? " const int8_t" : " const int16_t");
-    OS << " OpcodeOperandTypes[] = {\n    ";
-    for (int I = 0, E = OperandRecords.size(), CurOffset = 0; I != E; ++I) {
-      // We print each Opcode's operands in its own row.
-      if (I == OperandOffsets[CurOffset]) {
-        OS << "\n    /* " << getInstrName(CurOffset) << " */\n    ";
-        while (OperandOffsets[++CurOffset] == I)
-          OS << "/* " << getInstrName(CurOffset) << " */\n    ";
-      }
-      const Record *OpR = OperandRecords[I];
-      if ((OpR->isSubClassOf("Operand") ||
-           OpR->isSubClassOf("RegisterOperand") ||
-           OpR->isSubClassOf("RegisterClass")) &&
-          !OpR->isAnonymous())
-        OS << OpR->getName();
-      else
-        OS << -1;
-      OS << ", ";
+  // Add an entry for the end so that we don't need to special case it below.
+  OperandOffsets.push_back(OperandRecords.size());
+
+  // Emit the actual operand types in a flat table.
+  // Size the signed integer operand type to save space.
+  assert(EnumVal <= INT16_MAX &&
+         "Too many operand types for operand types table");
+  OS << "\n  using namespace OpTypes;\n";
+  OS << "  static";
+  OS << (EnumVal <= INT8_MAX ? " constexpr int8_t" : " constexpr int16_t");
+  OS << " OpcodeOperandTypes[] = {";
+  size_t CurOffset = 0;
+  for (auto [Idx, OpR] : enumerate(OperandRecords)) {
+    // We print each Opcode's operands in its own row.
+    if (Idx == OperandOffsets[CurOffset]) {
+      OS << "\n    /* " << getInstrName(CurOffset) << " */\n    ";
+      while (OperandOffsets[++CurOffset] == Idx)
+        OS << "/* " << getInstrName(CurOffset) << " */\n    ";
     }
-    OS << "\n  };\n";
-
-    OS << "  return OpcodeOperandTypes[Offsets[Opcode] + OpIdx];\n";
-  } else {
-    OS << "  llvm_unreachable(\"No instructions defined\");\n";
+    if ((OpR->isSubClassOf("Operand") || OpR->isSubClassOf("RegisterOperand") ||
+         OpR->isSubClassOf("RegisterClass")) &&
+        !OpR->isAnonymous())
+      OS << OpR->getName();
+    else
+      OS << -1;
+    OS << ", ";
   }
+  OS << "\n  };\n";
+
+  OS << "  return OpcodeOperandTypes[Offsets[Opcode] + OpIdx];\n";
   OS << "}\n";
   OS << "} // end namespace llvm::" << Namespace << "\n";
   OS << "#endif // GET_INSTRINFO_OPERAND_TYPE\n\n";
@@ -461,10 +465,10 @@ void InstrInfoEmitter::emitOperandTypeMappings(
       SizeToOperandName[Size].push_back(Op->getName());
   }
   OS << "  default: return 0;\n";
-  for (const auto &KV : SizeToOperandName) {
-    for (const StringRef &OperandName : KV.second)
+  for (const auto &[Size, OperandNames] : SizeToOperandName) {
+    for (const StringRef &OperandName : OperandNames)
       OS << "  case OpTypes::" << OperandName << ":\n";
-    OS << "    return " << KV.first << ";\n\n";
+    OS << "    return " << Size << ";\n\n";
   }
   OS << "  }\n}\n";
   OS << "} // end namespace llvm::" << Namespace << "\n";
@@ -472,15 +476,20 @@ void InstrInfoEmitter::emitOperandTypeMappings(
 }
 
 void InstrInfoEmitter::emitLogicalOperandSizeMappings(
-    raw_ostream &OS, StringRef Namespace,
+    raw_ostream &OS, const CodeGenTarget &Target,
     ArrayRef<const CodeGenInstruction *> NumberedInstructions) {
-  std::map<std::vector<unsigned>, unsigned> LogicalOpSizeMap;
+  StringRef Namespace = Target.getInstNamespace();
 
+  std::map<std::vector<unsigned>, unsigned> LogicalOpSizeMap;
   std::map<unsigned, std::vector<std::string>> InstMap;
 
   size_t LogicalOpListSize = 0U;
   std::vector<unsigned> LogicalOpList;
-  for (const auto *Inst : NumberedInstructions) {
+
+  // Fixed/Predefined instructions do not have UseLogicalOperandMappings
+  // enabled, so we can just skip them.
+  const unsigned NumFixedInsts = Target.getNumFixedInstructions();
+  for (const auto *Inst : NumberedInstructions.drop_front(NumFixedInsts)) {
     if (!Inst->TheDef->getValueAsBit("UseLogicalOperandMappings"))
       continue;
 
@@ -907,22 +916,34 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
   unsigned OperandInfoSize =
       CollectOperandInfo(OperandInfoList, OperandInfoMap);
 
+  ArrayRef<const CodeGenInstruction *> NumberedInstructions =
+      Target.getInstructionsByEnumValue();
+
   // Collect all of the instruction's implicit uses and defs.
+  // Also collect which features are enabled by instructions to control
+  // emission of various mappings.
+
+  bool HasUseLogicalOperandMappings = false;
+  bool HasUseNamedOperandTable = false;
+
   Timer.startTimer("Collect uses/defs");
   std::map<std::vector<const Record *>, unsigned> EmittedLists;
   std::vector<std::vector<const Record *>> ImplicitLists;
   unsigned ImplicitListSize = 0;
-  for (const CodeGenInstruction *II : Target.getInstructionsByEnumValue()) {
-    std::vector<const Record *> ImplicitOps = II->ImplicitUses;
-    llvm::append_range(ImplicitOps, II->ImplicitDefs);
+  for (const CodeGenInstruction *Inst : NumberedInstructions) {
+    HasUseLogicalOperandMappings |=
+        Inst->TheDef->getValueAsBit("UseLogicalOperandMappings");
+    HasUseNamedOperandTable |=
+        Inst->TheDef->getValueAsBit("UseNamedOperandTable");
+
+    std::vector<const Record *> ImplicitOps = Inst->ImplicitUses;
+    llvm::append_range(ImplicitOps, Inst->ImplicitDefs);
     if (EmittedLists.insert({ImplicitOps, ImplicitListSize}).second) {
       ImplicitLists.push_back(ImplicitOps);
       ImplicitListSize += ImplicitOps.size();
     }
   }
 
-  ArrayRef<const CodeGenInstruction *> NumberedInstructions =
-      Target.getInstructionsByEnumValue();
   OS << "#if defined(GET_INSTRINFO_MC_DESC) || "
         "defined(GET_INSTRINFO_CTOR_DTOR)\n";
   OS << "namespace llvm {\n\n";
@@ -1123,14 +1144,18 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
 
   OS << "#endif // GET_INSTRINFO_CTOR_DTOR\n\n";
 
-  Timer.startTimer("Emit operand name mappings");
-  emitOperandNameMappings(OS, Target, NumberedInstructions);
+  if (HasUseNamedOperandTable) {
+    Timer.startTimer("Emit operand name mappings");
+    emitOperandNameMappings(OS, Target, NumberedInstructions);
+  }
 
   Timer.startTimer("Emit operand type mappings");
   emitOperandTypeMappings(OS, Target, NumberedInstructions);
 
-  Timer.startTimer("Emit logical operand size mappings");
-  emitLogicalOperandSizeMappings(OS, TargetName, NumberedInstructions);
+  if (HasUseLogicalOperandMappings) {
+    Timer.startTimer("Emit logical operand size mappings");
+    emitLogicalOperandSizeMappings(OS, Target, NumberedInstructions);
+  }
 
   Timer.startTimer("Emit logical operand type mappings");
   emitLogicalOperandTypeMappings(OS, TargetName, NumberedInstructions);

``````````

</details>


https://github.com/llvm/llvm-project/pull/126137