[llvm] [TableGen][DecoderEmitter] Analyze encodings once (PR #154309)

Tue Aug 19 04:15:44 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-tablegen

Author: Sergei Barannikov (s-barannikov)

<details>
<summary>Changes</summary>

Follow-up to #154288.

With HwModes involved, we used to analyze the same encoding multiple times (unless `-suppress-per-hwmode-duplicates=O2` is specified). This affected the build time and made the statistics inaccurate.

>From the point of view of the generated code, this is an NFC.

---
Full diff: https://github.com/llvm/llvm-project/pull/154309.diff


1 Files Affected:

- (modified) llvm/utils/TableGen/DecoderEmitter.cpp (+87-34) 


``````````diff

diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index abf29f9d7beb9..ab67fc03ee057 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -1472,7 +1472,6 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
                                                       ? MCD::OPC_TryDecodeOrFail
                                                       : MCD::OPC_TryDecode);
   TableInfo.Table.push_back(DecoderOp);
-  NumEncodingsSupported++;
   const Record *InstDef = Encodings[EncodingID].Inst->TheDef;
   TableInfo.Table.insertULEB128(Emitter->getTarget().getInstrIntValue(InstDef));
   TableInfo.Table.insertULEB128(DIdx);
@@ -2457,6 +2456,41 @@ void DecoderEmitter::handleHwModesUnrelatedEncodings(
   }
 }
 
+/// Checks if the given target-specific non-pseudo instruction
+/// is a candidate for decoding.
+static bool isDecodableInstruction(const Record *InstDef) {
+  return !InstDef->getValueAsBit("isAsmParserOnly") &&
+         !InstDef->getValueAsBit("isCodeGenOnly");
+}
+
+/// Checks if the given encoding is valid.
+static bool isValidEncoding(const Record *EncodingDef) {
+  const RecordVal *InstField = EncodingDef->getValue("Inst");
+  if (!InstField)
+    return false;
+
+  if (const auto *InstInit = dyn_cast<BitsInit>(InstField->getValue())) {
+    // Fixed-length encoding. Size must be non-zero.
+    if (!EncodingDef->getValueAsInt("Size"))
+      return false;
+
+    // At least one of the encoding bits must be complete (not '?').
+    return !InstInit->allInComplete();
+  }
+
+  if (const auto *InstInit = dyn_cast<DagInit>(InstField->getValue())) {
+    // Variable-length encoding.
+    // At least one of the encoding bits must be complete (not '?').
+    VarLenInst VLI(InstInit, InstField);
+    return !all_of(VLI, [](const EncodingSegment &S) {
+      return isa<UnsetInit>(S.Value);
+    });
+  }
+
+  // Inst field is neither BitsInit nor DagInit. This is something unsupported.
+  return false;
+}
+
 /// Parses all InstructionEncoding instances and fills internal data structures.
 void DecoderEmitter::parseInstructionEncodings() {
   // First, collect all encoding-related HwModes referenced by the target.
@@ -2468,19 +2502,34 @@ void DecoderEmitter::parseInstructionEncodings() {
   if (HwModeIDs.empty())
     HwModeIDs.push_back(DefaultMode);
 
-  ArrayRef<const CodeGenInstruction *> Instructions = Target.getInstructions();
+  ArrayRef<const CodeGenInstruction *> Instructions =
+      Target.getTargetNonPseudoInstructions();
   Encodings.reserve(Instructions.size());
-  NumInstructions = Instructions.size();
 
   for (const CodeGenInstruction *Inst : Instructions) {
     const Record *InstDef = Inst->TheDef;
+    if (!isDecodableInstruction(InstDef)) {
+      ++NumEncodingsLackingDisasm;
+      continue;
+    }
+
     if (const Record *RV = InstDef->getValueAsOptionalDef("EncodingInfos")) {
       EncodingInfoByHwMode EBM(RV, CGH);
       for (auto [HwModeID, EncodingDef] : EBM) {
+        if (!isValidEncoding(EncodingDef)) {
+          // TODO: Should probably give a warning.
+          ++NumEncodingsOmitted;
+          continue;
+        }
         unsigned EncodingID = Encodings.size();
         Encodings.emplace_back(EncodingDef, Inst);
         EncodingIDsByHwMode[HwModeID].push_back(EncodingID);
       }
+      continue; // Ignore encoding specified by Instruction itself.
+    }
+
+    if (!isValidEncoding(InstDef)) {
+      ++NumEncodingsOmitted;
       continue;
     }
 
@@ -2496,8 +2545,24 @@ void DecoderEmitter::parseInstructionEncodings() {
   for (const Record *EncodingDef :
        RK.getAllDerivedDefinitions("AdditionalEncoding")) {
     const Record *InstDef = EncodingDef->getValueAsDef("AliasOf");
+    // TODO: Should probably give a warning in these cases.
+    //   What's the point of specifying an additional encoding
+    //   if it is invalid or if the instruction is not decodable?
+    if (!isDecodableInstruction(InstDef)) {
+      ++NumEncodingsLackingDisasm;
+      continue;
+    }
+    if (!isValidEncoding(EncodingDef)) {
+      ++NumEncodingsOmitted;
+      continue;
+    }
     Encodings.emplace_back(EncodingDef, &Target.getInstruction(InstDef));
   }
+
+  // Do some statistics.
+  NumInstructions = Instructions.size();
+  NumEncodingsSupported = Encodings.size();
+  NumEncodings = NumEncodingsSupported + NumEncodingsOmitted;
 }
 
 DecoderEmitter::DecoderEmitter(const RecordKeeper &RK,
@@ -2528,9 +2593,6 @@ namespace {
   emitInsertBits(OS);
   emitCheck(OS);
 
-  // Map of (namespace, hwmode, size) tuple to encoding IDs.
-  std::map<std::tuple<StringRef, unsigned, unsigned>, std::vector<unsigned>>
-      EncMap;
   std::map<unsigned, std::vector<OperandInfo>> Operands;
   std::vector<unsigned> InstrLen;
   bool IsVarLenInst = Target.hasVariableLengthEncodings();
@@ -2538,39 +2600,30 @@ namespace {
     InstrLen.resize(Target.getInstructions().size(), 0);
   unsigned MaxInstLen = 0;
 
+  for (auto [EncodingID, Encoding] : enumerate(Encodings)) {
+    const Record *EncodingDef = Encoding.EncodingDef;
+    const CodeGenInstruction *Inst = Encoding.Inst;
+    unsigned BitWidth =
+        populateInstruction(Target, *EncodingDef, *Inst, EncodingID,
+                            Operands, IsVarLenInst);
+    assert(BitWidth && "Invalid encodings should have been filtered out");
+    if (IsVarLenInst) {
+      MaxInstLen = std::max(MaxInstLen, BitWidth);
+      InstrLen[Target.getInstrIntValue(Inst->TheDef)] = BitWidth;
+    }
+  }
+
+  // Map of (namespace, hwmode, size) tuple to encoding IDs.
+  std::map<std::tuple<StringRef, unsigned, unsigned>, std::vector<unsigned>>
+      EncMap;
   for (const auto &[HwModeID, EncodingIDs] : EncodingIDsByHwMode) {
     for (unsigned EncodingID : EncodingIDs) {
       const EncodingAndInst &Encoding = Encodings[EncodingID];
       const Record *EncodingDef = Encoding.EncodingDef;
-      const CodeGenInstruction *Inst = Encoding.Inst;
-      const Record *Def = Inst->TheDef;
       unsigned Size = EncodingDef->getValueAsInt("Size");
-      if (Def->getValueAsString("Namespace") == "TargetOpcode" ||
-          Def->getValueAsBit("isPseudo") ||
-          Def->getValueAsBit("isAsmParserOnly") ||
-          Def->getValueAsBit("isCodeGenOnly")) {
-        NumEncodingsLackingDisasm++;
-        continue;
-      }
-
-      NumEncodings++;
-
-      if (!Size && !IsVarLenInst)
-        continue;
-
-      if (unsigned Len =
-              populateInstruction(Target, *EncodingDef, *Inst, EncodingID,
-                                  Operands, IsVarLenInst)) {
-        if (IsVarLenInst) {
-          MaxInstLen = std::max(MaxInstLen, Len);
-          InstrLen[EncodingID] = Len;
-        }
-        StringRef DecoderNamespace =
-            EncodingDef->getValueAsString("DecoderNamespace");
-        EncMap[{DecoderNamespace, HwModeID, Size}].push_back(EncodingID);
-      } else {
-        NumEncodingsOmitted++;
-      }
+      StringRef DecoderNamespace =
+          EncodingDef->getValueAsString("DecoderNamespace");
+      EncMap[{DecoderNamespace, HwModeID, Size}].push_back(EncodingID);
     }
   }
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/154309