[llvm] [TableGen][DecoderEmitter] Stop duplicating encodings (NFC) (PR #154288)

Sergei Barannikov via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 19 01:34:51 PDT 2025


https://github.com/s-barannikov created https://github.com/llvm/llvm-project/pull/154288

When HwModes are involved, we may duplicate an instruction encoding with the default HwMode multiple times. We can do better by mapping HwMode to a list of encoding IDs it contains.

The encodings that were duplicated are still processed multiple times (e.g., we call an expensive populateInstruction() on each instance). This is going to be fixed in subsequent patches.

>From 66db833af73ffd57bc3689ef503b8266be635429 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Tue, 19 Aug 2025 11:29:56 +0300
Subject: [PATCH] [TableGen][DecoderEmitter] Stop duplicating encodings (NFC)

When HwModes are involved, we may duplicate an instruction encoding
with the default HwMode multiple times. We can do better by mapping
HwMode to a list of encoding IDs it contains.

The encodings that were duplicated are still processed multiple times
(e.g., we call an expensive populateInstruction() on each instance).
This is going to be fixed in subsequent patches.
---
 llvm/utils/TableGen/DecoderEmitter.cpp | 96 +++++++++++++++-----------
 1 file changed, 55 insertions(+), 41 deletions(-)

diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 0634fac72bd18..abf29f9d7beb9 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -208,11 +208,9 @@ struct DecoderTableInfo {
 struct EncodingAndInst {
   const Record *EncodingDef;
   const CodeGenInstruction *Inst;
-  unsigned HwModeID;
 
-  EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst,
-                  unsigned HwModeID = DefaultMode)
-      : EncodingDef(EncodingDef), Inst(Inst), HwModeID(HwModeID) {}
+  EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst)
+      : EncodingDef(EncodingDef), Inst(Inst) {}
 };
 
 using NamespacesHwModesMap = std::map<std::string, std::set<unsigned>>;
@@ -221,8 +219,13 @@ class DecoderEmitter {
   const RecordKeeper &RK;
   CodeGenTarget Target;
   const CodeGenHwModes &CGH;
+
+  /// All parsed encodings.
   std::vector<EncodingAndInst> Encodings;
 
+  /// Encodings IDs for each HwMode. An ID is an index into Encodings.
+  SmallDenseMap<unsigned, std::vector<unsigned>> EncodingIDsByHwMode;
+
 public:
   DecoderEmitter(const RecordKeeper &RK, StringRef PredicateNamespace);
 
@@ -249,7 +252,7 @@ class DecoderEmitter {
       NamespacesHwModesMap &NamespacesWithHwModes) const;
 
   void
-  handleHwModesUnrelatedEncodings(const CodeGenInstruction *Instr,
+  handleHwModesUnrelatedEncodings(unsigned EncodingID,
                                   ArrayRef<unsigned> HwModeIDs,
                                   NamespacesHwModesMap &NamespacesWithHwModes);
 
@@ -2425,32 +2428,31 @@ void DecoderEmitter::collectHwModesReferencedForEncodings(
 }
 
 void DecoderEmitter::handleHwModesUnrelatedEncodings(
-    const CodeGenInstruction *Instr, ArrayRef<unsigned> HwModeIDs,
+    unsigned EncodingID, ArrayRef<unsigned> HwModeIDs,
     NamespacesHwModesMap &NamespacesWithHwModes) {
-  const Record *InstDef = Instr->TheDef;
-
   switch (DecoderEmitterSuppressDuplicates) {
   case SUPPRESSION_DISABLE: {
     for (unsigned HwModeID : HwModeIDs)
-      Encodings.emplace_back(InstDef, Instr, HwModeID);
+      EncodingIDsByHwMode[HwModeID].push_back(EncodingID);
     break;
   }
   case SUPPRESSION_LEVEL1: {
+    const Record *InstDef = Encodings[EncodingID].Inst->TheDef;
     std::string DecoderNamespace =
         InstDef->getValueAsString("DecoderNamespace").str();
     auto It = NamespacesWithHwModes.find(DecoderNamespace);
     if (It != NamespacesWithHwModes.end()) {
       for (unsigned HwModeID : It->second)
-        Encodings.emplace_back(InstDef, Instr, HwModeID);
+        EncodingIDsByHwMode[HwModeID].push_back(EncodingID);
     } else {
       // Only emit the encoding once, as it's DecoderNamespace doesn't
       // contain any HwModes.
-      Encodings.emplace_back(InstDef, Instr, DefaultMode);
+      EncodingIDsByHwMode[DefaultMode].push_back(EncodingID);
     }
     break;
   }
   case SUPPRESSION_LEVEL2:
-    Encodings.emplace_back(InstDef, Instr, DefaultMode);
+    EncodingIDsByHwMode[DefaultMode].push_back(EncodingID);
     break;
   }
 }
@@ -2474,13 +2476,21 @@ void DecoderEmitter::parseInstructionEncodings() {
     const Record *InstDef = Inst->TheDef;
     if (const Record *RV = InstDef->getValueAsOptionalDef("EncodingInfos")) {
       EncodingInfoByHwMode EBM(RV, CGH);
-      for (auto [HwModeID, EncodingDef] : EBM)
-        Encodings.emplace_back(EncodingDef, Inst, HwModeID);
+      for (auto [HwModeID, EncodingDef] : EBM) {
+        unsigned EncodingID = Encodings.size();
+        Encodings.emplace_back(EncodingDef, Inst);
+        EncodingIDsByHwMode[HwModeID].push_back(EncodingID);
+      }
       continue;
     }
+
+    unsigned EncodingID = Encodings.size();
+    Encodings.emplace_back(InstDef, Inst);
+
     // This instruction is encoded the same on all HwModes.
-    // According to user needs, provide varying degrees of suppression.
-    handleHwModesUnrelatedEncodings(Inst, HwModeIDs, NamespacesWithHwModes);
+    // According to user needs, add it to all, some, or only the default HwMode.
+    handleHwModesUnrelatedEncodings(EncodingID, HwModeIDs,
+                                    NamespacesWithHwModes);
   }
 
   for (const Record *EncodingDef :
@@ -2528,35 +2538,39 @@ namespace {
     InstrLen.resize(Target.getInstructions().size(), 0);
   unsigned MaxInstLen = 0;
 
-  for (const auto &[EncodingID, Encoding] : enumerate(Encodings)) {
-    const Record *EncodingDef = Encoding.EncodingDef;
-    const CodeGenInstruction *Inst = Encoding.Inst;
-    const Record *Def = Inst->TheDef;
-    unsigned Size = EncodingDef->getValueAsInt("Size");
-    if (Def->getValueAsString("Namespace") == "TargetOpcode" ||
-        Def->getValueAsBit("isPseudo") ||
-        Def->getValueAsBit("isAsmParserOnly") ||
-        Def->getValueAsBit("isCodeGenOnly")) {
-      NumEncodingsLackingDisasm++;
-      continue;
-    }
+  for (const auto &[HwModeID, EncodingIDs] : EncodingIDsByHwMode) {
+    for (unsigned EncodingID : EncodingIDs) {
+      const EncodingAndInst &Encoding = Encodings[EncodingID];
+      const Record *EncodingDef = Encoding.EncodingDef;
+      const CodeGenInstruction *Inst = Encoding.Inst;
+      const Record *Def = Inst->TheDef;
+      unsigned Size = EncodingDef->getValueAsInt("Size");
+      if (Def->getValueAsString("Namespace") == "TargetOpcode" ||
+          Def->getValueAsBit("isPseudo") ||
+          Def->getValueAsBit("isAsmParserOnly") ||
+          Def->getValueAsBit("isCodeGenOnly")) {
+        NumEncodingsLackingDisasm++;
+        continue;
+      }
 
-    NumEncodings++;
+      NumEncodings++;
 
-    if (!Size && !IsVarLenInst)
-      continue;
+      if (!Size && !IsVarLenInst)
+        continue;
 
-    if (unsigned Len = populateInstruction(
-            Target, *EncodingDef, *Inst, EncodingID, Operands, IsVarLenInst)) {
-      if (IsVarLenInst) {
-        MaxInstLen = std::max(MaxInstLen, Len);
-        InstrLen[EncodingID] = Len;
+      if (unsigned Len =
+              populateInstruction(Target, *EncodingDef, *Inst, EncodingID,
+                                  Operands, IsVarLenInst)) {
+        if (IsVarLenInst) {
+          MaxInstLen = std::max(MaxInstLen, Len);
+          InstrLen[EncodingID] = Len;
+        }
+        StringRef DecoderNamespace =
+            EncodingDef->getValueAsString("DecoderNamespace");
+        EncMap[{DecoderNamespace, HwModeID, Size}].push_back(EncodingID);
+      } else {
+        NumEncodingsOmitted++;
       }
-      StringRef DecoderNamespace =
-          EncodingDef->getValueAsString("DecoderNamespace");
-      EncMap[{DecoderNamespace, Encoding.HwModeID, Size}].push_back(EncodingID);
-    } else {
-      NumEncodingsOmitted++;
     }
   }
 



More information about the llvm-commits mailing list