[llvm] [TableGen][DecoderEmitter] Extract encoding parsing into a method (NFC) (PR #154271)

Sergei Barannikov via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 18 22:59:10 PDT 2025


https://github.com/s-barannikov created https://github.com/llvm/llvm-project/pull/154271

Call it from the constructor so that we can make `run` method `const`.
Turn a couple of related functions into methods as well.


>From ef713883c817aa876dfdbf459cc8c26f64cbc65a Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Tue, 19 Aug 2025 08:58:14 +0300
Subject: [PATCH] [TableGen][DecoderEmitter] Extract encoding parsing into a
 method (NFC)

Call it from the constructor so that we can make `run` method `const`.
Turn a couple of related functions into methods as well.
---
 llvm/utils/TableGen/DecoderEmitter.cpp | 110 ++++++++++++++-----------
 1 file changed, 61 insertions(+), 49 deletions(-)

diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 109b955db6d79..8ac8a363e5fe2 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -219,11 +219,12 @@ using NamespacesHwModesMap = std::map<std::string, std::set<unsigned>>;
 
 class DecoderEmitter {
   const RecordKeeper &RK;
+  CodeGenTarget Target;
+  const CodeGenHwModes &CGH;
   std::vector<EncodingAndInst> Encodings;
 
 public:
-  DecoderEmitter(const RecordKeeper &R, StringRef PredicateNamespace)
-      : RK(R), Target(R), PredicateNamespace(PredicateNamespace) {}
+  DecoderEmitter(const RecordKeeper &RK, StringRef PredicateNamespace);
 
   const CodeGenTarget &getTarget() const { return Target; }
 
@@ -240,10 +241,18 @@ class DecoderEmitter {
                            DecoderSet &Decoders) const;
 
   // run - Output the code emitter
-  void run(raw_ostream &o);
+  void run(raw_ostream &o) const;
 
 private:
-  CodeGenTarget Target;
+  void collectHwModesReferencedForEncodings(
+      std::vector<unsigned> &HwModeIDs,
+      NamespacesHwModesMap &NamespacesWithHwModes) const;
+
+  void handleHwModesUnrelatedEncodings(
+      const CodeGenInstruction *Instr, ArrayRef<unsigned> HwModeIDs,
+      NamespacesHwModesMap &NamespacesWithHwModes);
+
+  void parseInstructionEncodings();
 
 public:
   StringRef PredicateNamespace;
@@ -2393,12 +2402,12 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) {
 )";
 }
 
-// Collect all HwModes referenced by the target for encoding purposes.
-static void collectHwModesReferencedForEncodings(
-    const CodeGenHwModes &HWM, std::vector<unsigned> &HwModeIDs,
-    NamespacesHwModesMap &NamespacesWithHwModes) {
-  SmallBitVector BV(HWM.getNumModeIds());
-  for (const auto &MS : HWM.getHwModeSelects()) {
+/// Collects all HwModes referenced by the target for encoding purposes.
+void DecoderEmitter::collectHwModesReferencedForEncodings(
+    std::vector<unsigned> &HwModeIDs,
+    NamespacesHwModesMap &NamespacesWithHwModes) const {
+  SmallBitVector BV(CGH.getNumModeIds());
+  for (const auto &MS : CGH.getHwModeSelects()) {
     for (auto [HwModeID, EncodingDef] : MS.second.Items) {
       if (EncodingDef->isSubClassOf("InstructionEncoding")) {
         std::string DecoderNamespace =
@@ -2414,17 +2423,15 @@ static void collectHwModesReferencedForEncodings(
   llvm::copy(BV.set_bits(), std::back_inserter(HwModeIDs));
 }
 
-static void
-handleHwModesUnrelatedEncodings(const CodeGenInstruction *Instr,
-                                ArrayRef<unsigned> HwModeIDs,
-                                NamespacesHwModesMap &NamespacesWithHwModes,
-                                std::vector<EncodingAndInst> &GlobalEncodings) {
+void DecoderEmitter::handleHwModesUnrelatedEncodings(
+    const CodeGenInstruction *Instr, ArrayRef<unsigned> HwModeIDs,
+    NamespacesHwModesMap &NamespacesWithHwModes) {
   const Record *InstDef = Instr->TheDef;
 
   switch (DecoderEmitterSuppressDuplicates) {
   case SUPPRESSION_DISABLE: {
     for (unsigned HwModeID : HwModeIDs)
-      GlobalEncodings.emplace_back(InstDef, Instr, HwModeID);
+      Encodings.emplace_back(InstDef, Instr, HwModeID);
     break;
   }
   case SUPPRESSION_LEVEL1: {
@@ -2433,51 +2440,28 @@ handleHwModesUnrelatedEncodings(const CodeGenInstruction *Instr,
     auto It = NamespacesWithHwModes.find(DecoderNamespace);
     if (It != NamespacesWithHwModes.end()) {
       for (unsigned HwModeID : It->second)
-        GlobalEncodings.emplace_back(InstDef, Instr, HwModeID);
+        Encodings.emplace_back(InstDef, Instr, HwModeID);
     } else {
       // Only emit the encoding once, as it's DecoderNamespace doesn't
       // contain any HwModes.
-      GlobalEncodings.emplace_back(InstDef, Instr, DefaultMode);
+      Encodings.emplace_back(InstDef, Instr, DefaultMode);
     }
     break;
   }
   case SUPPRESSION_LEVEL2:
-    GlobalEncodings.emplace_back(InstDef, Instr, DefaultMode);
+    Encodings.emplace_back(InstDef, Instr, DefaultMode);
     break;
   }
 }
 
-// Emits disassembler code for instruction decoding.
-void DecoderEmitter::run(raw_ostream &o) {
-  formatted_raw_ostream OS(o);
-  OS << R"(
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/LEB128.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/TargetParser/SubtargetFeature.h"
-#include <assert.h>
-
-namespace {
-)";
-
-  emitFieldFromInstruction(OS);
-  emitInsertBits(OS);
-  emitCheck(OS);
-
-  Target.reverseBitsForLittleEndianEncoding();
-
-  // Parameterize the decoders based on namespace and instruction width.
-
+/// Parses all InstructionEncoding instances and fills internal data structures.
+void DecoderEmitter::parseInstructionEncodings() {
   // First, collect all encoding-related HwModes referenced by the target.
   // And establish a mapping table between DecoderNamespace and HwMode.
   // If HwModeNames is empty, add the default mode so we always have one HwMode.
-  const CodeGenHwModes &HWM = Target.getHwModes();
   std::vector<unsigned> HwModeIDs;
   NamespacesHwModesMap NamespacesWithHwModes;
-  collectHwModesReferencedForEncodings(HWM, HwModeIDs, NamespacesWithHwModes);
+  collectHwModesReferencedForEncodings(HwModeIDs, NamespacesWithHwModes);
   if (HwModeIDs.empty())
     HwModeIDs.push_back(DefaultMode);
 
@@ -2488,15 +2472,14 @@ namespace {
   for (const CodeGenInstruction *Inst : Instructions) {
     const Record *InstDef = Inst->TheDef;
     if (const Record *RV = InstDef->getValueAsOptionalDef("EncodingInfos")) {
-      EncodingInfoByHwMode EBM(RV, HWM);
+      EncodingInfoByHwMode EBM(RV, CGH);
       for (auto [HwModeID, EncodingDef] : EBM)
         Encodings.emplace_back(EncodingDef, Inst, HwModeID);
       continue;
     }
     // This instruction is encoded the same on all HwModes.
     // According to user needs, provide varying degrees of suppression.
-    handleHwModesUnrelatedEncodings(Inst, HwModeIDs, NamespacesWithHwModes,
-                                    Encodings);
+    handleHwModesUnrelatedEncodings(Inst, HwModeIDs, NamespacesWithHwModes);
   }
 
   for (const Record *EncodingDef :
@@ -2504,6 +2487,35 @@ namespace {
     const Record *InstDef = EncodingDef->getValueAsDef("AliasOf");
     Encodings.emplace_back(EncodingDef, &Target.getInstruction(InstDef));
   }
+}
+
+DecoderEmitter::DecoderEmitter(const RecordKeeper &RK,
+                               StringRef PredicateNamespace)
+    : RK(RK), Target(RK), CGH(Target.getHwModes()),
+      PredicateNamespace(PredicateNamespace) {
+  Target.reverseBitsForLittleEndianEncoding();
+  parseInstructionEncodings();
+}
+
+// Emits disassembler code for instruction decoding.
+void DecoderEmitter::run(raw_ostream &o) const {
+  formatted_raw_ostream OS(o);
+  OS << R"(
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
+#include <assert.h>
+
+namespace {
+)";
+
+  emitFieldFromInstruction(OS);
+  emitInsertBits(OS);
+  emitCheck(OS);
 
   // Map of (namespace, hwmode, size) tuple to encoding IDs.
   std::map<std::tuple<StringRef, unsigned, unsigned>, std::vector<unsigned>>
@@ -2512,7 +2524,7 @@ namespace {
   std::vector<unsigned> InstrLen;
   bool IsVarLenInst = Target.hasVariableLengthEncodings();
   if (IsVarLenInst)
-    InstrLen.resize(Instructions.size(), 0);
+    InstrLen.resize(Target.getInstructions().size(), 0);
   unsigned MaxInstLen = 0;
 
   for (const auto &[EncodingID, Encoding] : enumerate(Encodings)) {



More information about the llvm-commits mailing list