[llvm] [LLVM][MC] Add support to cull inactive decoders in decoder emitter (PR #154865)

Sergei Barannikov via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 25 19:54:49 PDT 2025


================
@@ -2528,59 +2535,114 @@ namespace {
 )";
 
   // Do extra bookkeeping for variable-length encodings.
-  std::vector<unsigned> InstrLen;
   bool IsVarLenInst = Target.hasVariableLengthEncodings();
   unsigned MaxInstLen = 0;
   if (IsVarLenInst) {
-    InstrLen.resize(Target.getInstructions().size(), 0);
+    std::vector<unsigned> InstrLen(Target.getInstructions().size(), 0);
     for (const InstructionEncoding &Encoding : Encodings) {
       MaxInstLen = std::max(MaxInstLen, Encoding.getBitWidth());
       InstrLen[Target.getInstrIntValue(Encoding.getInstruction()->TheDef)] =
           Encoding.getBitWidth();
     }
+
+    // For variable instruction, we emit a instruction length table to let the
+    // decoder know how long the instructions are. You can see example usage in
+    // M68k's disassembler.
+    emitInstrLenTable(OS, InstrLen);
   }
 
   // Map of (namespace, hwmode, size) tuple to encoding IDs.
-  std::map<std::tuple<StringRef, unsigned, unsigned>, std::vector<unsigned>>
-      EncMap;
+  using EncMapTy = std::map<std::tuple<StringRef, unsigned, unsigned>,
+                            std::vector<unsigned>>;
+  EncMapTy EncMap;
+
+  // The set of valid instruction bitwidths for this target.
+  SmallSet<unsigned, 4> InstrBitwidths;
   for (const auto &[HwModeID, EncodingIDs] : EncodingIDsByHwMode) {
     for (unsigned EncodingID : EncodingIDs) {
       const InstructionEncoding &Encoding = Encodings[EncodingID];
       const Record *EncodingDef = Encoding.getRecord();
       unsigned Size = EncodingDef->getValueAsInt("Size");
       StringRef DecoderNamespace =
           EncodingDef->getValueAsString("DecoderNamespace");
+      const unsigned BitWidth = IsVarLenInst ? MaxInstLen : 8 * Size;
+      InstrBitwidths.insert(BitWidth);
       EncMap[{DecoderNamespace, HwModeID, Size}].push_back(EncodingID);
     }
   }
 
+  const bool SpecializeDecodersPerBitwidth =
+      Target.getInstructionSet()->getValueAsBit(
+          "SpecializeDecodersPerBitwidth");
+
+  // Variable length instructions use the same `APInt` type for all instructions
+  // so we cannot specialize decoders based on instruction bitwidths (which
+  // requires using different `InstType` for differet bitwidths for the correct
+  // template specialization to kick in).
+  if (IsVarLenInst && SpecializeDecodersPerBitwidth)
+    PrintFatalError(
+        "Cannot specialize decoders for variable length instuctions");
+
+  // Bucket entries in the `EncMap` based on the instruction bitwidths if
+  // SpecializeDecodersPerBitwidth is enabled.
+  SmallVector<SmallVector<const EncMapTy::value_type *>> PerBitWidthEncMap;
----------------
s-barannikov wrote:

Another idea is to turn EncMap into a map of maps, where the outer map has bitwidth key, and the innner has (ns, hwmode) key. Then guard `TableInfo.Decoders.clear()` and `emitDecodeFunction` by the option.

https://github.com/llvm/llvm-project/pull/154865


More information about the llvm-commits mailing list