[llvm] [LLVM][MC] Add support to cull inactive decoders in decoder emitter (PR #154865)

Mon Aug 25 09:49:18 PDT 2025

https://github.com/jurahul updated https://github.com/llvm/llvm-project/pull/154865

>From 7736778b3088d58a3c9440060b21f6e3047827ab Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Thu, 21 Aug 2025 16:08:03 -0700
Subject: [PATCH] Cull decoders

---
 llvm/include/llvm/MC/MCDecoder.h              |  25 ++++
 llvm/include/llvm/Target/Target.td            |   7 +
 llvm/lib/Target/AMDGPU/AMDGPU.td              |   1 +
 .../Disassembler/AMDGPUDisassembler.cpp       |  29 ++---
 .../AMDGPU/Disassembler/AMDGPUDisassembler.h  |  38 ------
 .../RISCV/Disassembler/RISCVDisassembler.cpp  |  19 ++-
 llvm/lib/Target/RISCV/RISCV.td                |   1 +
 llvm/utils/TableGen/DecoderEmitter.cpp        | 120 +++++++++++++-----
 8 files changed, 147 insertions(+), 93 deletions(-)

diff --git a/llvm/include/llvm/MC/MCDecoder.h b/llvm/include/llvm/MC/MCDecoder.h
index 70762a4a5ebae..6259ef5a3bd5d 100644
--- a/llvm/include/llvm/MC/MCDecoder.h
+++ b/llvm/include/llvm/MC/MCDecoder.h
@@ -12,6 +12,7 @@
 
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/Support/MathExtras.h"
+#include <bitset>
 #include <cassert>
 
 namespace llvm::MCD {
@@ -48,6 +49,15 @@ fieldFromInstruction(const InsnType &Insn, unsigned StartBit,
   return Insn.extractBitsAsZExtValue(NumBits, StartBit);
 }
 
+template <size_t N>
+uint64_t fieldFromInstruction(const std::bitset<N> &Insn, unsigned StartBit,
+                              unsigned NumBits) {
+  assert(StartBit + NumBits <= N && "Instruction field out of bounds!");
+  assert(NumBits <= 64 && "Cannot support >64-bit extractions!");
+  const std::bitset<N> Mask(maskTrailingOnes<uint64_t>(NumBits));
+  return ((Insn >> StartBit) & Mask).to_ullong();
+}
+
 // Helper function for inserting bits extracted from an encoded instruction into
 // an integer-typed field.
 template <typename IntType>
@@ -62,6 +72,21 @@ insertBits(IntType &field, IntType bits, unsigned startBit, unsigned numBits) {
   field |= bits << startBit;
 }
 
+// InsnBitWidth is essentially a type trait used by the decoder emitter to query
+// the supported bitwidth for a given type. But default, the value is 0, making
+// it an invalid type for use as `InsnType` when instantiating the decoder.
+template <typename T> inline constexpr uint32_t InsnBitWidth = 0;
+
+// Provide specializations for commonly used types.
+// Integer types.
+template <> inline constexpr uint32_t InsnBitWidth<uint8_t> = 8;
+template <> inline constexpr uint32_t InsnBitWidth<uint16_t> = 16;
+template <> inline constexpr uint32_t InsnBitWidth<uint32_t> = 32;
+template <> inline constexpr uint32_t InsnBitWidth<uint64_t> = 64;
+
+// std::bitset<N>.
+template <size_t N> inline constexpr uint32_t InsnBitWidth<std::bitset<N>> = N;
+
 } // namespace llvm::MCD
 
 #endif // LLVM_MC_MCDECODER_H
diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td
index 495b59ee916cf..9aa8c91939b52 100644
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -1158,6 +1158,13 @@ class InstrInfo {
   //
   // This option is a temporary migration help. It will go away.
   bit guessInstructionProperties = true;
+
+  // Generate decoders that are specialized per bit width in the generated
+  // decoder/disassembler. This requires use of different `InsnType` for
+  // different bitwidths and defining `InsnBitWidth` template specialization for
+  // the `InsnType` types used. Some common specializations are its already
+  // provided in MCDecoder.h.
+  bit SpecializeDecodersPerBitwidth = false;
 }
 
 // Standard Pseudo Instructions.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 8e4b6365dc06b..93d904a8fe991 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2091,6 +2091,7 @@ def FeatureISAVersion12_Generic: FeatureSet<
 
 def AMDGPUInstrInfo : InstrInfo {
   let guessInstructionProperties = 1;
+  let SpecializeDecodersPerBitwidth = true;
 }
 
 def AMDGPUAsmParser : AsmParser {
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 6a2beeed41dfd..ac42e51d447e3 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -38,6 +38,7 @@
 #include "llvm/Support/Compiler.h"
 
 using namespace llvm;
+using namespace llvm::MCD;
 
 #define DEBUG_TYPE "amdgpu-disassembler"
 
@@ -498,26 +499,24 @@ template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
   return Res;
 }
 
-static inline DecoderUInt128 eat12Bytes(ArrayRef<uint8_t> &Bytes) {
+static inline std::bitset<96> eat12Bytes(ArrayRef<uint8_t> &Bytes) {
+  using namespace llvm::support::endian;
   assert(Bytes.size() >= 12);
-  uint64_t Lo =
-      support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
+  std::bitset<96> Lo(read<uint64_t, endianness::little>(Bytes.data()));
   Bytes = Bytes.slice(8);
-  uint64_t Hi =
-      support::endian::read<uint32_t, llvm::endianness::little>(Bytes.data());
+  std::bitset<96> Hi(read<uint32_t, endianness::little>(Bytes.data()));
   Bytes = Bytes.slice(4);
-  return DecoderUInt128(Lo, Hi);
+  return (Hi << 64) | Lo;
 }
 
-static inline DecoderUInt128 eat16Bytes(ArrayRef<uint8_t> &Bytes) {
+static inline std::bitset<128> eat16Bytes(ArrayRef<uint8_t> &Bytes) {
+  using namespace llvm::support::endian;
   assert(Bytes.size() >= 16);
-  uint64_t Lo =
-      support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
+  std::bitset<128> Lo(read<uint64_t, endianness::little>(Bytes.data()));
   Bytes = Bytes.slice(8);
-  uint64_t Hi =
-      support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
+  std::bitset<128> Hi(read<uint64_t, endianness::little>(Bytes.data()));
   Bytes = Bytes.slice(8);
-  return DecoderUInt128(Lo, Hi);
+  return (Hi << 64) | Lo;
 }
 
 void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
@@ -600,14 +599,14 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
     // encodings
     if (isGFX1250() && Bytes.size() >= 16) {
-      DecoderUInt128 DecW = eat16Bytes(Bytes);
+      std::bitset<128> DecW = eat16Bytes(Bytes);
       if (tryDecodeInst(DecoderTableGFX1250128, MI, DecW, Address, CS))
         break;
       Bytes = Bytes_.slice(0, MaxInstBytesNum);
     }
 
     if (isGFX11Plus() && Bytes.size() >= 12) {
-      DecoderUInt128 DecW = eat12Bytes(Bytes);
+      std::bitset<96> DecW = eat12Bytes(Bytes);
 
       if (isGFX11() &&
           tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
@@ -642,7 +641,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
 
     } else if (Bytes.size() >= 16 &&
                STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
-      DecoderUInt128 DecW = eat16Bytes(Bytes);
+      std::bitset<128> DecW = eat16Bytes(Bytes);
       if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS))
         break;
 
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index f4d164bf10c3c..ded447b6f8d5a 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -32,44 +32,6 @@ class MCOperand;
 class MCSubtargetInfo;
 class Twine;
 
-// Exposes an interface expected by autogenerated code in
-// FixedLenDecoderEmitter
-class DecoderUInt128 {
-private:
-  uint64_t Lo = 0;
-  uint64_t Hi = 0;
-
-public:
-  DecoderUInt128() = default;
-  DecoderUInt128(uint64_t Lo, uint64_t Hi = 0) : Lo(Lo), Hi(Hi) {}
-  operator bool() const { return Lo || Hi; }
-  uint64_t extractBitsAsZExtValue(unsigned NumBits,
-                                  unsigned BitPosition) const {
-    assert(NumBits && NumBits <= 64);
-    assert(BitPosition < 128);
-    uint64_t Val;
-    if (BitPosition < 64)
-      Val = Lo >> BitPosition | Hi << 1 << (63 - BitPosition);
-    else
-      Val = Hi >> (BitPosition - 64);
-    return Val & ((uint64_t(2) << (NumBits - 1)) - 1);
-  }
-  DecoderUInt128 operator&(const DecoderUInt128 &RHS) const {
-    return DecoderUInt128(Lo & RHS.Lo, Hi & RHS.Hi);
-  }
-  DecoderUInt128 operator&(const uint64_t &RHS) const {
-    return *this & DecoderUInt128(RHS);
-  }
-  DecoderUInt128 operator~() const { return DecoderUInt128(~Lo, ~Hi); }
-  bool operator==(const DecoderUInt128 &RHS) {
-    return Lo == RHS.Lo && Hi == RHS.Hi;
-  }
-  bool operator!=(const DecoderUInt128 &RHS) {
-    return Lo != RHS.Lo || Hi != RHS.Hi;
-  }
-  bool operator!=(const int &RHS) { return *this != DecoderUInt128(RHS); }
-};
-
 //===----------------------------------------------------------------------===//
 // AMDGPUDisassembler
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index dbb16fce8390a..494de07843237 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -558,7 +558,8 @@ static DecodeStatus decodeXqccmpRlistS0(MCInst &Inst, uint32_t Imm,
   return decodeZcmpRlist(Inst, Imm, Address, Decoder);
 }
 
-static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn,
+static DecodeStatus decodeCSSPushPopchk(MCInst &Inst,
+                                        const std::bitset<48> &Insn,
                                         uint64_t Address,
                                         const MCDisassembler *Decoder) {
   uint32_t Rs1 = fieldFromInstruction(Insn, 7, 5);
@@ -568,7 +569,8 @@ static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn,
   return MCDisassembler::Success;
 }
 
-static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn,
+static DecodeStatus decodeXTHeadMemPair(MCInst &Inst,
+                                        const std::bitset<48> &Insn,
                                         uint64_t Address,
                                         const MCDisassembler *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
@@ -710,9 +712,7 @@ DecodeStatus RISCVDisassembler::getInstruction32(MCInst &MI, uint64_t &Size,
   }
   Size = 4;
 
-  // Use uint64_t to match getInstruction48. decodeInstruction is templated
-  // on the Insn type.
-  uint64_t Insn = support::endian::read32le(Bytes.data());
+  uint32_t Insn = support::endian::read32le(Bytes.data());
 
   for (const DecoderListEntry &Entry : DecoderList32) {
     if (!Entry.haveContainedFeatures(STI.getFeatureBits()))
@@ -758,9 +758,7 @@ DecodeStatus RISCVDisassembler::getInstruction16(MCInst &MI, uint64_t &Size,
   }
   Size = 2;
 
-  // Use uint64_t to match getInstruction48. decodeInstruction is templated
-  // on the Insn type.
-  uint64_t Insn = support::endian::read16le(Bytes.data());
+  uint16_t Insn = support::endian::read16le(Bytes.data());
 
   for (const DecoderListEntry &Entry : DecoderList16) {
     if (!Entry.haveContainedFeatures(STI.getFeatureBits()))
@@ -794,10 +792,11 @@ DecodeStatus RISCVDisassembler::getInstruction48(MCInst &MI, uint64_t &Size,
   }
   Size = 6;
 
-  uint64_t Insn = 0;
+  uint64_t InsnBits = 0;
   for (size_t i = Size; i-- != 0;)
-    Insn += (static_cast<uint64_t>(Bytes[i]) << 8 * i);
+    InsnBits += (static_cast<uint64_t>(Bytes[i]) << 8 * i);
 
+  std::bitset<48> Insn = InsnBits;
   for (const DecoderListEntry &Entry : DecoderList48) {
     if (!Entry.haveContainedFeatures(STI.getFeatureBits()))
       continue;
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index b24d8637cb27f..bd8bc56f1da3a 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -85,6 +85,7 @@ include "RISCVPfmCounters.td"
 
 def RISCVInstrInfo : InstrInfo {
   let guessInstructionProperties = 0;
+  let SpecializeDecodersPerBitwidth = true;
 }
 
 def RISCVAsmParser : AsmParser {
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index de5119dfbe1d8..39c4d5e9eecf2 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -23,6 +23,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
@@ -354,7 +355,8 @@ class DecoderEmitter {
   void emitPredicateFunction(formatted_raw_ostream &OS,
                              PredicateSet &Predicates) const;
   void emitDecoderFunction(formatted_raw_ostream &OS,
-                           DecoderSet &Decoders) const;
+                           const DecoderTableInfo &TableInfo,
+                           unsigned BucketBitWidth) const;
 
   // run - Output the code emitter
   void run(raw_ostream &o) const;
@@ -936,7 +938,10 @@ void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS,
 }
 
 void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS,
-                                         DecoderSet &Decoders) const {
+                                         const DecoderTableInfo &TableInfo,
+                                         unsigned BucketBitWidth) const {
+  const DecoderSet &Decoders = TableInfo.Decoders;
+
   // The decoder function is just a big switch statement or a table of function
   // pointers based on the input decoder index.
 
@@ -967,8 +972,13 @@ void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS,
 
   OS << "// Handling " << Decoders.size() << " cases.\n";
   OS << "template <typename InsnType>\n";
-  OS << "static DecodeStatus decodeToMCInst(unsigned Idx, " << DecodeParams
-     << ") {\n";
+  OS << "static ";
+  if (BucketBitWidth != 0)
+    OS << "std::enable_if_t<InsnBitWidth<InsnType> == " << BucketBitWidth
+       << ", DecodeStatus>\n";
+  else
+    OS << "DecodeStatus ";
+  OS << "decodeToMCInst(unsigned Idx, " << DecodeParams << ") {\n";
   OS << "  using namespace llvm::MCD;\n";
   OS << "  DecodeComplete = true;\n";
 
@@ -981,7 +991,6 @@ void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS,
     OS << "  };\n";
     OS << "  if (Idx >= " << Decoders.size() << ")\n";
     OS << "    llvm_unreachable(\"Invalid decoder index!\");\n";
-
     OS << "  return decodeFnTable[Idx](S, insn, MI, Address, Decoder, "
           "DecodeComplete);\n";
   } else {
@@ -1135,6 +1144,7 @@ unsigned FilterChooser::getDecoderIndex(DecoderSet &Decoders,
 
   // Make sure the predicate is in the table.
   Decoders.insert(CachedHashString(Decoder));
+
   // Now figure out the index for when we write out the table.
   DecoderSet::const_iterator P = find(Decoders, Decoder.str());
   return std::distance(Decoders.begin(), P);
@@ -2523,9 +2533,19 @@ namespace {
     }
   }
 
+  // For variable instruction, we emit a instruction length table to let the
+  // decoder know how long the instructions are. You can see example usage in
+  // M68k's disassembler.
+  if (IsVarLenInst)
+    emitInstrLenTable(OS, InstrLen);
+
   // Map of (namespace, hwmode, size) tuple to encoding IDs.
-  std::map<std::tuple<StringRef, unsigned, unsigned>, std::vector<unsigned>>
-      EncMap;
+  using EncMapTy = std::map<std::tuple<StringRef, unsigned, unsigned>,
+                            std::vector<unsigned>>;
+  EncMapTy EncMap;
+
+  // The set of valid instruction bitwidths for this target.
+  SmallSet<unsigned, 4> InstrBitwidths;
   for (const auto &[HwModeID, EncodingIDs] : EncodingIDsByHwMode) {
     for (unsigned EncodingID : EncodingIDs) {
       const InstructionEncoding &Encoding = Encodings[EncodingID];
@@ -2533,37 +2553,80 @@ namespace {
       unsigned Size = EncodingDef->getValueAsInt("Size");
       StringRef DecoderNamespace =
           EncodingDef->getValueAsString("DecoderNamespace");
+      const unsigned BitWidth = IsVarLenInst ? MaxInstLen : 8 * Size;
+      InstrBitwidths.insert(BitWidth);
       EncMap[{DecoderNamespace, HwModeID, Size}].push_back(EncodingID);
     }
   }
 
+  const bool SpecializeDecodersPerBitwidth =
+      Target.getInstructionSet()->getValueAsBit(
+          "SpecializeDecodersPerBitwidth");
+
+  // Variable length instructions use the same `APInt` type for all instructions
+  // so we cannot cull decoders based on instruction bitwidths (which requires
+  // using different types for differet bitwidths for the correct template
+  // specialization to kick in).
+  if (IsVarLenInst && SpecializeDecodersPerBitwidth)
+    PrintFatalError(
+        "Cannot specialize decoders for variable length instuctions");
+
+  // Bucket entries in the `EncMap` based on the instruction bitwidths if
+  // SpecializeDecodersPerBitwidth is enabled.
+  using BucketTy = SmallVector<const EncMapTy::value_type *>;
+  SmallVector<BucketTy> PerBitWidthEncMap;
+  if (SpecializeDecodersPerBitwidth) {
+    for (unsigned BW : InstrBitwidths) {
+      auto &Bucket = PerBitWidthEncMap.emplace_back();
+      for (const auto &Entry : EncMap) {
+        unsigned Size = std::get<2>(Entry.first);
+        const unsigned BitWidth = IsVarLenInst ? MaxInstLen : 8 * Size;
+        if (BitWidth != BW)
+          continue;
+        Bucket.push_back(&Entry);
+      }
+    }
+  } else {
+    // If we are not emitting decoders specialized per bit width, create a
+    // single bucket
+    auto &Bucket = PerBitWidthEncMap.emplace_back();
+    for (const auto &Entry : EncMap)
+      Bucket.push_back(&Entry);
+  }
+
   DecoderTableInfo TableInfo;
   unsigned OpcodeMask = 0;
 
-  for (const auto &[Key, EncodingIDs] : EncMap) {
-    auto [DecoderNamespace, HwModeID, Size] = Key;
-    const unsigned BitWidth = IsVarLenInst ? MaxInstLen : 8 * Size;
-    // Emit the decoder for this (namespace, hwmode, width) combination.
-    FilterChooser FC(Encodings, EncodingIDs, BitWidth, Target);
+  for (const auto &Bucket : PerBitWidthEncMap) {
+    // Each BitWidth get's its own decoders and decoder function.
+    TableInfo.Decoders.clear();
+
+    for (const auto &[Key, EncodingIDs] : make_pointee_range(Bucket)) {
+      auto [DecoderNamespace, HwModeID, Size] = Key;
+      const unsigned BitWidth = IsVarLenInst ? MaxInstLen : 8 * Size;
+      // Emit the decoder for this (namespace, hwmode, width) combination.
+      FilterChooser FC(Encodings, EncodingIDs, BitWidth, Target);
+
+      // The decode table is cleared for each top level decoder function. The
+      // predicates and decoders themselves, however, are shared across all
+      // decoders to give more opportunities for uniqueing.
+      TableInfo.Table.clear();
+      FC.emitTableEntries(TableInfo);
+      assert(TableInfo.isOutermostScope() && "fixup stack phasing error!");
+
+      // Print the table to the output stream.
+      OpcodeMask |= emitTable(OS, TableInfo.Table, DecoderNamespace, HwModeID,
+                              BitWidth, EncodingIDs);
+    }
 
-    // The decode table is cleared for each top level decoder function. The
-    // predicates and decoders themselves, however, are shared across all
-    // decoders to give more opportunities for uniqueing.
-    TableInfo.Table.clear();
-    FC.emitTableEntries(TableInfo);
-    assert(TableInfo.isOutermostScope() && "fixup stack phasing error!");
+    unsigned BucketSize = std::get<2>(Bucket.front()->first);
+    const unsigned BucketBitWidth =
+        SpecializeDecodersPerBitwidth ? BucketSize * 8 : 0;
 
-    // Print the table to the output stream.
-    OpcodeMask |= emitTable(OS, TableInfo.Table, DecoderNamespace, HwModeID,
-                            BitWidth, EncodingIDs);
+    // Emit the decoder function for each BitWidth bucket.
+    emitDecoderFunction(OS, TableInfo, BucketBitWidth);
   }
 
-  // For variable instruction, we emit a instruction length table
-  // to let the decoder know how long the instructions are.
-  // You can see example usage in M68k's disassembler.
-  if (IsVarLenInst)
-    emitInstrLenTable(OS, InstrLen);
-
   const bool HasCheckPredicate =
       OpcodeMask &
       ((1 << MCD::OPC_CheckPredicate) | (1 << MCD::OPC_CheckPredicateOrFail));
@@ -2572,9 +2635,6 @@ namespace {
   if (HasCheckPredicate)
     emitPredicateFunction(OS, TableInfo.Predicates);
 
-  // Emit the decoder function.
-  emitDecoderFunction(OS, TableInfo.Decoders);
-
   // Emit the main entry point for the decoder, decodeInstruction().
   emitDecodeInstruction(OS, IsVarLenInst, OpcodeMask);