[llvm] [TableGen][DecoderEmitter] Rework table construction/emission (PR #155889)

Sergei Barannikov via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 15 20:00:01 PDT 2025


https://github.com/s-barannikov updated https://github.com/llvm/llvm-project/pull/155889

>From bd0a8065ddaab91efcfa5359984fd6eaea71364d Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sun, 14 Sep 2025 03:19:24 +0300
Subject: [PATCH] tmp

---
 llvm/include/llvm/MC/MCDecoderOps.h         |   20 +-
 llvm/utils/TableGen/CMakeLists.txt          |    3 +
 llvm/utils/TableGen/DecoderEmitter.cpp      | 1444 +------------------
 llvm/utils/TableGen/DecoderTableEmitter.cpp |  382 +++++
 llvm/utils/TableGen/DecoderTree.cpp         |   11 +
 llvm/utils/TableGen/DecoderTree.h           |  238 +++
 llvm/utils/TableGen/DecoderTreeBuilder.cpp  |  884 ++++++++++++
 7 files changed, 1573 insertions(+), 1409 deletions(-)
 create mode 100644 llvm/utils/TableGen/DecoderTableEmitter.cpp
 create mode 100644 llvm/utils/TableGen/DecoderTree.cpp
 create mode 100644 llvm/utils/TableGen/DecoderTree.h
 create mode 100644 llvm/utils/TableGen/DecoderTreeBuilder.cpp

diff --git a/llvm/include/llvm/MC/MCDecoderOps.h b/llvm/include/llvm/MC/MCDecoderOps.h
index 790ff3eb4f333..4e06deb0eacee 100644
--- a/llvm/include/llvm/MC/MCDecoderOps.h
+++ b/llvm/include/llvm/MC/MCDecoderOps.h
@@ -13,19 +13,15 @@
 namespace llvm::MCD {
 
 // Disassembler state machine opcodes.
-// nts_t is either uint16_t or uint24_t based on whether large decoder table is
-// enabled.
 enum DecoderOps {
-  OPC_Scope = 1,         // OPC_Scope(nts_t NumToSkip)
-  OPC_ExtractField,      // OPC_ExtractField(uleb128 Start, uint8_t Len)
-  OPC_FilterValueOrSkip, // OPC_FilterValueOrSkip(uleb128 Val, nts_t NumToSkip)
-  OPC_FilterValue,       // OPC_FilterValue(uleb128 Val)
-  OPC_CheckField,        // OPC_CheckField(uleb128 Start, uint8_t Len,
-                         //                uleb128 Val)
-  OPC_CheckPredicate,    // OPC_CheckPredicate(uleb128 PIdx)
-  OPC_Decode,            // OPC_Decode(uleb128 Opcode, uleb128 DIdx)
-  OPC_TryDecode,         // OPC_TryDecode(uleb128 Opcode, uleb128 DIdx)
-  OPC_SoftFail,          // OPC_SoftFail(uleb128 PMask, uleb128 NMask)
+  OPC_Scope = 1,      // OPC_Scope(uleb128 Size)
+  OPC_SwitchField,    // OPC_SwitchField(uleb128 Start, uint8_t Len,
+                      //                 [uleb128 Val, uleb128 Size]...)
+  OPC_CheckField,     // OPC_CheckField(uleb128 Start, uint8_t Len, uleb128 Val)
+  OPC_CheckPredicate, // OPC_CheckPredicate(uleb128 PIdx)
+  OPC_Decode,         // OPC_Decode(uleb128 Opcode, uleb128 DIdx)
+  OPC_TryDecode,      // OPC_TryDecode(uleb128 Opcode, uleb128 DIdx)
+  OPC_SoftFail,       // OPC_SoftFail(uleb128 PMask, uleb128 NMask)
 };
 
 } // namespace llvm::MCD
diff --git a/llvm/utils/TableGen/CMakeLists.txt b/llvm/utils/TableGen/CMakeLists.txt
index 67291214c14e6..1a85553950f69 100644
--- a/llvm/utils/TableGen/CMakeLists.txt
+++ b/llvm/utils/TableGen/CMakeLists.txt
@@ -43,6 +43,9 @@ add_tablegen(llvm-tblgen LLVM
   DAGISelMatcherGen.cpp
   DAGISelMatcherOpt.cpp
   DecoderEmitter.cpp
+  DecoderTableEmitter.cpp
+  DecoderTree.cpp
+  DecoderTreeBuilder.cpp
   DFAEmitter.cpp
   DFAPacketizerEmitter.cpp
   DisassemblerEmitter.cpp
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 97a6b58e1bd99..6600345bad779 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -16,38 +16,25 @@
 #include "Common/CodeGenTarget.h"
 #include "Common/InfoByHwMode.h"
 #include "Common/InstructionEncoding.h"
-#include "Common/SubtargetFeatureInfo.h"
 #include "Common/VarLenCodeEmitterGen.h"
+#include "DecoderTree.h"
 #include "TableGenBackends.h"
-#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/CachedHashString.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallBitVector.h"
-#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCDecoderOps.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/KnownBits.h"
-#include "llvm/Support/LEB128.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
-#include <algorithm>
-#include <cassert>
-#include <cstddef>
-#include <cstdint>
 #include <map>
 #include <memory>
 #include <set>
@@ -56,7 +43,6 @@
 #include <vector>
 
 using namespace llvm;
-using namespace llvm::MCD;
 
 #define DEBUG_TYPE "decoder-emitter"
 
@@ -84,13 +70,8 @@ static cl::opt<SuppressLevel> DecoderEmitterSuppressDuplicates(
             "significantly reducing Table Duplications")),
     cl::init(SUPPRESSION_DISABLE), cl::cat(DisassemblerEmitterCat));
 
-static cl::opt<bool> LargeTable(
-    "large-decoder-table",
-    cl::desc("Use large decoder table format. This uses 24 bits for offset\n"
-             "in the table instead of the default 16 bits."),
-    cl::init(false), cl::cat(DisassemblerEmitterCat));
-
-static cl::opt<bool> UseFnTableInDecodeToMCInst(
+namespace llvm {
+cl::opt<bool> UseFnTableInDecodeToMCInst(
     "use-fn-table-in-decode-to-mcinst",
     cl::desc(
         "Use a table of function pointers instead of a switch case in the\n"
@@ -102,23 +83,24 @@ static cl::opt<bool> UseFnTableInDecodeToMCInst(
 // bitwidths and defining `InsnBitWidth` template specialization for the
 // `InsnType` types used. Some common specializations are already defined in
 // MCDecoder.h.
-static cl::opt<bool> SpecializeDecodersPerBitwidth(
+cl::opt<bool> SpecializeDecodersPerBitwidth(
     "specialize-decoders-per-bitwidth",
     cl::desc("Specialize the generated `decodeToMCInst` function per bitwidth. "
              "Helps reduce the code size."),
     cl::init(false), cl::cat(DisassemblerEmitterCat));
 
-static cl::opt<bool> IgnoreNonDecodableOperands(
+cl::opt<bool> IgnoreNonDecodableOperands(
     "ignore-non-decodable-operands",
     cl::desc(
         "Do not issue an error if an operand cannot be decoded automatically."),
     cl::init(false), cl::cat(DisassemblerEmitterCat));
 
-static cl::opt<bool> IgnoreFullyDefinedOperands(
+cl::opt<bool> IgnoreFullyDefinedOperands(
     "ignore-fully-defined-operands",
     cl::desc(
         "Do not automatically decode operands with no '?' in their encoding."),
     cl::init(false), cl::cat(DisassemblerEmitterCat));
+} // namespace llvm
 
 STATISTIC(NumEncodings, "Number of encodings considered");
 STATISTIC(NumEncodingsLackingDisasm,
@@ -127,107 +109,8 @@ STATISTIC(NumInstructions, "Number of instructions considered");
 STATISTIC(NumEncodingsSupported, "Number of encodings supported");
 STATISTIC(NumEncodingsOmitted, "Number of encodings omitted");
 
-static unsigned getNumToSkipInBytes() { return LargeTable ? 3 : 2; }
-
-/// Similar to KnownBits::print(), but allows you to specify a character to use
-/// to print unknown bits.
-static void printKnownBits(raw_ostream &OS, const KnownBits &Bits,
-                           char Unknown) {
-  for (unsigned I = Bits.getBitWidth(); I--;) {
-    if (Bits.Zero[I] && Bits.One[I])
-      OS << '!';
-    else if (Bits.Zero[I])
-      OS << '0';
-    else if (Bits.One[I])
-      OS << '1';
-    else
-      OS << Unknown;
-  }
-}
-
 namespace {
 
-/// Sorting predicate to sort encoding IDs by encoding width.
-class LessEncodingIDByWidth {
-  ArrayRef<InstructionEncoding> Encodings;
-
-public:
-  explicit LessEncodingIDByWidth(ArrayRef<InstructionEncoding> Encodings)
-      : Encodings(Encodings) {}
-
-  bool operator()(unsigned ID1, unsigned ID2) const {
-    return Encodings[ID1].getBitWidth() < Encodings[ID2].getBitWidth();
-  }
-};
-
-typedef SmallSetVector<CachedHashString, 16> PredicateSet;
-typedef SmallSetVector<CachedHashString, 16> DecoderSet;
-
-class DecoderTable {
-public:
-  DecoderTable() { Data.reserve(16384); }
-
-  void clear() { Data.clear(); }
-  size_t size() const { return Data.size(); }
-  const uint8_t *data() const { return Data.data(); }
-
-  using const_iterator = std::vector<uint8_t>::const_iterator;
-  const_iterator begin() const { return Data.begin(); }
-  const_iterator end() const { return Data.end(); }
-
-  /// Inserts a state machine opcode into the table.
-  void insertOpcode(DecoderOps Opcode) { Data.push_back(Opcode); }
-
-  /// Inserts a uint8 encoded value into the table.
-  void insertUInt8(unsigned Value) {
-    assert(isUInt<8>(Value));
-    Data.push_back(Value);
-  }
-
-  /// Inserts a ULEB128 encoded value into the table.
-  void insertULEB128(uint64_t Value) {
-    // Encode and emit the value to filter against.
-    uint8_t Buffer[16];
-    unsigned Len = encodeULEB128(Value, Buffer);
-    Data.insert(Data.end(), Buffer, Buffer + Len);
-  }
-
-  // Insert space for `NumToSkip` and return the position
-  // in the table for patching.
-  size_t insertNumToSkip() {
-    size_t Size = Data.size();
-    Data.insert(Data.end(), getNumToSkipInBytes(), 0);
-    return Size;
-  }
-
-  void patchNumToSkip(size_t FixupIdx, uint32_t DestIdx) {
-    // Calculate the distance from the byte following the fixup entry byte
-    // to the destination. The Target is calculated from after the
-    // `getNumToSkipInBytes()`-byte NumToSkip entry itself, so subtract
-    // `getNumToSkipInBytes()` from the displacement here to account for that.
-    assert(DestIdx >= FixupIdx + getNumToSkipInBytes() &&
-           "Expecting a forward jump in the decoding table");
-    uint32_t Delta = DestIdx - FixupIdx - getNumToSkipInBytes();
-    if (!isUIntN(8 * getNumToSkipInBytes(), Delta))
-      PrintFatalError(
-          "disassembler decoding table too large, try --large-decoder-table");
-
-    Data[FixupIdx] = static_cast<uint8_t>(Delta);
-    Data[FixupIdx + 1] = static_cast<uint8_t>(Delta >> 8);
-    if (getNumToSkipInBytes() == 3)
-      Data[FixupIdx + 2] = static_cast<uint8_t>(Delta >> 16);
-  }
-
-private:
-  std::vector<uint8_t> Data;
-};
-
-struct DecoderTableInfo {
-  DecoderTable Table;
-  PredicateSet Predicates;
-  DecoderSet Decoders;
-};
-
 using NamespacesHwModesMap = std::map<StringRef, std::set<unsigned>>;
 
 class DecoderEmitter {
@@ -246,15 +129,10 @@ class DecoderEmitter {
 
   const CodeGenTarget &getTarget() const { return Target; }
 
-  // Emit the decoder state machine table. Returns a mask of MCD decoder ops
-  // that were emitted.
-  unsigned emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
-                     StringRef Namespace, unsigned HwModeID, unsigned BitWidth,
-                     ArrayRef<unsigned> EncodingIDs) const;
   void emitInstrLenTable(formatted_raw_ostream &OS,
                          ArrayRef<unsigned> InstrLen) const;
   void emitPredicateFunction(formatted_raw_ostream &OS,
-                             PredicateSet &Predicates) const;
+                             const PredicateSet &Predicates) const;
   void emitDecoderFunction(formatted_raw_ostream &OS,
                            const DecoderSet &Decoders,
                            unsigned BucketBitWidth) const;
@@ -275,555 +153,7 @@ class DecoderEmitter {
   void parseInstructionEncodings();
 };
 
-} // end anonymous namespace
-
-namespace {
-
-/// Filter - Filter works with FilterChooser to produce the decoding tree for
-/// the ISA.
-///
-/// It is useful to think of a Filter as governing the switch stmts of the
-/// decoding tree in a certain level.  Each case stmt delegates to an inferior
-/// FilterChooser to decide what further decoding logic to employ, or in another
-/// words, what other remaining bits to look at.  The FilterChooser eventually
-/// chooses a best Filter to do its job.
-///
-/// This recursive scheme ends when the number of Opcodes assigned to the
-/// FilterChooser becomes 1 or if there is a conflict.  A conflict happens when
-/// the Filter/FilterChooser combo does not know how to distinguish among the
-/// Opcodes assigned.
-///
-/// An example of a conflict is
-///
-/// Decoding Conflict:
-///     ................................
-///     1111............................
-///     1111010.........................
-///     1111010...00....................
-///     1111010...00........0001........
-///     111101000.00........0001........
-///     111101000.00........00010000....
-///     111101000_00________00010000____  VST4q8a
-///     111101000_00________00010000____  VST4q8b
-///
-/// The Debug output shows the path that the decoding tree follows to reach the
-/// the conclusion that there is a conflict.  VST4q8a is a vst4 to double-spaced
-/// even registers, while VST4q8b is a vst4 to double-spaced odd registers.
-///
-/// The encoding info in the .td files does not specify this meta information,
-/// which could have been used by the decoder to resolve the conflict.  The
-/// decoder could try to decode the even/odd register numbering and assign to
-/// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a"
-/// version and return the Opcode since the two have the same Asm format string.
-struct Filter {
-  unsigned StartBit; // the starting bit position
-  unsigned NumBits;  // number of bits to filter
-
-  // Map of well-known segment value to the set of uid's with that value.
-  std::map<uint64_t, std::vector<unsigned>> FilteredIDs;
-
-  // Set of uid's with non-constant segment values.
-  std::vector<unsigned> VariableIDs;
-
-  Filter(ArrayRef<InstructionEncoding> Encodings,
-         ArrayRef<unsigned> EncodingIDs, unsigned StartBit, unsigned NumBits);
-
-  // Returns the number of fanout produced by the filter.  More fanout implies
-  // the filter distinguishes more categories of instructions.
-  unsigned usefulness() const;
-}; // end class Filter
-
-// These are states of our finite state machines used in FilterChooser's
-// filterProcessor() which produces the filter candidates to use.
-enum bitAttr_t {
-  ATTR_NONE,
-  ATTR_FILTERED,
-  ATTR_ALL_SET,
-  ATTR_ALL_UNSET,
-  ATTR_MIXED
-};
-
-/// FilterChooser - FilterChooser chooses the best filter among a set of Filters
-/// in order to perform the decoding of instructions at the current level.
-///
-/// Decoding proceeds from the top down.  Based on the well-known encoding bits
-/// of instructions available, FilterChooser builds up the possible Filters that
-/// can further the task of decoding by distinguishing among the remaining
-/// candidate instructions.
-///
-/// Once a filter has been chosen, it is called upon to divide the decoding task
-/// into sub-tasks and delegates them to its inferior FilterChoosers for further
-/// processings.
-///
-/// It is useful to think of a Filter as governing the switch stmts of the
-/// decoding tree.  And each case is delegated to an inferior FilterChooser to
-/// decide what further remaining bits to look at.
-
-class FilterChooser {
-  // TODO: Unfriend by providing the necessary accessors.
-  friend class DecoderTableBuilder;
-
-  // Vector of encodings to choose our filter.
-  ArrayRef<InstructionEncoding> Encodings;
-
-  /// Encoding IDs for this filter chooser to work on.
-  /// Sorted by non-decreasing encoding width.
-  SmallVector<unsigned, 0> EncodingIDs;
-
-  // Array of bit values passed down from our parent.
-  // Set to all unknown for Parent == nullptr.
-  KnownBits FilterBits;
-
-  // Links to the FilterChooser above us in the decoding tree.
-  const FilterChooser *Parent;
-
-  /// If the selected filter matches multiple encodings, then this is the
-  /// starting position and the width of the filtered range.
-  unsigned StartBit;
-  unsigned NumBits;
-
-  /// If the selected filter matches multiple encodings, and there is
-  /// *exactly one* encoding in which all bits are known in the filtered range,
-  /// then this is the ID of that encoding.
-  /// Also used when there is only one encoding.
-  std::optional<unsigned> SingletonEncodingID;
-
-  /// If the selected filter matches multiple encodings, and there is
-  /// *at least one* encoding in which all bits are known in the filtered range,
-  /// then this is the FilterChooser created for the subset of encodings that
-  /// contain some unknown bits in the filtered range.
-  std::unique_ptr<const FilterChooser> VariableFC;
-
-  /// If the selected filter matches multiple encodings, and there is
-  /// *more than one* encoding in which all bits are known in the filtered
-  /// range, then this is a map of field values to FilterChoosers created for
-  /// the subset of encodings sharing that field value.
-  /// The "field value" here refers to the encoding bits in the filtered range.
-  std::map<uint64_t, std::unique_ptr<const FilterChooser>> FilterChooserMap;
-
-  /// Set to true if decoding conflict was encountered.
-  bool HasConflict = false;
-
-  struct Island {
-    unsigned StartBit;
-    unsigned NumBits;
-    uint64_t FieldVal;
-  };
-
-public:
-  /// Constructs a top-level filter chooser.
-  FilterChooser(ArrayRef<InstructionEncoding> Encodings,
-                ArrayRef<unsigned> EncodingIDs)
-      : Encodings(Encodings), EncodingIDs(EncodingIDs), Parent(nullptr) {
-    // Sort encoding IDs once.
-    stable_sort(this->EncodingIDs, LessEncodingIDByWidth(Encodings));
-    // Filter width is the width of the smallest encoding.
-    unsigned FilterWidth = Encodings[this->EncodingIDs.front()].getBitWidth();
-    FilterBits = KnownBits(FilterWidth);
-    doFilter();
-  }
-
-  /// Constructs an inferior filter chooser.
-  FilterChooser(ArrayRef<InstructionEncoding> Encodings,
-                ArrayRef<unsigned> EncodingIDs, const KnownBits &FilterBits,
-                const FilterChooser &Parent)
-      : Encodings(Encodings), EncodingIDs(EncodingIDs), Parent(&Parent) {
-    // Inferior filter choosers are created from sorted array of encoding IDs.
-    assert(is_sorted(EncodingIDs, LessEncodingIDByWidth(Encodings)));
-    assert(!FilterBits.hasConflict() && "Broken filter");
-    // Filter width is the width of the smallest encoding.
-    unsigned FilterWidth = Encodings[EncodingIDs.front()].getBitWidth();
-    this->FilterBits = FilterBits.anyext(FilterWidth);
-    doFilter();
-  }
-
-  FilterChooser(const FilterChooser &) = delete;
-  void operator=(const FilterChooser &) = delete;
-
-  /// Returns the width of the largest encoding.
-  unsigned getMaxEncodingWidth() const {
-    // The last encoding ID is the ID of an encoding with the largest width.
-    return Encodings[EncodingIDs.back()].getBitWidth();
-  }
-
-  /// Returns true if any decoding conflicts were encountered.
-  bool hasConflict() const { return HasConflict; }
-
-private:
-  /// Applies the given filter to the set of encodings this FilterChooser
-  /// works with, creating inferior FilterChoosers as necessary.
-  void applyFilter(const Filter &F);
-
-  /// dumpStack - dumpStack traverses the filter chooser chain and calls
-  /// dumpFilterArray on each filter chooser up to the top level one.
-  void dumpStack(raw_ostream &OS, indent Indent, unsigned PadToWidth) const;
-
-  bool isPositionFiltered(unsigned Idx) const {
-    return FilterBits.Zero[Idx] || FilterBits.One[Idx];
-  }
-
-  // Calculates the island(s) needed to decode the instruction.
-  // This returns a list of undecoded bits of an instructions, for example,
-  // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
-  // decoded bits in order to verify that the instruction matches the Opcode.
-  std::vector<Island> getIslands(const KnownBits &EncodingBits) const;
-
-  /// Scans the well-known encoding bits of the encodings and, builds up a list
-  /// of candidate filters, and then returns the best one, if any.
-  std::unique_ptr<Filter> findBestFilter(ArrayRef<bitAttr_t> BitAttrs,
-                                         bool AllowMixed,
-                                         bool Greedy = true) const;
-
-  std::unique_ptr<Filter> findBestFilter() const;
-
-  // Decides on the best configuration of filter(s) to use in order to decode
-  // the instructions.  A conflict of instructions may occur, in which case we
-  // dump the conflict set to the standard error.
-  void doFilter();
-
-public:
-  void dump() const;
-};
-
-class DecoderTableBuilder {
-  const CodeGenTarget &Target;
-  ArrayRef<InstructionEncoding> Encodings;
-  DecoderTableInfo &TableInfo;
-
-public:
-  DecoderTableBuilder(const CodeGenTarget &Target,
-                      ArrayRef<InstructionEncoding> Encodings,
-                      DecoderTableInfo &TableInfo)
-      : Target(Target), Encodings(Encodings), TableInfo(TableInfo) {}
-
-  void buildTable(const FilterChooser &FC, unsigned BitWidth) const {
-    // When specializing decoders per bit width, each decoder table will begin
-    // with the bitwidth for that table.
-    if (SpecializeDecodersPerBitwidth)
-      TableInfo.Table.insertULEB128(BitWidth);
-    emitTableEntries(FC);
-  }
-
-private:
-  void emitBinaryParser(raw_ostream &OS, indent Indent,
-                        const InstructionEncoding &Encoding,
-                        const OperandInfo &OpInfo) const;
-
-  void emitDecoder(raw_ostream &OS, indent Indent, unsigned EncodingID) const;
-
-  unsigned getDecoderIndex(unsigned EncodingID) const;
-
-  unsigned getPredicateIndex(StringRef P) const;
-
-  bool emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp,
-                             raw_ostream &OS) const;
-
-  bool emitPredicateMatch(raw_ostream &OS, unsigned EncodingID) const;
-
-  void emitPredicateTableEntry(unsigned EncodingID) const;
-
-  void emitSoftFailTableEntry(unsigned EncodingID) const;
-
-  void emitSingletonTableEntry(const FilterChooser &FC) const;
-
-  void emitTableEntries(const FilterChooser &FC) const;
-};
-
-} // end anonymous namespace
-
-///////////////////////////
-//                       //
-// Filter Implementation //
-//                       //
-///////////////////////////
-
-Filter::Filter(ArrayRef<InstructionEncoding> Encodings,
-               ArrayRef<unsigned> EncodingIDs, unsigned StartBit,
-               unsigned NumBits)
-    : StartBit(StartBit), NumBits(NumBits) {
-  for (unsigned EncodingID : EncodingIDs) {
-    const InstructionEncoding &Encoding = Encodings[EncodingID];
-    KnownBits EncodingBits = Encoding.getMandatoryBits();
-
-    // Scans the segment for possibly well-specified encoding bits.
-    KnownBits FieldBits = EncodingBits.extractBits(NumBits, StartBit);
-
-    if (FieldBits.isConstant()) {
-      // The encoding bits are well-known.  Lets add the uid of the
-      // instruction into the bucket keyed off the constant field value.
-      FilteredIDs[FieldBits.getConstant().getZExtValue()].push_back(EncodingID);
-    } else {
-      // Some of the encoding bit(s) are unspecified.  This contributes to
-      // one additional member of "Variable" instructions.
-      VariableIDs.push_back(EncodingID);
-    }
-  }
-
-  assert((FilteredIDs.size() + VariableIDs.size() > 0) &&
-         "Filter returns no instruction categories");
-}
-
-void FilterChooser::applyFilter(const Filter &F) {
-  StartBit = F.StartBit;
-  NumBits = F.NumBits;
-  assert(FilterBits.extractBits(NumBits, StartBit).isUnknown());
-
-  if (!F.VariableIDs.empty()) {
-    // Delegates to an inferior filter chooser for further processing on this
-    // group of instructions whose segment values are variable.
-    VariableFC = std::make_unique<FilterChooser>(Encodings, F.VariableIDs,
-                                                 FilterBits, *this);
-    HasConflict |= VariableFC->HasConflict;
-  }
-
-  // Otherwise, create sub choosers.
-  for (const auto &[FilterVal, InferiorEncodingIDs] : F.FilteredIDs) {
-    // Create a new filter by inserting the field bits into the parent filter.
-    APInt FieldBits(NumBits, FilterVal);
-    KnownBits InferiorFilterBits = FilterBits;
-    InferiorFilterBits.insertBits(KnownBits::makeConstant(FieldBits), StartBit);
-
-    // Delegates to an inferior filter chooser for further processing on this
-    // category of instructions.
-    auto [It, _] = FilterChooserMap.try_emplace(
-        FilterVal,
-        std::make_unique<FilterChooser>(Encodings, InferiorEncodingIDs,
-                                        InferiorFilterBits, *this));
-    HasConflict |= It->second->HasConflict;
-  }
-}
-
-// Returns the number of fanout produced by the filter.  More fanout implies
-// the filter distinguishes more categories of instructions.
-unsigned Filter::usefulness() const {
-  return FilteredIDs.size() + VariableIDs.empty();
-}
-
-//////////////////////////////////
-//                              //
-// Filterchooser Implementation //
-//                              //
-//////////////////////////////////
-
-static StringRef getDecoderOpName(DecoderOps Op) {
-#define CASE(OP)                                                               \
-  case OP:                                                                     \
-    return #OP
-  switch (Op) {
-    CASE(OPC_Scope);
-    CASE(OPC_ExtractField);
-    CASE(OPC_FilterValueOrSkip);
-    CASE(OPC_FilterValue);
-    CASE(OPC_CheckField);
-    CASE(OPC_CheckPredicate);
-    CASE(OPC_Decode);
-    CASE(OPC_TryDecode);
-    CASE(OPC_SoftFail);
-  }
-#undef CASE
-  llvm_unreachable("Unknown decoder op");
-}
-
-// Emit the decoder state machine table. Returns a mask of MCD decoder ops
-// that were emitted.
-unsigned DecoderEmitter::emitTable(formatted_raw_ostream &OS,
-                                   DecoderTable &Table, StringRef Namespace,
-                                   unsigned HwModeID, unsigned BitWidth,
-                                   ArrayRef<unsigned> EncodingIDs) const {
-  // We'll need to be able to map from a decoded opcode into the corresponding
-  // EncodingID for this specific combination of BitWidth and Namespace. This
-  // is used below to index into Encodings.
-  DenseMap<unsigned, unsigned> OpcodeToEncodingID;
-  OpcodeToEncodingID.reserve(EncodingIDs.size());
-  for (unsigned EncodingID : EncodingIDs) {
-    const Record *InstDef = Encodings[EncodingID].getInstruction()->TheDef;
-    OpcodeToEncodingID[Target.getInstrIntValue(InstDef)] = EncodingID;
-  }
-
-  OS << "static const uint8_t DecoderTable" << Namespace;
-  if (HwModeID != DefaultMode)
-    OS << '_' << Target.getHwModes().getModeName(HwModeID);
-  OS << BitWidth << "[" << Table.size() << "] = {\n";
-
-  // Emit ULEB128 encoded value to OS, returning the number of bytes emitted.
-  auto EmitULEB128 = [](DecoderTable::const_iterator &I,
-                        formatted_raw_ostream &OS) {
-    while (*I >= 128)
-      OS << (unsigned)*I++ << ", ";
-    OS << (unsigned)*I++ << ", ";
-  };
-
-  // Emit `getNumToSkipInBytes()`-byte numtoskip value to OS, returning the
-  // NumToSkip value.
-  auto EmitNumToSkip = [](DecoderTable::const_iterator &I,
-                          formatted_raw_ostream &OS) {
-    uint8_t Byte = *I++;
-    uint32_t NumToSkip = Byte;
-    OS << (unsigned)Byte << ", ";
-    Byte = *I++;
-    OS << (unsigned)Byte << ", ";
-    NumToSkip |= Byte << 8;
-    if (getNumToSkipInBytes() == 3) {
-      Byte = *I++;
-      OS << (unsigned)(Byte) << ", ";
-      NumToSkip |= Byte << 16;
-    }
-    return NumToSkip;
-  };
-
-  // FIXME: We may be able to use the NumToSkip values to recover
-  // appropriate indentation levels.
-  DecoderTable::const_iterator I = Table.begin();
-  DecoderTable::const_iterator E = Table.end();
-  const uint8_t *const EndPtr = Table.data() + Table.size();
-
-  auto EmitPos = [&OS](uint32_t Pos) {
-    constexpr uint32_t StartColumn = 12;
-    OS << "/* " << Pos << " */";
-    OS.PadToColumn(StartColumn);
-  };
-
-  auto StartComment = [&OS]() {
-    constexpr uint32_t CommentColumn = 52;
-    OS.PadToColumn(CommentColumn);
-    OS << "// ";
-  };
-
-  auto EmitNumToSkipComment = [&](uint32_t NumToSkip) {
-    uint32_t Index = (I - Table.begin()) + NumToSkip;
-    OS << "skip to " << Index;
-  };
-
-  // The first entry when specializing decoders per bitwidth is the bitwidth.
-  // This will be used for additional checks in `decodeInstruction`.
-  if (SpecializeDecodersPerBitwidth) {
-    EmitPos(0);
-    EmitULEB128(I, OS);
-    StartComment();
-    OS << "Bitwidth " << BitWidth << '\n';
-  }
-
-  auto DecodeAndEmitULEB128 = [EndPtr,
-                               &EmitULEB128](DecoderTable::const_iterator &I,
-                                             formatted_raw_ostream &OS) {
-    const char *ErrMsg = nullptr;
-    uint64_t Value = decodeULEB128(&*I, nullptr, EndPtr, &ErrMsg);
-    assert(ErrMsg == nullptr && "ULEB128 value too large!");
-
-    EmitULEB128(I, OS);
-    return Value;
-  };
-
-  unsigned OpcodeMask = 0;
-
-  while (I != E) {
-    assert(I < E && "incomplete decode table entry!");
-
-    uint32_t Pos = I - Table.begin();
-    EmitPos(Pos);
-    const uint8_t DecoderOp = *I++;
-    OpcodeMask |= (1 << DecoderOp);
-    OS << getDecoderOpName(static_cast<DecoderOps>(DecoderOp)) << ", ";
-    switch (DecoderOp) {
-    default:
-      PrintFatalError("Invalid decode table opcode: " + Twine((int)DecoderOp) +
-                      " at index " + Twine(Pos));
-    case OPC_Scope: {
-      uint32_t NumToSkip = EmitNumToSkip(I, OS);
-      StartComment();
-      uint32_t Index = (I - Table.begin()) + NumToSkip;
-      OS << "end scope at " << Index;
-      break;
-    }
-    case OPC_ExtractField: {
-      // ULEB128 encoded start value.
-      unsigned Start = DecodeAndEmitULEB128(I, OS);
-      unsigned Len = *I++;
-      OS << Len << ',';
-      StartComment();
-      OS << "Field = Inst{";
-      if (Len > 1)
-        OS << (Start + Len - 1) << '-';
-      OS << Start << '}';
-      break;
-    }
-    case OPC_FilterValueOrSkip: {
-      // The filter value is ULEB128 encoded.
-      uint64_t FilterVal = DecodeAndEmitULEB128(I, OS);
-      uint32_t NumToSkip = EmitNumToSkip(I, OS);
-      StartComment();
-      OS << "if Field != " << format_hex(FilterVal, 0) << ' ';
-      EmitNumToSkipComment(NumToSkip);
-      break;
-    }
-    case OPC_FilterValue: {
-      // The filter value is ULEB128 encoded.
-      uint64_t FilterVal = DecodeAndEmitULEB128(I, OS);
-
-      StartComment();
-      OS << "if Field != " << format_hex(FilterVal, 0) << " pop scope";
-      break;
-    }
-    case OPC_CheckField: {
-      // ULEB128 encoded start value.
-      unsigned Start = DecodeAndEmitULEB128(I, OS);
-
-      // 8-bit length.
-      unsigned Len = *I++;
-      OS << Len << ", ";
-
-      // ULEB128 encoded field value.
-      uint64_t FieldVal = DecodeAndEmitULEB128(I, OS);
-
-      StartComment();
-      OS << "if Inst{";
-      if (Len > 1)
-        OS << (Start + Len - 1) << '-';
-      OS << Start << "} != " << format_hex(FieldVal, 0) << " pop scope";
-      break;
-    }
-    case OPC_CheckPredicate: {
-      unsigned PIdx = DecodeAndEmitULEB128(I, OS);
-      StartComment();
-      OS << "if !checkPredicate(" << PIdx << ") pop scope";
-      break;
-    }
-    case OPC_Decode:
-    case OPC_TryDecode: {
-      // Decode the Opcode value.
-      unsigned Opc = DecodeAndEmitULEB128(I, OS);
-
-      // Decoder index.
-      unsigned DecodeIdx = DecodeAndEmitULEB128(I, OS);
-
-      auto EncI = OpcodeToEncodingID.find(Opc);
-      assert(EncI != OpcodeToEncodingID.end() && "no encoding entry");
-      auto EncodingID = EncI->second;
-
-      StartComment();
-      OS << "Opcode: " << Encodings[EncodingID].getName()
-         << ", DecodeIdx: " << DecodeIdx;
-      break;
-    }
-    case OPC_SoftFail: {
-      // Decode the positive mask.
-      uint64_t PositiveMask = DecodeAndEmitULEB128(I, OS);
-
-      // Decode the negative mask.
-      uint64_t NegativeMask = DecodeAndEmitULEB128(I, OS);
-
-      StartComment();
-      OS << "positive mask: " << format_hex(PositiveMask, 0)
-         << "negative mask: " << format_hex(NegativeMask, 0);
-      break;
-    }
-    }
-    OS << '\n';
-  }
-  OS << "};\n\n";
-
-  return OpcodeMask;
-}
+} // namespace
 
 void DecoderEmitter::emitInstrLenTable(formatted_raw_ostream &OS,
                                        ArrayRef<unsigned> InstrLen) const {
@@ -833,8 +163,8 @@ void DecoderEmitter::emitInstrLenTable(formatted_raw_ostream &OS,
   OS << "};\n\n";
 }
 
-void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS,
-                                           PredicateSet &Predicates) const {
+void DecoderEmitter::emitPredicateFunction(
+    formatted_raw_ostream &OS, const PredicateSet &Predicates) const {
   // The predicate function is just a big switch statement based on the
   // input predicate index.
   OS << "static bool checkDecoderPredicate(unsigned Idx, const FeatureBitset "
@@ -933,665 +263,11 @@ void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS,
   OS << "}\n";
 }
 
-/// dumpStack - dumpStack traverses the filter chooser chain and calls
-/// dumpFilterArray on each filter chooser up to the top level one.
-void FilterChooser::dumpStack(raw_ostream &OS, indent Indent,
-                              unsigned PadToWidth) const {
-  if (Parent)
-    Parent->dumpStack(OS, Indent, PadToWidth);
-  assert(PadToWidth >= FilterBits.getBitWidth());
-  OS << Indent << indent(PadToWidth - FilterBits.getBitWidth());
-  printKnownBits(OS, FilterBits, '.');
-  OS << '\n';
-}
-
-// Calculates the island(s) needed to decode the instruction.
-// This returns a list of undecoded bits of an instructions, for example,
-// Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
-// decoded bits in order to verify that the instruction matches the Opcode.
-std::vector<FilterChooser::Island>
-FilterChooser::getIslands(const KnownBits &EncodingBits) const {
-  std::vector<Island> Islands;
-  uint64_t FieldVal;
-  unsigned StartBit;
-
-  // 0: Init
-  // 1: Water (the bit value does not affect decoding)
-  // 2: Island (well-known bit value needed for decoding)
-  unsigned State = 0;
-
-  unsigned FilterWidth = FilterBits.getBitWidth();
-  for (unsigned i = 0; i != FilterWidth; ++i) {
-    bool IsKnown = EncodingBits.Zero[i] || EncodingBits.One[i];
-    bool Filtered = isPositionFiltered(i);
-    switch (State) {
-    default:
-      llvm_unreachable("Unreachable code!");
-    case 0:
-    case 1:
-      if (Filtered || !IsKnown) {
-        State = 1; // Still in Water
-      } else {
-        State = 2; // Into the Island
-        StartBit = i;
-        FieldVal = static_cast<uint64_t>(EncodingBits.One[i]);
-      }
-      break;
-    case 2:
-      if (Filtered || !IsKnown) {
-        State = 1; // Into the Water
-        Islands.push_back({StartBit, i - StartBit, FieldVal});
-      } else {
-        State = 2; // Still in Island
-        FieldVal |= static_cast<uint64_t>(EncodingBits.One[i])
-                    << (i - StartBit);
-      }
-      break;
-    }
-  }
-  // If we are still in Island after the loop, do some housekeeping.
-  if (State == 2)
-    Islands.push_back({StartBit, FilterWidth - StartBit, FieldVal});
-
-  return Islands;
-}
-
-void DecoderTableBuilder::emitBinaryParser(raw_ostream &OS, indent Indent,
-                                           const InstructionEncoding &Encoding,
-                                           const OperandInfo &OpInfo) const {
-  if (OpInfo.HasNoEncoding) {
-    // If an operand has no encoding, the old behavior is to not decode it
-    // automatically and let the target do it. This is error-prone, so the
-    // new behavior is to report an error.
-    if (!IgnoreNonDecodableOperands)
-      PrintError(Encoding.getRecord()->getLoc(),
-                 "could not find field for operand '" + OpInfo.Name + "'");
-    return;
-  }
-
-  // Special case for 'bits<0>'.
-  if (OpInfo.Fields.empty() && !OpInfo.InitValue) {
-    if (IgnoreNonDecodableOperands)
-      return;
-    assert(!OpInfo.Decoder.empty());
-    // The operand has no encoding, so the corresponding argument is omitted.
-    // This avoids confusion and allows the function to be overloaded if the
-    // operand does have an encoding in other instructions.
-    OS << Indent << "if (!Check(S, " << OpInfo.Decoder << "(MI, Decoder)))\n"
-       << Indent << "  return MCDisassembler::Fail;\n";
-    return;
-  }
-
-  if (OpInfo.fields().empty()) {
-    // Only a constant part. The old behavior is to not decode this operand.
-    if (IgnoreFullyDefinedOperands)
-      return;
-    // Initialize `tmp` with the constant part.
-    OS << Indent << "tmp = " << format_hex(*OpInfo.InitValue, 0) << ";\n";
-  } else if (OpInfo.fields().size() == 1 && !OpInfo.InitValue.value_or(0)) {
-    // One variable part and no/zero constant part. Initialize `tmp` with the
-    // variable part.
-    auto [Base, Width, Offset] = OpInfo.fields().front();
-    OS << Indent << "tmp = fieldFromInstruction(insn, " << Base << ", " << Width
-       << ')';
-    if (Offset)
-      OS << " << " << Offset;
-    OS << ";\n";
-  } else {
-    // General case. Initialize `tmp` with the constant part, if any, and
-    // insert the variable parts into it.
-    OS << Indent << "tmp = " << format_hex(OpInfo.InitValue.value_or(0), 0)
-       << ";\n";
-    for (auto [Base, Width, Offset] : OpInfo.fields())
-      OS << Indent << "insertBits(tmp, fieldFromInstruction(insn, " << Base
-         << ", " << Width << "), " << Offset << ", " << Width << ");\n";
-  }
-
-  StringRef Decoder = OpInfo.Decoder;
-  if (!Decoder.empty()) {
-    OS << Indent << "if (!Check(S, " << Decoder
-       << "(MI, tmp, Address, Decoder))) { "
-       << (OpInfo.HasCompleteDecoder ? "" : "DecodeComplete = false; ")
-       << "return MCDisassembler::Fail; }\n";
-  } else {
-    OS << Indent << "MI.addOperand(MCOperand::createImm(tmp));\n";
-  }
-}
-
-void DecoderTableBuilder::emitDecoder(raw_ostream &OS, indent Indent,
-                                      unsigned EncodingID) const {
-  const InstructionEncoding &Encoding = Encodings[EncodingID];
-
-  // If a custom instruction decoder was specified, use that.
-  StringRef DecoderMethod = Encoding.getDecoderMethod();
-  if (!DecoderMethod.empty()) {
-    OS << Indent << "if (!Check(S, " << DecoderMethod
-       << "(MI, insn, Address, Decoder))) { "
-       << (Encoding.hasCompleteDecoder() ? "" : "DecodeComplete = false; ")
-       << "return MCDisassembler::Fail; }\n";
-    return;
-  }
-
-  for (const OperandInfo &Op : Encoding.getOperands())
-    emitBinaryParser(OS, Indent, Encoding, Op);
-}
-
-unsigned DecoderTableBuilder::getDecoderIndex(unsigned EncodingID) const {
-  // Build up the predicate string.
-  SmallString<256> Decoder;
-  // FIXME: emitDecoder() function can take a buffer directly rather than
-  // a stream.
-  raw_svector_ostream S(Decoder);
-  indent Indent(UseFnTableInDecodeToMCInst ? 2 : 4);
-  emitDecoder(S, Indent, EncodingID);
-
-  // Using the full decoder string as the key value here is a bit
-  // heavyweight, but is effective. If the string comparisons become a
-  // performance concern, we can implement a mangling of the predicate
-  // data easily enough with a map back to the actual string. That's
-  // overkill for now, though.
-
-  // Make sure the predicate is in the table.
-  DecoderSet &Decoders = TableInfo.Decoders;
-  Decoders.insert(CachedHashString(Decoder));
-  // Now figure out the index for when we write out the table.
-  DecoderSet::const_iterator P = find(Decoders, Decoder.str());
-  return std::distance(Decoders.begin(), P);
-}
-
-// If ParenIfBinOp is true, print a surrounding () if Val uses && or ||.
-bool DecoderTableBuilder::emitPredicateMatchAux(const Init &Val,
-                                                bool ParenIfBinOp,
-                                                raw_ostream &OS) const {
-  if (const auto *D = dyn_cast<DefInit>(&Val)) {
-    if (!D->getDef()->isSubClassOf("SubtargetFeature"))
-      return true;
-    OS << "Bits[" << Target.getName() << "::" << D->getAsString() << "]";
-    return false;
-  }
-  if (const auto *D = dyn_cast<DagInit>(&Val)) {
-    std::string Op = D->getOperator()->getAsString();
-    if (Op == "not" && D->getNumArgs() == 1) {
-      OS << '!';
-      return emitPredicateMatchAux(*D->getArg(0), true, OS);
-    }
-    if ((Op == "any_of" || Op == "all_of") && D->getNumArgs() > 0) {
-      bool Paren = D->getNumArgs() > 1 && std::exchange(ParenIfBinOp, true);
-      if (Paren)
-        OS << '(';
-      ListSeparator LS(Op == "any_of" ? " || " : " && ");
-      for (auto *Arg : D->getArgs()) {
-        OS << LS;
-        if (emitPredicateMatchAux(*Arg, ParenIfBinOp, OS))
-          return true;
-      }
-      if (Paren)
-        OS << ')';
-      return false;
-    }
-  }
-  return true;
-}
-
-// Returns true if there was any predicate emitted.
-bool DecoderTableBuilder::emitPredicateMatch(raw_ostream &OS,
-                                             unsigned EncodingID) const {
-  std::vector<const Record *> Predicates =
-      Encodings[EncodingID].getRecord()->getValueAsListOfDefs("Predicates");
-  auto It = llvm::find_if(Predicates, [](const Record *R) {
-    return R->getValueAsBit("AssemblerMatcherPredicate");
-  });
-  bool AnyAsmPredicate = It != Predicates.end();
-  if (!AnyAsmPredicate)
-    return false;
-  SubtargetFeatureInfo::emitMCPredicateCheck(OS, Target.getName(), Predicates);
-  return true;
-}
-
-unsigned DecoderTableBuilder::getPredicateIndex(StringRef Predicate) const {
-  // Using the full predicate string as the key value here is a bit
-  // heavyweight, but is effective. If the string comparisons become a
-  // performance concern, we can implement a mangling of the predicate
-  // data easily enough with a map back to the actual string. That's
-  // overkill for now, though.
-
-  // Make sure the predicate is in the table.
-  TableInfo.Predicates.insert(CachedHashString(Predicate));
-  // Now figure out the index for when we write out the table.
-  PredicateSet::const_iterator P = find(TableInfo.Predicates, Predicate);
-  return (unsigned)(P - TableInfo.Predicates.begin());
-}
-
-void DecoderTableBuilder::emitPredicateTableEntry(unsigned EncodingID) const {
-  // Build up the predicate string.
-  SmallString<256> Predicate;
-  raw_svector_ostream PS(Predicate);
-  if (!emitPredicateMatch(PS, EncodingID))
-    return;
-
-  // Figure out the index into the predicate table for the predicate just
-  // computed.
-  unsigned PIdx = getPredicateIndex(PS.str());
-
-  TableInfo.Table.insertOpcode(OPC_CheckPredicate);
-  TableInfo.Table.insertULEB128(PIdx);
-}
-
-void DecoderTableBuilder::emitSoftFailTableEntry(unsigned EncodingID) const {
-  const InstructionEncoding &Encoding = Encodings[EncodingID];
-  const KnownBits &InstBits = Encoding.getInstBits();
-  const APInt &SoftFailMask = Encoding.getSoftFailMask();
-
-  if (SoftFailMask.isZero())
-    return;
-
-  APInt PositiveMask = InstBits.Zero & SoftFailMask;
-  APInt NegativeMask = InstBits.One & SoftFailMask;
-
-  TableInfo.Table.insertOpcode(OPC_SoftFail);
-  TableInfo.Table.insertULEB128(PositiveMask.getZExtValue());
-  TableInfo.Table.insertULEB128(NegativeMask.getZExtValue());
-}
-
-// Emits table entries to decode the singleton.
-void DecoderTableBuilder::emitSingletonTableEntry(
-    const FilterChooser &FC) const {
-  unsigned EncodingID = *FC.SingletonEncodingID;
-  const InstructionEncoding &Encoding = Encodings[EncodingID];
-  KnownBits EncodingBits = Encoding.getMandatoryBits();
-
-  // Look for islands of undecoded bits of the singleton.
-  std::vector<FilterChooser::Island> Islands = FC.getIslands(EncodingBits);
-
-  // Emit the predicate table entry if one is needed.
-  emitPredicateTableEntry(EncodingID);
-
-  // Check any additional encoding fields needed.
-  for (const FilterChooser::Island &Ilnd : reverse(Islands)) {
-    TableInfo.Table.insertOpcode(OPC_CheckField);
-    TableInfo.Table.insertULEB128(Ilnd.StartBit);
-    TableInfo.Table.insertUInt8(Ilnd.NumBits);
-    TableInfo.Table.insertULEB128(Ilnd.FieldVal);
-  }
-
-  // Check for soft failure of the match.
-  emitSoftFailTableEntry(EncodingID);
-
-  unsigned DIdx = getDecoderIndex(EncodingID);
-
-  // Produce OPC_Decode or OPC_TryDecode opcode based on the information
-  // whether the instruction decoder is complete or not. If it is complete
-  // then it handles all possible values of remaining variable/unfiltered bits
-  // and for any value can determine if the bitpattern is a valid instruction
-  // or not. This means OPC_Decode will be the final step in the decoding
-  // process. If it is not complete, then the Fail return code from the
-  // decoder method indicates that additional processing should be done to see
-  // if there is any other instruction that also matches the bitpattern and
-  // can decode it.
-  const DecoderOps DecoderOp =
-      Encoding.hasCompleteDecoder() ? OPC_Decode : OPC_TryDecode;
-  TableInfo.Table.insertOpcode(DecoderOp);
-  const Record *InstDef = Encodings[EncodingID].getInstruction()->TheDef;
-  TableInfo.Table.insertULEB128(Target.getInstrIntValue(InstDef));
-  TableInfo.Table.insertULEB128(DIdx);
-}
-
-std::unique_ptr<Filter>
-FilterChooser::findBestFilter(ArrayRef<bitAttr_t> BitAttrs, bool AllowMixed,
-                              bool Greedy) const {
-  assert(EncodingIDs.size() >= 2 && "Nothing to filter");
-
-  // Heuristics.  See also doFilter()'s "Heuristics" comment when num of
-  // instructions is 3.
-  if (AllowMixed && !Greedy) {
-    assert(EncodingIDs.size() == 3);
-
-    for (unsigned EncodingID : EncodingIDs) {
-      const InstructionEncoding &Encoding = Encodings[EncodingID];
-      KnownBits EncodingBits = Encoding.getMandatoryBits();
-
-      // Look for islands of undecoded bits of any instruction.
-      std::vector<Island> Islands = getIslands(EncodingBits);
-      if (!Islands.empty()) {
-        // Found an instruction with island(s).  Now just assign a filter.
-        return std::make_unique<Filter>(
-            Encodings, EncodingIDs, Islands[0].StartBit, Islands[0].NumBits);
-      }
-    }
-  }
-
-  // The regionAttr automaton consumes the bitAttrs automatons' state,
-  // lowest-to-highest.
-  //
-  //   Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed)
-  //   States:        NONE, ALL_SET, MIXED
-  //   Initial state: NONE
-  //
-  // (NONE) ----- F --> (NONE)
-  // (NONE) ----- S --> (ALL_SET)     ; and set region start
-  // (NONE) ----- U --> (NONE)
-  // (NONE) ----- M --> (MIXED)       ; and set region start
-  // (ALL_SET) -- F --> (NONE)        ; and report an ALL_SET region
-  // (ALL_SET) -- S --> (ALL_SET)
-  // (ALL_SET) -- U --> (NONE)        ; and report an ALL_SET region
-  // (ALL_SET) -- M --> (MIXED)       ; and report an ALL_SET region
-  // (MIXED) ---- F --> (NONE)        ; and report a MIXED region
-  // (MIXED) ---- S --> (ALL_SET)     ; and report a MIXED region
-  // (MIXED) ---- U --> (NONE)        ; and report a MIXED region
-  // (MIXED) ---- M --> (MIXED)
-
-  bitAttr_t RA = ATTR_NONE;
-  unsigned StartBit = 0;
-
-  std::vector<std::unique_ptr<Filter>> Filters;
-
-  auto addCandidateFilter = [&](unsigned StartBit, unsigned EndBit) {
-    Filters.push_back(std::make_unique<Filter>(Encodings, EncodingIDs, StartBit,
-                                               EndBit - StartBit));
-  };
-
-  unsigned FilterWidth = FilterBits.getBitWidth();
-  for (unsigned BitIndex = 0; BitIndex != FilterWidth; ++BitIndex) {
-    bitAttr_t bitAttr = BitAttrs[BitIndex];
-
-    assert(bitAttr != ATTR_NONE && "Bit without attributes");
-
-    switch (RA) {
-    case ATTR_NONE:
-      switch (bitAttr) {
-      case ATTR_FILTERED:
-        break;
-      case ATTR_ALL_SET:
-        StartBit = BitIndex;
-        RA = ATTR_ALL_SET;
-        break;
-      case ATTR_ALL_UNSET:
-        break;
-      case ATTR_MIXED:
-        StartBit = BitIndex;
-        RA = ATTR_MIXED;
-        break;
-      default:
-        llvm_unreachable("Unexpected bitAttr!");
-      }
-      break;
-    case ATTR_ALL_SET:
-      if (!AllowMixed && bitAttr != ATTR_ALL_SET)
-        addCandidateFilter(StartBit, BitIndex);
-      switch (bitAttr) {
-      case ATTR_FILTERED:
-        RA = ATTR_NONE;
-        break;
-      case ATTR_ALL_SET:
-        break;
-      case ATTR_ALL_UNSET:
-        RA = ATTR_NONE;
-        break;
-      case ATTR_MIXED:
-        StartBit = BitIndex;
-        RA = ATTR_MIXED;
-        break;
-      default:
-        llvm_unreachable("Unexpected bitAttr!");
-      }
-      break;
-    case ATTR_MIXED:
-      if (AllowMixed && bitAttr != ATTR_MIXED)
-        addCandidateFilter(StartBit, BitIndex);
-      switch (bitAttr) {
-      case ATTR_FILTERED:
-        StartBit = BitIndex;
-        RA = ATTR_NONE;
-        break;
-      case ATTR_ALL_SET:
-        StartBit = BitIndex;
-        RA = ATTR_ALL_SET;
-        break;
-      case ATTR_ALL_UNSET:
-        RA = ATTR_NONE;
-        break;
-      case ATTR_MIXED:
-        break;
-      default:
-        llvm_unreachable("Unexpected bitAttr!");
-      }
-      break;
-    case ATTR_ALL_UNSET:
-      llvm_unreachable("regionAttr state machine has no ATTR_UNSET state");
-    case ATTR_FILTERED:
-      llvm_unreachable("regionAttr state machine has no ATTR_FILTERED state");
-    }
-  }
-
-  // At the end, if we're still in ALL_SET or MIXED states, report a region
-  switch (RA) {
-  case ATTR_NONE:
-    break;
-  case ATTR_FILTERED:
-    break;
-  case ATTR_ALL_SET:
-    if (!AllowMixed)
-      addCandidateFilter(StartBit, FilterWidth);
-    break;
-  case ATTR_ALL_UNSET:
-    break;
-  case ATTR_MIXED:
-    if (AllowMixed)
-      addCandidateFilter(StartBit, FilterWidth);
-    break;
-  }
-
-  // We have finished with the filter processings.  Now it's time to choose
-  // the best performing filter.
-  auto MaxIt = llvm::max_element(Filters, [](const std::unique_ptr<Filter> &A,
-                                             const std::unique_ptr<Filter> &B) {
-    return A->usefulness() < B->usefulness();
-  });
-  if (MaxIt == Filters.end() || (*MaxIt)->usefulness() == 0)
-    return nullptr;
-  return std::move(*MaxIt);
-}
-
-std::unique_ptr<Filter> FilterChooser::findBestFilter() const {
-  // We maintain BIT_WIDTH copies of the bitAttrs automaton.
-  // The automaton consumes the corresponding bit from each
-  // instruction.
-  //
-  //   Input symbols: 0, 1, _ (unset), and . (any of the above).
-  //   States:        NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED.
-  //   Initial state: NONE.
-  //
-  // (NONE) ------- [01] -> (ALL_SET)
-  // (NONE) ------- _ ----> (ALL_UNSET)
-  // (ALL_SET) ---- [01] -> (ALL_SET)
-  // (ALL_SET) ---- _ ----> (MIXED)
-  // (ALL_UNSET) -- [01] -> (MIXED)
-  // (ALL_UNSET) -- _ ----> (ALL_UNSET)
-  // (MIXED) ------ . ----> (MIXED)
-  // (FILTERED)---- . ----> (FILTERED)
-
-  unsigned FilterWidth = FilterBits.getBitWidth();
-  SmallVector<bitAttr_t, 128> BitAttrs(FilterWidth, ATTR_NONE);
-
-  // FILTERED bit positions provide no entropy and are not worthy of pursuing.
-  // Filter::recurse() set either 1 or 0 for each position.
-  for (unsigned BitIndex = 0; BitIndex != FilterWidth; ++BitIndex)
-    if (isPositionFiltered(BitIndex))
-      BitAttrs[BitIndex] = ATTR_FILTERED;
-
-  for (unsigned EncodingID : EncodingIDs) {
-    const InstructionEncoding &Encoding = Encodings[EncodingID];
-    KnownBits EncodingBits = Encoding.getMandatoryBits();
-
-    for (unsigned BitIndex = 0; BitIndex != FilterWidth; ++BitIndex) {
-      bool IsKnown = EncodingBits.Zero[BitIndex] || EncodingBits.One[BitIndex];
-      switch (BitAttrs[BitIndex]) {
-      case ATTR_NONE:
-        if (IsKnown)
-          BitAttrs[BitIndex] = ATTR_ALL_SET;
-        else
-          BitAttrs[BitIndex] = ATTR_ALL_UNSET;
-        break;
-      case ATTR_ALL_SET:
-        if (!IsKnown)
-          BitAttrs[BitIndex] = ATTR_MIXED;
-        break;
-      case ATTR_ALL_UNSET:
-        if (IsKnown)
-          BitAttrs[BitIndex] = ATTR_MIXED;
-        break;
-      case ATTR_MIXED:
-      case ATTR_FILTERED:
-        break;
-      }
-    }
-  }
-
-  // Try regions of consecutive known bit values first.
-  if (std::unique_ptr<Filter> F =
-          findBestFilter(BitAttrs, /*AllowMixed=*/false))
-    return F;
-
-  // Then regions of mixed bits (both known and unitialized bit values allowed).
-  if (std::unique_ptr<Filter> F = findBestFilter(BitAttrs, /*AllowMixed=*/true))
-    return F;
-
-  // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where
-  // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a
-  // well-known encoding pattern.  In such case, we backtrack and scan for the
-  // the very first consecutive ATTR_ALL_SET region and assign a filter to it.
-  if (EncodingIDs.size() == 3) {
-    if (std::unique_ptr<Filter> F =
-            findBestFilter(BitAttrs, /*AllowMixed=*/true, /*Greedy=*/false))
-      return F;
-  }
-
-  // There is a conflict we could not resolve.
-  return nullptr;
-}
-
-// Decides on the best configuration of filter(s) to use in order to decode
-// the instructions.  A conflict of instructions may occur, in which case we
-// dump the conflict set to the standard error.
-void FilterChooser::doFilter() {
-  assert(!EncodingIDs.empty() && "FilterChooser created with no instructions");
-
-  // No filter needed.
-  if (EncodingIDs.size() == 1) {
-    SingletonEncodingID = EncodingIDs.front();
-    return;
-  }
-
-  std::unique_ptr<Filter> BestFilter = findBestFilter();
-  if (BestFilter) {
-    applyFilter(*BestFilter);
-    return;
-  }
-
-  // Print out useful conflict information for postmortem analysis.
-  errs() << "Decoding Conflict:\n";
-  dump();
-  HasConflict = true;
-}
-
-void FilterChooser::dump() const {
-  indent Indent(4);
-  // Helps to keep the output right-justified.
-  unsigned PadToWidth = getMaxEncodingWidth();
-
-  // Dump filter stack.
-  dumpStack(errs(), Indent, PadToWidth);
-
-  // Dump encodings.
-  for (unsigned EncodingID : EncodingIDs) {
-    const InstructionEncoding &Encoding = Encodings[EncodingID];
-    errs() << Indent << indent(PadToWidth - Encoding.getBitWidth());
-    printKnownBits(errs(), Encoding.getMandatoryBits(), '_');
-    errs() << "  " << Encoding.getName() << '\n';
-  }
-}
-
-void DecoderTableBuilder::emitTableEntries(const FilterChooser &FC) const {
-  DecoderTable &Table = TableInfo.Table;
-
-  // If there are other encodings that could match if those with all bits
-  // known don't, enter a scope so that they have a chance.
-  size_t FixupLoc = 0;
-  if (FC.VariableFC) {
-    Table.insertOpcode(OPC_Scope);
-    FixupLoc = Table.insertNumToSkip();
-  }
-
-  if (FC.SingletonEncodingID) {
-    assert(FC.FilterChooserMap.empty());
-    // There is only one encoding in which all bits in the filtered range are
-    // fully defined, but we still need to check if the remaining (unfiltered)
-    // bits are valid for this encoding. We also need to check predicates etc.
-    emitSingletonTableEntry(FC);
-  } else if (FC.FilterChooserMap.size() == 1) {
-    // If there is only one possible field value, emit a combined OPC_CheckField
-    // instead of OPC_ExtractField + OPC_FilterValue.
-    const auto &[FilterVal, Delegate] = *FC.FilterChooserMap.begin();
-    Table.insertOpcode(OPC_CheckField);
-    Table.insertULEB128(FC.StartBit);
-    Table.insertUInt8(FC.NumBits);
-    Table.insertULEB128(FilterVal);
-
-    // Emit table entries for the only case.
-    emitTableEntries(*Delegate);
-  } else {
-    // The general case: emit a switch over the field value.
-    Table.insertOpcode(OPC_ExtractField);
-    Table.insertULEB128(FC.StartBit);
-    Table.insertUInt8(FC.NumBits);
-
-    // Emit switch cases for all but the last element.
-    for (const auto &[FilterVal, Delegate] : drop_end(FC.FilterChooserMap)) {
-      Table.insertOpcode(OPC_FilterValueOrSkip);
-      Table.insertULEB128(FilterVal);
-      size_t FixupPos = Table.insertNumToSkip();
-
-      // Emit table entries for this case.
-      emitTableEntries(*Delegate);
-
-      // Patch the previous FilterValueOrSkip to fall through to the next case.
-      Table.patchNumToSkip(FixupPos, Table.size());
-    }
-
-    // Emit a switch case for the last element. It never falls through;
-    // if it doesn't match, we leave the current scope.
-    const auto &[FilterVal, Delegate] = *FC.FilterChooserMap.rbegin();
-    Table.insertOpcode(OPC_FilterValue);
-    Table.insertULEB128(FilterVal);
-
-    // Emit table entries for the last case.
-    emitTableEntries(*Delegate);
-  }
-
-  if (FC.VariableFC) {
-    Table.patchNumToSkip(FixupLoc, Table.size());
-    emitTableEntries(*FC.VariableFC);
-  }
-}
-
 // emitDecodeInstruction - Emit the templated helper function
 // decodeInstruction().
 static void emitDecodeInstruction(formatted_raw_ostream &OS, bool IsVarLenInst,
-                                  unsigned OpcodeMask) {
-  const bool HasTryDecode = OpcodeMask & (1 << OPC_TryDecode);
-  const bool HasCheckPredicate = OpcodeMask & (1 << OPC_CheckPredicate);
-  const bool HasSoftFail = OpcodeMask & (1 << OPC_SoftFail);
-
+                                  const DecoderTableInfo &TableInfo) {
   OS << R"(
-static unsigned decodeNumToSkip(const uint8_t *&Ptr) {
-  unsigned NumToSkip = *Ptr++;
-  NumToSkip |= (*Ptr++) << 8;
-)";
-  if (getNumToSkipInBytes() == 3)
-    OS << "  NumToSkip |= (*Ptr++) << 16;\n";
-  OS << R"(  return NumToSkip;
-}
-
 template <typename InsnType>
 static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
                                       InsnType insn, uint64_t Address,
@@ -1602,7 +278,7 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
           "llvm::function_ref<void(APInt &, uint64_t)> makeUp";
   }
   OS << ") {\n";
-  if (HasCheckPredicate)
+  if (TableInfo.HasCheckPredicate)
     OS << "  const FeatureBitset &Bits = STI.getFeatureBits();\n";
   OS << "  const uint8_t *Ptr = DecodeTable;\n";
 
@@ -1618,7 +294,6 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
 
   OS << R"(
   SmallVector<const uint8_t *, 8> ScopeStack;
-  uint64_t CurFieldValue = 0;
   DecodeStatus S = MCDisassembler::Success;
   while (true) {
     ptrdiff_t Loc = Ptr - DecodeTable;
@@ -1629,51 +304,34 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
              << (int)DecoderOp << '\n';
       return MCDisassembler::Fail;
     case OPC_Scope: {
-      unsigned NumToSkip = decodeNumToSkip(Ptr);
+      unsigned NumToSkip = decodeULEB128AndIncUnsafe(Ptr);
       const uint8_t *SkipTo = Ptr + NumToSkip;
       ScopeStack.push_back(SkipTo);
       LLVM_DEBUG(dbgs() << Loc << ": OPC_Scope(" << SkipTo - DecodeTable
                         << ")\n");
       break;
     }
-    case OPC_ExtractField: {
+    case OPC_SwitchField: {
       // Decode the start value.
       unsigned Start = decodeULEB128AndIncUnsafe(Ptr);
       unsigned Len = *Ptr++;)";
   if (IsVarLenInst)
     OS << "\n      makeUp(insn, Start + Len);";
   OS << R"(
-      CurFieldValue = fieldFromInstruction(insn, Start, Len);
-      LLVM_DEBUG(dbgs() << Loc << ": OPC_ExtractField(" << Start << ", "
-                   << Len << "): " << CurFieldValue << "\n");
-      break;
-    }
-    case OPC_FilterValueOrSkip: {
-      // Decode the field value.
-      uint64_t Val = decodeULEB128AndIncUnsafe(Ptr);
-      bool Failed = Val != CurFieldValue;
-      unsigned NumToSkip = decodeNumToSkip(Ptr);
-      const uint8_t *SkipTo = Ptr + NumToSkip;
-
-      LLVM_DEBUG(dbgs() << Loc << ": OPC_FilterValueOrSkip(" << Val << ", "
-                        << SkipTo - DecodeTable << ") "
-                        << (Failed ? "FAIL, " : "PASS\n"));
-
-      if (Failed) {
-        Ptr = SkipTo;
-        LLVM_DEBUG(dbgs() << "continuing at " << Ptr - DecodeTable << '\n');
+      uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);
+      uint64_t CaseValue;
+      unsigned CaseSize;
+      while (true) {
+        CaseValue = decodeULEB128AndIncUnsafe(Ptr);
+        CaseSize = decodeULEB128AndIncUnsafe(Ptr);
+        if (FieldValue == CaseValue || !CaseSize)
+          break;
+        Ptr += CaseSize;
       }
-      break;
-    }
-    case OPC_FilterValue: {
-      // Decode the field value.
-      uint64_t Val = decodeULEB128AndIncUnsafe(Ptr);
-      bool Failed = Val != CurFieldValue;
-
-      LLVM_DEBUG(dbgs() << Loc << ": OPC_FilterValue(" << Val << ") "
-                        << (Failed ? "FAIL, " : "PASS\n"));
-
-      if (Failed) {
+      if (FieldValue == CaseValue) {
+        LLVM_DEBUG(dbgs() << Loc << ": OPC_SwitchField(" << Start << ", " << Len
+                          << "): " << FieldValue << '\n');
+      } else {
         if (ScopeStack.empty()) {
           LLVM_DEBUG(dbgs() << "returning Fail\n");
           return MCDisassembler::Fail;
@@ -1711,7 +369,7 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
       }
       break;
     })";
-  if (HasCheckPredicate) {
+  if (TableInfo.HasCheckPredicate) {
     OS << R"(
     case OPC_CheckPredicate: {
       // Decode the Predicate Index value.
@@ -1755,7 +413,7 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
                    << (S != MCDisassembler::Fail ? "PASS\n" : "FAIL\n"));
       return S;
     })";
-  if (HasTryDecode) {
+  if (TableInfo.HasTryDecode) {
     OS << R"(
     case OPC_TryDecode: {
       // Decode the Opcode value.
@@ -1789,7 +447,7 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
       break;
     })";
   }
-  if (HasSoftFail) {
+  if (TableInfo.HasSoftFail) {
     OS << R"(
     case OPC_SoftFail: {
       // Decode the mask values.
@@ -2048,9 +706,7 @@ template <typename T> constexpr uint32_t InsnBitWidth = 0;
 
   // Entries in `EncMap` are already sorted by bitwidth. So bucketing per
   // bitwidth can be done on-the-fly as we iterate over the map.
-  DecoderTableInfo TableInfo;
-  DecoderTableBuilder TableBuilder(Target, Encodings, TableInfo);
-  unsigned OpcodeMask = 0;
+  DecoderTableInfo TableInfo{};
 
   bool HasConflict = false;
   for (const auto &[BitWidth, BWMap] : EncMap) {
@@ -2058,25 +714,21 @@ template <typename T> constexpr uint32_t InsnBitWidth = 0;
       auto [DecoderNamespace, HwModeID] = Key;
 
       // Emit the decoder for this (namespace, hwmode, width) combination.
-      FilterChooser FC(Encodings, EncodingIDs);
-      HasConflict |= FC.hasConflict();
-      // Skip emitting table entries if a conflict has been detected.
-      if (HasConflict)
+      std::unique_ptr<DecoderTreeNode> Tree =
+          buildDecoderTree(Target, Encodings, EncodingIDs);
+      if (!Tree) {
+        HasConflict = true;
         continue;
+      }
 
-      // The decode table is cleared for each top level decoder function. The
-      // predicates and decoders themselves, however, are shared across
-      // different decoders to give more opportunities for uniqueing.
-      //  - If `SpecializeDecodersPerBitwidth` is enabled, decoders are shared
-      //    across all decoder tables for a given bitwidth, else they are shared
-      //    across all decoder tables.
-      //  - predicates are shared across all decoder tables.
-      TableInfo.Table.clear();
-      TableBuilder.buildTable(FC, BitWidth);
-
-      // Print the table to the output stream.
-      OpcodeMask |= emitTable(OS, TableInfo.Table, DecoderNamespace, HwModeID,
-                              BitWidth, EncodingIDs);
+      SmallString<32> TableName("DecoderTable");
+      TableName.append(DecoderNamespace);
+      if (HwModeID != DefaultMode)
+        TableName.append({"_", Target.getHwModes().getModeName(HwModeID)});
+      TableName.append(std::to_string(BitWidth));
+
+      // Serialize the tree.
+      emitDecoderTable(OS, TableInfo, TableName, BitWidth, Tree.get());
     }
 
     // Each BitWidth get's its own decoders and decoder function if
@@ -2095,14 +747,12 @@ template <typename T> constexpr uint32_t InsnBitWidth = 0;
   if (!SpecializeDecodersPerBitwidth)
     emitDecoderFunction(OS, TableInfo.Decoders, 0);
 
-  const bool HasCheckPredicate = OpcodeMask & (1 << OPC_CheckPredicate);
-
   // Emit the predicate function.
-  if (HasCheckPredicate)
+  if (TableInfo.HasCheckPredicate)
     emitPredicateFunction(OS, TableInfo.Predicates);
 
   // Emit the main entry point for the decoder, decodeInstruction().
-  emitDecodeInstruction(OS, IsVarLenInst, OpcodeMask);
+  emitDecodeInstruction(OS, IsVarLenInst, TableInfo);
 
   OS << "\n} // namespace\n";
 }
diff --git a/llvm/utils/TableGen/DecoderTableEmitter.cpp b/llvm/utils/TableGen/DecoderTableEmitter.cpp
new file mode 100644
index 0000000000000..bda80215743f5
--- /dev/null
+++ b/llvm/utils/TableGen/DecoderTableEmitter.cpp
@@ -0,0 +1,382 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Common/CodeGenTarget.h"
+#include "Common/InstructionEncoding.h"
+#include "DecoderTree.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/LEB128.h"
+
+namespace llvm {
+extern cl::opt<bool> SpecializeDecodersPerBitwidth;
+} // namespace llvm
+
+using namespace llvm;
+
+namespace {
+
+class DecoderTableEmitter {
+  DecoderTableInfo &TableInfo;
+  formatted_raw_ostream OS;
+  unsigned IndexWidth;
+  unsigned CurrentIndex;
+  unsigned CommentIndex;
+
+public:
+  DecoderTableEmitter(DecoderTableInfo &TableInfo, raw_ostream &OS)
+      : TableInfo(TableInfo), OS(OS) {}
+
+  void emitTable(StringRef TableName, unsigned BitWidth,
+                 const DecoderTreeNode *Root);
+
+private:
+  void analyzeNode(const DecoderTreeNode *Node) const;
+
+  unsigned computeNodeSize(const DecoderTreeNode *Node) const;
+  unsigned computeTableSize(const DecoderTreeNode *Root,
+                            unsigned BitWidth) const;
+
+  void emitStartLine();
+  void emitOpcode(StringRef Name);
+  void emitByte(uint8_t Val);
+  void emitUInt8(unsigned Val);
+  void emitULEB128(uint64_t Val);
+
+  formatted_raw_ostream &emitComment(indent Indent);
+
+  void emitCheckAnyNode(const CheckAnyNode *N, indent Indent);
+  void emitCheckAllNode(const CheckAllNode *N, indent Indent);
+  void emitSwitchFieldNode(const SwitchFieldNode *N, indent Indent);
+  void emitCheckFieldNode(const CheckFieldNode *N, indent Indent);
+  void emitCheckPredicateNode(const CheckPredicateNode *N, indent Indent);
+  void emitSoftFailNode(const SoftFailNode *N, indent Indent);
+  void emitDecodeNode(const DecodeNode *N, indent Indent);
+  void emitNode(const DecoderTreeNode *N, indent Indent);
+};
+
+} // namespace
+
+void DecoderTableEmitter::analyzeNode(const DecoderTreeNode *Node) const {
+  switch (Node->getKind()) {
+  case DecoderTreeNode::CheckAny: {
+    const auto *N = static_cast<const CheckAnyNode *>(Node);
+    for (const DecoderTreeNode *Child : N->children())
+      analyzeNode(Child);
+    break;
+  }
+  case DecoderTreeNode::CheckAll: {
+    const auto *N = static_cast<const CheckAllNode *>(Node);
+    for (const DecoderTreeNode *Child : N->children())
+      analyzeNode(Child);
+    break;
+  }
+  case DecoderTreeNode::CheckField:
+    break;
+  case DecoderTreeNode::SwitchField: {
+    const auto *N = static_cast<const SwitchFieldNode *>(Node);
+    for (const DecoderTreeNode *Child : make_second_range(N->cases()))
+      analyzeNode(Child);
+    break;
+  }
+  case DecoderTreeNode::CheckPredicate: {
+    const auto *N = static_cast<const CheckPredicateNode *>(Node);
+    TableInfo.Predicates.insert(CachedHashString(N->getPredicateString()));
+    TableInfo.HasCheckPredicate = true;
+    break;
+  }
+  case DecoderTreeNode::SoftFail:
+    TableInfo.HasSoftFail = true;
+    break;
+  case DecoderTreeNode::Decode: {
+    const auto *N = static_cast<const DecodeNode *>(Node);
+    const InstructionEncoding &Encoding = N->getEncoding();
+    TableInfo.Decoders.insert(CachedHashString(N->getDecoderString()));
+    TableInfo.HasTryDecode |= !Encoding.hasCompleteDecoder();
+    break;
+  }
+  }
+}
+
+unsigned
+DecoderTableEmitter::computeNodeSize(const DecoderTreeNode *Node) const {
+  switch (Node->getKind()) {
+  case DecoderTreeNode::CheckAny: {
+    const auto *N = static_cast<const CheckAnyNode *>(Node);
+    unsigned Size = 0;
+    for (const DecoderTreeNode *Child : drop_end(N->children())) {
+      unsigned ChildSize = computeNodeSize(Child);
+      Size += 1 + getULEB128Size(ChildSize) + ChildSize;
+    }
+    return Size + computeNodeSize(*std::prev(N->child_end()));
+  }
+  case DecoderTreeNode::CheckAll: {
+    const auto *N = static_cast<const CheckAllNode *>(Node);
+    unsigned Size = 0;
+    for (const DecoderTreeNode *Child : N->children())
+      Size += computeNodeSize(Child);
+    return Size;
+  }
+  case DecoderTreeNode::CheckField: {
+    const auto *N = static_cast<const CheckFieldNode *>(Node);
+    return 1 + getULEB128Size(N->getStartBit()) + 1 +
+           getULEB128Size(N->getValue());
+  }
+  case DecoderTreeNode::SwitchField: {
+    const auto *N = static_cast<const SwitchFieldNode *>(Node);
+    unsigned Size = 1 + getULEB128Size(N->getStartBit()) + 1;
+
+    for (auto [Val, Child] : drop_end(N->cases())) {
+      unsigned ChildSize = computeNodeSize(Child);
+      Size += getULEB128Size(Val) + getULEB128Size(ChildSize) + ChildSize;
+    }
+
+    auto [Val, Child] = *std::prev(N->case_end());
+    unsigned ChildSize = computeNodeSize(Child);
+    Size += getULEB128Size(Val) + getULEB128Size(0) + ChildSize;
+    return Size;
+  }
+  case DecoderTreeNode::CheckPredicate: {
+    const auto *N = static_cast<const CheckPredicateNode *>(Node);
+    unsigned PredicateIndex =
+        TableInfo.getPredicateIndex(N->getPredicateString());
+    return 1 + getULEB128Size(PredicateIndex);
+  }
+  case DecoderTreeNode::SoftFail: {
+    const auto *N = static_cast<const SoftFailNode *>(Node);
+    return 1 + getULEB128Size(N->getPositiveMask()) +
+           getULEB128Size(N->getNegativeMask());
+  }
+  case DecoderTreeNode::Decode: {
+    const auto *N = static_cast<const DecodeNode *>(Node);
+    unsigned InstOpcode = N->getEncoding().getInstruction()->EnumVal;
+    unsigned DecoderIndex = TableInfo.getDecoderIndex(N->getDecoderString());
+    return 1 + getULEB128Size(InstOpcode) + getULEB128Size(DecoderIndex);
+  }
+  }
+  llvm_unreachable("Unknown node kind");
+}
+
+unsigned DecoderTableEmitter::computeTableSize(const DecoderTreeNode *Root,
+                                               unsigned BitWidth) const {
+  unsigned Size = 0;
+  if (SpecializeDecodersPerBitwidth)
+    Size += getULEB128Size(BitWidth);
+  Size += computeNodeSize(Root);
+  return Size;
+}
+
+void DecoderTableEmitter::emitStartLine() {
+  CommentIndex = CurrentIndex;
+  OS.indent(2);
+}
+
+void DecoderTableEmitter::emitOpcode(StringRef Name) {
+  emitStartLine();
+  OS << "MCD::" << Name << ", ";
+  ++CurrentIndex;
+}
+
+void DecoderTableEmitter::emitByte(uint8_t Val) {
+  OS << static_cast<unsigned>(Val) << ", ";
+  ++CurrentIndex;
+}
+
+void DecoderTableEmitter::emitUInt8(unsigned int Val) {
+  assert(isUInt<8>(Val));
+  emitByte(Val);
+}
+
+void DecoderTableEmitter::emitULEB128(uint64_t Val) {
+  while (Val >= 0x80) {
+    emitByte((Val & 0x7F) | 0x80);
+    Val >>= 7;
+  }
+  emitByte(Val);
+}
+
+formatted_raw_ostream &DecoderTableEmitter::emitComment(indent Indent) {
+  constexpr unsigned CommentColumn = 45;
+  if (OS.getColumn() > CommentColumn)
+    OS << '\n';
+  OS.PadToColumn(CommentColumn);
+  OS << "// " << format_decimal(CommentIndex, IndexWidth) << ": " << Indent;
+  return OS;
+}
+
+void DecoderTableEmitter::emitCheckAnyNode(const CheckAnyNode *N,
+                                           indent Indent) {
+  for (const DecoderTreeNode *Child : drop_end(N->children())) {
+    emitOpcode("OPC_Scope");
+    emitULEB128(computeNodeSize(Child));
+
+    emitComment(Indent) << "{\n";
+    emitNode(Child, Indent + 1);
+    emitComment(Indent) << "}\n";
+  }
+
+  const DecoderTreeNode *Child = *std::prev(N->child_end());
+  emitNode(Child, Indent);
+}
+
+void DecoderTableEmitter::emitCheckAllNode(const CheckAllNode *N,
+                                           indent Indent) {
+  for (const DecoderTreeNode *Child : N->children())
+    emitNode(Child, Indent);
+}
+
+void DecoderTableEmitter::emitSwitchFieldNode(const SwitchFieldNode *N,
+                                              indent Indent) {
+  unsigned LSB = N->getStartBit();
+  unsigned Width = N->getNumBits();
+  unsigned MSB = LSB + Width - 1;
+
+  emitOpcode("OPC_SwitchField");
+  emitULEB128(LSB);
+  emitUInt8(Width);
+
+  emitComment(Indent) << "switch Inst[" << MSB << ':' << LSB << "] {\n";
+
+  for (auto [Val, Child] : drop_end(N->cases())) {
+    emitStartLine();
+    emitULEB128(Val);
+    emitULEB128(computeNodeSize(Child));
+
+    emitComment(Indent) << "case " << format_hex(Val, 0) << ": {\n";
+    emitNode(Child, Indent + 1);
+    emitComment(Indent) << "}\n";
+  }
+
+  auto [Val, Child] = *std::prev(N->case_end());
+  emitStartLine();
+  emitULEB128(Val);
+  emitULEB128(0);
+
+  emitComment(Indent) << "case " << format_hex(Val, 0) << ": {\n";
+  emitNode(Child, Indent + 1);
+  emitComment(Indent) << "}\n";
+
+  emitComment(Indent) << "} // switch Inst[" << MSB << ':' << LSB << "]\n";
+}
+
+void DecoderTableEmitter::emitCheckFieldNode(const CheckFieldNode *N,
+                                             indent Indent) {
+  unsigned LSB = N->getStartBit();
+  unsigned Width = N->getNumBits();
+  unsigned MSB = LSB + Width - 1;
+  uint64_t Val = N->getValue();
+
+  emitOpcode("OPC_CheckField");
+  emitULEB128(LSB);
+  emitUInt8(Width);
+  emitULEB128(Val);
+
+  emitComment(Indent);
+  OS << "check Inst[" << MSB << ':' << LSB << "] == " << format_hex(Val, 0)
+     << '\n';
+}
+
+void DecoderTableEmitter::emitCheckPredicateNode(const CheckPredicateNode *N,
+                                                 indent Indent) {
+  unsigned PredicateIndex =
+      TableInfo.getPredicateIndex(N->getPredicateString());
+
+  emitOpcode("OPC_CheckPredicate");
+  emitULEB128(PredicateIndex);
+
+  emitComment(Indent) << "check predicate " << PredicateIndex << "\n";
+}
+
+void DecoderTableEmitter::emitSoftFailNode(const SoftFailNode *N,
+                                           indent Indent) {
+  uint64_t PositiveMask = N->getPositiveMask();
+  uint64_t NegativeMask = N->getNegativeMask();
+
+  emitOpcode("OPC_SoftFail");
+  emitULEB128(PositiveMask);
+  emitULEB128(NegativeMask);
+
+  emitComment(Indent) << "check softfail";
+  OS << " pos=" << format_hex(PositiveMask, 10);
+  OS << " neg=" << format_hex(NegativeMask, 10) << '\n';
+}
+
+void DecoderTableEmitter::emitDecodeNode(const DecodeNode *N, indent Indent) {
+  const InstructionEncoding &Encoding = N->getEncoding();
+  unsigned InstOpcode = Encoding.getInstruction()->EnumVal;
+  unsigned DecoderIndex = TableInfo.getDecoderIndex(N->getDecoderString());
+
+  emitOpcode(Encoding.hasCompleteDecoder() ? "OPC_Decode" : "OPC_TryDecode");
+  emitULEB128(InstOpcode);
+  emitULEB128(DecoderIndex);
+
+  emitComment(Indent);
+  if (!Encoding.hasCompleteDecoder())
+    OS << "try ";
+  OS << "decode to " << Encoding.getName() << " using decoder " << DecoderIndex
+     << '\n';
+}
+
+void DecoderTableEmitter::emitNode(const DecoderTreeNode *N, indent Indent) {
+  switch (N->getKind()) {
+  case DecoderTreeNode::CheckAny:
+    return emitCheckAnyNode(static_cast<const CheckAnyNode *>(N), Indent);
+  case DecoderTreeNode::CheckAll:
+    return emitCheckAllNode(static_cast<const CheckAllNode *>(N), Indent);
+  case DecoderTreeNode::SwitchField:
+    return emitSwitchFieldNode(static_cast<const SwitchFieldNode *>(N), Indent);
+  case DecoderTreeNode::CheckField:
+    return emitCheckFieldNode(static_cast<const CheckFieldNode *>(N), Indent);
+  case DecoderTreeNode::CheckPredicate:
+    return emitCheckPredicateNode(static_cast<const CheckPredicateNode *>(N),
+                                  Indent);
+  case DecoderTreeNode::SoftFail:
+    return emitSoftFailNode(static_cast<const SoftFailNode *>(N), Indent);
+  case DecoderTreeNode::Decode:
+    return emitDecodeNode(static_cast<const DecodeNode *>(N), Indent);
+  }
+  llvm_unreachable("Unknown node kind");
+}
+
+void DecoderTableEmitter::emitTable(StringRef TableName, unsigned BitWidth,
+                                    const DecoderTreeNode *Root) {
+  analyzeNode(Root);
+
+  unsigned TableSize = computeTableSize(Root, BitWidth);
+  OS << "static const uint8_t " << TableName << "[" << TableSize << "] = {\n";
+
+  // Calculate the number of decimal places for table indices.
+  // This is simply log10 of the table size.
+  IndexWidth = 1;
+  for (unsigned S = TableSize; S /= 10;)
+    ++IndexWidth;
+
+  CurrentIndex = 0;
+
+  // When specializing decoders per bit width, each decoder table will begin
+  // with the bitwidth for that table.
+  if (SpecializeDecodersPerBitwidth) {
+    emitStartLine();
+    emitULEB128(BitWidth);
+    emitComment(indent(0)) << "BitWidth " << BitWidth << '\n';
+  }
+
+  emitNode(Root, indent(0));
+  assert(CurrentIndex == TableSize &&
+         "The size of the emitted table differs from the calculated one");
+
+  OS << "};\n";
+}
+
+void llvm::emitDecoderTable(raw_ostream &OS, DecoderTableInfo &TableInfo,
+                            StringRef TableName, unsigned BitWidth,
+                            const DecoderTreeNode *Tree) {
+  DecoderTableEmitter TableEmitter(TableInfo, OS);
+  TableEmitter.emitTable(TableName, BitWidth, Tree);
+}
diff --git a/llvm/utils/TableGen/DecoderTree.cpp b/llvm/utils/TableGen/DecoderTree.cpp
new file mode 100644
index 0000000000000..47740b5e57c7d
--- /dev/null
+++ b/llvm/utils/TableGen/DecoderTree.cpp
@@ -0,0 +1,11 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "DecoderTree.h"
+
+using namespace llvm;
diff --git a/llvm/utils/TableGen/DecoderTree.h b/llvm/utils/TableGen/DecoderTree.h
new file mode 100644
index 0000000000000..c7cc37b08ec9e
--- /dev/null
+++ b/llvm/utils/TableGen/DecoderTree.h
@@ -0,0 +1,238 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_TABLEGEN_DECODERTREE_H
+#define LLVM_UTILS_TABLEGEN_DECODERTREE_H
+
+#include "llvm/ADT/CachedHashString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include <map>
+#include <memory>
+#include <set>
+
+namespace llvm {
+
+class CodeGenTarget;
+class InstructionEncoding;
+class raw_ostream;
+
+class DecoderTreeNode {
+public:
+  virtual ~DecoderTreeNode() = default;
+
+  enum KindTy {
+    CheckAny,
+    CheckAll,
+    CheckField,
+    SwitchField,
+    CheckPredicate,
+    SoftFail,
+    Decode,
+  };
+
+  KindTy getKind() const { return Kind; }
+
+protected:
+  explicit DecoderTreeNode(KindTy Kind) : Kind(Kind) {}
+
+private:
+  KindTy Kind;
+};
+
+class CheckAnyNode : public DecoderTreeNode {
+  SmallVector<std::unique_ptr<DecoderTreeNode>, 0> Children;
+
+  static const DecoderTreeNode *
+  mapElement(decltype(Children)::const_reference Element) {
+    return Element.get();
+  }
+
+public:
+  CheckAnyNode() : DecoderTreeNode(CheckAny) {}
+
+  void addChild(std::unique_ptr<DecoderTreeNode> N) {
+    Children.push_back(std::move(N));
+  }
+
+  using child_iterator = mapped_iterator<decltype(Children)::const_iterator,
+                                         decltype(&mapElement)>;
+
+  child_iterator child_begin() const {
+    return child_iterator(Children.begin(), mapElement);
+  }
+
+  child_iterator child_end() const {
+    return child_iterator(Children.end(), mapElement);
+  }
+
+  iterator_range<child_iterator> children() const {
+    return make_range(child_begin(), child_end());
+  }
+};
+
+class CheckAllNode : public DecoderTreeNode {
+  SmallVector<std::unique_ptr<DecoderTreeNode>, 0> Children;
+
+  static const DecoderTreeNode *
+  mapElement(decltype(Children)::const_reference Element) {
+    return Element.get();
+  }
+
+public:
+  CheckAllNode() : DecoderTreeNode(CheckAll) {}
+
+  void addChild(std::unique_ptr<DecoderTreeNode> Child) {
+    Children.push_back(std::move(Child));
+  }
+
+  using child_iterator = mapped_iterator<decltype(Children)::const_iterator,
+                                         decltype(&mapElement)>;
+
+  child_iterator child_begin() const {
+    return child_iterator(Children.begin(), mapElement);
+  }
+
+  child_iterator child_end() const {
+    return child_iterator(Children.end(), mapElement);
+  }
+
+  iterator_range<child_iterator> children() const {
+    return make_range(child_begin(), child_end());
+  }
+};
+
+class CheckFieldNode : public DecoderTreeNode {
+  unsigned StartBit;
+  unsigned NumBits;
+  uint64_t Value;
+
+public:
+  CheckFieldNode(unsigned StartBit, unsigned NumBits, uint64_t Value)
+      : DecoderTreeNode(CheckField), StartBit(StartBit), NumBits(NumBits),
+        Value(Value) {}
+
+  unsigned getStartBit() const { return StartBit; }
+
+  unsigned getNumBits() const { return NumBits; }
+
+  uint64_t getValue() const { return Value; }
+};
+
+class SwitchFieldNode : public DecoderTreeNode {
+  unsigned StartBit;
+  unsigned NumBits;
+  std::map<uint64_t, std::unique_ptr<DecoderTreeNode>> Cases;
+
+  static std::pair<uint64_t, const DecoderTreeNode *>
+  mapElement(decltype(Cases)::const_reference Element) {
+    return std::pair(Element.first, Element.second.get());
+  }
+
+public:
+  SwitchFieldNode(unsigned StartBit, unsigned NumBits)
+      : DecoderTreeNode(SwitchField), StartBit(StartBit), NumBits(NumBits) {}
+
+  void addCase(uint64_t Value, std::unique_ptr<DecoderTreeNode> N) {
+    Cases.try_emplace(Value, std::move(N));
+  }
+
+  unsigned getStartBit() const { return StartBit; }
+
+  unsigned getNumBits() const { return NumBits; }
+
+  using case_iterator =
+      mapped_iterator<decltype(Cases)::const_iterator, decltype(&mapElement)>;
+
+  case_iterator case_begin() const {
+    return case_iterator(Cases.begin(), mapElement);
+  }
+
+  case_iterator case_end() const {
+    return case_iterator(Cases.end(), mapElement);
+  }
+
+  iterator_range<case_iterator> cases() const {
+    return make_range(case_begin(), case_end());
+  }
+};
+
+class CheckPredicateNode : public DecoderTreeNode {
+  std::string PredicateString;
+
+public:
+  explicit CheckPredicateNode(std::string PredicateString)
+      : DecoderTreeNode(CheckPredicate),
+        PredicateString(std::move(PredicateString)) {}
+
+  StringRef getPredicateString() const { return PredicateString; }
+};
+
+class SoftFailNode : public DecoderTreeNode {
+  uint64_t PositiveMask, NegativeMask;
+
+public:
+  SoftFailNode(uint64_t PositiveMask, uint64_t NegativeMask)
+      : DecoderTreeNode(SoftFail), PositiveMask(PositiveMask),
+        NegativeMask(NegativeMask) {}
+
+  uint64_t getPositiveMask() const { return PositiveMask; }
+  uint64_t getNegativeMask() const { return NegativeMask; }
+};
+
+class DecodeNode : public DecoderTreeNode {
+  const InstructionEncoding &Encoding;
+  std::string DecoderString;
+
+public:
+  DecodeNode(const InstructionEncoding &Encoding, std::string DecoderString)
+      : DecoderTreeNode(Decode), Encoding(Encoding),
+        DecoderString(std::move(DecoderString)) {}
+
+  const InstructionEncoding &getEncoding() const { return Encoding; }
+
+  StringRef getDecoderString() const { return DecoderString; }
+};
+
+using PredicateSet = SetVector<CachedHashString>;
+using DecoderSet = SetVector<CachedHashString>;
+
+struct DecoderTableInfo {
+  PredicateSet Predicates;
+  DecoderSet Decoders;
+  bool HasCheckPredicate;
+  bool HasSoftFail;
+  bool HasTryDecode;
+
+  unsigned getPredicateIndex(StringRef PredicateString) const {
+    auto I = find(Predicates, PredicateString);
+    assert(I != Predicates.end());
+    return std::distance(Predicates.begin(), I);
+  }
+
+  unsigned getDecoderIndex(StringRef DecoderString) const {
+    auto I = find(Decoders, DecoderString);
+    assert(I != Decoders.end());
+    return std::distance(Decoders.begin(), I);
+  }
+};
+
+std::unique_ptr<DecoderTreeNode>
+buildDecoderTree(const CodeGenTarget &Target,
+                 ArrayRef<InstructionEncoding> Encodings,
+                 ArrayRef<unsigned> EncodingIDs);
+
+void emitDecoderTable(raw_ostream &OS, DecoderTableInfo &TableInfo,
+                      StringRef TableName, unsigned BitWidth,
+                      const DecoderTreeNode *Tree);
+
+} // namespace llvm
+
+#endif // LLVM_UTILS_TABLEGEN_DECODERTREE_H
diff --git a/llvm/utils/TableGen/DecoderTreeBuilder.cpp b/llvm/utils/TableGen/DecoderTreeBuilder.cpp
new file mode 100644
index 0000000000000..c733bf8f0e64c
--- /dev/null
+++ b/llvm/utils/TableGen/DecoderTreeBuilder.cpp
@@ -0,0 +1,884 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Common/CodeGenTarget.h"
+#include "Common/InstructionEncoding.h"
+#include "Common/SubtargetFeatureInfo.h"
+#include "DecoderTree.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Error.h"
+
+namespace llvm {
+extern cl::opt<bool> UseFnTableInDecodeToMCInst;
+extern cl::opt<bool> IgnoreNonDecodableOperands;
+extern cl::opt<bool> IgnoreFullyDefinedOperands;
+} // namespace llvm
+
+using namespace llvm;
+
+namespace {
+
+/// Filter - Filter works with FilterChooser to produce the decoding tree for
+/// the ISA.
+///
+/// It is useful to think of a Filter as governing the switch stmts of the
+/// decoding tree in a certain level.  Each case stmt delegates to an inferior
+/// FilterChooser to decide what further decoding logic to employ, or in another
+/// words, what other remaining bits to look at.  The FilterChooser eventually
+/// chooses a best Filter to do its job.
+///
+/// This recursive scheme ends when the number of Opcodes assigned to the
+/// FilterChooser becomes 1 or if there is a conflict.  A conflict happens when
+/// the Filter/FilterChooser combo does not know how to distinguish among the
+/// Opcodes assigned.
+///
+/// An example of a conflict is
+///
+/// Decoding Conflict:
+///     ................................
+///     1111............................
+///     1111010.........................
+///     1111010...00....................
+///     1111010...00........0001........
+///     111101000.00........0001........
+///     111101000.00........00010000....
+///     111101000_00________00010000____  VST4q8a
+///     111101000_00________00010000____  VST4q8b
+///
+/// The Debug output shows the path that the decoding tree follows to reach the
+/// the conclusion that there is a conflict.  VST4q8a is a vst4 to double-spaced
+/// even registers, while VST4q8b is a vst4 to double-spaced odd registers.
+///
+/// The encoding info in the .td files does not specify this meta information,
+/// which could have been used by the decoder to resolve the conflict.  The
+/// decoder could try to decode the even/odd register numbering and assign to
+/// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a"
+/// version and return the Opcode since the two have the same Asm format string.
+struct Filter {
+  unsigned StartBit; // the starting bit position
+  unsigned NumBits;  // number of bits to filter
+
+  // Map of well-known segment value to the set of uid's with that value.
+  std::map<uint64_t, std::vector<unsigned>> FilteredIDs;
+
+  // Set of uid's with non-constant segment values.
+  std::vector<unsigned> VariableIDs;
+
+  Filter(ArrayRef<InstructionEncoding> Encodings,
+         ArrayRef<unsigned> EncodingIDs, unsigned StartBit, unsigned NumBits);
+
+  // Returns the number of fanout produced by the filter.  More fanout implies
+  // the filter distinguishes more categories of instructions.
+  unsigned usefulness() const {
+    return FilteredIDs.size() + VariableIDs.empty();
+  }
+};
+
+} // namespace
+
+Filter::Filter(ArrayRef<InstructionEncoding> Encodings,
+               ArrayRef<unsigned> EncodingIDs, unsigned StartBit,
+               unsigned NumBits)
+    : StartBit(StartBit), NumBits(NumBits) {
+  for (unsigned EncodingID : EncodingIDs) {
+    const InstructionEncoding &Encoding = Encodings[EncodingID];
+    KnownBits EncodingBits = Encoding.getMandatoryBits();
+
+    // Scans the segment for possibly well-specified encoding bits.
+    KnownBits FieldBits = EncodingBits.extractBits(NumBits, StartBit);
+
+    if (FieldBits.isConstant()) {
+      // The encoding bits are well-known.  Lets add the uid of the
+      // instruction into the bucket keyed off the constant field value.
+      FilteredIDs[FieldBits.getConstant().getZExtValue()].push_back(EncodingID);
+    } else {
+      // Some of the encoding bit(s) are unspecified.  This contributes to
+      // one additional member of "Variable" instructions.
+      VariableIDs.push_back(EncodingID);
+    }
+  }
+
+  assert((FilteredIDs.size() + VariableIDs.size() > 0) &&
+         "Filter returns no instruction categories");
+}
+
+namespace {
+
+// These are states of our finite state machines used in FilterChooser's
+// filterProcessor() which produces the filter candidates to use.
+enum bitAttr_t {
+  ATTR_NONE,
+  ATTR_FILTERED,
+  ATTR_ALL_SET,
+  ATTR_ALL_UNSET,
+  ATTR_MIXED
+};
+
+/// FilterChooser - FilterChooser chooses the best filter among a set of Filters
+/// in order to perform the decoding of instructions at the current level.
+///
+/// Decoding proceeds from the top down.  Based on the well-known encoding bits
+/// of instructions available, FilterChooser builds up the possible Filters that
+/// can further the task of decoding by distinguishing among the remaining
+/// candidate instructions.
+///
+/// Once a filter has been chosen, it is called upon to divide the decoding task
+/// into sub-tasks and delegates them to its inferior FilterChoosers for further
+/// processings.
+///
+/// It is useful to think of a Filter as governing the switch stmts of the
+/// decoding tree.  And each case is delegated to an inferior FilterChooser to
+/// decide what further remaining bits to look at.
+
+class FilterChooser {
+  // TODO: Unfriend by providing the necessary accessors.
+  friend class DecoderTreeBuilder;
+
+  // Vector of encodings to choose our filter.
+  ArrayRef<InstructionEncoding> Encodings;
+
+  /// Encoding IDs for this filter chooser to work on.
+  /// Sorted by non-decreasing encoding width.
+  SmallVector<unsigned, 0> EncodingIDs;
+
+  // Array of bit values passed down from our parent.
+  // Set to all unknown for Parent == nullptr.
+  KnownBits FilterBits;
+
+  // Links to the FilterChooser above us in the decoding tree.
+  const FilterChooser *Parent;
+
+  /// If the selected filter matches multiple encodings, then this is the
+  /// starting position and the width of the filtered range.
+  unsigned StartBit;
+  unsigned NumBits;
+
+  /// If the selected filter matches multiple encodings, and there is
+  /// *exactly one* encoding in which all bits are known in the filtered range,
+  /// then this is the ID of that encoding.
+  /// Also used when there is only one encoding.
+  std::optional<unsigned> SingletonEncodingID;
+
+  /// If the selected filter matches multiple encodings, and there is
+  /// *at least one* encoding in which all bits are known in the filtered range,
+  /// then this is the FilterChooser created for the subset of encodings that
+  /// contain some unknown bits in the filtered range.
+  std::unique_ptr<const FilterChooser> VariableFC;
+
+  /// If the selected filter matches multiple encodings, and there is
+  /// *more than one* encoding in which all bits are known in the filtered
+  /// range, then this is a map of field values to FilterChoosers created for
+  /// the subset of encodings sharing that field value.
+  /// The "field value" here refers to the encoding bits in the filtered range.
+  std::map<uint64_t, std::unique_ptr<const FilterChooser>> FilterChooserMap;
+
+  /// Set to true if decoding conflict was encountered.
+  bool HasConflict = false;
+
+  struct Island {
+    unsigned StartBit;
+    unsigned NumBits;
+    uint64_t FieldVal;
+  };
+
+public:
+  /// Constructs a top-level filter chooser.
+  FilterChooser(ArrayRef<InstructionEncoding> Encodings,
+                ArrayRef<unsigned> EncodingIDs);
+
+  /// Constructs an inferior filter chooser.
+  FilterChooser(ArrayRef<InstructionEncoding> Encodings,
+                ArrayRef<unsigned> EncodingIDs, const KnownBits &FilterBits,
+                const FilterChooser &Parent);
+
+  /// Returns the width of the largest encoding.
+  unsigned getMaxEncodingWidth() const {
+    // The last encoding ID is the ID of an encoding with the largest width.
+    return Encodings[EncodingIDs.back()].getBitWidth();
+  }
+
+  /// Returns true if any decoding conflicts were encountered.
+  bool hasConflict() const { return HasConflict; }
+
+private:
+  /// Applies the given filter to the set of encodings this FilterChooser
+  /// works with, creating inferior FilterChoosers as necessary.
+  void applyFilter(const Filter &F);
+
+  /// dumpStack - dumpStack traverses the filter chooser chain and calls
+  /// dumpFilterArray on each filter chooser up to the top level one.
+  void dumpStack(raw_ostream &OS, indent Indent, unsigned PadToWidth) const;
+
+  bool isPositionFiltered(unsigned Idx) const {
+    return FilterBits.Zero[Idx] || FilterBits.One[Idx];
+  }
+
+  // Calculates the island(s) needed to decode the instruction.
+  // This returns a list of undecoded bits of an instructions, for example,
+  // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
+  // decoded bits in order to verify that the instruction matches the Opcode.
+  static std::vector<Island> getIslands(const KnownBits &EncodingBits,
+                                        const KnownBits &FilterBits);
+
+  /// Scans the well-known encoding bits of the encodings and, builds up a list
+  /// of candidate filters, and then returns the best one, if any.
+  std::unique_ptr<Filter> findBestFilter(ArrayRef<bitAttr_t> BitAttrs,
+                                         bool AllowMixed,
+                                         bool Greedy = true) const;
+
+  std::unique_ptr<Filter> findBestFilter() const;
+
+  // Decides on the best configuration of filter(s) to use in order to decode
+  // the instructions.  A conflict of instructions may occur, in which case we
+  // dump the conflict set to the standard error.
+  void doFilter();
+
+public:
+  void dump() const;
+};
+
+/// Sorting predicate to sort encoding IDs by encoding width.
+class LessEncodingIDByWidth {
+  ArrayRef<InstructionEncoding> Encodings;
+
+public:
+  explicit LessEncodingIDByWidth(ArrayRef<InstructionEncoding> Encodings)
+      : Encodings(Encodings) {}
+
+  bool operator()(unsigned ID1, unsigned ID2) const {
+    return Encodings[ID1].getBitWidth() < Encodings[ID2].getBitWidth();
+  }
+};
+
+} // namespace
+
+FilterChooser::FilterChooser(ArrayRef<InstructionEncoding> Encodings,
+                             ArrayRef<unsigned> EncodingIDs)
+    : Encodings(Encodings), EncodingIDs(EncodingIDs), Parent(nullptr) {
+  // Sort encoding IDs once.
+  stable_sort(this->EncodingIDs, LessEncodingIDByWidth(Encodings));
+  // Filter width is the width of the smallest encoding.
+  unsigned FilterWidth = Encodings[this->EncodingIDs.front()].getBitWidth();
+  FilterBits = KnownBits(FilterWidth);
+  doFilter();
+}
+
+FilterChooser::FilterChooser(ArrayRef<InstructionEncoding> Encodings,
+                             ArrayRef<unsigned int> EncodingIDs,
+                             const KnownBits &FilterBits,
+                             const FilterChooser &Parent)
+    : Encodings(Encodings), EncodingIDs(EncodingIDs), Parent(&Parent) {
+  // Inferior filter choosers are created from sorted array of encoding IDs.
+  assert(is_sorted(EncodingIDs, LessEncodingIDByWidth(Encodings)));
+  assert(!FilterBits.hasConflict() && "Broken filter");
+  // Filter width is the width of the smallest encoding.
+  unsigned FilterWidth = Encodings[EncodingIDs.front()].getBitWidth();
+  this->FilterBits = FilterBits.anyext(FilterWidth);
+  doFilter();
+}
+
+void FilterChooser::applyFilter(const Filter &F) {
+  StartBit = F.StartBit;
+  NumBits = F.NumBits;
+  assert(FilterBits.extractBits(NumBits, StartBit).isUnknown());
+
+  if (!F.VariableIDs.empty()) {
+    // Delegates to an inferior filter chooser for further processing on this
+    // group of instructions whose segment values are variable.
+    VariableFC = std::make_unique<FilterChooser>(Encodings, F.VariableIDs,
+                                                 FilterBits, *this);
+    HasConflict |= VariableFC->HasConflict;
+  }
+
+  // Otherwise, create sub choosers.
+  for (const auto &[FilterVal, InferiorEncodingIDs] : F.FilteredIDs) {
+    // Create a new filter by inserting the field bits into the parent filter.
+    APInt FieldBits(NumBits, FilterVal);
+    KnownBits InferiorFilterBits = FilterBits;
+    InferiorFilterBits.insertBits(KnownBits::makeConstant(FieldBits), StartBit);
+
+    // Delegates to an inferior filter chooser for further processing on this
+    // category of instructions.
+    auto [It, _] = FilterChooserMap.try_emplace(
+        FilterVal,
+        std::make_unique<FilterChooser>(Encodings, InferiorEncodingIDs,
+                                        InferiorFilterBits, *this));
+    HasConflict |= It->second->HasConflict;
+  }
+}
+
+/// Similar to KnownBits::print(), but allows you to specify a character to use
+/// to print unknown bits.
+static void printKnownBits(raw_ostream &OS, const KnownBits &Bits,
+                           char Unknown) {
+  for (unsigned I = Bits.getBitWidth(); I--;) {
+    if (Bits.Zero[I] && Bits.One[I])
+      OS << '!';
+    else if (Bits.Zero[I])
+      OS << '0';
+    else if (Bits.One[I])
+      OS << '1';
+    else
+      OS << Unknown;
+  }
+}
+
+/// dumpStack - dumpStack traverses the filter chooser chain and calls
+/// dumpFilterArray on each filter chooser up to the top level one.
+void FilterChooser::dumpStack(raw_ostream &OS, indent Indent,
+                              unsigned PadToWidth) const {
+  if (Parent)
+    Parent->dumpStack(OS, Indent, PadToWidth);
+  assert(PadToWidth >= FilterBits.getBitWidth());
+  OS << Indent << indent(PadToWidth - FilterBits.getBitWidth());
+  printKnownBits(OS, FilterBits, '.');
+  OS << '\n';
+}
+
+// Calculates the island(s) needed to decode the instruction.
+// This returns a list of undecoded bits of an instructions, for example,
+// Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
+// decoded bits in order to verify that the instruction matches the Opcode.
+std::vector<FilterChooser::Island>
+FilterChooser::getIslands(const KnownBits &EncodingBits,
+                          const KnownBits &FilterBits) {
+  std::vector<Island> Islands;
+  uint64_t FieldVal;
+  unsigned StartBit;
+
+  // 0: Init
+  // 1: Water (the bit value does not affect decoding)
+  // 2: Island (well-known bit value needed for decoding)
+  unsigned State = 0;
+
+  unsigned FilterWidth = FilterBits.getBitWidth();
+  for (unsigned i = 0; i != FilterWidth; ++i) {
+    bool IsKnown = EncodingBits.Zero[i] || EncodingBits.One[i];
+    bool Filtered = FilterBits.Zero[i] || FilterBits.One[i];
+    switch (State) {
+    default:
+      llvm_unreachable("Unreachable code!");
+    case 0:
+    case 1:
+      if (Filtered || !IsKnown) {
+        State = 1; // Still in Water
+      } else {
+        State = 2; // Into the Island
+        StartBit = i;
+        FieldVal = static_cast<uint64_t>(EncodingBits.One[i]);
+      }
+      break;
+    case 2:
+      if (Filtered || !IsKnown) {
+        State = 1; // Into the Water
+        Islands.push_back({StartBit, i - StartBit, FieldVal});
+      } else {
+        State = 2; // Still in Island
+        FieldVal |= static_cast<uint64_t>(EncodingBits.One[i])
+                    << (i - StartBit);
+      }
+      break;
+    }
+  }
+  // If we are still in Island after the loop, do some housekeeping.
+  if (State == 2)
+    Islands.push_back({StartBit, FilterWidth - StartBit, FieldVal});
+
+  return Islands;
+}
+
+std::unique_ptr<Filter>
+FilterChooser::findBestFilter(ArrayRef<bitAttr_t> BitAttrs, bool AllowMixed,
+                              bool Greedy) const {
+  assert(EncodingIDs.size() >= 2 && "Nothing to filter");
+
+  // Heuristics.  See also doFilter()'s "Heuristics" comment when num of
+  // instructions is 3.
+  if (AllowMixed && !Greedy) {
+    assert(EncodingIDs.size() == 3);
+
+    for (unsigned EncodingID : EncodingIDs) {
+      const InstructionEncoding &Encoding = Encodings[EncodingID];
+      KnownBits EncodingBits = Encoding.getMandatoryBits();
+
+      // Look for islands of undecoded bits of any instruction.
+      std::vector<Island> Islands = getIslands(EncodingBits, FilterBits);
+      if (!Islands.empty()) {
+        // Found an instruction with island(s).  Now just assign a filter.
+        return std::make_unique<Filter>(
+            Encodings, EncodingIDs, Islands[0].StartBit, Islands[0].NumBits);
+      }
+    }
+  }
+
+  // The regionAttr automaton consumes the bitAttrs automatons' state,
+  // lowest-to-highest.
+  //
+  //   Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed)
+  //   States:        NONE, ALL_SET, MIXED
+  //   Initial state: NONE
+  //
+  // (NONE) ----- F --> (NONE)
+  // (NONE) ----- S --> (ALL_SET)     ; and set region start
+  // (NONE) ----- U --> (NONE)
+  // (NONE) ----- M --> (MIXED)       ; and set region start
+  // (ALL_SET) -- F --> (NONE)        ; and report an ALL_SET region
+  // (ALL_SET) -- S --> (ALL_SET)
+  // (ALL_SET) -- U --> (NONE)        ; and report an ALL_SET region
+  // (ALL_SET) -- M --> (MIXED)       ; and report an ALL_SET region
+  // (MIXED) ---- F --> (NONE)        ; and report a MIXED region
+  // (MIXED) ---- S --> (ALL_SET)     ; and report a MIXED region
+  // (MIXED) ---- U --> (NONE)        ; and report a MIXED region
+  // (MIXED) ---- M --> (MIXED)
+
+  bitAttr_t RA = ATTR_NONE;
+  unsigned StartBit = 0;
+
+  std::vector<std::unique_ptr<Filter>> Filters;
+
+  auto AddCandidateFilter = [&](unsigned StartBit, unsigned EndBit) {
+    Filters.push_back(std::make_unique<Filter>(Encodings, EncodingIDs, StartBit,
+                                               EndBit - StartBit));
+  };
+
+  unsigned FilterWidth = FilterBits.getBitWidth();
+  for (unsigned BitIndex = 0; BitIndex != FilterWidth; ++BitIndex) {
+    bitAttr_t bitAttr = BitAttrs[BitIndex];
+
+    assert(bitAttr != ATTR_NONE && "Bit without attributes");
+
+    switch (RA) {
+    case ATTR_NONE:
+      switch (bitAttr) {
+      case ATTR_FILTERED:
+        break;
+      case ATTR_ALL_SET:
+        StartBit = BitIndex;
+        RA = ATTR_ALL_SET;
+        break;
+      case ATTR_ALL_UNSET:
+        break;
+      case ATTR_MIXED:
+        StartBit = BitIndex;
+        RA = ATTR_MIXED;
+        break;
+      default:
+        llvm_unreachable("Unexpected bitAttr!");
+      }
+      break;
+    case ATTR_ALL_SET:
+      if (!AllowMixed && bitAttr != ATTR_ALL_SET)
+        AddCandidateFilter(StartBit, BitIndex);
+      switch (bitAttr) {
+      case ATTR_FILTERED:
+        RA = ATTR_NONE;
+        break;
+      case ATTR_ALL_SET:
+        break;
+      case ATTR_ALL_UNSET:
+        RA = ATTR_NONE;
+        break;
+      case ATTR_MIXED:
+        StartBit = BitIndex;
+        RA = ATTR_MIXED;
+        break;
+      default:
+        llvm_unreachable("Unexpected bitAttr!");
+      }
+      break;
+    case ATTR_MIXED:
+      if (AllowMixed && bitAttr != ATTR_MIXED)
+        AddCandidateFilter(StartBit, BitIndex);
+      switch (bitAttr) {
+      case ATTR_FILTERED:
+        StartBit = BitIndex;
+        RA = ATTR_NONE;
+        break;
+      case ATTR_ALL_SET:
+        StartBit = BitIndex;
+        RA = ATTR_ALL_SET;
+        break;
+      case ATTR_ALL_UNSET:
+        RA = ATTR_NONE;
+        break;
+      case ATTR_MIXED:
+        break;
+      default:
+        llvm_unreachable("Unexpected bitAttr!");
+      }
+      break;
+    case ATTR_ALL_UNSET:
+      llvm_unreachable("regionAttr state machine has no ATTR_UNSET state");
+    case ATTR_FILTERED:
+      llvm_unreachable("regionAttr state machine has no ATTR_FILTERED state");
+    }
+  }
+
+  // At the end, if we're still in ALL_SET or MIXED states, report a region
+  switch (RA) {
+  case ATTR_NONE:
+    break;
+  case ATTR_FILTERED:
+    break;
+  case ATTR_ALL_SET:
+    if (!AllowMixed)
+      AddCandidateFilter(StartBit, FilterWidth);
+    break;
+  case ATTR_ALL_UNSET:
+    break;
+  case ATTR_MIXED:
+    if (AllowMixed)
+      AddCandidateFilter(StartBit, FilterWidth);
+    break;
+  }
+
+  // We have finished with the filter processings.  Now it's time to choose
+  // the best performing filter.
+  auto MaxIt = llvm::max_element(Filters, [](const std::unique_ptr<Filter> &A,
+                                             const std::unique_ptr<Filter> &B) {
+    return A->usefulness() < B->usefulness();
+  });
+  if (MaxIt == Filters.end() || (*MaxIt)->usefulness() == 0)
+    return nullptr;
+  return std::move(*MaxIt);
+}
+
+std::unique_ptr<Filter> FilterChooser::findBestFilter() const {
+  // We maintain BIT_WIDTH copies of the bitAttrs automaton.
+  // The automaton consumes the corresponding bit from each
+  // instruction.
+  //
+  //   Input symbols: 0, 1, _ (unset), and . (any of the above).
+  //   States:        NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED.
+  //   Initial state: NONE.
+  //
+  // (NONE) ------- [01] -> (ALL_SET)
+  // (NONE) ------- _ ----> (ALL_UNSET)
+  // (ALL_SET) ---- [01] -> (ALL_SET)
+  // (ALL_SET) ---- _ ----> (MIXED)
+  // (ALL_UNSET) -- [01] -> (MIXED)
+  // (ALL_UNSET) -- _ ----> (ALL_UNSET)
+  // (MIXED) ------ . ----> (MIXED)
+  // (FILTERED)---- . ----> (FILTERED)
+
+  unsigned FilterWidth = FilterBits.getBitWidth();
+  SmallVector<bitAttr_t, 128> BitAttrs(FilterWidth, ATTR_NONE);
+
+  // FILTERED bit positions provide no entropy and are not worthy of pursuing.
+  // Filter::recurse() set either 1 or 0 for each position.
+  for (unsigned BitIndex = 0; BitIndex != FilterWidth; ++BitIndex)
+    if (isPositionFiltered(BitIndex))
+      BitAttrs[BitIndex] = ATTR_FILTERED;
+
+  for (unsigned EncodingID : EncodingIDs) {
+    const InstructionEncoding &Encoding = Encodings[EncodingID];
+    KnownBits EncodingBits = Encoding.getMandatoryBits();
+
+    for (unsigned BitIndex = 0; BitIndex != FilterWidth; ++BitIndex) {
+      bool IsKnown = EncodingBits.Zero[BitIndex] || EncodingBits.One[BitIndex];
+      switch (BitAttrs[BitIndex]) {
+      case ATTR_NONE:
+        if (IsKnown)
+          BitAttrs[BitIndex] = ATTR_ALL_SET;
+        else
+          BitAttrs[BitIndex] = ATTR_ALL_UNSET;
+        break;
+      case ATTR_ALL_SET:
+        if (!IsKnown)
+          BitAttrs[BitIndex] = ATTR_MIXED;
+        break;
+      case ATTR_ALL_UNSET:
+        if (IsKnown)
+          BitAttrs[BitIndex] = ATTR_MIXED;
+        break;
+      case ATTR_MIXED:
+      case ATTR_FILTERED:
+        break;
+      }
+    }
+  }
+
+  // Try regions of consecutive known bit values first.
+  if (std::unique_ptr<Filter> F =
+          findBestFilter(BitAttrs, /*AllowMixed=*/false))
+    return F;
+
+  // Then regions of mixed bits (both known and unitialized bit values allowed).
+  if (std::unique_ptr<Filter> F = findBestFilter(BitAttrs, /*AllowMixed=*/true))
+    return F;
+
+  // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where
+  // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a
+  // well-known encoding pattern.  In such case, we backtrack and scan for the
+  // the very first consecutive ATTR_ALL_SET region and assign a filter to it.
+  if (EncodingIDs.size() == 3) {
+    if (std::unique_ptr<Filter> F =
+            findBestFilter(BitAttrs, /*AllowMixed=*/true, /*Greedy=*/false))
+      return F;
+  }
+
+  // There is a conflict we could not resolve.
+  return nullptr;
+}
+
+// Decides on the best configuration of filter(s) to use in order to decode
+// the instructions.  A conflict of instructions may occur, in which case we
+// dump the conflict set to the standard error.
+void FilterChooser::doFilter() {
+  assert(!EncodingIDs.empty() && "FilterChooser created with no instructions");
+
+  // No filter needed.
+  if (EncodingIDs.size() == 1) {
+    SingletonEncodingID = EncodingIDs.front();
+    return;
+  }
+
+  std::unique_ptr<Filter> BestFilter = findBestFilter();
+  if (BestFilter) {
+    applyFilter(*BestFilter);
+    return;
+  }
+
+  // Print out useful conflict information for postmortem analysis.
+  errs() << "Decoding Conflict:\n";
+  dump();
+  HasConflict = true;
+}
+
+void FilterChooser::dump() const {
+  indent Indent(4);
+  // Helps to keep the output right-justified.
+  unsigned PadToWidth = getMaxEncodingWidth();
+
+  // Dump filter stack.
+  dumpStack(errs(), Indent, PadToWidth);
+
+  // Dump encodings.
+  for (unsigned EncodingID : EncodingIDs) {
+    const InstructionEncoding &Encoding = Encodings[EncodingID];
+    errs() << Indent << indent(PadToWidth - Encoding.getBitWidth());
+    printKnownBits(errs(), Encoding.getMandatoryBits(), '_');
+    errs() << "  " << Encoding.getName() << '\n';
+  }
+}
+
+namespace {
+
+class DecoderTreeBuilder {
+  const CodeGenTarget &Target;
+  ArrayRef<InstructionEncoding> Encodings;
+
+public:
+  DecoderTreeBuilder(const CodeGenTarget &Target,
+                     ArrayRef<InstructionEncoding> Encodings)
+      : Target(Target), Encodings(Encodings) {}
+
+  std::unique_ptr<DecoderTreeNode> buildTree(const FilterChooser &FC) {
+    return buildCheckAnyNode(FC);
+  }
+
+private:
+  std::unique_ptr<DecoderTreeNode>
+  buildTerminalNode(unsigned EncodingID, const KnownBits &FilterBits);
+
+  std::unique_ptr<DecoderTreeNode> buildCheckAllOrSwitchNode(
+      unsigned StartBit, unsigned NumBits,
+      const std::map<uint64_t, std::unique_ptr<const FilterChooser>> &FCMap);
+
+  std::unique_ptr<DecoderTreeNode> buildCheckAnyNode(const FilterChooser &FC);
+};
+
+} // namespace
+
+static bool doesOpcodeNeedPredicate(const InstructionEncoding &Encoding) {
+  std::vector<const Record *> Predicates =
+      Encoding.getRecord()->getValueAsListOfDefs("Predicates");
+  auto MCPredicates = make_filter_range(Predicates, [](const Record *R) {
+    return R->getValue("AssemblerMatcherPredicate");
+  });
+  return !MCPredicates.empty();
+}
+
+static std::string getPredicateString(const InstructionEncoding &Encoding,
+                                      StringRef TargetName) {
+  std::vector<const Record *> Predicates =
+      Encoding.getRecord()->getValueAsListOfDefs("Predicates");
+  auto It = llvm::find_if(Predicates, [](const Record *R) {
+    return R->getValueAsBit("AssemblerMatcherPredicate");
+  });
+  if (It == Predicates.end())
+    return std::string();
+
+  std::string PredicateString;
+  raw_string_ostream OS(PredicateString);
+  SubtargetFeatureInfo::emitMCPredicateCheck(OS, TargetName, Predicates);
+  return PredicateString;
+}
+
+static void emitBinaryParser(raw_ostream &OS, indent Indent,
+                             const InstructionEncoding &Encoding,
+                             const OperandInfo &OpInfo) {
+  if (OpInfo.HasNoEncoding) {
+    // If an operand has no encoding, the old behavior is to not decode it
+    // automatically and let the target do it. This is error-prone, so the
+    // new behavior is to report an error.
+    if (!IgnoreNonDecodableOperands)
+      PrintError(Encoding.getRecord()->getLoc(),
+                 "could not find field for operand '" + OpInfo.Name + "'");
+    return;
+  }
+  // Special case for 'bits<0>'.
+  if (OpInfo.Fields.empty() && !OpInfo.InitValue) {
+    if (IgnoreNonDecodableOperands)
+      return;
+    assert(!OpInfo.Decoder.empty());
+    // The operand has no encoding, so the corresponding argument is omitted.
+    // This avoids confusion and allows the function to be overloaded if the
+    // operand does have an encoding in other instructions.
+    OS << Indent << "if (!Check(S, " << OpInfo.Decoder << "(MI, Decoder)))\n"
+       << Indent << "  return MCDisassembler::Fail;\n";
+    return;
+  }
+
+  if (OpInfo.fields().empty()) {
+    // Only a constant part. The old behavior is to not decode this operand.
+    if (IgnoreFullyDefinedOperands)
+      return;
+    // Initialize `tmp` with the constant part.
+    OS << Indent << "tmp = " << format_hex(*OpInfo.InitValue, 0) << ";\n";
+  } else if (OpInfo.fields().size() == 1 && !OpInfo.InitValue.value_or(0)) {
+    // One variable part and no/zero constant part. Initialize `tmp` with the
+    // variable part.
+    auto [Base, Width, Offset] = OpInfo.fields().front();
+    OS << Indent << "tmp = fieldFromInstruction(insn, " << Base << ", " << Width
+       << ')';
+    if (Offset)
+      OS << " << " << Offset;
+    OS << ";\n";
+  } else {
+    // General case. Initialize `tmp` with the constant part, if any, and
+    // insert the variable parts into it.
+    OS << Indent << "tmp = " << format_hex(OpInfo.InitValue.value_or(0), 0)
+       << ";\n";
+    for (auto [Base, Width, Offset] : OpInfo.fields())
+      OS << Indent << "insertBits(tmp, fieldFromInstruction(insn, " << Base
+         << ", " << Width << "), " << Offset << ", " << Width << ");\n";
+  }
+
+  StringRef Decoder = OpInfo.Decoder;
+  if (!Decoder.empty()) {
+    OS << Indent << "if (!Check(S, " << Decoder
+       << "(MI, tmp, Address, Decoder))) { "
+       << (OpInfo.HasCompleteDecoder ? "" : "DecodeComplete = false; ")
+       << "return MCDisassembler::Fail; }\n";
+  } else {
+    OS << Indent << "MI.addOperand(MCOperand::createImm(tmp));\n";
+  }
+}
+
+static void emitDecoder(raw_ostream &OS, indent Indent,
+                        const InstructionEncoding &Encoding) {
+  // If a custom instruction decoder was specified, use that.
+  StringRef DecoderMethod = Encoding.getDecoderMethod();
+  if (!DecoderMethod.empty()) {
+    OS << Indent << "if (!Check(S, " << DecoderMethod
+       << "(MI, insn, Address, Decoder))) { "
+       << (Encoding.hasCompleteDecoder() ? "" : "DecodeComplete = false; ")
+       << "return MCDisassembler::Fail; }\n";
+    return;
+  }
+
+  for (const OperandInfo &Op : Encoding.getOperands())
+    emitBinaryParser(OS, Indent, Encoding, Op);
+}
+
+static std::string getDecoderString(const InstructionEncoding &Encoding) {
+  std::string DecoderString;
+  raw_string_ostream S(DecoderString);
+  indent Indent(UseFnTableInDecodeToMCInst ? 2 : 4);
+  emitDecoder(S, Indent, Encoding);
+  return DecoderString;
+}
+
+std::unique_ptr<DecoderTreeNode>
+DecoderTreeBuilder::buildTerminalNode(unsigned EncodingID,
+                                      const KnownBits &FilterBits) {
+  const InstructionEncoding &Encoding = Encodings[EncodingID];
+  auto N = std::make_unique<CheckAllNode>();
+
+  if (doesOpcodeNeedPredicate(Encoding)) {
+    std::string Predicate = getPredicateString(Encoding, Target.getName());
+    N->addChild(std::make_unique<CheckPredicateNode>(std::move(Predicate)));
+  }
+
+  std::vector<FilterChooser::Island> Islands =
+      FilterChooser::getIslands(Encoding.getMandatoryBits(), FilterBits);
+  for (const FilterChooser::Island &Ilnd : reverse(Islands)) {
+    N->addChild(std::make_unique<CheckFieldNode>(Ilnd.StartBit, Ilnd.NumBits,
+                                                 Ilnd.FieldVal));
+  }
+
+  const KnownBits &InstBits = Encoding.getInstBits();
+  const APInt &SoftFailMask = Encoding.getSoftFailMask();
+  if (!SoftFailMask.isZero()) {
+    APInt PositiveMask = InstBits.Zero & SoftFailMask;
+    APInt NegativeMask = InstBits.One & SoftFailMask;
+    N->addChild(std::make_unique<SoftFailNode>(PositiveMask.getZExtValue(),
+                                               NegativeMask.getZExtValue()));
+  }
+
+  std::string DecoderIndex = getDecoderString(Encoding);
+  N->addChild(std::make_unique<DecodeNode>(Encoding, DecoderIndex));
+
+  return N;
+}
+
+std::unique_ptr<DecoderTreeNode> DecoderTreeBuilder::buildCheckAllOrSwitchNode(
+    unsigned StartBit, unsigned NumBits,
+    const std::map<uint64_t, std::unique_ptr<const FilterChooser>> &FCMap) {
+  if (FCMap.size() == 1) {
+    const auto &[FieldVal, ChildFC] = *FCMap.begin();
+    auto N = std::make_unique<CheckAllNode>();
+    N->addChild(std::make_unique<CheckFieldNode>(StartBit, NumBits, FieldVal));
+    N->addChild(buildCheckAnyNode(*ChildFC));
+    return N;
+  }
+  auto N = std::make_unique<SwitchFieldNode>(StartBit, NumBits);
+  for (const auto &[FieldVal, ChildFC] : FCMap)
+    N->addCase(FieldVal, buildCheckAnyNode(*ChildFC));
+  return N;
+}
+
+std::unique_ptr<DecoderTreeNode>
+DecoderTreeBuilder::buildCheckAnyNode(const FilterChooser &FC) {
+  auto N = std::make_unique<CheckAnyNode>();
+  if (FC.SingletonEncodingID) {
+    N->addChild(buildTerminalNode(*FC.SingletonEncodingID, FC.FilterBits));
+  } else {
+    N->addChild(buildCheckAllOrSwitchNode(FC.StartBit, FC.NumBits,
+                                          FC.FilterChooserMap));
+  }
+  if (FC.VariableFC) {
+    N->addChild(buildCheckAnyNode(*FC.VariableFC));
+  }
+
+  return N;
+}
+
+std::unique_ptr<DecoderTreeNode>
+llvm::buildDecoderTree(const CodeGenTarget &Target,
+                       ArrayRef<InstructionEncoding> Encodings,
+                       ArrayRef<unsigned> EncodingIDs) {
+  FilterChooser FC(Encodings, EncodingIDs);
+  if (FC.hasConflict())
+    return nullptr;
+  DecoderTreeBuilder TreeBuilder(Target, Encodings);
+  return TreeBuilder.buildTree(FC);
+}



More information about the llvm-commits mailing list