[llvm] [TableGen][DecoderEmitter] Use KnownBits for filters/encodings (NFCI) (PR #154691)

Sergei Barannikov via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 21 02:23:52 PDT 2025


https://github.com/s-barannikov updated https://github.com/llvm/llvm-project/pull/154691

>From 77f025d4c86897cf774fa37e8ddc16764149db6a Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Thu, 21 Aug 2025 10:57:28 +0300
Subject: [PATCH 1/2] [TableGen][DecoderEmitter] Use KnownBits for filter
 values (NFCI)

KnownBits is faster and smaller than std::vector<BitValue>.
It is also more convenient to use.
---
 llvm/utils/TableGen/DecoderEmitter.cpp | 84 ++++++++++++--------------
 1 file changed, 39 insertions(+), 45 deletions(-)

diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 95522030300a0..10dc4fe8f2f1f 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -34,6 +34,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
@@ -101,6 +102,22 @@ STATISTIC(NumEncodingsOmitted, "Number of encodings omitted");
 
 static unsigned getNumToSkipInBytes() { return LargeTable ? 3 : 2; }
 
+/// Similar to KnownBits::print(), but allows you to specify a character to use
+/// to print unknown bits.
+static void printKnownBits(raw_ostream &OS, const KnownBits &Bits,
+                           char Unknown) {
+  for (unsigned I = Bits.getBitWidth(); I--;) {
+    if (Bits.Zero[I] && Bits.One[I])
+      OS << '!';
+    else if (Bits.Zero[I])
+      OS << '0';
+    else if (Bits.One[I])
+      OS << '1';
+    else
+      OS << Unknown;
+  }
+}
+
 namespace {
 
 struct EncodingField {
@@ -315,15 +332,11 @@ class DecoderEmitter {
 
 // The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system
 // for a bit value.
-//
-// BIT_UNFILTERED is used as the init value for a filter position.  It is used
-// only for filter processings.
 struct BitValue {
   enum bit_value_t : uint8_t {
-    BIT_FALSE,     // '0'
-    BIT_TRUE,      // '1'
-    BIT_UNSET,     // '?', printed as '_'
-    BIT_UNFILTERED // unfiltered, printed as '.'
+    BIT_FALSE, // '0'
+    BIT_TRUE,  // '1'
+    BIT_UNSET, // '?', printed as '_'
   };
 
   BitValue(bit_value_t V) : V(V) {}
@@ -352,8 +365,6 @@ struct BitValue {
       return "1";
     case BIT_UNSET:
       return "_";
-    case BIT_UNFILTERED:
-      return ".";
     }
     llvm_unreachable("Unknow bit value");
   }
@@ -553,8 +564,8 @@ class FilterChooser {
   std::unique_ptr<Filter> BestFilter;
 
   // Array of bit values passed down from our parent.
-  // Set to all BIT_UNFILTERED's for Parent == NULL.
-  std::vector<BitValue> FilterBitValues;
+  // Set to all unknown for Parent == nullptr.
+  KnownBits FilterBits;
 
   // Links to the FilterChooser above us in the decoding tree.
   const FilterChooser *Parent;
@@ -575,19 +586,18 @@ class FilterChooser {
   FilterChooser(ArrayRef<InstructionEncoding> Encodings,
                 ArrayRef<unsigned> EncodingIDs, unsigned BW,
                 const DecoderEmitter *E)
-      : Encodings(Encodings), EncodingIDs(EncodingIDs),
-        FilterBitValues(BW, BitValue::BIT_UNFILTERED), Parent(nullptr),
-        BitWidth(BW), Emitter(E) {
+      : Encodings(Encodings), EncodingIDs(EncodingIDs), FilterBits(BW),
+        Parent(nullptr), BitWidth(BW), Emitter(E) {
     doFilter();
   }
 
   FilterChooser(ArrayRef<InstructionEncoding> Encodings,
                 ArrayRef<unsigned> EncodingIDs,
-                const std::vector<BitValue> &ParentFilterBitValues,
-                const FilterChooser &parent)
+                const KnownBits &ParentFilterBits, const FilterChooser &parent)
       : Encodings(Encodings), EncodingIDs(EncodingIDs),
-        FilterBitValues(ParentFilterBitValues), Parent(&parent),
+        FilterBits(ParentFilterBits), Parent(&parent),
         BitWidth(parent.BitWidth), Emitter(parent.Emitter) {
+    assert(!FilterBits.hasConflict() && "Broken filter");
     doFilter();
   }
 
@@ -618,16 +628,12 @@ class FilterChooser {
     return Insn;
   }
 
-  /// dumpFilterArray - dumpFilterArray prints out debugging info for the given
-  /// filter array as a series of chars.
-  void dumpFilterArray(raw_ostream &OS, ArrayRef<BitValue> Filter) const;
-
   /// dumpStack - dumpStack traverses the filter chooser chain and calls
   /// dumpFilterArray on each filter chooser up to the top level one.
   void dumpStack(raw_ostream &OS, indent Indent) const;
 
-  bool PositionFiltered(unsigned Idx) const {
-    return FilterBitValues[Idx].isSet();
+  bool isPositionFiltered(unsigned Idx) const {
+    return FilterBits.Zero[Idx] || FilterBits.One[Idx];
   }
 
   // Calculates the island(s) needed to decode the instruction.
@@ -742,16 +748,14 @@ Filter::Filter(const FilterChooser &owner, unsigned startBit, unsigned numBits)
 // match the remaining undecoded encoding bits against the singleton.
 void Filter::recurse() {
   // Starts by inheriting our parent filter chooser's filter bit values.
-  std::vector<BitValue> BitValueArray(Owner.FilterBitValues);
+  KnownBits FilterBits = Owner.FilterBits;
+  assert(FilterBits.extractBits(NumBits, StartBit).isUnknown());
 
   if (!VariableIDs.empty()) {
-    for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex)
-      BitValueArray[StartBit + bitIndex] = BitValue::BIT_UNFILTERED;
-
     // Delegates to an inferior filter chooser for further processing on this
     // group of instructions whose segment values are variable.
     VariableFC = std::make_unique<FilterChooser>(Owner.Encodings, VariableIDs,
-                                                 BitValueArray, Owner);
+                                                 FilterBits, Owner);
   }
 
   // No need to recurse for a singleton filtered instruction.
@@ -763,17 +767,15 @@ void Filter::recurse() {
 
   // Otherwise, create sub choosers.
   for (const auto &[FilterVal, EncodingIDs] : FilteredIDs) {
-    // Marks all the segment positions with either BIT_TRUE or BIT_FALSE.
-    for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex)
-      BitValueArray[StartBit + bitIndex] = FilterVal & (1ULL << bitIndex)
-                                               ? BitValue::BIT_TRUE
-                                               : BitValue::BIT_FALSE;
+    // Create a new filter by inserting the field bits into the parent filter.
+    APInt FieldBits(NumBits, FilterVal);
+    FilterBits.insertBits(KnownBits::makeConstant(FieldBits), StartBit);
 
     // Delegates to an inferior filter chooser for further processing on this
     // category of instructions.
     FilterChooserMap.try_emplace(
         FilterVal, std::make_unique<FilterChooser>(Owner.Encodings, EncodingIDs,
-                                                   BitValueArray, Owner));
+                                                   FilterBits, Owner));
   }
 }
 
@@ -1145,21 +1147,13 @@ void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS,
   OS << "}\n";
 }
 
-/// dumpFilterArray - dumpFilterArray prints out debugging info for the given
-/// filter array as a series of chars.
-void FilterChooser::dumpFilterArray(raw_ostream &OS,
-                                    ArrayRef<BitValue> Filter) const {
-  for (unsigned bitIndex = BitWidth; bitIndex > 0; bitIndex--)
-    OS << Filter[bitIndex - 1];
-}
-
 /// dumpStack - dumpStack traverses the filter chooser chain and calls
 /// dumpFilterArray on each filter chooser up to the top level one.
 void FilterChooser::dumpStack(raw_ostream &OS, indent Indent) const {
   if (Parent)
     Parent->dumpStack(OS, Indent);
   OS << Indent;
-  dumpFilterArray(OS, FilterBitValues);
+  printKnownBits(OS, FilterBits, '.');
   OS << '\n';
 }
 
@@ -1180,7 +1174,7 @@ FilterChooser::getIslands(const insn_t &Insn) const {
 
   for (unsigned i = 0; i < BitWidth; ++i) {
     std::optional<uint64_t> Val = Insn[i].getValue();
-    bool Filtered = PositionFiltered(i);
+    bool Filtered = isPositionFiltered(i);
     switch (State) {
     default:
       llvm_unreachable("Unreachable code!");
@@ -1762,7 +1756,7 @@ void FilterChooser::doFilter() {
   // FILTERED bit positions provide no entropy and are not worthy of pursuing.
   // Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position.
   for (unsigned BitIndex = 0; BitIndex < BitWidth; ++BitIndex)
-    if (FilterBitValues[BitIndex].isSet())
+    if (isPositionFiltered(BitIndex))
       BitAttrs[BitIndex] = ATTR_FILTERED;
 
   for (unsigned EncodingID : EncodingIDs) {

>From cfc80a1a61501433a2f539d743a5a6b47caf48ba Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Thu, 21 Aug 2025 11:49:31 +0300
Subject: [PATCH 2/2] Use KnownBits for encodings as well

---
 llvm/utils/TableGen/DecoderEmitter.cpp | 162 ++++++++-----------------
 1 file changed, 49 insertions(+), 113 deletions(-)

diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 10dc4fe8f2f1f..b3c9664775190 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -330,60 +330,8 @@ class DecoderEmitter {
   StringRef PredicateNamespace;
 };
 
-// The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system
-// for a bit value.
-struct BitValue {
-  enum bit_value_t : uint8_t {
-    BIT_FALSE, // '0'
-    BIT_TRUE,  // '1'
-    BIT_UNSET, // '?', printed as '_'
-  };
-
-  BitValue(bit_value_t V) : V(V) {}
-  explicit BitValue(const Init *Init) {
-    if (const auto *Bit = dyn_cast<BitInit>(Init))
-      V = Bit->getValue() ? BIT_TRUE : BIT_FALSE;
-    else
-      V = BIT_UNSET;
-  }
-  BitValue(const BitsInit &Bits, unsigned Idx) : BitValue(Bits.getBit(Idx)) {}
-
-  bool isSet() const { return V == BIT_TRUE || V == BIT_FALSE; }
-  bool isUnset() const { return V == BIT_UNSET; }
-  std::optional<uint64_t> getValue() const {
-    if (isSet())
-      return static_cast<uint64_t>(V);
-    return std::nullopt;
-  }
-
-  // For printing a bit value.
-  operator StringRef() const {
-    switch (V) {
-    case BIT_FALSE:
-      return "0";
-    case BIT_TRUE:
-      return "1";
-    case BIT_UNSET:
-      return "_";
-    }
-    llvm_unreachable("Unknow bit value");
-  }
-
-  bool operator==(bit_value_t Other) const { return Other == V; }
-  bool operator!=(bit_value_t Other) const { return Other != V; }
-
-private:
-  bit_value_t V;
-};
-
 } // end anonymous namespace
 
-// Prints the bit value for each position.
-static void dumpBits(raw_ostream &OS, const BitsInit &Bits, unsigned BitWidth) {
-  for (const Init *Bit : reverse(Bits.getBits().take_front(BitWidth)))
-    OS << BitValue(Bit);
-}
-
 static const BitsInit &getBitsField(const Record &Def, StringRef FieldName) {
   const RecordVal *RV = Def.getValue(FieldName);
   if (const BitsInit *Bits = dyn_cast<BitsInit>(RV->getValue()))
@@ -405,27 +353,6 @@ static const BitsInit &getBitsField(const Record &Def, StringRef FieldName) {
   return *BitsInit::get(Def.getRecords(), Bits);
 }
 
-// Representation of the instruction to work on.
-typedef std::vector<BitValue> insn_t;
-
-/// Extracts a NumBits long field from Insn, starting from StartBit.
-/// Returns the value of the field if all bits are well-known,
-/// otherwise std::nullopt.
-static std::optional<uint64_t>
-fieldFromInsn(const insn_t &Insn, unsigned StartBit, unsigned NumBits) {
-  uint64_t Field = 0;
-
-  for (unsigned BitIndex = 0; BitIndex < NumBits; ++BitIndex) {
-    if (Insn[StartBit + BitIndex] == BitValue::BIT_UNSET)
-      return std::nullopt;
-
-    if (Insn[StartBit + BitIndex] == BitValue::BIT_TRUE)
-      Field = Field | (1ULL << BitIndex);
-  }
-
-  return Field;
-}
-
 namespace {
 
 class FilterChooser;
@@ -607,11 +534,10 @@ class FilterChooser {
   unsigned getBitWidth() const { return BitWidth; }
 
 protected:
-  // Populates the insn given the uid.
-  insn_t insnWithID(unsigned EncodingID) const {
+  KnownBits getMandatoryEncodingBits(unsigned EncodingID) const {
     const Record *EncodingDef = Encodings[EncodingID].getRecord();
     const BitsInit &Bits = getBitsField(*EncodingDef, "Inst");
-    insn_t Insn(std::max(BitWidth, Bits.getNumBits()), BitValue::BIT_UNSET);
+    KnownBits Insn(std::max(BitWidth, Bits.getNumBits()));
     // We may have a SoftFail bitmask, which specifies a mask where an encoding
     // may differ from the value in "Inst" and yet still be valid, but the
     // disassembler should return SoftFail instead of Success.
@@ -620,10 +546,17 @@ class FilterChooser {
     const RecordVal *RV = EncodingDef->getValue("SoftFail");
     const BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr;
     for (unsigned i = 0; i < Bits.getNumBits(); ++i) {
-      if (SFBits && BitValue(*SFBits, i) == BitValue::BIT_TRUE)
-        Insn[i] = BitValue::BIT_UNSET;
-      else
-        Insn[i] = BitValue(Bits, i);
+      if (SFBits) {
+        const auto *B = dyn_cast<BitInit>(SFBits->getBit(i));
+        if (B && B->getValue())
+          continue;
+      }
+      if (const auto *B = dyn_cast<BitInit>(Bits.getBit(i))) {
+        if (B->getValue())
+          Insn.One.setBit(i);
+        else
+          Insn.Zero.setBit(i);
+      }
     }
     return Insn;
   }
@@ -640,7 +573,7 @@ class FilterChooser {
   // This returns a list of undecoded bits of an instructions, for example,
   // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
   // decoded bits in order to verify that the instruction matches the Opcode.
-  std::vector<Island> getIslands(const insn_t &Insn) const;
+  std::vector<Island> getIslands(const KnownBits &EncodingBits) const;
 
   // Emits code to check the Predicates member of an instruction are true.
   // Returns true if predicate matches were emitted, false otherwise.
@@ -719,15 +652,15 @@ Filter::Filter(const FilterChooser &owner, unsigned startBit, unsigned numBits)
 
   for (unsigned EncodingID : Owner.EncodingIDs) {
     // Populates the insn given the uid.
-    insn_t Insn = Owner.insnWithID(EncodingID);
+    KnownBits EncodingBits = Owner.getMandatoryEncodingBits(EncodingID);
 
     // Scans the segment for possibly well-specified encoding bits.
-    std::optional<uint64_t> Field = fieldFromInsn(Insn, StartBit, NumBits);
+    KnownBits FieldBits = EncodingBits.extractBits(NumBits, StartBit);
 
-    if (Field) {
+    if (FieldBits.isConstant()) {
       // The encoding bits are well-known.  Lets add the uid of the
       // instruction into the bucket keyed off the constant field value.
-      FilteredIDs[*Field].push_back(EncodingID);
+      FilteredIDs[FieldBits.getConstant().getZExtValue()].push_back(EncodingID);
       ++NumFiltered;
     } else {
       // Some of the encoding bit(s) are unspecified.  This contributes to
@@ -1162,7 +1095,7 @@ void FilterChooser::dumpStack(raw_ostream &OS, indent Indent) const {
 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
 // decoded bits in order to verify that the instruction matches the Opcode.
 std::vector<FilterChooser::Island>
-FilterChooser::getIslands(const insn_t &Insn) const {
+FilterChooser::getIslands(const KnownBits &EncodingBits) const {
   std::vector<Island> Islands;
   uint64_t FieldVal;
   unsigned StartBit;
@@ -1173,28 +1106,29 @@ FilterChooser::getIslands(const insn_t &Insn) const {
   unsigned State = 0;
 
   for (unsigned i = 0; i < BitWidth; ++i) {
-    std::optional<uint64_t> Val = Insn[i].getValue();
+    bool IsKnown = EncodingBits.Zero[i] || EncodingBits.One[i];
     bool Filtered = isPositionFiltered(i);
     switch (State) {
     default:
       llvm_unreachable("Unreachable code!");
     case 0:
     case 1:
-      if (Filtered || !Val) {
+      if (Filtered || !IsKnown) {
         State = 1; // Still in Water
       } else {
         State = 2; // Into the Island
         StartBit = i;
-        FieldVal = *Val;
+        FieldVal = static_cast<uint64_t>(EncodingBits.One[i]);
       }
       break;
     case 2:
-      if (Filtered || !Val) {
+      if (Filtered || !IsKnown) {
         State = 1; // Into the Water
         Islands.push_back({StartBit, i - StartBit, FieldVal});
       } else {
         State = 2; // Still in Island
-        FieldVal |= *Val << (i - StartBit);
+        FieldVal |= static_cast<uint64_t>(EncodingBits.One[i])
+                    << (i - StartBit);
       }
       break;
     }
@@ -1432,19 +1366,11 @@ void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
   APInt PositiveMask(BitWidth, 0ULL);
   APInt NegativeMask(BitWidth, 0ULL);
   for (unsigned i = 0; i < BitWidth; ++i) {
-    BitValue B(*SFBits, i);
-    BitValue IB(*InstBits, i);
-
-    if (B != BitValue::BIT_TRUE)
+    if (!isa<BitInit>(SFBits->getBit(i)) ||
+        !cast<BitInit>(SFBits->getBit(i))->getValue())
       continue;
 
-    if (IB == BitValue::BIT_FALSE) {
-      // The bit is meant to be false, so emit a check to see if it is true.
-      PositiveMask.setBit(i);
-    } else if (IB == BitValue::BIT_TRUE) {
-      // The bit is meant to be true, so emit a check to see if it is false.
-      NegativeMask.setBit(i);
-    } else {
+    if (!isa<BitInit>(InstBits->getBit(i))) {
       // The bit is not set; this must be an error!
       errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in "
              << Encodings[EncodingID].getName() << " is set but Inst{" << i
@@ -1453,6 +1379,15 @@ void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
              << " (1/0 - not '?') in Inst\n";
       return;
     }
+
+    bool IB = cast<BitInit>(InstBits->getBit(i))->getValue();
+    if (!IB) {
+      // The bit is meant to be false, so emit a check to see if it is true.
+      PositiveMask.setBit(i);
+    } else {
+      // The bit is meant to be true, so emit a check to see if it is false.
+      NegativeMask.setBit(i);
+    }
   }
 
   bool NeedPositiveMask = PositiveMask.getBoolValue();
@@ -1469,10 +1404,10 @@ void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
 // Emits table entries to decode the singleton.
 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
                                             unsigned EncodingID) const {
-  insn_t Insn = insnWithID(EncodingID);
+  KnownBits EncodingBits = getMandatoryEncodingBits(EncodingID);
 
   // Look for islands of undecoded bits of the singleton.
-  std::vector<Island> Islands = getIslands(Insn);
+  std::vector<Island> Islands = getIslands(EncodingBits);
 
   // Emit the predicate table entry if one is needed.
   emitPredicateTableEntry(TableInfo, EncodingID);
@@ -1571,10 +1506,10 @@ bool FilterChooser::filterProcessor(ArrayRef<bitAttr_t> BitAttrs,
     assert(EncodingIDs.size() == 3);
 
     for (unsigned EncodingID : EncodingIDs) {
-      insn_t Insn = insnWithID(EncodingID);
+      KnownBits EncodingBits = getMandatoryEncodingBits(EncodingID);
 
       // Look for islands of undecoded bits of any instruction.
-      std::vector<Island> Islands = getIslands(Insn);
+      std::vector<Island> Islands = getIslands(EncodingBits);
       if (!Islands.empty()) {
         // Found an instruction with island(s).  Now just assign a filter.
         runSingleFilter(Islands[0].StartBit, Islands[0].NumBits);
@@ -1760,22 +1695,23 @@ void FilterChooser::doFilter() {
       BitAttrs[BitIndex] = ATTR_FILTERED;
 
   for (unsigned EncodingID : EncodingIDs) {
-    insn_t EncodingBits = insnWithID(EncodingID);
+    KnownBits EncodingBits = getMandatoryEncodingBits(EncodingID);
 
     for (unsigned BitIndex = 0; BitIndex < BitWidth; ++BitIndex) {
+      bool IsKnown = EncodingBits.Zero[BitIndex] || EncodingBits.One[BitIndex];
       switch (BitAttrs[BitIndex]) {
       case ATTR_NONE:
-        if (EncodingBits[BitIndex] == BitValue::BIT_UNSET)
-          BitAttrs[BitIndex] = ATTR_ALL_UNSET;
-        else
+        if (IsKnown)
           BitAttrs[BitIndex] = ATTR_ALL_SET;
+        else
+          BitAttrs[BitIndex] = ATTR_ALL_UNSET;
         break;
       case ATTR_ALL_SET:
-        if (EncodingBits[BitIndex] == BitValue::BIT_UNSET)
+        if (!IsKnown)
           BitAttrs[BitIndex] = ATTR_MIXED;
         break;
       case ATTR_ALL_UNSET:
-        if (EncodingBits[BitIndex] != BitValue::BIT_UNSET)
+        if (IsKnown)
           BitAttrs[BitIndex] = ATTR_MIXED;
         break;
       case ATTR_MIXED:
@@ -1816,7 +1752,7 @@ void FilterChooser::doFilter() {
   for (unsigned EncodingID : EncodingIDs) {
     const InstructionEncoding &Enc = Encodings[EncodingID];
     errs() << Indent;
-    dumpBits(errs(), getBitsField(*Enc.getRecord(), "Inst"), BitWidth);
+    printKnownBits(errs(), getMandatoryEncodingBits(EncodingID), '_');
     errs() << "  " << Enc.getName() << '\n';
   }
   PrintFatalError("Decoding conflict encountered");



More information about the llvm-commits mailing list