[llvm] c9070cc - [TableGen] Allow empty terminator in SequenceToOffsetTable (#119751)

via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 13 08:55:15 PST 2024


Author: Sergei Barannikov
Date: 2024-12-13T19:55:11+03:00
New Revision: c9070cce09e1aef1c4bf1cb8c0000294b533dcd7

URL: https://github.com/llvm/llvm-project/commit/c9070cce09e1aef1c4bf1cb8c0000294b533dcd7
DIFF: https://github.com/llvm/llvm-project/commit/c9070cce09e1aef1c4bf1cb8c0000294b533dcd7.diff

LOG: [TableGen] Allow empty terminator in SequenceToOffsetTable (#119751)

Some clients do not want to emit a terminator after each sub-sequence
(they have other means of determining the length of sub-sequences).

This moves `Term` argument from `emit` method to the constructor and
makes it optional. It couldn't be made optional while still on the
`emit` method because if the terminator wasn't specified, it has to be
taken into account in `layout` method as well.

The fact that `layout` method was called is now recorded in a dedicated
member variable, `IsLaidOut`. `Entries != 0` can no longer be used to
reliably check if `layout` method was called because it may be zero for
a different reason: the terminator wasn't specified and all added
sequences (if any) were empty.

This reduces the size of `*LaneMaskLists` and `*SubRegIdxLists` a bit
and resolves the removed TODO.

Added: 
    

Modified: 
    llvm/test/TableGen/MixedCasedMnemonic.td
    llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
    llvm/utils/TableGen/DFAEmitter.cpp
    llvm/utils/TableGen/RegisterInfoEmitter.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/test/TableGen/MixedCasedMnemonic.td b/llvm/test/TableGen/MixedCasedMnemonic.td
index 3dc44ab6052c3b..cb224ac59c6de5 100644
--- a/llvm/test/TableGen/MixedCasedMnemonic.td
+++ b/llvm/test/TableGen/MixedCasedMnemonic.td
@@ -53,8 +53,8 @@ def :MnemonicAlias<"InstB", "BInst">;
 
 // Check that the writer preserves the case of the mnemonics.
 // WRITER:      static const char AsmStrs[] = {
-// WRITER:        "BInst\0"
-// WRITER-NEXT:   "aInst\0"
+// WRITER:        "BInst\000"
+// WRITER-NEXT:   "aInst\000"
 // WRITER-NEXT: };
 
 // ALIAS: static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID) {
@@ -73,4 +73,3 @@ def :MnemonicAlias<"InstB", "BInst">;
 // ALIAS-NEXT        case 'b':	 // 1 string to match.
 // ALIAS-NEXT          Mnemonic = "binst";	 // "instb"
 // ALIAS-NEXT          return;
-

diff  --git a/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h b/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
index 497e74afc18ec9..c918365b2289b8 100644
--- a/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
+++ b/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
@@ -6,9 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// SequenceToOffsetTable can be used to emit a number of null-terminated
-// sequences as one big array.  Use the same memory when a sequence is a suffix
-// of another.
+// SequenceToOffsetTable can be used to emit a number of sequences as one big
+// array. Uses the same memory when a sequence is a suffix of another.
 //
 //===----------------------------------------------------------------------===//
 
@@ -65,8 +64,14 @@ class SequenceToOffsetTable {
   // Sequences added so far, with suffixes removed.
   SeqMap Seqs;
 
+  // Terminator element to be appended to each added sequence.
+  std::optional<ElemT> Terminator;
+
+  // True if `layout` method was called.
+  bool IsLaidOut = false;
+
   // Entries in the final table, or 0 before layout was called.
-  unsigned Entries;
+  unsigned Entries = 0;
 
   // isSuffix - Returns true if A is a suffix of B.
   static bool isSuffix(const SeqT &A, const SeqT &B) {
@@ -74,12 +79,13 @@ class SequenceToOffsetTable {
   }
 
 public:
-  SequenceToOffsetTable() : Entries(0) {}
+  explicit SequenceToOffsetTable(std::optional<ElemT> Terminator = ElemT())
+      : Terminator(Terminator) {}
 
   /// add - Add a sequence to the table.
   /// This must be called before layout().
   void add(const SeqT &Seq) {
-    assert(Entries == 0 && "Cannot call add() after layout()");
+    assert(!IsLaidOut && "Cannot call add() after layout()");
     typename SeqMap::iterator I = Seqs.lower_bound(Seq);
 
     // If SeqMap contains a sequence that has Seq as a suffix, I will be
@@ -97,25 +103,27 @@ class SequenceToOffsetTable {
   bool empty() const { return Seqs.empty(); }
 
   unsigned size() const {
-    assert((empty() || Entries) && "Call layout() before size()");
+    assert(IsLaidOut && "Call layout() before size()");
     return Entries;
   }
 
   /// layout - Computes the final table layout.
   void layout() {
-    assert(Entries == 0 && "Can only call layout() once");
+    assert(!IsLaidOut && "Can only call layout() once");
+    IsLaidOut = true;
+
     // Lay out the table in Seqs iteration order.
     for (typename SeqMap::iterator I = Seqs.begin(), E = Seqs.end(); I != E;
          ++I) {
       I->second = Entries;
       // Include space for a terminator.
-      Entries += I->first.size() + 1;
+      Entries += I->first.size() + Terminator.has_value();
     }
   }
 
   /// get - Returns the offset of Seq in the final table.
   unsigned get(const SeqT &Seq) const {
-    assert(Entries && "Call layout() before get()");
+    assert(IsLaidOut && "Call layout() before get()");
     typename SeqMap::const_iterator I = Seqs.lower_bound(Seq);
     assert(I != Seqs.end() && isSuffix(Seq, I->first) &&
            "get() called with sequence that wasn't added first");
@@ -127,10 +135,10 @@ class SequenceToOffsetTable {
   /// `\0`. Falls back to emitting a comma-separated integer list if
   /// `EmitLongStrLiterals` is false
   void emitStringLiteralDef(raw_ostream &OS, const Twine &Decl) const {
-    assert(Entries && "Call layout() before emitStringLiteralDef()");
+    assert(IsLaidOut && "Call layout() before emitStringLiteralDef()");
     if (!EmitLongStrLiterals) {
       OS << Decl << " = {\n";
-      emit(OS, printChar, "0");
+      emit(OS, printChar);
       OS << "  0\n};\n\n";
       return;
     }
@@ -143,7 +151,9 @@ class SequenceToOffsetTable {
     for (const auto &[Seq, Offset] : Seqs) {
       OS << "  /* " << Offset << " */ \"";
       OS.write_escaped(Seq);
-      OS << "\\0\"\n";
+      if (Terminator)
+        OS.write_escaped(StringRef(&*Terminator, 1));
+      OS << "\"\n";
     }
     OS << "};\n"
        << "#ifdef __GNUC__\n"
@@ -153,16 +163,26 @@ class SequenceToOffsetTable {
 
   /// emit - Print out the table as the body of an array initializer.
   /// Use the Print function to print elements.
-  void emit(raw_ostream &OS, void (*Print)(raw_ostream &, ElemT),
-            const char *Term = "0") const {
-    assert((empty() || Entries) && "Call layout() before emit()");
+  void emit(raw_ostream &OS, void (*Print)(raw_ostream &, ElemT)) const {
+    assert(IsLaidOut && "Call layout() before emit()");
     for (const auto &[Seq, Offset] : Seqs) {
       OS << "  /* " << Offset << " */ ";
       for (const ElemT &Element : Seq) {
         Print(OS, Element);
         OS << ", ";
       }
-      OS << Term << ",\n";
+      if (Terminator) {
+        Print(OS, *Terminator);
+        OS << ',';
+      }
+      OS << '\n';
+    }
+
+    // Print a dummy element if the array would be empty otherwise.
+    if (!Entries) {
+      OS << "  /* dummy */ ";
+      Print(OS, ElemT());
+      OS << '\n';
     }
   }
 };

diff  --git a/llvm/utils/TableGen/DFAEmitter.cpp b/llvm/utils/TableGen/DFAEmitter.cpp
index 264cccf6ac0ca6..c150620b741757 100644
--- a/llvm/utils/TableGen/DFAEmitter.cpp
+++ b/llvm/utils/TableGen/DFAEmitter.cpp
@@ -124,12 +124,9 @@ void DfaEmitter::emit(StringRef Name, raw_ostream &OS) {
   Table.layout();
   OS << "const std::array<NfaStatePair, " << Table.size() << "> " << Name
      << "TransitionInfo = {{\n";
-  Table.emit(
-      OS,
-      [](raw_ostream &OS, std::pair<uint64_t, uint64_t> P) {
-        OS << "{" << P.first << ", " << P.second << "}";
-      },
-      "{0ULL, 0ULL}");
+  Table.emit(OS, [](raw_ostream &OS, std::pair<uint64_t, uint64_t> P) {
+    OS << "{" << P.first << ", " << P.second << "}";
+  });
 
   OS << "}};\n\n";
 

diff  --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index bfcd52da1c39cb..0c1f5d205ca0f4 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -288,7 +288,7 @@ void RegisterInfoEmitter::EmitRegUnitPressure(raw_ostream &OS,
      << "  return PressureLimitTable[Idx];\n"
      << "}\n\n";
 
-  SequenceToOffsetTable<std::vector<int>> PSetsSeqs;
+  SequenceToOffsetTable<std::vector<int>> PSetsSeqs(/*Terminator=*/-1);
 
   // This table may be larger than NumRCs if some register units needed a list
   // of unit sets that did not correspond to a register class.
@@ -309,7 +309,7 @@ void RegisterInfoEmitter::EmitRegUnitPressure(raw_ostream &OS,
 
   OS << "/// Table of pressure sets per register class or unit.\n"
      << "static const int RCSetsTable[] = {\n";
-  PSetsSeqs.emit(OS, printInt, "-1");
+  PSetsSeqs.emit(OS, printInt);
   OS << "};\n\n";
 
   OS << "/// Get the dimensions of register pressure impacted by this "
@@ -610,7 +610,7 @@ static void printSimpleValueType(raw_ostream &OS, MVT::SimpleValueType VT) {
 }
 
 static void printSubRegIndex(raw_ostream &OS, const CodeGenSubRegIndex *Idx) {
-  OS << Idx->EnumValue;
+  OS << (Idx ? Idx->EnumValue : 0);
 }
 
 // Differentially encoded register and regunit lists allow for better
@@ -875,13 +875,14 @@ void RegisterInfoEmitter::runMCDesc(raw_ostream &OS) {
   SmallVector<DiffVec, 4> RegUnitLists(Regs.size());
 
   // List of lane masks accompanying register unit sequences.
-  SequenceToOffsetTable<MaskVec> LaneMaskSeqs;
+  SequenceToOffsetTable<MaskVec> LaneMaskSeqs(/*Terminator=*/std::nullopt);
   SmallVector<MaskVec, 4> RegUnitLaneMasks(Regs.size());
 
   // Keep track of sub-register names as well. These are not 
diff erentially
   // encoded.
   typedef SmallVector<const CodeGenSubRegIndex *, 4> SubRegIdxVec;
-  SequenceToOffsetTable<SubRegIdxVec, deref<std::less<>>> SubRegIdxSeqs;
+  SequenceToOffsetTable<SubRegIdxVec, deref<std::less<>>> SubRegIdxSeqs(
+      /*Terminator=*/std::nullopt);
   SmallVector<SubRegIdxVec, 4> SubRegIdxLists(Regs.size());
 
   SequenceToOffsetTable<std::string> RegStrings;
@@ -936,9 +937,7 @@ void RegisterInfoEmitter::runMCDesc(raw_ostream &OS) {
 
   // Emit the shared table of regunit lane mask sequences.
   OS << "extern const LaneBitmask " << TargetName << "LaneMaskLists[] = {\n";
-  // TODO: Omit the terminator since it is never used. The length of this list
-  // is known implicitly from the corresponding reg unit list.
-  LaneMaskSeqs.emit(OS, printMask, "LaneBitmask::getAll()");
+  LaneMaskSeqs.emit(OS, printMask);
   OS << "};\n\n";
 
   // Emit the table of sub-register indexes.
@@ -1209,7 +1208,8 @@ void RegisterInfoEmitter::runTargetDesc(raw_ostream &OS) {
   unsigned NumModes = CGH.getNumModeIds();
 
   // Build a shared array of value types.
-  SequenceToOffsetTable<std::vector<MVT::SimpleValueType>> VTSeqs;
+  SequenceToOffsetTable<std::vector<MVT::SimpleValueType>> VTSeqs(
+      /*Terminator=*/MVT::Other);
   for (unsigned M = 0; M < NumModes; ++M) {
     for (const auto &RC : RegisterClasses) {
       std::vector<MVT::SimpleValueType> S;
@@ -1221,7 +1221,7 @@ void RegisterInfoEmitter::runTargetDesc(raw_ostream &OS) {
   }
   VTSeqs.layout();
   OS << "\nstatic const MVT::SimpleValueType VTLists[] = {\n";
-  VTSeqs.emit(OS, printSimpleValueType, "MVT::Other");
+  VTSeqs.emit(OS, printSimpleValueType);
   OS << "};\n";
 
   // Emit SubRegIndex names, skipping 0.


        


More information about the llvm-commits mailing list