[llvm] [TableGen] Allow empty terminator in SequenceToOffsetTable (PR #119751)

Sergei Barannikov via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 12 13:50:15 PST 2024


https://github.com/s-barannikov updated https://github.com/llvm/llvm-project/pull/119751

>From 2ecc51c1f08b078dce441cfb343464044a139458 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Thu, 12 Dec 2024 22:15:21 +0300
Subject: [PATCH 1/3] [TableGen] Allow empty terminator in
 SequenceToOffsetTable

Some clients do not want to emit a terminator after each sub-sequence
(they have other means of determining the length of the sub-sequence).

This moves `Term` argument from `emit` method to the constructor
and makes it optional. It couldn't be made optional while still
on the `emit` method because if the terminator wasn't specified,
it has to be taken into account in `layout` method as well.

The fact that `layout` method was called is now recorded in a dedicated
member variable, `IsLaidOut`. `Entries != 0` can no longer be used
to reliably check if `layout` method was called because it may be zero
for a different reason: the terminator wasn't specified and all added
sequences (if any) were empty.

This reduces the size of `*LaneMaskLists` and `*SubRegIdxLists` a bit
and resolves the removed FIXME.
---
 llvm/utils/TableGen/AsmWriterEmitter.cpp      |  4 +-
 .../TableGen/Basic/SequenceToOffsetTable.h    | 47 ++++++++++++-------
 llvm/utils/TableGen/DFAEmitter.cpp            | 12 ++---
 llvm/utils/TableGen/DXILEmitter.cpp           |  4 +-
 llvm/utils/TableGen/InstrInfoEmitter.cpp      |  2 +-
 llvm/utils/TableGen/IntrinsicEmitter.cpp      |  2 +-
 llvm/utils/TableGen/RegisterInfoEmitter.cpp   | 29 ++++++------
 7 files changed, 56 insertions(+), 44 deletions(-)

diff --git a/llvm/utils/TableGen/AsmWriterEmitter.cpp b/llvm/utils/TableGen/AsmWriterEmitter.cpp
index 9880214a37368f..64cc80afd0900e 100644
--- a/llvm/utils/TableGen/AsmWriterEmitter.cpp
+++ b/llvm/utils/TableGen/AsmWriterEmitter.cpp
@@ -338,7 +338,7 @@ void AsmWriterEmitter::EmitGetMnemonic(
     << "::getMnemonic(const MCInst &MI) const {\n";
 
   // Build an aggregate string, and build a table of offsets into it.
-  SequenceToOffsetTable<std::string> StringTable;
+  SequenceToOffsetTable<std::string> StringTable(/*Terminator=*/'\0');
 
   /// OpcodeInfo - This encodes the index of the string to use for the first
   /// chunk of the output as well as indices used for operand printing.
@@ -583,7 +583,7 @@ void AsmWriterEmitter::EmitPrintInstruction(
 static void
 emitRegisterNameString(raw_ostream &O, StringRef AltName,
                        const std::deque<CodeGenRegister> &Registers) {
-  SequenceToOffsetTable<std::string> StringTable;
+  SequenceToOffsetTable<std::string> StringTable(/*Terminator=*/'\0');
   SmallVector<std::string, 4> AsmNames(Registers.size());
   unsigned i = 0;
   for (const auto &Reg : Registers) {
diff --git a/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h b/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
index 497e74afc18ec9..4e1388d3e45285 100644
--- a/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
+++ b/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
@@ -6,9 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// SequenceToOffsetTable can be used to emit a number of null-terminated
-// sequences as one big array.  Use the same memory when a sequence is a suffix
-// of another.
+// SequenceToOffsetTable can be used to emit a number of sequences as one big
+// array. Uses the same memory when a sequence is a suffix of another.
 //
 //===----------------------------------------------------------------------===//
 
@@ -65,8 +64,14 @@ class SequenceToOffsetTable {
   // Sequences added so far, with suffixes removed.
   SeqMap Seqs;
 
+  // Terminator element to be appended to each added sequence.
+  std::optional<ElemT> Terminator;
+
+  // True if `layout` method was called.
+  bool IsLaidOut = false;
+
   // Entries in the final table, or 0 before layout was called.
-  unsigned Entries;
+  unsigned Entries = 0;
 
   // isSuffix - Returns true if A is a suffix of B.
   static bool isSuffix(const SeqT &A, const SeqT &B) {
@@ -74,12 +79,13 @@ class SequenceToOffsetTable {
   }
 
 public:
-  SequenceToOffsetTable() : Entries(0) {}
+  explicit SequenceToOffsetTable(std::optional<ElemT> Terminator)
+      : Terminator(Terminator) {}
 
   /// add - Add a sequence to the table.
   /// This must be called before layout().
   void add(const SeqT &Seq) {
-    assert(Entries == 0 && "Cannot call add() after layout()");
+    assert(!IsLaidOut && "Cannot call add() after layout()");
     typename SeqMap::iterator I = Seqs.lower_bound(Seq);
 
     // If SeqMap contains a sequence that has Seq as a suffix, I will be
@@ -97,25 +103,27 @@ class SequenceToOffsetTable {
   bool empty() const { return Seqs.empty(); }
 
   unsigned size() const {
-    assert((empty() || Entries) && "Call layout() before size()");
+    assert(IsLaidOut && "Call layout() before size()");
     return Entries;
   }
 
   /// layout - Computes the final table layout.
   void layout() {
-    assert(Entries == 0 && "Can only call layout() once");
+    assert(!IsLaidOut && "Can only call layout() once");
+    IsLaidOut = true;
+
     // Lay out the table in Seqs iteration order.
     for (typename SeqMap::iterator I = Seqs.begin(), E = Seqs.end(); I != E;
          ++I) {
       I->second = Entries;
       // Include space for a terminator.
-      Entries += I->first.size() + 1;
+      Entries += I->first.size() + Terminator.has_value();
     }
   }
 
   /// get - Returns the offset of Seq in the final table.
   unsigned get(const SeqT &Seq) const {
-    assert(Entries && "Call layout() before get()");
+    assert(IsLaidOut && "Call layout() before get()");
     typename SeqMap::const_iterator I = Seqs.lower_bound(Seq);
     assert(I != Seqs.end() && isSuffix(Seq, I->first) &&
            "get() called with sequence that wasn't added first");
@@ -127,10 +135,10 @@ class SequenceToOffsetTable {
   /// `\0`. Falls back to emitting a comma-separated integer list if
   /// `EmitLongStrLiterals` is false
   void emitStringLiteralDef(raw_ostream &OS, const Twine &Decl) const {
-    assert(Entries && "Call layout() before emitStringLiteralDef()");
+    assert(IsLaidOut && "Call layout() before emitStringLiteralDef()");
     if (!EmitLongStrLiterals) {
       OS << Decl << " = {\n";
-      emit(OS, printChar, "0");
+      emit(OS, printChar);
       OS << "  0\n};\n\n";
       return;
     }
@@ -143,7 +151,9 @@ class SequenceToOffsetTable {
     for (const auto &[Seq, Offset] : Seqs) {
       OS << "  /* " << Offset << " */ \"";
       OS.write_escaped(Seq);
-      OS << "\\0\"\n";
+      if (Terminator)
+        OS.write_escaped(StringRef(&*Terminator, 1));
+      OS << "\"\n";
     }
     OS << "};\n"
        << "#ifdef __GNUC__\n"
@@ -153,16 +163,19 @@ class SequenceToOffsetTable {
 
   /// emit - Print out the table as the body of an array initializer.
   /// Use the Print function to print elements.
-  void emit(raw_ostream &OS, void (*Print)(raw_ostream &, ElemT),
-            const char *Term = "0") const {
-    assert((empty() || Entries) && "Call layout() before emit()");
+  void emit(raw_ostream &OS, void (*Print)(raw_ostream &, ElemT)) const {
+    assert(IsLaidOut && "Call layout() before emit()");
     for (const auto &[Seq, Offset] : Seqs) {
       OS << "  /* " << Offset << " */ ";
       for (const ElemT &Element : Seq) {
         Print(OS, Element);
         OS << ", ";
       }
-      OS << Term << ",\n";
+      if (Terminator) {
+        Print(OS, *Terminator);
+        OS << ',';
+      }
+      OS << '\n';
     }
   }
 };
diff --git a/llvm/utils/TableGen/DFAEmitter.cpp b/llvm/utils/TableGen/DFAEmitter.cpp
index 264cccf6ac0ca6..a42c615879ba74 100644
--- a/llvm/utils/TableGen/DFAEmitter.cpp
+++ b/llvm/utils/TableGen/DFAEmitter.cpp
@@ -117,19 +117,17 @@ void DfaEmitter::emit(StringRef Name, raw_ostream &OS) {
   OS << "// transition implies a set of NFA transitions. These are referred\n";
   OS << "// to by index in " << Name << "Transitions[].\n";
 
-  SequenceToOffsetTable<DfaTransitionInfo> Table;
+  SequenceToOffsetTable<DfaTransitionInfo> Table(
+      /*Terminator=*/std::pair(0, 0));
   std::map<DfaTransitionInfo, unsigned> EmittedIndices;
   for (auto &T : DfaTransitions)
     Table.add(T.second.second);
   Table.layout();
   OS << "const std::array<NfaStatePair, " << Table.size() << "> " << Name
      << "TransitionInfo = {{\n";
-  Table.emit(
-      OS,
-      [](raw_ostream &OS, std::pair<uint64_t, uint64_t> P) {
-        OS << "{" << P.first << ", " << P.second << "}";
-      },
-      "{0ULL, 0ULL}");
+  Table.emit(OS, [](raw_ostream &OS, std::pair<uint64_t, uint64_t> P) {
+    OS << "{" << P.first << ", " << P.second << "}";
+  });
 
   OS << "}};\n\n";
 
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index a0c93bed5ad834..b54949ef2dc1ad 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -449,8 +449,8 @@ static void emitDXILIntrinsicArgSelectTypes(const RecordKeeper &Records,
 static void emitDXILOperationTable(ArrayRef<DXILOperationDesc> Ops,
                                    raw_ostream &OS) {
   // Collect Names.
-  SequenceToOffsetTable<std::string> OpClassStrings;
-  SequenceToOffsetTable<std::string> OpStrings;
+  SequenceToOffsetTable<std::string> OpClassStrings(/*Terminator=*/'\0');
+  SequenceToOffsetTable<std::string> OpStrings(/*Terminator=*/'\0');
 
   StringSet<> ClassSet;
   for (const auto &Op : Ops) {
diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp
index 8c0e27215a7362..912a96c40f70e0 100644
--- a/llvm/utils/TableGen/InstrInfoEmitter.cpp
+++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp
@@ -988,7 +988,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
 
   OS << "extern const " << TargetName << "InstrTable " << TargetName
      << "Descs = {\n  {\n";
-  SequenceToOffsetTable<std::string> InstrNames;
+  SequenceToOffsetTable<std::string> InstrNames(/*Terminator=*/'\0');
   unsigned Num = NumberedInstructions.size();
   for (const CodeGenInstruction *Inst : reverse(NumberedInstructions)) {
     // Keep a list of the instruction names.
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index 093602c3da8045..a0ad8e561aa482 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -338,7 +338,7 @@ void IntrinsicEmitter::EmitGenerator(const CodeGenIntrinsicTable &Ints,
   // If we can compute a 16/32-bit fixed encoding for this intrinsic, do so and
   // capture it in this vector, otherwise store a ~0U.
   std::vector<FixedEncodingTy> FixedEncodings;
-  SequenceToOffsetTable<TypeSigTy> LongEncodingTable;
+  SequenceToOffsetTable<TypeSigTy> LongEncodingTable(/*Terminator=*/0);
 
   FixedEncodings.reserve(Ints.size());
 
diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index bfcd52da1c39cb..98d752bc07e297 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -288,7 +288,7 @@ void RegisterInfoEmitter::EmitRegUnitPressure(raw_ostream &OS,
      << "  return PressureLimitTable[Idx];\n"
      << "}\n\n";
 
-  SequenceToOffsetTable<std::vector<int>> PSetsSeqs;
+  SequenceToOffsetTable<std::vector<int>> PSetsSeqs(/*Terminator=*/-1);
 
   // This table may be larger than NumRCs if some register units needed a list
   // of unit sets that did not correspond to a register class.
@@ -309,7 +309,7 @@ void RegisterInfoEmitter::EmitRegUnitPressure(raw_ostream &OS,
 
   OS << "/// Table of pressure sets per register class or unit.\n"
      << "static const int RCSetsTable[] = {\n";
-  PSetsSeqs.emit(OS, printInt, "-1");
+  PSetsSeqs.emit(OS, printInt);
   OS << "};\n\n";
 
   OS << "/// Get the dimensions of register pressure impacted by this "
@@ -610,7 +610,7 @@ static void printSimpleValueType(raw_ostream &OS, MVT::SimpleValueType VT) {
 }
 
 static void printSubRegIndex(raw_ostream &OS, const CodeGenSubRegIndex *Idx) {
-  OS << Idx->EnumValue;
+  OS << (Idx ? Idx->EnumValue : 0);
 }
 
 // Differentially encoded register and regunit lists allow for better
@@ -869,22 +869,23 @@ void RegisterInfoEmitter::runMCDesc(raw_ostream &OS) {
   typedef std::vector<const CodeGenRegister *> RegVec;
 
   // Differentially encoded lists.
-  SequenceToOffsetTable<DiffVec> DiffSeqs;
+  SequenceToOffsetTable<DiffVec> DiffSeqs(/*Terminator=*/0);
   SmallVector<DiffVec, 4> SubRegLists(Regs.size());
   SmallVector<DiffVec, 4> SuperRegLists(Regs.size());
   SmallVector<DiffVec, 4> RegUnitLists(Regs.size());
 
   // List of lane masks accompanying register unit sequences.
-  SequenceToOffsetTable<MaskVec> LaneMaskSeqs;
+  SequenceToOffsetTable<MaskVec> LaneMaskSeqs(/*Terminator=*/std::nullopt);
   SmallVector<MaskVec, 4> RegUnitLaneMasks(Regs.size());
 
   // Keep track of sub-register names as well. These are not differentially
   // encoded.
   typedef SmallVector<const CodeGenSubRegIndex *, 4> SubRegIdxVec;
-  SequenceToOffsetTable<SubRegIdxVec, deref<std::less<>>> SubRegIdxSeqs;
+  SequenceToOffsetTable<SubRegIdxVec, deref<std::less<>>> SubRegIdxSeqs(
+      /*Terminator=*/std::nullopt);
   SmallVector<SubRegIdxVec, 4> SubRegIdxLists(Regs.size());
 
-  SequenceToOffsetTable<std::string> RegStrings;
+  SequenceToOffsetTable<std::string> RegStrings(/*Terminator=*/'\0');
 
   // Precompute register lists for the SequenceToOffsetTable.
   unsigned i = 0;
@@ -936,9 +937,7 @@ void RegisterInfoEmitter::runMCDesc(raw_ostream &OS) {
 
   // Emit the shared table of regunit lane mask sequences.
   OS << "extern const LaneBitmask " << TargetName << "LaneMaskLists[] = {\n";
-  // TODO: Omit the terminator since it is never used. The length of this list
-  // is known implicitly from the corresponding reg unit list.
-  LaneMaskSeqs.emit(OS, printMask, "LaneBitmask::getAll()");
+  LaneMaskSeqs.emit(OS, printMask);
   OS << "};\n\n";
 
   // Emit the table of sub-register indexes.
@@ -994,7 +993,7 @@ void RegisterInfoEmitter::runMCDesc(raw_ostream &OS) {
   // Loop over all of the register classes... emitting each one.
   OS << "namespace {     // Register classes...\n";
 
-  SequenceToOffsetTable<std::string> RegClassStrings;
+  SequenceToOffsetTable<std::string> RegClassStrings(/*Terminator=*/'\0');
 
   // Emit the register enum value arrays for each RegisterClass
   for (const auto &RC : RegisterClasses) {
@@ -1209,7 +1208,8 @@ void RegisterInfoEmitter::runTargetDesc(raw_ostream &OS) {
   unsigned NumModes = CGH.getNumModeIds();
 
   // Build a shared array of value types.
-  SequenceToOffsetTable<std::vector<MVT::SimpleValueType>> VTSeqs;
+  SequenceToOffsetTable<std::vector<MVT::SimpleValueType>> VTSeqs(
+      /*Terminator=*/MVT::Other);
   for (unsigned M = 0; M < NumModes; ++M) {
     for (const auto &RC : RegisterClasses) {
       std::vector<MVT::SimpleValueType> S;
@@ -1221,7 +1221,7 @@ void RegisterInfoEmitter::runTargetDesc(raw_ostream &OS) {
   }
   VTSeqs.layout();
   OS << "\nstatic const MVT::SimpleValueType VTLists[] = {\n";
-  VTSeqs.emit(OS, printSimpleValueType, "MVT::Other");
+  VTSeqs.emit(OS, printSimpleValueType);
   OS << "};\n";
 
   // Emit SubRegIndex names, skipping 0.
@@ -1307,7 +1307,8 @@ void RegisterInfoEmitter::runTargetDesc(raw_ostream &OS) {
     // Compress the sub-reg index lists.
     typedef std::vector<const CodeGenSubRegIndex *> IdxList;
     SmallVector<IdxList, 8> SuperRegIdxLists(RegisterClasses.size());
-    SequenceToOffsetTable<IdxList, deref<std::less<>>> SuperRegIdxSeqs;
+    SequenceToOffsetTable<IdxList, deref<std::less<>>> SuperRegIdxSeqs(
+        /*Terminator=*/nullptr);
     BitVector MaskBV(RegisterClasses.size());
 
     for (const auto &RC : RegisterClasses) {

>From a3676eda47a65ca50380c717df149bb178779e85 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Fri, 13 Dec 2024 00:06:46 +0300
Subject: [PATCH 2/3] Simplify the loop in `layout`

---
 llvm/utils/TableGen/Basic/SequenceToOffsetTable.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h b/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
index 4e1388d3e45285..67cf6677c70b6f 100644
--- a/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
+++ b/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
@@ -116,9 +116,12 @@ class SequenceToOffsetTable {
     for (typename SeqMap::iterator I = Seqs.begin(), E = Seqs.end(); I != E;
          ++I) {
       I->second = Entries;
-      // Include space for a terminator.
-      Entries += I->first.size() + Terminator.has_value();
+      Entries += I->first.size();
     }
+
+    // Include space for terminators.
+    if (Terminator)
+      Entries += Seqs.size();
   }
 
   /// get - Returns the offset of Seq in the final table.

>From b8294395dee679c03b25d43babded68084db701f Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Fri, 13 Dec 2024 00:49:57 +0300
Subject: [PATCH 3/3] Print a dummy element if the array would be empty
 otherwise

---
 llvm/utils/TableGen/Basic/SequenceToOffsetTable.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h b/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
index 67cf6677c70b6f..ccfe746a333772 100644
--- a/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
+++ b/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
@@ -180,6 +180,13 @@ class SequenceToOffsetTable {
       }
       OS << '\n';
     }
+
+    // Print a dummy element if the array would be empty otherwise.
+    if (!Entries) {
+      OS << "  /* dummy */ ";
+      Print(OS, ElemT());
+      OS << '\n';
+    }
   }
 };
 



More information about the llvm-commits mailing list