[llvm] [SelectionDAGISel][WIP] Separate the operand numbers in OPC_EmitNode/MorphNodeTo into their own table. (PR #178722)

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 29 22:53:01 PST 2026


https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/178722

>From 8379a9ee60775f785447c7678a16d9e149541476 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 28 Jan 2026 23:11:18 -0800
Subject: [PATCH 1/2] [SelecitonDAGISel][WIP] Separate the operand numbers in
 OPC_EmitNode/MorphNodeTo into their own table.

The operand lists for these opcode require 1 byte per operand and
are usually small values that fit in 3-4 bits. This makes their storage
inefficient. In addition, many EmitNode/MorphNodeTo in the isel table
will use the same list of operand numbers.

This patch proposes to separate the operand lists into their own
table where they can be de-duplicated. The OPC_EmitNode/MorphNodeTo
in the main table will only store an index into this smaller table.

This is reduced version of a suggestion from this very old FIXME.
https://github.com/llvm/llvm-project/blob/d8d4096c0be0a6a3248c8deae96608913a85debf/llvm/utils/TableGen/DAGISelMatcherGen.cpp#L1070

For RISC-V this reduces the main table from 1437353 bytes to 1276015
bytes plus a 929 byte operand list table. A savings of about 11%.

For X86 this reduces the main table from 719237 bytes to 623612 bytes
with a 1042 byte operand list table. A savings of about 11%.

I expect further savings could be had by moving more bytes over.

Initial compile time results https://llvm-compile-time-tracker.com/compare.php?from=3404537393f5896650ddc71f0e1285977c8f8aa4&to=71463ae2580ddbb35ec17deea1dfb6d2e88db017&stat=instructions:u
---
 llvm/include/llvm/CodeGen/SelectionDAGISel.h  |  2 +-
 .../CodeGen/SelectionDAG/SelectionDAGISel.cpp | 13 ++++--
 llvm/utils/TableGen/DAGISelMatcherEmitter.cpp | 45 ++++++++++++++++---
 3 files changed, 51 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
index c94dc4241368e..38a7d70ef6db0 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -470,7 +470,7 @@ class SelectionDAGISel {
   }
 
   void SelectCodeCommon(SDNode *NodeToMatch, const uint8_t *MatcherTable,
-                        unsigned TableSize);
+                        unsigned TableSize, const uint8_t *OperandLists);
 
   /// Return true if complex patterns for this target can mutate the
   /// DAG.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index de9daca767388..ed2aa06a9824c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -3350,7 +3350,8 @@ class MatchStateUpdater : public SelectionDAG::DAGUpdateListener
 
 void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
                                         const uint8_t *MatcherTable,
-                                        unsigned TableSize) {
+                                        unsigned TableSize,
+                                        const uint8_t *OperandLists) {
   // FIXME: Should these even be selected?  Handle these cases in the caller?
   switch (NodeToMatch->getOpcode()) {
   default:
@@ -4308,11 +4309,17 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
 
       // Get the operand list.
       unsigned NumOps = MatcherTable[MatcherIndex++];
+
+      // Get the index into the OperandLists.
+      unsigned OperandIndex = MatcherTable[MatcherIndex++];
+      if (OperandIndex & 128)
+        OperandIndex = GetVBR(OperandIndex, MatcherTable, MatcherIndex);
+
       SmallVector<SDValue, 8> Ops;
       for (unsigned i = 0; i != NumOps; ++i) {
-        unsigned RecNo = MatcherTable[MatcherIndex++];
+        unsigned RecNo = OperandLists[OperandIndex++];
         if (RecNo & 128)
-          RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
+          RecNo = GetVBR(RecNo, OperandLists, OperandIndex);
 
         assert(RecNo < RecordedNodes.size() && "Invalid EmitNode");
         Ops.push_back(RecordedNodes[RecNo].first);
diff --git a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
index 082d6fce3abe3..d93d98a6fe614 100644
--- a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Basic/SDNodeProperties.h"
+#include "Basic/SequenceToOffsetTable.h"
 #include "Common/CodeGenDAGPatterns.h"
 #include "Common/CodeGenInstruction.h"
 #include "Common/CodeGenRegisters.h"
@@ -73,6 +74,8 @@ class MatcherTableEmitter {
 
   std::map<ValueTypeByHwMode, unsigned> ValueTypeMap;
 
+  SequenceToOffsetTable<std::vector<uint8_t>> OperandTable;
+
   unsigned getPatternIdxFromTable(std::string &&P, std::string &&include_loc) {
     const auto [It, Inserted] =
         VecPatterns.try_emplace(std::move(P), VecPatterns.size());
@@ -85,7 +88,8 @@ class MatcherTableEmitter {
 
 public:
   MatcherTableEmitter(const Matcher *TheMatcher, const CodeGenDAGPatterns &cgp)
-      : CGP(cgp), OpcodeCounts(Matcher::HighestKind + 1, 0) {
+      : CGP(cgp), OpcodeCounts(Matcher::HighestKind + 1, 0),
+        OperandTable(std::nullopt) {
     // Record the usage of ComplexPattern.
     MapVector<const ComplexPattern *, unsigned> ComplexPatternUsage;
     // Record the usage of PatternPredicate.
@@ -111,11 +115,26 @@ class MatcherTableEmitter {
           ++PatternPredicateUsage[CPPM->getPredicate()];
         else if (auto *PM = dyn_cast<CheckPredicateMatcher>(N))
           ++PredicateUsage[PM->getPredicate().getOrigPatFragRecord()];
+
+        if (const auto *EN = dyn_cast<EmitNodeMatcherCommon>(N)) {
+          ArrayRef<unsigned> Ops = EN->getOperandList();
+          std::vector<uint8_t> OpBytes;
+          for (unsigned Op : Ops) {
+            uint8_t Buffer[5];
+            unsigned Len = encodeULEB128(Op, Buffer);
+            for (unsigned i = 0; i < Len; ++i)
+              OpBytes.push_back(Buffer[i]);
+          }
+          OperandTable.add(OpBytes);
+        }
+
         N = N->getNext();
       }
     };
     Statistic(TheMatcher);
 
+    OperandTable.layout();
+
     // Sort ComplexPatterns by usage.
     std::vector<std::pair<const ComplexPattern *, unsigned>> ComplexPatternList(
         ComplexPatternUsage.begin(), ComplexPatternUsage.end());
@@ -172,6 +191,8 @@ class MatcherTableEmitter {
   unsigned EmitMatcherList(const Matcher *N, const unsigned Indent,
                            unsigned StartIdx, raw_ostream &OS);
 
+  void EmitOperandLists(raw_ostream &OS);
+
   unsigned SizeMatcherList(Matcher *N, raw_ostream &OS);
 
   void EmitPredicateFunctions(raw_ostream &OS);
@@ -1116,11 +1137,16 @@ unsigned MatcherTableEmitter::EmitMatcher(const Matcher *N,
     if (!OmitComments)
       OS << "/*#Ops*/";
     OS << ',';
-    unsigned NumOperandBytes = 0;
+
+    std::vector<uint8_t> OpBytes;
     for (unsigned i = 0, e = EN->getNumOperands(); i != e; ++i) {
-      OS << ' ';
-      NumOperandBytes += EmitVBRValue(EN->getOperand(i), OS);
+      uint8_t Buffer[5];
+      unsigned Len = encodeULEB128(EN->getOperand(i), Buffer);
+      for (unsigned i = 0; i < Len; ++i)
+        OpBytes.push_back(Buffer[i]);
     }
+    unsigned Index = OperandTable.get(OpBytes);
+    unsigned NumOperandBytes = EmitVBRValue(Index, OS);
 
     if (!OmitComments) {
       // Print the result #'s for EmitNode.
@@ -1209,6 +1235,10 @@ unsigned MatcherTableEmitter::EmitMatcherList(const Matcher *N,
   return Size;
 }
 
+void MatcherTableEmitter::EmitOperandLists(raw_ostream &OS) {
+  OperandTable.emit(OS, [](raw_ostream &OS, uint8_t O) { OS << (unsigned)O; });
+}
+
 void MatcherTableEmitter::EmitNodePredicatesFunction(
     const std::vector<TreePattern *> &Preds, StringRef Decl, raw_ostream &OS) {
   if (Preds.empty())
@@ -1545,9 +1575,14 @@ void llvm::EmitMatcherTable(Matcher *TheMatcher, const CodeGenDAGPatterns &CGP,
 
   MatcherEmitter.EmitHistogram(OS);
 
+  OS << "  static const uint8_t OperandLists[] = {\n";
+  MatcherEmitter.EmitOperandLists(OS);
+  OS << "  };\n\n";
+
   OS << "  #undef COVERAGE_IDX_VAL\n";
   OS << "  #undef TARGET_VAL\n";
-  OS << "  SelectCodeCommon(N, MatcherTable, sizeof(MatcherTable));\n";
+  OS << "  SelectCodeCommon(N, MatcherTable, sizeof(MatcherTable),\n";
+  OS << "                   OperandLists);\n";
   OS << "}\n";
   EndEmitFunction(OS);
 

>From e54c0fd3dd62099e76c18b3032089d352f051da7 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 29 Jan 2026 22:52:19 -0800
Subject: [PATCH 2/2] fixup! Add comments for !OmitComments

---
 llvm/utils/TableGen/DAGISelMatcherEmitter.cpp | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
index 6c970652b5b54..67211cdbc8c9c 100644
--- a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -1136,7 +1136,7 @@ unsigned MatcherTableEmitter::EmitMatcher(const Matcher *N,
     OS << ' ' << EN->getNumOperands();
     if (!OmitComments)
       OS << "/*#Ops*/";
-    OS << ',';
+    OS << ", ";
 
     std::vector<uint8_t> OpBytes;
     for (unsigned i = 0, e = EN->getNumOperands(); i != e; ++i) {
@@ -1146,13 +1146,24 @@ unsigned MatcherTableEmitter::EmitMatcher(const Matcher *N,
         OpBytes.push_back(Buffer[i]);
     }
     unsigned Index = OperandTable.get(OpBytes);
+    if (!OmitComments)
+      OS << "/*OperandList*/";
     unsigned NumOperandBytes = EmitVBRValue(Index, OS);
 
     if (!OmitComments) {
+      // Print the operand #'s.
+      ArrayRef<unsigned> Ops = EN->getOperandList();
+      OS << " // Ops =";
+      if (Ops.empty())
+        OS << " None";
+      else
+        for (unsigned OpNo : Ops)
+          OS << " #" << OpNo;
+
       // Print the result #'s for EmitNode.
       if (const EmitNodeMatcher *E = dyn_cast<EmitNodeMatcher>(EN)) {
         if (unsigned NumResults = EN->getNumVTs()) {
-          OS << " // Results =";
+          OS << " Results =";
           unsigned First = E->getFirstResultSlot();
           for (unsigned i = 0; i != NumResults; ++i)
             OS << " #" << First + i;



More information about the llvm-commits mailing list