[llvm] [SelectionDAGISel][WIP] Separate the operand numbers in OPC_EmitNode/MorphNodeTo into their own table. (PR #178722)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 29 22:53:01 PST 2026
https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/178722
>From 8379a9ee60775f785447c7678a16d9e149541476 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 28 Jan 2026 23:11:18 -0800
Subject: [PATCH 1/2] [SelecitonDAGISel][WIP] Separate the operand numbers in
OPC_EmitNode/MorphNodeTo into their own table.
The operand lists for these opcode require 1 byte per operand and
are usually small values that fit in 3-4 bits. This makes their storage
inefficient. In addition, many EmitNode/MorphNodeTo in the isel table
will use the same list of operand numbers.
This patch proposes to separate the operand lists into their own
table where they can be de-duplicated. The OPC_EmitNode/MorphNodeTo
in the main table will only store an index into this smaller table.
This is reduced version of a suggestion from this very old FIXME.
https://github.com/llvm/llvm-project/blob/d8d4096c0be0a6a3248c8deae96608913a85debf/llvm/utils/TableGen/DAGISelMatcherGen.cpp#L1070
For RISC-V this reduces the main table from 1437353 bytes to 1276015
bytes plus a 929 byte operand list table. A savings of about 11%.
For X86 this reduces the main table from 719237 bytes to 623612 bytes
with a 1042 byte operand list table. A savings of about 11%.
I expect further savings could be had by moving more bytes over.
Initial compile time results https://llvm-compile-time-tracker.com/compare.php?from=3404537393f5896650ddc71f0e1285977c8f8aa4&to=71463ae2580ddbb35ec17deea1dfb6d2e88db017&stat=instructions:u
---
llvm/include/llvm/CodeGen/SelectionDAGISel.h | 2 +-
.../CodeGen/SelectionDAG/SelectionDAGISel.cpp | 13 ++++--
llvm/utils/TableGen/DAGISelMatcherEmitter.cpp | 45 ++++++++++++++++---
3 files changed, 51 insertions(+), 9 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
index c94dc4241368e..38a7d70ef6db0 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -470,7 +470,7 @@ class SelectionDAGISel {
}
void SelectCodeCommon(SDNode *NodeToMatch, const uint8_t *MatcherTable,
- unsigned TableSize);
+ unsigned TableSize, const uint8_t *OperandLists);
/// Return true if complex patterns for this target can mutate the
/// DAG.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index de9daca767388..ed2aa06a9824c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -3350,7 +3350,8 @@ class MatchStateUpdater : public SelectionDAG::DAGUpdateListener
void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
const uint8_t *MatcherTable,
- unsigned TableSize) {
+ unsigned TableSize,
+ const uint8_t *OperandLists) {
// FIXME: Should these even be selected? Handle these cases in the caller?
switch (NodeToMatch->getOpcode()) {
default:
@@ -4308,11 +4309,17 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// Get the operand list.
unsigned NumOps = MatcherTable[MatcherIndex++];
+
+ // Get the index into the OperandLists.
+ unsigned OperandIndex = MatcherTable[MatcherIndex++];
+ if (OperandIndex & 128)
+ OperandIndex = GetVBR(OperandIndex, MatcherTable, MatcherIndex);
+
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i != NumOps; ++i) {
- unsigned RecNo = MatcherTable[MatcherIndex++];
+ unsigned RecNo = OperandLists[OperandIndex++];
if (RecNo & 128)
- RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
+ RecNo = GetVBR(RecNo, OperandLists, OperandIndex);
assert(RecNo < RecordedNodes.size() && "Invalid EmitNode");
Ops.push_back(RecordedNodes[RecNo].first);
diff --git a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
index 082d6fce3abe3..d93d98a6fe614 100644
--- a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "Basic/SDNodeProperties.h"
+#include "Basic/SequenceToOffsetTable.h"
#include "Common/CodeGenDAGPatterns.h"
#include "Common/CodeGenInstruction.h"
#include "Common/CodeGenRegisters.h"
@@ -73,6 +74,8 @@ class MatcherTableEmitter {
std::map<ValueTypeByHwMode, unsigned> ValueTypeMap;
+ SequenceToOffsetTable<std::vector<uint8_t>> OperandTable;
+
unsigned getPatternIdxFromTable(std::string &&P, std::string &&include_loc) {
const auto [It, Inserted] =
VecPatterns.try_emplace(std::move(P), VecPatterns.size());
@@ -85,7 +88,8 @@ class MatcherTableEmitter {
public:
MatcherTableEmitter(const Matcher *TheMatcher, const CodeGenDAGPatterns &cgp)
- : CGP(cgp), OpcodeCounts(Matcher::HighestKind + 1, 0) {
+ : CGP(cgp), OpcodeCounts(Matcher::HighestKind + 1, 0),
+ OperandTable(std::nullopt) {
// Record the usage of ComplexPattern.
MapVector<const ComplexPattern *, unsigned> ComplexPatternUsage;
// Record the usage of PatternPredicate.
@@ -111,11 +115,26 @@ class MatcherTableEmitter {
++PatternPredicateUsage[CPPM->getPredicate()];
else if (auto *PM = dyn_cast<CheckPredicateMatcher>(N))
++PredicateUsage[PM->getPredicate().getOrigPatFragRecord()];
+
+ if (const auto *EN = dyn_cast<EmitNodeMatcherCommon>(N)) {
+ ArrayRef<unsigned> Ops = EN->getOperandList();
+ std::vector<uint8_t> OpBytes;
+ for (unsigned Op : Ops) {
+ uint8_t Buffer[5];
+ unsigned Len = encodeULEB128(Op, Buffer);
+ for (unsigned i = 0; i < Len; ++i)
+ OpBytes.push_back(Buffer[i]);
+ }
+ OperandTable.add(OpBytes);
+ }
+
N = N->getNext();
}
};
Statistic(TheMatcher);
+ OperandTable.layout();
+
// Sort ComplexPatterns by usage.
std::vector<std::pair<const ComplexPattern *, unsigned>> ComplexPatternList(
ComplexPatternUsage.begin(), ComplexPatternUsage.end());
@@ -172,6 +191,8 @@ class MatcherTableEmitter {
unsigned EmitMatcherList(const Matcher *N, const unsigned Indent,
unsigned StartIdx, raw_ostream &OS);
+ void EmitOperandLists(raw_ostream &OS);
+
unsigned SizeMatcherList(Matcher *N, raw_ostream &OS);
void EmitPredicateFunctions(raw_ostream &OS);
@@ -1116,11 +1137,16 @@ unsigned MatcherTableEmitter::EmitMatcher(const Matcher *N,
if (!OmitComments)
OS << "/*#Ops*/";
OS << ',';
- unsigned NumOperandBytes = 0;
+
+ std::vector<uint8_t> OpBytes;
for (unsigned i = 0, e = EN->getNumOperands(); i != e; ++i) {
- OS << ' ';
- NumOperandBytes += EmitVBRValue(EN->getOperand(i), OS);
+ uint8_t Buffer[5];
+ unsigned Len = encodeULEB128(EN->getOperand(i), Buffer);
+ for (unsigned i = 0; i < Len; ++i)
+ OpBytes.push_back(Buffer[i]);
}
+ unsigned Index = OperandTable.get(OpBytes);
+ unsigned NumOperandBytes = EmitVBRValue(Index, OS);
if (!OmitComments) {
// Print the result #'s for EmitNode.
@@ -1209,6 +1235,10 @@ unsigned MatcherTableEmitter::EmitMatcherList(const Matcher *N,
return Size;
}
+void MatcherTableEmitter::EmitOperandLists(raw_ostream &OS) {
+ OperandTable.emit(OS, [](raw_ostream &OS, uint8_t O) { OS << (unsigned)O; });
+}
+
void MatcherTableEmitter::EmitNodePredicatesFunction(
const std::vector<TreePattern *> &Preds, StringRef Decl, raw_ostream &OS) {
if (Preds.empty())
@@ -1545,9 +1575,14 @@ void llvm::EmitMatcherTable(Matcher *TheMatcher, const CodeGenDAGPatterns &CGP,
MatcherEmitter.EmitHistogram(OS);
+ OS << " static const uint8_t OperandLists[] = {\n";
+ MatcherEmitter.EmitOperandLists(OS);
+ OS << " };\n\n";
+
OS << " #undef COVERAGE_IDX_VAL\n";
OS << " #undef TARGET_VAL\n";
- OS << " SelectCodeCommon(N, MatcherTable, sizeof(MatcherTable));\n";
+ OS << " SelectCodeCommon(N, MatcherTable, sizeof(MatcherTable),\n";
+ OS << " OperandLists);\n";
OS << "}\n";
EndEmitFunction(OS);
>From e54c0fd3dd62099e76c18b3032089d352f051da7 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 29 Jan 2026 22:52:19 -0800
Subject: [PATCH 2/2] fixup! Add comments for !OmitComments
---
llvm/utils/TableGen/DAGISelMatcherEmitter.cpp | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
index 6c970652b5b54..67211cdbc8c9c 100644
--- a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -1136,7 +1136,7 @@ unsigned MatcherTableEmitter::EmitMatcher(const Matcher *N,
OS << ' ' << EN->getNumOperands();
if (!OmitComments)
OS << "/*#Ops*/";
- OS << ',';
+ OS << ", ";
std::vector<uint8_t> OpBytes;
for (unsigned i = 0, e = EN->getNumOperands(); i != e; ++i) {
@@ -1146,13 +1146,24 @@ unsigned MatcherTableEmitter::EmitMatcher(const Matcher *N,
OpBytes.push_back(Buffer[i]);
}
unsigned Index = OperandTable.get(OpBytes);
+ if (!OmitComments)
+ OS << "/*OperandList*/";
unsigned NumOperandBytes = EmitVBRValue(Index, OS);
if (!OmitComments) {
+ // Print the operand #'s.
+ ArrayRef<unsigned> Ops = EN->getOperandList();
+ OS << " // Ops =";
+ if (Ops.empty())
+ OS << " None";
+ else
+ for (unsigned OpNo : Ops)
+ OS << " #" << OpNo;
+
// Print the result #'s for EmitNode.
if (const EmitNodeMatcher *E = dyn_cast<EmitNodeMatcher>(EN)) {
if (unsigned NumResults = EN->getNumVTs()) {
- OS << " // Results =";
+ OS << " Results =";
unsigned First = E->getFirstResultSlot();
for (unsigned i = 0; i != NumResults; ++i)
OS << " #" << First + i;
More information about the llvm-commits
mailing list