[llvm] TableGen: Generate reverseComposeSubRegIndices (PR #127050)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 13 04:28:26 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-regalloc
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
This is necessary to enable composing subregisters in peephole-opt.
For now use a brute force table to find the return value. The worst
case target is AMDGPU with a 399 x 399 entry table.
---
Full diff: https://github.com/llvm/llvm-project/pull/127050.diff
3 Files Affected:
- (modified) llvm/include/llvm/CodeGen/TargetRegisterInfo.h (+21)
- (modified) llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.cpp (+80)
- (modified) llvm/utils/TableGen/RegisterInfoEmitter.cpp (+51-3)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 114149ff53d85..66fd3fb9b0526 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -732,6 +732,22 @@ class TargetRegisterInfo : public MCRegisterInfo {
return composeSubRegIndicesImpl(a, b);
}
+ /// Return a subregister index that will compose to give you the subregister
+ /// index.
+ ///
+ /// Finds a subregister index x such that composeSubRegIndices(a, x) ==
+ /// b. Note that this relationship does not hold if
+ /// reverseComposeSubRegIndices returns the null subregister.
+ ///
+ /// The special null sub-register index composes as the identity.
+ unsigned reverseComposeSubRegIndices(unsigned a, unsigned b) const {
+ if (!a)
+ return b;
+ if (!b)
+ return a;
+ return reverseComposeSubRegIndicesImpl(a, b);
+ }
+
/// Transforms a LaneMask computed for one subregister to the lanemask that
/// would have been computed when composing the subsubregisters with IdxA
/// first. @sa composeSubRegIndices()
@@ -774,6 +790,11 @@ class TargetRegisterInfo : public MCRegisterInfo {
llvm_unreachable("Target has no sub-registers");
}
+ /// Overridden by TableGen in targets that have sub-registers.
+ virtual unsigned reverseComposeSubRegIndicesImpl(unsigned, unsigned) const {
+ llvm_unreachable("Target has no sub-registers");
+ }
+
/// Overridden by TableGen in targets that have sub-registers.
virtual LaneBitmask
composeSubRegIndexLaneMaskImpl(unsigned, LaneBitmask) const {
diff --git a/llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.cpp b/llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.cpp
index d0a3cfa84ee01..8fbd470815b79 100644
--- a/llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.cpp
+++ b/llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.cpp
@@ -164,3 +164,83 @@ TEST(AMDGPU, TestVGPRLimitsPerOccupancy) {
testGPRLimits("VGPR", true, test);
}
+
+static const char *printSubReg(const TargetRegisterInfo &TRI, unsigned SubReg) {
+ return SubReg ? TRI.getSubRegIndexName(SubReg) : "<none>";
+}
+
+TEST(AMDGPU, TestReverseComposeSubRegIndices) {
+ auto TM = createAMDGPUTargetMachine("amdgcn-amd-", "gfx900", "");
+ if (!TM)
+ return;
+ GCNSubtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()),
+ std::string(TM->getTargetFeatureString()), *TM);
+
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+
+#define EXPECT_SUBREG_EQ(A, B, Expect) \
+ do { \
+ unsigned Reversed = TRI->reverseComposeSubRegIndices(A, B); \
+ EXPECT_EQ(Reversed, Expect) \
+ << printSubReg(*TRI, A) << ", " << printSubReg(*TRI, B) << " => " \
+ << printSubReg(*TRI, Reversed) << ", *" << printSubReg(*TRI, Expect); \
+ } while (0);
+
+ EXPECT_SUBREG_EQ(AMDGPU::NoSubRegister, AMDGPU::sub0, AMDGPU::sub0);
+ EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::NoSubRegister, AMDGPU::sub0);
+
+ EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::sub0, AMDGPU::sub0);
+
+ EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub1);
+ EXPECT_SUBREG_EQ(AMDGPU::sub1, AMDGPU::sub0, AMDGPU::NoSubRegister);
+
+ EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1, AMDGPU::sub0, AMDGPU::sub0);
+ EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::sub0_sub1, AMDGPU::sub0_sub1);
+
+ EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub0_sub1,
+ AMDGPU::sub0_sub1);
+ EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1, AMDGPU::sub0_sub1_sub2_sub3,
+ AMDGPU::sub0_sub1_sub2_sub3);
+
+ EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2,
+ AMDGPU::sub1_sub2);
+ EXPECT_SUBREG_EQ(AMDGPU::sub1_sub2, AMDGPU::sub0_sub1_sub2_sub3,
+ AMDGPU::NoSubRegister);
+
+ EXPECT_SUBREG_EQ(AMDGPU::sub1_sub2_sub3, AMDGPU::sub0_sub1_sub2_sub3,
+ AMDGPU::NoSubRegister);
+ EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2_sub3,
+ AMDGPU::sub1_sub2_sub3);
+
+ EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::sub30, AMDGPU::NoSubRegister);
+ EXPECT_SUBREG_EQ(AMDGPU::sub30, AMDGPU::sub0, AMDGPU::NoSubRegister);
+
+ EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::sub31, AMDGPU::NoSubRegister);
+ EXPECT_SUBREG_EQ(AMDGPU::sub31, AMDGPU::sub0, AMDGPU::NoSubRegister);
+
+ EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1, AMDGPU::sub30, AMDGPU::NoSubRegister);
+ EXPECT_SUBREG_EQ(AMDGPU::sub30, AMDGPU::sub0_sub1, AMDGPU::NoSubRegister);
+
+ EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1, AMDGPU::sub30_sub31,
+ AMDGPU::NoSubRegister);
+ EXPECT_SUBREG_EQ(AMDGPU::sub30_sub31, AMDGPU::sub0_sub1,
+ AMDGPU::NoSubRegister);
+
+ for (unsigned SubIdx0 = 1, LastSubReg = TRI->getNumSubRegIndices();
+ SubIdx0 != LastSubReg; ++SubIdx0) {
+ for (unsigned SubIdx1 = 1; SubIdx1 != LastSubReg; ++SubIdx1) {
+ if (unsigned ForwardCompose =
+ TRI->composeSubRegIndices(SubIdx0, SubIdx1)) {
+ unsigned ReverseComposed =
+ TRI->reverseComposeSubRegIndices(SubIdx0, ForwardCompose);
+ EXPECT_EQ(ReverseComposed, SubIdx1);
+ }
+
+ if (unsigned ReverseCompose =
+ TRI->reverseComposeSubRegIndices(SubIdx0, SubIdx1)) {
+ unsigned Recompose = TRI->composeSubRegIndices(SubIdx0, ReverseCompose);
+ EXPECT_EQ(Recompose, SubIdx1);
+ }
+ }
+ }
+}
diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index 2f9ec2e6e7a22..752ebdf01b948 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -680,8 +680,6 @@ static bool combine(const CodeGenSubRegIndex *Idx,
void RegisterInfoEmitter::emitComposeSubRegIndices(raw_ostream &OS,
StringRef ClassName) {
const auto &SubRegIndices = RegBank.getSubRegIndices();
- OS << "unsigned " << ClassName
- << "::composeSubRegIndicesImpl(unsigned IdxA, unsigned IdxB) const {\n";
// Many sub-register indexes are composition-compatible, meaning that
//
@@ -713,7 +711,10 @@ void RegisterInfoEmitter::emitComposeSubRegIndices(raw_ostream &OS,
RowMap.push_back(Found);
}
- // Output the row map if there is multiple rows.
+ OS << "unsigned " << ClassName
+ << "::composeSubRegIndicesImpl(unsigned IdxA, unsigned IdxB) const {\n";
+
+ // Output the row map if there are multiple rows.
if (Rows.size() > 1) {
OS << " static const " << getMinimalTypeForRange(Rows.size(), 32)
<< " RowMap[" << SubRegIndicesSize << "] = {\n ";
@@ -743,6 +744,51 @@ void RegisterInfoEmitter::emitComposeSubRegIndices(raw_ostream &OS,
else
OS << " return Rows[0][IdxB];\n";
OS << "}\n\n";
+
+ // Generate the reverse case.
+ //
+ // FIXME: This is the brute force approach. Compress the table similar to the
+ // forward case.
+ OS << "unsigned " << ClassName
+ << "::reverseComposeSubRegIndicesImpl(unsigned IdxA, unsigned IdxB) const "
+ "{\n";
+ OS << " static const " << getMinimalTypeForRange(SubRegIndicesSize + 1, 32)
+ << " Table[" << SubRegIndicesSize << "][" << SubRegIndicesSize
+ << "] = {\n";
+
+ // Find values where composeSubReg(A, X) == B;
+ for (const auto &IdxA : SubRegIndices) {
+ OS << " { ";
+
+ SmallVectorImpl<const CodeGenSubRegIndex *> &Row =
+ Rows[RowMap[IdxA.EnumValue - 1]];
+ for (const auto &IdxB : SubRegIndices) {
+ const CodeGenSubRegIndex *FoundReverse = nullptr;
+
+ for (unsigned i = 0, e = SubRegIndicesSize; i != e; ++i) {
+ const CodeGenSubRegIndex *This = &SubRegIndices[i];
+ const CodeGenSubRegIndex *Composed = Row[i];
+ if (Composed == &IdxB) {
+ if (FoundReverse && FoundReverse != This) // Not unique
+ break;
+ FoundReverse = This;
+ }
+ }
+
+ if (FoundReverse) {
+ OS << FoundReverse->getQualifiedName() << ", ";
+ } else {
+ OS << "0, ";
+ }
+ }
+ OS << "},\n";
+ }
+
+ OS << " };\n\n";
+ OS << " --IdxA; assert(IdxA < " << SubRegIndicesSize << ");\n"
+ << " --IdxB; assert(IdxB < " << SubRegIndicesSize << ");\n";
+ OS << " return Table[IdxA][IdxB];\n";
+ OS << " }\n\n";
}
void RegisterInfoEmitter::emitComposeSubRegIndexLaneMask(raw_ostream &OS,
@@ -1113,6 +1159,8 @@ void RegisterInfoEmitter::runTargetHeader(raw_ostream &OS) {
<< " unsigned PC = 0, unsigned HwMode = 0);\n";
if (!RegBank.getSubRegIndices().empty()) {
OS << " unsigned composeSubRegIndicesImpl"
+ << "(unsigned, unsigned) const override;\n"
+ << " unsigned reverseComposeSubRegIndicesImpl"
<< "(unsigned, unsigned) const override;\n"
<< " LaneBitmask composeSubRegIndexLaneMaskImpl"
<< "(unsigned, LaneBitmask) const override;\n"
``````````
</details>
https://github.com/llvm/llvm-project/pull/127050
More information about the llvm-commits
mailing list