[llvm] TableGen: Generate reverseComposeSubRegIndices (PR #127050)

via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 13 04:28:26 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-regalloc

Author: Matt Arsenault (arsenm)

<details>
<summary>Changes</summary>

This is necessary to enable composing subregisters in peephole-opt.
For now use a brute force table to find the return value. The worst
case target is AMDGPU with a 399 x 399 entry table.

---
Full diff: https://github.com/llvm/llvm-project/pull/127050.diff


3 Files Affected:

- (modified) llvm/include/llvm/CodeGen/TargetRegisterInfo.h (+21) 
- (modified) llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.cpp (+80) 
- (modified) llvm/utils/TableGen/RegisterInfoEmitter.cpp (+51-3) 


``````````diff
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 114149ff53d85..66fd3fb9b0526 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -732,6 +732,22 @@ class TargetRegisterInfo : public MCRegisterInfo {
     return composeSubRegIndicesImpl(a, b);
   }
 
+  /// Return a subregister index that will compose to give you the subregister
+  /// index.
+  ///
+  /// Finds a subregister index x such that composeSubRegIndices(a, x) ==
+  /// b. Note that this relationship does not hold if
+  /// reverseComposeSubRegIndices returns the null subregister.
+  ///
+  /// The special null sub-register index composes as the identity.
+  unsigned reverseComposeSubRegIndices(unsigned a, unsigned b) const {
+    if (!a)
+      return b;
+    if (!b)
+      return a;
+    return reverseComposeSubRegIndicesImpl(a, b);
+  }
+
   /// Transforms a LaneMask computed for one subregister to the lanemask that
   /// would have been computed when composing the subsubregisters with IdxA
   /// first. @sa composeSubRegIndices()
@@ -774,6 +790,11 @@ class TargetRegisterInfo : public MCRegisterInfo {
     llvm_unreachable("Target has no sub-registers");
   }
 
+  /// Overridden by TableGen in targets that have sub-registers.
+  virtual unsigned reverseComposeSubRegIndicesImpl(unsigned, unsigned) const {
+    llvm_unreachable("Target has no sub-registers");
+  }
+
   /// Overridden by TableGen in targets that have sub-registers.
   virtual LaneBitmask
   composeSubRegIndexLaneMaskImpl(unsigned, LaneBitmask) const {
diff --git a/llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.cpp b/llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.cpp
index d0a3cfa84ee01..8fbd470815b79 100644
--- a/llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.cpp
+++ b/llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.cpp
@@ -164,3 +164,83 @@ TEST(AMDGPU, TestVGPRLimitsPerOccupancy) {
 
   testGPRLimits("VGPR", true, test);
 }
+
+static const char *printSubReg(const TargetRegisterInfo &TRI, unsigned SubReg) {
+  return SubReg ? TRI.getSubRegIndexName(SubReg) : "<none>";
+}
+
+TEST(AMDGPU, TestReverseComposeSubRegIndices) {
+  auto TM = createAMDGPUTargetMachine("amdgcn-amd-", "gfx900", "");
+  if (!TM)
+    return;
+  GCNSubtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()),
+                  std::string(TM->getTargetFeatureString()), *TM);
+
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+
+#define EXPECT_SUBREG_EQ(A, B, Expect)                                         \
+  do {                                                                         \
+    unsigned Reversed = TRI->reverseComposeSubRegIndices(A, B);                \
+    EXPECT_EQ(Reversed, Expect)                                                \
+        << printSubReg(*TRI, A) << ", " << printSubReg(*TRI, B) << " => "      \
+        << printSubReg(*TRI, Reversed) << ", *" << printSubReg(*TRI, Expect);  \
+  } while (0);
+
+  EXPECT_SUBREG_EQ(AMDGPU::NoSubRegister, AMDGPU::sub0, AMDGPU::sub0);
+  EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::NoSubRegister, AMDGPU::sub0);
+
+  EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::sub0, AMDGPU::sub0);
+
+  EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub1);
+  EXPECT_SUBREG_EQ(AMDGPU::sub1, AMDGPU::sub0, AMDGPU::NoSubRegister);
+
+  EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1, AMDGPU::sub0, AMDGPU::sub0);
+  EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::sub0_sub1, AMDGPU::sub0_sub1);
+
+  EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub0_sub1,
+                   AMDGPU::sub0_sub1);
+  EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1, AMDGPU::sub0_sub1_sub2_sub3,
+                   AMDGPU::sub0_sub1_sub2_sub3);
+
+  EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2,
+                   AMDGPU::sub1_sub2);
+  EXPECT_SUBREG_EQ(AMDGPU::sub1_sub2, AMDGPU::sub0_sub1_sub2_sub3,
+                   AMDGPU::NoSubRegister);
+
+  EXPECT_SUBREG_EQ(AMDGPU::sub1_sub2_sub3, AMDGPU::sub0_sub1_sub2_sub3,
+                   AMDGPU::NoSubRegister);
+  EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2_sub3,
+                   AMDGPU::sub1_sub2_sub3);
+
+  EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::sub30, AMDGPU::NoSubRegister);
+  EXPECT_SUBREG_EQ(AMDGPU::sub30, AMDGPU::sub0, AMDGPU::NoSubRegister);
+
+  EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::sub31, AMDGPU::NoSubRegister);
+  EXPECT_SUBREG_EQ(AMDGPU::sub31, AMDGPU::sub0, AMDGPU::NoSubRegister);
+
+  EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1, AMDGPU::sub30, AMDGPU::NoSubRegister);
+  EXPECT_SUBREG_EQ(AMDGPU::sub30, AMDGPU::sub0_sub1, AMDGPU::NoSubRegister);
+
+  EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1, AMDGPU::sub30_sub31,
+                   AMDGPU::NoSubRegister);
+  EXPECT_SUBREG_EQ(AMDGPU::sub30_sub31, AMDGPU::sub0_sub1,
+                   AMDGPU::NoSubRegister);
+
+  for (unsigned SubIdx0 = 1, LastSubReg = TRI->getNumSubRegIndices();
+       SubIdx0 != LastSubReg; ++SubIdx0) {
+    for (unsigned SubIdx1 = 1; SubIdx1 != LastSubReg; ++SubIdx1) {
+      if (unsigned ForwardCompose =
+              TRI->composeSubRegIndices(SubIdx0, SubIdx1)) {
+        unsigned ReverseComposed =
+            TRI->reverseComposeSubRegIndices(SubIdx0, ForwardCompose);
+        EXPECT_EQ(ReverseComposed, SubIdx1);
+      }
+
+      if (unsigned ReverseCompose =
+              TRI->reverseComposeSubRegIndices(SubIdx0, SubIdx1)) {
+        unsigned Recompose = TRI->composeSubRegIndices(SubIdx0, ReverseCompose);
+        EXPECT_EQ(Recompose, SubIdx1);
+      }
+    }
+  }
+}
diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index 2f9ec2e6e7a22..752ebdf01b948 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -680,8 +680,6 @@ static bool combine(const CodeGenSubRegIndex *Idx,
 void RegisterInfoEmitter::emitComposeSubRegIndices(raw_ostream &OS,
                                                    StringRef ClassName) {
   const auto &SubRegIndices = RegBank.getSubRegIndices();
-  OS << "unsigned " << ClassName
-     << "::composeSubRegIndicesImpl(unsigned IdxA, unsigned IdxB) const {\n";
 
   // Many sub-register indexes are composition-compatible, meaning that
   //
@@ -713,7 +711,10 @@ void RegisterInfoEmitter::emitComposeSubRegIndices(raw_ostream &OS,
     RowMap.push_back(Found);
   }
 
-  // Output the row map if there is multiple rows.
+  OS << "unsigned " << ClassName
+     << "::composeSubRegIndicesImpl(unsigned IdxA, unsigned IdxB) const {\n";
+
+  // Output the row map if there are multiple rows.
   if (Rows.size() > 1) {
     OS << "  static const " << getMinimalTypeForRange(Rows.size(), 32)
        << " RowMap[" << SubRegIndicesSize << "] = {\n    ";
@@ -743,6 +744,51 @@ void RegisterInfoEmitter::emitComposeSubRegIndices(raw_ostream &OS,
   else
     OS << "  return Rows[0][IdxB];\n";
   OS << "}\n\n";
+
+  // Generate the reverse case.
+  //
+  // FIXME: This is the brute force approach. Compress the table similar to the
+  // forward case.
+  OS << "unsigned " << ClassName
+     << "::reverseComposeSubRegIndicesImpl(unsigned IdxA, unsigned IdxB) const "
+        "{\n";
+  OS << "  static const " << getMinimalTypeForRange(SubRegIndicesSize + 1, 32)
+     << " Table[" << SubRegIndicesSize << "][" << SubRegIndicesSize
+     << "] = {\n";
+
+  // Find values where composeSubReg(A, X) == B;
+  for (const auto &IdxA : SubRegIndices) {
+    OS << "    { ";
+
+    SmallVectorImpl<const CodeGenSubRegIndex *> &Row =
+        Rows[RowMap[IdxA.EnumValue - 1]];
+    for (const auto &IdxB : SubRegIndices) {
+      const CodeGenSubRegIndex *FoundReverse = nullptr;
+
+      for (unsigned i = 0, e = SubRegIndicesSize; i != e; ++i) {
+        const CodeGenSubRegIndex *This = &SubRegIndices[i];
+        const CodeGenSubRegIndex *Composed = Row[i];
+        if (Composed == &IdxB) {
+          if (FoundReverse && FoundReverse != This) // Not unique
+            break;
+          FoundReverse = This;
+        }
+      }
+
+      if (FoundReverse) {
+        OS << FoundReverse->getQualifiedName() << ", ";
+      } else {
+        OS << "0, ";
+      }
+    }
+    OS << "},\n";
+  }
+
+  OS << "  };\n\n";
+  OS << "  --IdxA; assert(IdxA < " << SubRegIndicesSize << ");\n"
+     << "  --IdxB; assert(IdxB < " << SubRegIndicesSize << ");\n";
+  OS << "  return Table[IdxA][IdxB];\n";
+  OS << "  }\n\n";
 }
 
 void RegisterInfoEmitter::emitComposeSubRegIndexLaneMask(raw_ostream &OS,
@@ -1113,6 +1159,8 @@ void RegisterInfoEmitter::runTargetHeader(raw_ostream &OS) {
      << "      unsigned PC = 0, unsigned HwMode = 0);\n";
   if (!RegBank.getSubRegIndices().empty()) {
     OS << "  unsigned composeSubRegIndicesImpl"
+       << "(unsigned, unsigned) const override;\n"
+       << "  unsigned reverseComposeSubRegIndicesImpl"
        << "(unsigned, unsigned) const override;\n"
        << "  LaneBitmask composeSubRegIndexLaneMaskImpl"
        << "(unsigned, LaneBitmask) const override;\n"

``````````

</details>


https://github.com/llvm/llvm-project/pull/127050


More information about the llvm-commits mailing list