[llvm] [TableGen] Fix computeRegUnitLaneMasks for ad hoc aliasing (PR #79835)

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 30 02:23:48 PST 2024


https://github.com/jayfoad updated https://github.com/llvm/llvm-project/pull/79835

>From 5cdd4ba70d9fca571372c341d8f091e378c08781 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Thu, 25 Jan 2024 15:33:06 +0000
Subject: [PATCH 1/2] [TableGen] Fix computeRegUnitLaneMasks for ad hoc
 aliasing

With ad hoc aliasing (using the Aliases field in register definitions)
we can have subregisters with disjoint lane masks that nevertheless
share a regunit. In this case we need to take the union of their
lane masks, not the intersection.

This was inadvertently broken by https://reviews.llvm.org/D157864
---
 llvm/utils/TableGen/CodeGenRegisters.cpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/llvm/utils/TableGen/CodeGenRegisters.cpp b/llvm/utils/TableGen/CodeGenRegisters.cpp
index d1abdb74ea4a..b9d404aaee5f 100644
--- a/llvm/utils/TableGen/CodeGenRegisters.cpp
+++ b/llvm/utils/TableGen/CodeGenRegisters.cpp
@@ -2144,7 +2144,7 @@ void CodeGenRegBank::computeRegUnitLaneMasks() {
     // Create an initial lane mask for all register units.
     const auto &RegUnits = Register.getRegUnits();
     CodeGenRegister::RegUnitLaneMaskList RegUnitLaneMasks(
-        RegUnits.count(), LaneBitmask::getAll());
+        RegUnits.count(), LaneBitmask::getNone());
     // Iterate through SubRegisters.
     typedef CodeGenRegister::SubRegMap SubRegMap;
     const SubRegMap &SubRegs = Register.getSubRegs();
@@ -2163,7 +2163,7 @@ void CodeGenRegBank::computeRegUnitLaneMasks() {
         unsigned u = 0;
         for (unsigned RU : RegUnits) {
           if (SUI == RU) {
-            RegUnitLaneMasks[u] &= LaneMask;
+            RegUnitLaneMasks[u] |= LaneMask;
             assert(!Found);
             Found = true;
           }
@@ -2173,6 +2173,10 @@ void CodeGenRegBank::computeRegUnitLaneMasks() {
         assert(Found);
       }
     }
+    for (auto &Mask : RegUnitLaneMasks) {
+      if (Mask.none())
+        Mask = LaneBitmask::getAll();
+    }
     Register.setRegUnitLaneMasks(RegUnitLaneMasks);
   }
 }

>From ae64aeddd82cccc8c3a3eab55f14ea5477a97829 Mon Sep 17 00:00:00 2001
From: "Kazushi (Jam) Marukawa" <marukawa at nec.com>
Date: Sat, 27 Jan 2024 21:51:40 +0900
Subject: [PATCH 2/2] Add test case for computeRegUnitLaneMasks fix

This test is based on VE implementation.  VE has two subregisters.  Both are
disjointed.  However, many instructions contaminate other register very often.
Therefore, VE defines them as aliased to avoid register allocator to allocate
both registers simultaneously.  This trick conflicts wht recent
computeRegUnitLaneMasks optimization.  The optimization is reverted now.  And
I'm adding test case for that.
---
 llvm/test/TableGen/DisjointAliasSubregs.td | 48 ++++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 llvm/test/TableGen/DisjointAliasSubregs.td

diff --git a/llvm/test/TableGen/DisjointAliasSubregs.td b/llvm/test/TableGen/DisjointAliasSubregs.td
new file mode 100644
index 000000000000..813e27feb1ef
--- /dev/null
+++ b/llvm/test/TableGen/DisjointAliasSubregs.td
@@ -0,0 +1,48 @@
+// RUN: llvm-tblgen -gen-register-info -I %p/../../include %s | FileCheck %s
+// Checks that tablegen calculation of register unit's lanemask.
+// Detail is described just after tests.
+
+include "llvm/Target/Target.td"
+
+class MyReg<string n, list<Register> subregs = [], list<Register> aliases = []>
+  : Register<n> {
+  let Namespace = "Test";
+  let SubRegs = subregs;
+  let Aliases = aliases;
+  let CoveredBySubRegs = 1;
+}
+class MyClass<int size, list<ValueType> types, dag registers>
+  : RegisterClass<"Test", types, size, registers> {
+  let Size = size;
+}
+
+// Register Example (VE):
+// SX0 -- SW0 (sub_i32)
+//     \- SF0 (sub_f32)
+
+def sub_i32     : SubRegIndex<32, 32>;        // Low 32 bit (32..63)
+def sub_f32     : SubRegIndex<32>;            // High 32 bit (0..31)
+
+def SW0 : MyReg<"sw0", []>;
+def SF0 : MyReg<"sf0", [], [!cast<MyReg>("SW0")]>;
+
+let SubRegIndices = [sub_i32, sub_f32] in {
+def SX0 : MyReg<"s0", [SW0, SF0]>;
+}
+
+def I64 : MyClass<1, [i64], (sequence "SX%u", 0, 0)>;
+
+def TestTarget : Target;
+
+// CHECK: extern const LaneBitmask TestTargetLaneMaskLists[] = {
+// CHECK-NEXT: /* 0 */ LaneBitmask(0x0000000000000003), LaneBitmask::getAll(),
+// CHECK-NEXT: /* 2 */ LaneBitmask(0xFFFFFFFFFFFFFFFF), LaneBitmask::getAll(),
+
+// A sub_f32 has 0x0001 as a lane bitmask.  A sub_i32 has 0x0002.
+// Both are disjointed, but VE defines those are aliased registers since
+// many instructions contaminate other part of register values.  So, VE
+// doesn't want to allocate SF0 and SW0 simultaneously.  TableGen's
+// computeRegUnitLaneMasks is optimizable using intersection algorithm,
+// but it calculates 0x0000 as a intersection of 0x0001 and 0x0002.
+// This causes wrong live-in calculation later.  Union mechanism can
+// calculate correct lanemaks as 0x0003.



More information about the llvm-commits mailing list