[llvm] 7a25870 - [CodeGen] Fix incorrect usage of MCPhysReg for diff list elements

Sun Jun 4 04:01:18 PDT 2023

Author: Sergei Barannikov
Date: 2023-06-04T14:01:04+03:00
New Revision: 7a258706e3d194ea9cec364c8e535446646fbcf1

URL: https://github.com/llvm/llvm-project/commit/7a258706e3d194ea9cec364c8e535446646fbcf1
DIFF: https://github.com/llvm/llvm-project/commit/7a258706e3d194ea9cec364c8e535446646fbcf1.diff

LOG: [CodeGen] Fix incorrect usage of MCPhysReg for diff list elements

The lists contain differences between register numbers, not the register
numbers themselves. Since a difference can also be negative, this also
changes its type to signed.

Changing the type to signed exposed a "bug". For AMDGPU, which has many
registers, the first element of a sequence could be as big as ~45k.
The value does not fit into int16_t, but fits into uint16_t. The bug
didn't show up because of unsigned wrapping and truncation of the Val
field in the advance() method.

To fix the issue, I changed the way regunit difflists are encoded. The
4-bit 'scale' field of MCRegisterDesc::RegUnit was replaced by 12-bit
number of the first regunit, and the first element of each of the lists
was removed. The higher 20 bits of RegUnit field contain the initial
offset into DiffLists array.
AMDGPU has 1'409 regunits (2^12 = 4'096), and the biggest offset is
80'041 (2^20 = 1'048'576). That is, there is enough room.

Changing the encoding method also resulted in a smaller array size, the
numbers are below (I omitted targets with less than 100 elements).

```
AMDGPU   | 80052 | 78741 |  -1,6%
RISCV    |  6498 |  6297 |  -3,1%
ARM      |  4181 |  3966 |  -5,1%
AArch64  |  2770 |  2592 |  -6,4%
PPC      |  1578 |  1441 |  -8,7%
Hexagon  |   994 |   740 | -25,6%
R600     |   508 |   398 | -21,7%
VE       |   471 |   459 |  -2,5%
Sparc    |   381 |   363 |  -4,7%
X86      |   326 |   208 | -36,2%
Mips     |   253 |   200 | -20,9%
SystemZ  |   186 |   162 | -12,9%
```

Reviewed By: foad, arsenm

Differential Revision: https://reviews.llvm.org/D151036

Added: 
    

Modified: 
    llvm/include/llvm/MC/MCRegisterInfo.h
    llvm/lib/CodeGen/StackMaps.cpp
    llvm/utils/TableGen/RegisterInfoEmitter.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MC/MCRegisterInfo.h b/llvm/include/llvm/MC/MCRegisterInfo.h
index cafa0ba4913fc..8e4890316d8c8 100644

--- a/llvm/include/llvm/MC/MCRegisterInfo.h
+++ b/llvm/include/llvm/MC/MCRegisterInfo.h
@@ -111,8 +111,8 @@ struct MCRegisterDesc {
   // sub-register in SubRegs.
   uint32_t SubRegIndices;
 
-  // RegUnits - Points to the list of register units. The low 4 bits holds the
-  // Scale, the high bits hold an offset into DiffLists. See MCRegUnitIterator.
+  // Points to the list of register units. The low bits hold the first regunit
+  // number, the high bits hold an offset into DiffLists. See MCRegUnitIterator.
   uint32_t RegUnits;
 
   /// Index into list with lane mask sequences. The sequence contains a lanemask
@@ -161,7 +161,7 @@ class MCRegisterInfo {
   unsigned NumClasses;                        // Number of entries in the array
   unsigned NumRegUnits;                       // Number of regunits.
   const MCPhysReg (*RegUnitRoots)[2];         // Pointer to regunit root table.
-  const MCPhysReg *DiffLists;                 // Pointer to the 
diff lists array
+  const int16_t *DiffLists;                   // Pointer to the 
diff lists array
   const LaneBitmask *RegUnitMaskSequences;    // Pointer to lane mask sequences
                                               // for register units.
   const char *RegStrings;                     // Pointer to the string table.
@@ -194,31 +194,19 @@ class MCRegisterInfo {
   /// Don't use this class directly, use one of the specialized sub-classes
   /// defined below.
   class DiffListIterator {
-    uint16_t Val = 0;
-    const MCPhysReg *List = nullptr;
+    unsigned Val = 0;
+    const int16_t *List = nullptr;
 
   protected:
     /// Create an invalid iterator. Call init() to point to something useful.
     DiffListIterator() = default;
 
-    /// init - Point the iterator to InitVal, decoding subsequent values from
-    /// DiffList. The iterator will initially point to InitVal, sub-classes are
-    /// responsible for skipping the seed value if it is not part of the list.
-    void init(MCPhysReg InitVal, const MCPhysReg *DiffList) {
+    /// Point the iterator to InitVal, decoding subsequent values from DiffList.
+    void init(unsigned InitVal, const int16_t *DiffList) {
       Val = InitVal;
       List = DiffList;
     }
 
-    /// advance - Move to the next list position, return the applied
-    /// 
diff erential. This function does not detect the end of the list, that
-    /// is the caller's responsibility (by checking for a 0 return value).
-    MCRegister advance() {
-      assert(isValid() && "Cannot move off the end of the list.");
-      MCPhysReg D = *List++;
-      Val += D;
-      return D;
-    }
-
   public:
     /// isValid - returns true if this iterator is not yet at the end.
     bool isValid() const { return List; }
@@ -228,8 +216,11 @@ class MCRegisterInfo {
 
     /// Pre-increment to move to the next position.
     void operator++() {
+      assert(isValid() && "Cannot move off the end of the list.");
+      int16_t D = *List++;
+      Val += D;
       // The end of the list is encoded as a 0 
diff erential.
-      if (!advance())
+      if (!D)
         List = nullptr;
     }
 
@@ -248,9 +239,9 @@ class MCRegisterInfo {
   protected:
     mc_
diff list_iterator(MCRegisterInfo::DiffListIterator Iter) : Iter(Iter) {}
 
-    // Allow conversion between instantiations where valid.
-    mc_
diff list_iterator(MCRegister Reg, const MCPhysReg *DiffList) {
-      Iter.init(Reg, DiffList);
+    /// Point the iterator to InitVal, decoding subsequent values from DiffList.
+    void init(unsigned InitVal, const int16_t *DiffList) {
+      Iter.init(InitVal, DiffList);
       Val = *Iter;
     }
 
@@ -287,8 +278,11 @@ class MCRegisterInfo {
     mc_subreg_iterator(MCRegisterInfo::DiffListIterator Iter)
         : mc_
diff list_iterator(Iter) {}
     mc_subreg_iterator() = default;
-    mc_subreg_iterator(MCRegister Reg, const MCRegisterInfo *MCRI)
-        : mc_
diff list_iterator(Reg, MCRI->DiffLists + MCRI->get(Reg).SubRegs) {}
+
+    mc_subreg_iterator(MCRegister Reg, const MCRegisterInfo *MCRI) {
+      assert(MCRegister::isPhysicalRegister(Reg.id()));
+      init(Reg.id(), MCRI->DiffLists + MCRI->get(Reg).SubRegs);
+    }
   };
 
   /// Forward iterator over all super-registers.
@@ -299,9 +293,11 @@ class MCRegisterInfo {
     mc_superreg_iterator(MCRegisterInfo::DiffListIterator Iter)
         : mc_
diff list_iterator(Iter) {}
     mc_superreg_iterator() = default;
-    mc_superreg_iterator(MCRegister Reg, const MCRegisterInfo *MCRI)
-        : mc_
diff list_iterator(Reg,
-                               MCRI->DiffLists + MCRI->get(Reg).SuperRegs) {}
+
+    mc_superreg_iterator(MCRegister Reg, const MCRegisterInfo *MCRI) {
+      assert(MCRegister::isPhysicalRegister(Reg.id()));
+      init(Reg.id(), MCRI->DiffLists + MCRI->get(Reg).SuperRegs);
+    }
   };
 
   /// Return an iterator range over all sub-registers of \p Reg, excluding \p
@@ -351,16 +347,11 @@ class MCRegisterInfo {
   /// Initialize MCRegisterInfo, called by TableGen
   /// auto-generated routines. *DO NOT USE*.
   void InitMCRegisterInfo(const MCRegisterDesc *D, unsigned NR, unsigned RA,
-                          unsigned PC,
-                          const MCRegisterClass *C, unsigned NC,
-                          const MCPhysReg (*RURoots)[2],
-                          unsigned NRU,
-                          const MCPhysReg *DL,
-                          const LaneBitmask *RUMS,
-                          const char *Strings,
-                          const char *ClassStrings,
-                          const uint16_t *SubIndices,
-                          unsigned NumIndices,
+                          unsigned PC, const MCRegisterClass *C, unsigned NC,
+                          const MCPhysReg (*RURoots)[2], unsigned NRU,
+                          const int16_t *DL, const LaneBitmask *RUMS,
+                          const char *Strings, const char *ClassStrings,
+                          const uint16_t *SubIndices, unsigned NumIndices,
                           const SubRegCoveredBits *SubIdxRanges,
                           const uint16_t *RET) {
     Desc = D;
@@ -598,7 +589,8 @@ class MCSubRegIterator : public MCRegisterInfo::DiffListIterator {
 public:
   MCSubRegIterator(MCRegister Reg, const MCRegisterInfo *MCRI,
                    bool IncludeSelf = false) {
-    init(Reg, MCRI->DiffLists + MCRI->get(Reg).SubRegs);
+    assert(MCRegister::isPhysicalRegister(Reg.id()));
+    init(Reg.id(), MCRI->DiffLists + MCRI->get(Reg).SubRegs);
     // Initially, the iterator points to Reg itself.
     if (!IncludeSelf)
       ++*this;
@@ -647,7 +639,8 @@ class MCSuperRegIterator : public MCRegisterInfo::DiffListIterator {
 
   MCSuperRegIterator(MCRegister Reg, const MCRegisterInfo *MCRI,
                      bool IncludeSelf = false) {
-    init(Reg, MCRI->DiffLists + MCRI->get(Reg).SuperRegs);
+    assert(MCRegister::isPhysicalRegister(Reg.id()));
+    init(Reg.id(), MCRI->DiffLists + MCRI->get(Reg).SuperRegs);
     // Initially, the iterator points to Reg itself.
     if (!IncludeSelf)
       ++*this;
@@ -675,6 +668,9 @@ inline bool MCRegisterInfo::isSuperRegister(MCRegister RegA, MCRegister RegB) co
 // MCRegUnitIterator enumerates a list of register units for Reg. The list is
 // in ascending numerical order.
 class MCRegUnitIterator : public MCRegisterInfo::DiffListIterator {
+  // The value must be kept in sync with RegisterInfoEmitter.cpp.
+  static constexpr unsigned RegUnitBits = 12;
+
 public:
   /// MCRegUnitIterator - Create an iterator that traverses the register units
   /// in Reg.
@@ -685,18 +681,9 @@ class MCRegUnitIterator : public MCRegisterInfo::DiffListIterator {
     assert(MCRegister::isPhysicalRegister(Reg.id()));
     // Decode the RegUnits MCRegisterDesc field.
     unsigned RU = MCRI->get(Reg).RegUnits;
-    unsigned Scale = RU & 15;
-    unsigned Offset = RU >> 4;
-
-    // Initialize the iterator to Reg * Scale, and the List pointer to
-    // DiffLists + Offset.
-    init(Reg * Scale, MCRI->DiffLists + Offset);
-
-    // That may not be a valid unit, we need to advance by one to get the real
-    // unit number. The first 
diff erential can be 0 which would normally
-    // terminate the list, but since we know every register has at least one
-    // unit, we can allow a 0 
diff erential here.
-    advance();
+    unsigned FirstRU = RU & ((1u << RegUnitBits) - 1);
+    unsigned Offset = RU >> RegUnitBits;
+    init(FirstRU, MCRI->DiffLists + Offset);
   }
 
   MCRegUnitIterator &operator++() {

diff  --git a/llvm/lib/CodeGen/StackMaps.cpp b/llvm/lib/CodeGen/StackMaps.cpp
index 1058f3b03cc01..f9115e4348784 100644
--- a/llvm/lib/CodeGen/StackMaps.cpp
+++ b/llvm/lib/CodeGen/StackMaps.cpp
@@ -392,7 +392,7 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
         break;
       }
       I->Size = std::max(I->Size, II->Size);
-      if (TRI->isSuperRegister(I->Reg, II->Reg))
+      if (I->Reg && TRI->isSuperRegister(I->Reg, II->Reg))
         I->Reg = II->Reg;
       II->Reg = 0; // mark for deletion.
     }

diff  --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index f35dd368cf667..1f433c01dca5f 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -635,17 +635,16 @@ static void printSubRegIndex(raw_ostream &OS, const CodeGenSubRegIndex *Idx) {
 // The initial value depends on the specific list. The list is terminated by a
 // 0 
diff erential which means we can't encode repeated elements.
 
-typedef SmallVector<uint16_t, 4> DiffVec;
+typedef SmallVector<int16_t, 4> DiffVec;
 typedef SmallVector<LaneBitmask, 4> MaskVec;
 
-// Differentially encode a sequence of numbers into V. The starting value and
-// terminating 0 are not added to V, so it will have the same size as List.
-static
-DiffVec &
diff Encode(DiffVec &V, unsigned InitVal, SparseBitVector<> List) {
+// Fills V with 
diff erentials between every two consecutive elements of List.
+static DiffVec &
diff Encode(DiffVec &V, SparseBitVector<> List) {
   assert(V.empty() && "Clear DiffVec before 
diff Encode.");
-  uint16_t Val = uint16_t(InitVal);
-
-  for (uint16_t Cur : List) {
+  SparseBitVector<>::iterator I = List.begin(), E = List.end();
+  unsigned Val = *I;
+  while (++I != E) {
+    unsigned Cur = *I;
     V.push_back(Cur - Val);
     Val = Cur;
   }
@@ -656,18 +655,16 @@ template<typename Iter>
 static
 DiffVec &
diff Encode(DiffVec &V, unsigned InitVal, Iter Begin, Iter End) {
   assert(V.empty() && "Clear DiffVec before 
diff Encode.");
-  uint16_t Val = uint16_t(InitVal);
+  unsigned Val = InitVal;
   for (Iter I = Begin; I != End; ++I) {
-    uint16_t Cur = (*I)->EnumValue;
+    unsigned Cur = (*I)->EnumValue;
     V.push_back(Cur - Val);
     Val = Cur;
   }
   return V;
 }
 
-static void printDiff16(raw_ostream &OS, uint16_t Val) {
-  OS << Val;
-}
+static void printDiff16(raw_ostream &OS, int16_t Val) { OS << Val; }
 
 static void printMask(raw_ostream &OS, LaneBitmask Val) {
   OS << "LaneBitmask(0x" << PrintLaneMask(Val) << ')';
@@ -891,7 +888,6 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
   SmallVector<DiffVec, 4> SubRegLists(Regs.size());
   SmallVector<DiffVec, 4> SuperRegLists(Regs.size());
   SmallVector<DiffVec, 4> RegUnitLists(Regs.size());
-  SmallVector<unsigned, 4> RegUnitInitScale(Regs.size());
 
   // List of lane masks accompanying register unit sequences.
   SequenceToOffsetTable<MaskVec> LaneMaskSeqs;
@@ -929,31 +925,8 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
                SuperRegList.end());
     DiffSeqs.add(SuperRegLists[i]);
 
-    // Differentially encode the register unit list, seeded by register number.
-    // First compute a scale factor that allows more 
diff -lists to be reused:
-    //
-    //   D0 -> (S0, S1)
-    //   D1 -> (S2, S3)
-    //
-    // A scale factor of 2 allows D0 and D1 to share a 
diff -list. The initial
-    // value for the 
diff erential decoder is the register number multiplied by
-    // the scale.
-    //
-    // Check the neighboring registers for arithmetic progressions.
-    unsigned ScaleA = ~0u, ScaleB = ~0u;
-    SparseBitVector<> RUs = Reg.getNativeRegUnits();
-    if (I != Regs.begin() &&
-        std::prev(I)->getNativeRegUnits().count() == RUs.count())
-      ScaleB = *RUs.begin() - *std::prev(I)->getNativeRegUnits().begin();
-    if (std::next(I) != Regs.end() &&
-        std::next(I)->getNativeRegUnits().count() == RUs.count())
-      ScaleA = *std::next(I)->getNativeRegUnits().begin() - *RUs.begin();
-    unsigned Scale = std::min(ScaleB, ScaleA);
-    // Default the scale to 0 if it can't be encoded in 4 bits.
-    if (Scale >= 16)
-      Scale = 0;
-    RegUnitInitScale[i] = Scale;
-    DiffSeqs.add(
diff Encode(RegUnitLists[i], Scale * Reg.EnumValue, RUs));
+    const SparseBitVector<> &RUs = Reg.getNativeRegUnits();
+    DiffSeqs.add(
diff Encode(RegUnitLists[i], RUs));
 
     const auto &RUMasks = Reg.getRegUnitLaneMasks();
     MaskVec &LaneMaskVec = RegUnitLaneMasks[i];
@@ -978,7 +951,7 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
   const std::string &TargetName = std::string(Target.getName());
 
   // Emit the shared table of 
diff erential lists.
-  OS << "extern const MCPhysReg " << TargetName << "RegDiffLists[] = {\n";
+  OS << "extern const int16_t " << TargetName << "RegDiffLists[] = {\n";
   DiffSeqs.emit(OS, printDiff16);
   OS << "};\n\n";
 
@@ -1014,10 +987,16 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
   // Emit the register descriptors now.
   i = 0;
   for (const auto &Reg : Regs) {
+    unsigned FirstRU = Reg.getNativeRegUnits().find_first();
+    unsigned Offset = DiffSeqs.get(RegUnitLists[i]);
+    // The value must be kept in sync with MCRegisterInfo.h.
+    constexpr unsigned RegUnitBits = 12;
+    assert(isUInt<RegUnitBits>(FirstRU) && "Too many regunits");
+    assert(isUInt<32 - RegUnitBits>(Offset) && "Offset is too big");
     OS << "  { " << RegStrings.get(std::string(Reg.getName())) << ", "
        << DiffSeqs.get(SubRegLists[i]) << ", " << DiffSeqs.get(SuperRegLists[i])
        << ", " << SubRegIdxSeqs.get(SubRegIdxLists[i]) << ", "
-       << (DiffSeqs.get(RegUnitLists[i]) * 16 + RegUnitInitScale[i]) << ", "
+       << (Offset << RegUnitBits | FirstRU) << ", "
        << LaneMaskSeqs.get(RegUnitLaneMasks[i]) << " },\n";
     ++i;
   }
@@ -1651,7 +1630,7 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
 
   // Emit the constructor of the class...
   OS << "extern const MCRegisterDesc " << TargetName << "RegDesc[];\n";
-  OS << "extern const MCPhysReg " << TargetName << "RegDiffLists[];\n";
+  OS << "extern const int16_t " << TargetName << "RegDiffLists[];\n";
   OS << "extern const LaneBitmask " << TargetName << "LaneMaskLists[];\n";
   OS << "extern const char " << TargetName << "RegStrings[];\n";
   OS << "extern const char " << TargetName << "RegClassStrings[];\n";