[llvm] 98aa843 - [AMDGPU] Fix register class for a subreg in GCNRewritePartialRegUses.

Valery Pykhtin via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 5 23:48:57 PDT 2023


Author: Valery Pykhtin
Date: 2023-07-06T08:48:45+02:00
New Revision: 98aa8439f5e05cecdf232f303842be3c07d72547

URL: https://github.com/llvm/llvm-project/commit/98aa8439f5e05cecdf232f303842be3c07d72547
DIFF: https://github.com/llvm/llvm-project/commit/98aa8439f5e05cecdf232f303842be3c07d72547.diff

LOG: [AMDGPU] Fix register class for a subreg in GCNRewritePartialRegUses.

1. Improved code that deduces register class from instruction definitions. Previously if some instruction didn't contain a reg class for an operand it was considered as no information on register class even if other instructions specified the class.

2. Added check on required size of resulting register because in some cases classes with smaller registers had been selected (for example VReg_1).

Reviewed By: arsenm, #amdgpu

Differential Revision: https://reviews.llvm.org/D152832

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
    llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir
    llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
index 102ee15306ea69..99db7e4af9fd1c 100644
--- a/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
@@ -97,14 +97,25 @@ class GCNRewritePartialRegUses : public MachineFunctionPass {
   const TargetRegisterClass *getMinSizeReg(const TargetRegisterClass *RC,
                                            SubRegMap &SubRegs) const;
 
-  /// Try to find register class containing registers of minimal size for a
-  /// given register class RC and used subregs as keys in SubRegs by shifting
-  /// offsets of the subregs by RShift value to the right. If found return the
-  /// resulting regclass and new shifted subregs as values in SubRegs map.
-  /// If CoverSubregIdx isn't null it specifies covering subreg.
+  /// Given regclass RC and pairs of [OldSubReg, SubRegRC] in SubRegs try to
+  /// find new regclass such that:
+  ///   1. It has subregs obtained by shifting each OldSubReg by RShift number
+  ///      of bits to the right. Every "shifted" subreg should have the same
+  ///      SubRegRC. SubRegRC can be null, in this case it initialized using
+  ///      getSubRegisterClass. If CoverSubregIdx is not zero it's a subreg that
+  ///      "covers" all other subregs in pairs. Basically such subreg becomes a
+  ///      whole register.
+  ///   2. Resulting register class contains registers of minimal size but not
+  ///      less than RegNumBits.
+  ///
+  /// SubRegs is map of OldSubReg -> [SubRegRC, NewSubReg] and is used as in/out
+  /// parameter:
+  ///   OldSubReg - input parameter,
+  ///   SubRegRC  - in/out, should be changed for unknown regclass,
+  ///   NewSubReg - output, contains shifted subregs on return.
   const TargetRegisterClass *
   getRegClassWithShiftedSubregs(const TargetRegisterClass *RC, unsigned RShift,
-                                unsigned CoverSubregIdx,
+                                unsigned RegNumBits, unsigned CoverSubregIdx,
                                 SubRegMap &SubRegs) const;
 
   /// Update live intervals after rewriting OldReg to NewReg with SubRegs map
@@ -207,8 +218,8 @@ const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask(
 
 const TargetRegisterClass *
 GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
-    const TargetRegisterClass *RC, unsigned RShift, unsigned CoverSubregIdx,
-    SubRegMap &SubRegs) const {
+    const TargetRegisterClass *RC, unsigned RShift, unsigned RegNumBits,
+    unsigned CoverSubregIdx, SubRegMap &SubRegs) const {
 
   unsigned RCAlign = TRI->getRegClassAlignmentNumBits(RC);
   LLVM_DEBUG(dbgs() << "  Shift " << RShift << ", reg align " << RCAlign
@@ -218,7 +229,13 @@ GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
   for (auto &[OldSubReg, SRI] : SubRegs) {
     auto &[SubRegRC, NewSubReg] = SRI;
 
-    // Instruction operand may not specify required register class (ex. COPY).
+    // Register class may be unknown, for example:
+    //   undef %0.sub4:sgpr_1024 = S_MOV_B32 01
+    //   %0.sub5:sgpr_1024 = S_MOV_B32 02
+    //   %1:vreg_64 = COPY %0.sub4_sub5
+    // Register classes for subregs 'sub4' and 'sub5' are known from the
+    // description of destination operand of S_MOV_B32 instruction but the
+    // class for the subreg 'sub4_sub5' isn't specified by the COPY instruction.
     if (!SubRegRC)
       SubRegRC = TRI->getSubRegisterClass(RC, OldSubReg);
 
@@ -256,21 +273,26 @@ GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
   // ClassMask is the set of all register classes such that each class is
   // allocatable, aligned, has all shifted subregs and each subreg has required
   // register class (see SubRegRC above). Now select first (that is largest)
-  // register class with registers of minimal size.
+  // register class with registers of minimal but not less than RegNumBits size.
+  // We have to check register size because we may encounter classes of smaller
+  // registers like VReg_1 in some situations.
   const TargetRegisterClass *MinRC = nullptr;
   unsigned MinNumBits = std::numeric_limits<unsigned>::max();
   for (unsigned ClassID : ClassMask.set_bits()) {
     auto *RC = TRI->getRegClass(ClassID);
     unsigned NumBits = TRI->getRegSizeInBits(*RC);
-    if (NumBits < MinNumBits) {
+    if (NumBits < MinNumBits && NumBits >= RegNumBits) {
       MinNumBits = NumBits;
       MinRC = RC;
     }
+    if (MinNumBits == RegNumBits)
+      break;
   }
 #ifndef NDEBUG
   if (MinRC) {
     assert(MinRC->isAllocatable() && TRI->isRegClassAligned(MinRC, RCAlign));
     for (auto [SubReg, SRI] : SubRegs)
+      // Check that all registers in MinRC support SRI.SubReg subregister.
       assert(MinRC == TRI->getSubClassWithSubReg(MinRC, SRI.SubReg));
   }
 #endif
@@ -302,7 +324,8 @@ GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC,
   // If covering subreg is found shift everything so the covering subreg would
   // be in the rightmost position.
   if (CoverSubreg != AMDGPU::NoSubRegister)
-    return getRegClassWithShiftedSubregs(RC, Offset, CoverSubreg, SubRegs);
+    return getRegClassWithShiftedSubregs(RC, Offset, End - Offset, CoverSubreg,
+                                         SubRegs);
 
   // Otherwise find subreg with maximum required alignment and shift it and all
   // other subregs to the rightmost possible position with respect to the
@@ -328,7 +351,7 @@ GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC,
     llvm_unreachable("misaligned subreg");
 
   unsigned RShift = FirstMaxAlignedSubRegOffset - NewOffsetOfMaxAlignedSubReg;
-  return getRegClassWithShiftedSubregs(RC, RShift, 0, SubRegs);
+  return getRegClassWithShiftedSubregs(RC, RShift, End - RShift, 0, SubRegs);
 }
 
 // Only the subrange's lanemasks of the original interval need to be modified.
@@ -406,6 +429,10 @@ bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const {
       return false;
   }
 
+  auto *RC = MRI->getRegClass(Reg);
+  LLVM_DEBUG(dbgs() << "Try to rewrite partial reg " << printReg(Reg, TRI)
+                    << ':' << TRI->getRegClassName(RC) << '\n');
+
   // Collect used subregs and constrained reg classes infered from instruction
   // operands.
   SubRegMap SubRegs;
@@ -413,14 +440,15 @@ bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const {
     assert(MO.getSubReg() != AMDGPU::NoSubRegister);
     auto *OpDescRC = getOperandRegClass(MO);
     const auto [I, Inserted] = SubRegs.try_emplace(MO.getSubReg(), OpDescRC);
-    if (!Inserted) {
+    if (!Inserted && OpDescRC) {
       SubRegInfo &SRI = I->second;
-      SRI.RC = TRI->getCommonSubClass(SRI.RC, OpDescRC);
+      SRI.RC = SRI.RC ? TRI->getCommonSubClass(SRI.RC, OpDescRC) : OpDescRC;
+      if (!SRI.RC) {
+        LLVM_DEBUG(dbgs() << "  Couldn't find common target regclass\n");
+        return false;
+      }
     }
   }
-  auto *RC = MRI->getRegClass(Reg);
-  LLVM_DEBUG(dbgs() << "Try to rewrite partial reg " << printReg(Reg, TRI)
-                    << ':' << TRI->getRegClassName(RC) << '\n');
 
   auto *NewRC = getMinSizeReg(RC, SubRegs);
   if (!NewRC) {

diff  --git a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir
index 9c042351bbcbea..d51e63f92e6914 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir
@@ -4341,9 +4341,9 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_64_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
     undef %0.sub0:sgpr_64 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0
@@ -4358,11 +4358,11 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_96_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
-    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
+    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 22
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
     undef %0.sub0:sgpr_96 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0
@@ -4381,11 +4381,11 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_128_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
-    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
+    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 23
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
     undef %0.sub0:sgpr_128 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0
@@ -4425,11 +4425,11 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_160_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
-    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
+    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 24
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
     undef %0.sub0:sgpr_160 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0
@@ -4450,11 +4450,11 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_192_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
-    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
+    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 25
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
     undef %0.sub0:sgpr_192 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0
@@ -4503,11 +4503,11 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_224_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
-    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
+    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 26
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
     undef %0.sub0:sgpr_224 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0
@@ -4530,11 +4530,11 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_256_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
-    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
+    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 27
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
     undef %0.sub0:sgpr_256 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0
@@ -4612,11 +4612,11 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_288_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
-    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
+    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 28
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
     undef %0.sub0:sgpr_288 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0
@@ -4672,11 +4672,11 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_320_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
-    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
+    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 29
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
     undef %0.sub0:sgpr_320 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0
@@ -4763,11 +4763,11 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_352_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
-    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 210
+    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 210
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
     undef %0.sub0:sgpr_352 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0
@@ -4791,11 +4791,11 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_384_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
-    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 211
+    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 211
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
     undef %0.sub0:sgpr_384 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0
@@ -4929,11 +4929,11 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_512_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
-    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 215
+    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 215
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
     undef %0.sub0:sgpr_512 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0
@@ -5086,11 +5086,11 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_1024_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
-    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 231
+    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 231
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
     undef %0.sub0:sgpr_1024 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0

diff  --git a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir
index 32e313c25fd237..135d8b8e8ae979 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir
@@ -52,3 +52,39 @@ body:             |
     S_NOP 0, implicit %4.sub1_sub2_sub3_sub4_sub5_sub6
     S_NOP 0, implicit %4.sub3_sub4_sub5_sub6_sub7_sub8
 ...
+---
+name: test_subregs_unknown_regclass_from_instructions
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: test_subregs_unknown_regclass_from_instructions
+    ; CHECK: undef %2.sub0:sgpr_64 = S_MOV_B32 1
+    ; CHECK-NEXT: %2.sub1:sgpr_64 = S_MOV_B32 2
+    ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vreg_64 = COPY %2
+    undef %0.sub4:sgpr_1024 = S_MOV_B32 01
+    %0.sub5:sgpr_1024 = S_MOV_B32 02
+    %1:vreg_64 = COPY %0.sub4_sub5
+...
+---
+name: test_subregs_unknown_regclass_from_instructions_sgpr_1024_to_sgpr_64
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: sgpr_1024 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: test_subregs_unknown_regclass_from_instructions_sgpr_1024_to_sgpr_64
+    ; CHECK: dead [[COPY:%[0-9]+]]:vreg_64 = COPY undef %2:sgpr_64
+    %1:vreg_64 = COPY undef %0.sub4_sub5
+...
+---
+name: test_subregs_regclass_defined_by_dst_operand_sreg_64_xexec
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: test_subregs_regclass_defined_by_dst_operand_sreg_64_xexec
+    ; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM undef %1:sreg_64, 0, 0
+    ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]]
+    undef %0.sub2_sub3:sgpr_128 = S_LOAD_DWORDX2_IMM undef %1:sreg_64, 0, 0
+    %2:vreg_64 = COPY %0.sub2_sub3:sgpr_128
+...
+


        


More information about the llvm-commits mailing list