[llvm] 667ba7f - [AMDGPU] Fix GCNRewritePartialRegUses pass: vector regclass is selected instead of scalar. (#69957)

via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 16 07:56:50 PST 2023


Author: Valery Pykhtin
Date: 2023-11-16T16:56:46+01:00
New Revision: 667ba7f8f31f7439e204c4efbd2aa576cd17273f

URL: https://github.com/llvm/llvm-project/commit/667ba7f8f31f7439e204c4efbd2aa576cd17273f
DIFF: https://github.com/llvm/llvm-project/commit/667ba7f8f31f7439e204c4efbd2aa576cd17273f.diff

LOG: [AMDGPU] Fix GCNRewritePartialRegUses pass: vector regclass is selected instead of scalar. (#69957)

For the following testcase:

undef %1.sub1:sgpr_96 = COPY undef %0:sgpr_32
%3:vgpr_32 = V_LSHL_ADD_U32_e64 %1.sub1:sgpr_96, ...

GCNRewritePartialRegUses produced:

%4:vgpr_32 = COPY undef %1:sgpr_32
dead %2:vgpr_32 = V_LSHL_ADD_U32_e64 %4, ...

Register class for %4 is incorrect: there should be sgpr_32 instead of
vgpr_32 because the original %1 had scalar regclass. This patch fixes
that.

Note that GCNRewritePartialRegUses pass isn't enabled by default yet.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
    llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir
    llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
index 99db7e4af9fd1c9..019b64dd871e2a7 100644
--- a/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
@@ -101,17 +101,16 @@ class GCNRewritePartialRegUses : public MachineFunctionPass {
   /// find new regclass such that:
   ///   1. It has subregs obtained by shifting each OldSubReg by RShift number
   ///      of bits to the right. Every "shifted" subreg should have the same
-  ///      SubRegRC. SubRegRC can be null, in this case it initialized using
-  ///      getSubRegisterClass. If CoverSubregIdx is not zero it's a subreg that
-  ///      "covers" all other subregs in pairs. Basically such subreg becomes a
-  ///      whole register.
+  ///      SubRegRC. If CoverSubregIdx is not zero it's a subreg that "covers"
+  ///      all other subregs in pairs. Basically such subreg becomes a whole
+  ///      register.
   ///   2. Resulting register class contains registers of minimal size but not
   ///      less than RegNumBits.
   ///
   /// SubRegs is map of OldSubReg -> [SubRegRC, NewSubReg] and is used as in/out
   /// parameter:
   ///   OldSubReg - input parameter,
-  ///   SubRegRC  - in/out, should be changed for unknown regclass,
+  ///   SubRegRC  - input parameter (cannot be null),
   ///   NewSubReg - output, contains shifted subregs on return.
   const TargetRegisterClass *
   getRegClassWithShiftedSubregs(const TargetRegisterClass *RC, unsigned RShift,
@@ -228,19 +227,7 @@ GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
   BitVector ClassMask(getAllocatableAndAlignedRegClassMask(RCAlign));
   for (auto &[OldSubReg, SRI] : SubRegs) {
     auto &[SubRegRC, NewSubReg] = SRI;
-
-    // Register class may be unknown, for example:
-    //   undef %0.sub4:sgpr_1024 = S_MOV_B32 01
-    //   %0.sub5:sgpr_1024 = S_MOV_B32 02
-    //   %1:vreg_64 = COPY %0.sub4_sub5
-    // Register classes for subregs 'sub4' and 'sub5' are known from the
-    // description of destination operand of S_MOV_B32 instruction but the
-    // class for the subreg 'sub4_sub5' isn't specified by the COPY instruction.
-    if (!SubRegRC)
-      SubRegRC = TRI->getSubRegisterClass(RC, OldSubReg);
-
-    if (!SubRegRC)
-      return nullptr;
+    assert(SubRegRC);
 
     LLVM_DEBUG(dbgs() << "  " << TRI->getSubRegIndexName(OldSubReg) << ':'
                       << TRI->getRegClassName(SubRegRC)
@@ -248,6 +235,8 @@ GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
                       << " -> ");
 
     if (OldSubReg == CoverSubregIdx) {
+      // Covering subreg will become a full register, RC should be allocatable.
+      assert(SubRegRC->isAllocatable());
       NewSubReg = AMDGPU::NoSubRegister;
       LLVM_DEBUG(dbgs() << "whole reg");
     } else {
@@ -421,33 +410,42 @@ GCNRewritePartialRegUses::getOperandRegClass(MachineOperand &MO) const {
 
 bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const {
   auto Range = MRI->reg_nodbg_operands(Reg);
-  if (Range.begin() == Range.end())
+  if (Range.empty() || any_of(Range, [](MachineOperand &MO) {
+        return MO.getSubReg() == AMDGPU::NoSubRegister; // Whole reg used. [1]
+      }))
     return false;
 
-  for (MachineOperand &MO : Range) {
-    if (MO.getSubReg() == AMDGPU::NoSubRegister) // Whole reg used, quit.
-      return false;
-  }
-
   auto *RC = MRI->getRegClass(Reg);
   LLVM_DEBUG(dbgs() << "Try to rewrite partial reg " << printReg(Reg, TRI)
                     << ':' << TRI->getRegClassName(RC) << '\n');
 
-  // Collect used subregs and constrained reg classes infered from instruction
+  // Collect used subregs and their reg classes infered from instruction
   // operands.
   SubRegMap SubRegs;
-  for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
-    assert(MO.getSubReg() != AMDGPU::NoSubRegister);
-    auto *OpDescRC = getOperandRegClass(MO);
-    const auto [I, Inserted] = SubRegs.try_emplace(MO.getSubReg(), OpDescRC);
-    if (!Inserted && OpDescRC) {
-      SubRegInfo &SRI = I->second;
-      SRI.RC = SRI.RC ? TRI->getCommonSubClass(SRI.RC, OpDescRC) : OpDescRC;
-      if (!SRI.RC) {
-        LLVM_DEBUG(dbgs() << "  Couldn't find common target regclass\n");
-        return false;
+  for (MachineOperand &MO : Range) {
+    const unsigned SubReg = MO.getSubReg();
+    assert(SubReg != AMDGPU::NoSubRegister); // Due to [1].
+    LLVM_DEBUG(dbgs() << "  " << TRI->getSubRegIndexName(SubReg) << ':');
+
+    const auto [I, Inserted] = SubRegs.try_emplace(SubReg);
+    const TargetRegisterClass *&SubRegRC = I->second.RC;
+
+    if (Inserted)
+      SubRegRC = TRI->getSubRegisterClass(RC, SubReg);
+
+    if (SubRegRC) {
+      if (const TargetRegisterClass *OpDescRC = getOperandRegClass(MO)) {
+        LLVM_DEBUG(dbgs() << TRI->getRegClassName(SubRegRC) << " & "
+                          << TRI->getRegClassName(OpDescRC) << " = ");
+        SubRegRC = TRI->getCommonSubClass(SubRegRC, OpDescRC);
       }
     }
+
+    if (!SubRegRC) {
+      LLVM_DEBUG(dbgs() << "couldn't find target regclass\n");
+      return false;
+    }
+    LLVM_DEBUG(dbgs() << TRI->getRegClassName(SubRegRC) << '\n');
   }
 
   auto *NewRC = getMinSizeReg(RC, SubRegs);

diff  --git a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir
index 86434d1f71a5a88..037f39df8c3e06e 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir
@@ -4341,9 +4341,9 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_64_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
     undef %0.sub0:sgpr_64 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0
@@ -4358,11 +4358,11 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_96_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
-    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 22
+    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
     undef %0.sub0:sgpr_96 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0
@@ -4381,11 +4381,11 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_128_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
-    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 23
+    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
     undef %0.sub0:sgpr_128 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0
@@ -4425,11 +4425,11 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_160_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
-    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 24
+    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
     undef %0.sub0:sgpr_160 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0
@@ -4450,11 +4450,11 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_192_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
-    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 25
+    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
     undef %0.sub0:sgpr_192 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0
@@ -4503,11 +4503,11 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_224_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
-    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 26
+    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
     undef %0.sub0:sgpr_224 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0
@@ -4530,11 +4530,11 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_256_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
-    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 27
+    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
     undef %0.sub0:sgpr_256 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0
@@ -4612,11 +4612,11 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_288_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
-    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 28
+    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
     undef %0.sub0:sgpr_288 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0
@@ -4672,11 +4672,11 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_320_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
-    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 29
+    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
     undef %0.sub0:sgpr_320 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0
@@ -4763,11 +4763,11 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_352_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
-    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 210
+    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 210
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
     undef %0.sub0:sgpr_352 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0
@@ -4791,11 +4791,11 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_384_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
-    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 211
+    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 211
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
     undef %0.sub0:sgpr_384 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0
@@ -4929,11 +4929,11 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_512_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
-    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 215
+    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 215
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
     undef %0.sub0:sgpr_512 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0
@@ -5086,11 +5086,11 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_sgpr_1024_w32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
-    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
-    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 231
+    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 231
     ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
     undef %0.sub0:sgpr_1024 = S_MOV_B32 00
     S_NOP 0, implicit %0.sub0

diff  --git a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir
index 58753081198cf58..07e49dcdafd8cc3 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir
@@ -1,6 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs -stop-after=rewrite-partial-reg-uses %s -o - | FileCheck -check-prefix=CHECK %s
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs %s -o /dev/null 2>&1
 ---
 name: test_subregs_composition_vreg_1024
 tracksRegLiveness: true
@@ -82,9 +81,20 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: test_subregs_regclass_defined_by_dst_operand_sreg_64_xexec
-    ; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM undef %1:sreg_64, 0, 0
+    ; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sgpr_64 = S_LOAD_DWORDX2_IMM undef %1:sreg_64, 0, 0
     ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]]
     undef %0.sub2_sub3:sgpr_128 = S_LOAD_DWORDX2_IMM undef %1:sreg_64, 0, 0
     %2:vreg_64 = COPY %0.sub2_sub3:sgpr_128
 ...
+---
+name: test_vgpr_selected_instead_of_sgpr_because_use_allows_both
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: test_vgpr_selected_instead_of_sgpr_because_use_allows_both
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr_32 = COPY undef %1:sgpr_32
+    ; CHECK-NEXT: dead [[V_LSHL_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_ADD_U32_e64 [[COPY]], 2, undef %3:vgpr_32, implicit $exec
+    undef %1.sub1:sgpr_96 = COPY undef %0:sgpr_32
+    %3:vgpr_32 = V_LSHL_ADD_U32_e64 %1.sub1:sgpr_96, 2, undef %2:vgpr_32, implicit $exec
+...
 


        


More information about the llvm-commits mailing list