[llvm] 667ba7f - [AMDGPU] Fix GCNRewritePartialRegUses pass: vector regclass is selected instead of scalar. (#69957)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 16 07:56:50 PST 2023
Author: Valery Pykhtin
Date: 2023-11-16T16:56:46+01:00
New Revision: 667ba7f8f31f7439e204c4efbd2aa576cd17273f
URL: https://github.com/llvm/llvm-project/commit/667ba7f8f31f7439e204c4efbd2aa576cd17273f
DIFF: https://github.com/llvm/llvm-project/commit/667ba7f8f31f7439e204c4efbd2aa576cd17273f.diff
LOG: [AMDGPU] Fix GCNRewritePartialRegUses pass: vector regclass is selected instead of scalar. (#69957)
For the following testcase:
undef %1.sub1:sgpr_96 = COPY undef %0:sgpr_32
%3:vgpr_32 = V_LSHL_ADD_U32_e64 %1.sub1:sgpr_96, ...
GCNRewritePartialRegUses produced:
%4:vgpr_32 = COPY undef %1:sgpr_32
dead %2:vgpr_32 = V_LSHL_ADD_U32_e64 %4, ...
Register class for %4 is incorrect: there should be sgpr_32 instead of
vgpr_32 because the original %1 had scalar regclass. This patch fixes
that.
Note that GCNRewritePartialRegUses pass isn't enabled by default yet.
Added:
Modified:
llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir
llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
index 99db7e4af9fd1c9..019b64dd871e2a7 100644
--- a/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
@@ -101,17 +101,16 @@ class GCNRewritePartialRegUses : public MachineFunctionPass {
/// find new regclass such that:
/// 1. It has subregs obtained by shifting each OldSubReg by RShift number
/// of bits to the right. Every "shifted" subreg should have the same
- /// SubRegRC. SubRegRC can be null, in this case it initialized using
- /// getSubRegisterClass. If CoverSubregIdx is not zero it's a subreg that
- /// "covers" all other subregs in pairs. Basically such subreg becomes a
- /// whole register.
+ /// SubRegRC. If CoverSubregIdx is not zero it's a subreg that "covers"
+ /// all other subregs in pairs. Basically such subreg becomes a whole
+ /// register.
/// 2. Resulting register class contains registers of minimal size but not
/// less than RegNumBits.
///
/// SubRegs is map of OldSubReg -> [SubRegRC, NewSubReg] and is used as in/out
/// parameter:
/// OldSubReg - input parameter,
- /// SubRegRC - in/out, should be changed for unknown regclass,
+ /// SubRegRC - input parameter (cannot be null),
/// NewSubReg - output, contains shifted subregs on return.
const TargetRegisterClass *
getRegClassWithShiftedSubregs(const TargetRegisterClass *RC, unsigned RShift,
@@ -228,19 +227,7 @@ GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
BitVector ClassMask(getAllocatableAndAlignedRegClassMask(RCAlign));
for (auto &[OldSubReg, SRI] : SubRegs) {
auto &[SubRegRC, NewSubReg] = SRI;
-
- // Register class may be unknown, for example:
- // undef %0.sub4:sgpr_1024 = S_MOV_B32 01
- // %0.sub5:sgpr_1024 = S_MOV_B32 02
- // %1:vreg_64 = COPY %0.sub4_sub5
- // Register classes for subregs 'sub4' and 'sub5' are known from the
- // description of destination operand of S_MOV_B32 instruction but the
- // class for the subreg 'sub4_sub5' isn't specified by the COPY instruction.
- if (!SubRegRC)
- SubRegRC = TRI->getSubRegisterClass(RC, OldSubReg);
-
- if (!SubRegRC)
- return nullptr;
+ assert(SubRegRC);
LLVM_DEBUG(dbgs() << " " << TRI->getSubRegIndexName(OldSubReg) << ':'
<< TRI->getRegClassName(SubRegRC)
@@ -248,6 +235,8 @@ GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
<< " -> ");
if (OldSubReg == CoverSubregIdx) {
+ // Covering subreg will become a full register, RC should be allocatable.
+ assert(SubRegRC->isAllocatable());
NewSubReg = AMDGPU::NoSubRegister;
LLVM_DEBUG(dbgs() << "whole reg");
} else {
@@ -421,33 +410,42 @@ GCNRewritePartialRegUses::getOperandRegClass(MachineOperand &MO) const {
bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const {
auto Range = MRI->reg_nodbg_operands(Reg);
- if (Range.begin() == Range.end())
+ if (Range.empty() || any_of(Range, [](MachineOperand &MO) {
+ return MO.getSubReg() == AMDGPU::NoSubRegister; // Whole reg used. [1]
+ }))
return false;
- for (MachineOperand &MO : Range) {
- if (MO.getSubReg() == AMDGPU::NoSubRegister) // Whole reg used, quit.
- return false;
- }
-
auto *RC = MRI->getRegClass(Reg);
LLVM_DEBUG(dbgs() << "Try to rewrite partial reg " << printReg(Reg, TRI)
<< ':' << TRI->getRegClassName(RC) << '\n');
- // Collect used subregs and constrained reg classes infered from instruction
+ // Collect used subregs and their reg classes infered from instruction
// operands.
SubRegMap SubRegs;
- for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
- assert(MO.getSubReg() != AMDGPU::NoSubRegister);
- auto *OpDescRC = getOperandRegClass(MO);
- const auto [I, Inserted] = SubRegs.try_emplace(MO.getSubReg(), OpDescRC);
- if (!Inserted && OpDescRC) {
- SubRegInfo &SRI = I->second;
- SRI.RC = SRI.RC ? TRI->getCommonSubClass(SRI.RC, OpDescRC) : OpDescRC;
- if (!SRI.RC) {
- LLVM_DEBUG(dbgs() << " Couldn't find common target regclass\n");
- return false;
+ for (MachineOperand &MO : Range) {
+ const unsigned SubReg = MO.getSubReg();
+ assert(SubReg != AMDGPU::NoSubRegister); // Due to [1].
+ LLVM_DEBUG(dbgs() << " " << TRI->getSubRegIndexName(SubReg) << ':');
+
+ const auto [I, Inserted] = SubRegs.try_emplace(SubReg);
+ const TargetRegisterClass *&SubRegRC = I->second.RC;
+
+ if (Inserted)
+ SubRegRC = TRI->getSubRegisterClass(RC, SubReg);
+
+ if (SubRegRC) {
+ if (const TargetRegisterClass *OpDescRC = getOperandRegClass(MO)) {
+ LLVM_DEBUG(dbgs() << TRI->getRegClassName(SubRegRC) << " & "
+ << TRI->getRegClassName(OpDescRC) << " = ");
+ SubRegRC = TRI->getCommonSubClass(SubRegRC, OpDescRC);
}
}
+
+ if (!SubRegRC) {
+ LLVM_DEBUG(dbgs() << "couldn't find target regclass\n");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << TRI->getRegClassName(SubRegRC) << '\n');
}
auto *NewRC = getMinSizeReg(RC, SubRegs);
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir
index 86434d1f71a5a88..037f39df8c3e06e 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir
@@ -4341,9 +4341,9 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_64_w32
- ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
- ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+ ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
undef %0.sub0:sgpr_64 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
@@ -4358,11 +4358,11 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_96_w32
- ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
- ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+ ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
- ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 22
+ ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
undef %0.sub0:sgpr_96 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
@@ -4381,11 +4381,11 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_128_w32
- ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
- ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+ ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
- ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 23
+ ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
undef %0.sub0:sgpr_128 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
@@ -4425,11 +4425,11 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_160_w32
- ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
- ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+ ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
- ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 24
+ ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
undef %0.sub0:sgpr_160 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
@@ -4450,11 +4450,11 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_192_w32
- ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
- ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+ ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
- ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 25
+ ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
undef %0.sub0:sgpr_192 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
@@ -4503,11 +4503,11 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_224_w32
- ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
- ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+ ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
- ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 26
+ ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
undef %0.sub0:sgpr_224 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
@@ -4530,11 +4530,11 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_256_w32
- ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
- ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+ ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
- ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 27
+ ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
undef %0.sub0:sgpr_256 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
@@ -4612,11 +4612,11 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_288_w32
- ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
- ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+ ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
- ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 28
+ ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
undef %0.sub0:sgpr_288 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
@@ -4672,11 +4672,11 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_320_w32
- ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
- ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+ ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
- ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 29
+ ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
undef %0.sub0:sgpr_320 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
@@ -4763,11 +4763,11 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_352_w32
- ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
- ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+ ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
- ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 210
+ ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 210
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
undef %0.sub0:sgpr_352 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
@@ -4791,11 +4791,11 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_384_w32
- ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
- ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+ ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
- ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 211
+ ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 211
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
undef %0.sub0:sgpr_384 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
@@ -4929,11 +4929,11 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_512_w32
- ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
- ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+ ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
- ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 215
+ ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 215
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
undef %0.sub0:sgpr_512 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
@@ -5086,11 +5086,11 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_1024_w32
- ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
- ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
+ ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
- ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 231
+ ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 231
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
undef %0.sub0:sgpr_1024 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir
index 58753081198cf58..07e49dcdafd8cc3 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs -stop-after=rewrite-partial-reg-uses %s -o - | FileCheck -check-prefix=CHECK %s
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs %s -o /dev/null 2>&1
---
name: test_subregs_composition_vreg_1024
tracksRegLiveness: true
@@ -82,9 +81,20 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_subregs_regclass_defined_by_dst_operand_sreg_64_xexec
- ; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM undef %1:sreg_64, 0, 0
+ ; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sgpr_64 = S_LOAD_DWORDX2_IMM undef %1:sreg_64, 0, 0
; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]]
undef %0.sub2_sub3:sgpr_128 = S_LOAD_DWORDX2_IMM undef %1:sreg_64, 0, 0
%2:vreg_64 = COPY %0.sub2_sub3:sgpr_128
...
+---
+name: test_vgpr_selected_instead_of_sgpr_because_use_allows_both
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: test_vgpr_selected_instead_of_sgpr_because_use_allows_both
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr_32 = COPY undef %1:sgpr_32
+ ; CHECK-NEXT: dead [[V_LSHL_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_ADD_U32_e64 [[COPY]], 2, undef %3:vgpr_32, implicit $exec
+ undef %1.sub1:sgpr_96 = COPY undef %0:sgpr_32
+ %3:vgpr_32 = V_LSHL_ADD_U32_e64 %1.sub1:sgpr_96, 2, undef %2:vgpr_32, implicit $exec
+...
More information about the llvm-commits
mailing list