[llvm] [AMDGPU] Assert that we can find subregs in copyPhysReg. NFC. (PR #70332)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 26 06:48:55 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Jay Foad (jayfoad)
<details>
<summary>Changes</summary>
This helped to catch a codegen failure caused by #<!-- -->69703. MachineVerifier
did not complain about this malformed COPY either before regalloc:
%9:vreg_64 = COPY %17:vgpr_32
Or after regalloc:
renamable $vgpr0_vgpr1 = COPY renamable $vgpr2, implicit $exec
But we can at least catch the problem when copyPhysReg tries to expand
it into 32-bit register moves and fails to find suitable source
registers:
$vgpr0 = V_MOV_B32_e32 $noreg, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr2
$vgpr1 = V_MOV_B32_e32 $noreg, implicit $exec, implicit $vgpr2, implicit $exec
---
Full diff: https://github.com/llvm/llvm-project/pull/70332.diff
1 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+27-24)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index ffcd415a66648df..62f5a17635cee1a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -742,23 +742,27 @@ static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB,
for (unsigned Idx = 0; Idx < BaseIndices.size(); ++Idx) {
int16_t SubIdx = BaseIndices[Idx];
- Register Reg = RI.getSubReg(DestReg, SubIdx);
+ Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
+ Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
+ assert(DestSubReg && SrcSubReg && "Failed to find subregs!");
unsigned Opcode = AMDGPU::S_MOV_B32;
// Is SGPR aligned? If so try to combine with next.
- Register Src = RI.getSubReg(SrcReg, SubIdx);
- bool AlignedDest = ((Reg - AMDGPU::SGPR0) % 2) == 0;
- bool AlignedSrc = ((Src - AMDGPU::SGPR0) % 2) == 0;
+ bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
+ bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.size())) {
// Can use SGPR64 copy
unsigned Channel = RI.getChannelFromSubReg(SubIdx);
SubIdx = RI.getSubRegFromChannel(Channel, 2);
+ DestSubReg = RI.getSubReg(DestReg, SubIdx);
+ SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
+ assert(DestSubReg && SrcSubReg && "Failed to find subregs!");
Opcode = AMDGPU::S_MOV_B64;
Idx++;
}
- LastMI = BuildMI(MBB, I, DL, TII.get(Opcode), RI.getSubReg(DestReg, SubIdx))
- .addReg(RI.getSubReg(SrcReg, SubIdx))
+ LastMI = BuildMI(MBB, I, DL, TII.get(Opcode), DestSubReg)
+ .addReg(SrcSubReg)
.addReg(SrcReg, RegState::Implicit);
if (!FirstMI)
@@ -1098,6 +1102,9 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
SubIdx = SubIndices[Idx];
else
SubIdx = SubIndices[SubIndices.size() - Idx - 1];
+ Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
+ Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
+ assert(DestSubReg && SrcSubReg && "Failed to find subregs!");
bool IsFirstSubreg = Idx == 0;
bool UseKill = CanKillSuperReg && Idx == SubIndices.size() - 1;
@@ -1105,30 +1112,26 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
Register ImpDefSuper = IsFirstSubreg ? Register(DestReg) : Register();
Register ImpUseSuper = SrcReg;
- indirectCopyToAGPR(*this, MBB, MI, DL, RI.getSubReg(DestReg, SubIdx),
- RI.getSubReg(SrcReg, SubIdx), UseKill, *RS, Overlap,
- ImpDefSuper, ImpUseSuper);
+ indirectCopyToAGPR(*this, MBB, MI, DL, DestSubReg, SrcSubReg, UseKill,
+ *RS, Overlap, ImpDefSuper, ImpUseSuper);
} else if (Opcode == AMDGPU::V_PK_MOV_B32) {
- Register DstSubReg = RI.getSubReg(DestReg, SubIdx);
- Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
MachineInstrBuilder MIB =
- BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DstSubReg)
- .addImm(SISrcMods::OP_SEL_1)
- .addReg(SrcSubReg)
- .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1)
- .addReg(SrcSubReg)
- .addImm(0) // op_sel_lo
- .addImm(0) // op_sel_hi
- .addImm(0) // neg_lo
- .addImm(0) // neg_hi
- .addImm(0) // clamp
- .addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit);
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DestSubReg)
+ .addImm(SISrcMods::OP_SEL_1)
+ .addReg(SrcSubReg)
+ .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1)
+ .addReg(SrcSubReg)
+ .addImm(0) // op_sel_lo
+ .addImm(0) // op_sel_hi
+ .addImm(0) // neg_lo
+ .addImm(0) // neg_hi
+ .addImm(0) // clamp
+ .addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit);
if (IsFirstSubreg)
MIB.addReg(DestReg, RegState::Define | RegState::Implicit);
} else {
MachineInstrBuilder Builder =
- BuildMI(MBB, MI, DL, get(Opcode), RI.getSubReg(DestReg, SubIdx))
- .addReg(RI.getSubReg(SrcReg, SubIdx));
+ BuildMI(MBB, MI, DL, get(Opcode), DestSubReg).addReg(SrcSubReg);
if (IsFirstSubreg)
Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
``````````
</details>
https://github.com/llvm/llvm-project/pull/70332
More information about the llvm-commits
mailing list