[llvm] [AMDGPU] Generate more efficient code to avoid shift64 bug (PR #171871)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 12 06:54:44 PST 2025
================
@@ -2217,16 +2217,34 @@ bool GCNHazardRecognizer::fixShift64HighRegBug(MachineInstr *MI) {
if (AmtReg != AMDGPU::VGPR255 && MRI.isPhysRegUsed(AmtReg + 1))
return false;
- MachineOperand *Src1 = TII.getNamedOperand(*MI, AMDGPU::OpName::src1);
- bool OverlappedSrc = Src1->isReg() && TRI.regsOverlap(Src1->getReg(), AmtReg);
- bool OverlappedDst = MI->modifiesRegister(AmtReg, &TRI);
- bool Overlapped = OverlappedSrc || OverlappedDst;
-
- assert(!OverlappedDst || !OverlappedSrc ||
- Src1->getReg() == MI->getOperand(0).getReg());
assert(ST.needsAlignedVGPRs());
static_assert(AMDGPU::VGPR0 + 1 == AMDGPU::VGPR1);
+ const DebugLoc DL = MI->getDebugLoc();
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineOperand *Src1 = TII.getNamedOperand(*MI, AMDGPU::OpName::src1);
+
+ // In:
+ //
+ // Dst = shiftrev64 Amt, Src1
+ //
+ // if Dst!=Src1 then avoid the bug with:
+ //
+ // Dst.sub0 = Amt
+ // Dst = shift64 Dst.sub0, Src1
+
+ Register DstReg = MI->getOperand(0).getReg();
+ if (!Src1->isReg() || Src1->getReg() != DstReg) {
+ Register DstLo = TRI.getSubReg(DstReg, AMDGPU::sub0);
+ runOnInstruction(
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_MOV_B32_e32), DstLo)
+ .addReg(AmtReg, Amt->isKill() ? RegState::Kill : 0));
----------------
LU-JOHN wrote:
Changed.
https://github.com/llvm/llvm-project/pull/171871
More information about the llvm-commits
mailing list