[llvm] 69879ff - AMDGPU: Fix using illegal VOP3 literal in frame index elimination (#115747)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 13 08:01:08 PST 2024
Author: Matt Arsenault
Date: 2024-11-13T08:01:01-08:00
New Revision: 69879ffaec8789dd4ce5f6fa26f1b5e8140190ff
URL: https://github.com/llvm/llvm-project/commit/69879ffaec8789dd4ce5f6fa26f1b5e8140190ff
DIFF: https://github.com/llvm/llvm-project/commit/69879ffaec8789dd4ce5f6fa26f1b5e8140190ff.diff
LOG: AMDGPU: Fix using illegal VOP3 literal in frame index elimination (#115747)
Added:
Modified:
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index f76d1266f495cf..246ef7ad481ab7 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2268,7 +2268,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
const SIInstrInfo *TII = ST.getInstrInfo();
- DebugLoc DL = MI->getDebugLoc();
+ const DebugLoc &DL = MI->getDebugLoc();
assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?");
@@ -2496,6 +2496,25 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
Register MaterializedReg = FrameReg;
Register ScavengedVGPR;
+ int64_t Offset = FrameInfo.getObjectOffset(Index);
+ // For the non-immediate case, we could fall through to the default
+ // handling, but we do an in-place update of the result register here to
+ // avoid scavenging another register.
+ if (OtherOp->isImm()) {
+ int64_t TotalOffset = OtherOp->getImm() + Offset;
+
+ if (!ST.hasVOP3Literal() && SIInstrInfo::isVOP3(*MI) &&
+ !AMDGPU::isInlinableIntLiteral(TotalOffset)) {
+ // If we can't support a VOP3 literal in the VALU instruction, we
+ // can't specially fold into the add.
+ // TODO: Handle VOP3->VOP2 shrink to support the fold.
+ break;
+ }
+
+ OtherOp->setImm(TotalOffset);
+ Offset = 0;
+ }
+
if (FrameReg && !ST.enableFlatScratch()) {
// We should just do an in-place update of the result register. However,
// the value there may also be used by the add, in which case we need a
@@ -2516,15 +2535,6 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
MaterializedReg = ScavengedVGPR;
}
- int64_t Offset = FrameInfo.getObjectOffset(Index);
- // For the non-immediate case, we could fall through to the default
- // handling, but we do an in-place update of the result register here to
- // avoid scavenging another register.
- if (OtherOp->isImm()) {
- OtherOp->setImm(OtherOp->getImm() + Offset);
- Offset = 0;
- }
-
if ((!OtherOp->isImm() || OtherOp->getImm() != 0) && MaterializedReg) {
if (ST.enableFlatScratch() &&
!TII->isOperandLegal(*MI, Src1Idx, OtherOp)) {
@@ -2761,411 +2771,408 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
return true;
}
default: {
- // Other access to frame index
- const DebugLoc &DL = MI->getDebugLoc();
+ break;
+ }
+ }
- int64_t Offset = FrameInfo.getObjectOffset(Index);
- if (ST.enableFlatScratch()) {
- if (TII->isFLATScratch(*MI)) {
- assert((int16_t)FIOperandNum ==
- AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::saddr));
+ int64_t Offset = FrameInfo.getObjectOffset(Index);
+ if (ST.enableFlatScratch()) {
+ if (TII->isFLATScratch(*MI)) {
+ assert(
+ (int16_t)FIOperandNum ==
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr));
- // The offset is always swizzled, just replace it
- if (FrameReg)
- FIOp->ChangeToRegister(FrameReg, false);
+ // The offset is always swizzled, just replace it
+ if (FrameReg)
+ FIOp->ChangeToRegister(FrameReg, false);
- MachineOperand *OffsetOp =
+ MachineOperand *OffsetOp =
TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
- int64_t NewOffset = Offset + OffsetOp->getImm();
- if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
- SIInstrFlags::FlatScratch)) {
- OffsetOp->setImm(NewOffset);
- if (FrameReg)
- return false;
- Offset = 0;
- }
+ int64_t NewOffset = Offset + OffsetOp->getImm();
+ if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
+ SIInstrFlags::FlatScratch)) {
+ OffsetOp->setImm(NewOffset);
+ if (FrameReg)
+ return false;
+ Offset = 0;
+ }
- if (!Offset) {
- unsigned Opc = MI->getOpcode();
- int NewOpc = -1;
- if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr)) {
- NewOpc = AMDGPU::getFlatScratchInstSVfromSVS(Opc);
- } else if (ST.hasFlatScratchSTMode()) {
- // On GFX10 we have ST mode to use no registers for an address.
- // Otherwise we need to materialize 0 into an SGPR.
- NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc);
- }
+ if (!Offset) {
+ unsigned Opc = MI->getOpcode();
+ int NewOpc = -1;
+ if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr)) {
+ NewOpc = AMDGPU::getFlatScratchInstSVfromSVS(Opc);
+ } else if (ST.hasFlatScratchSTMode()) {
+ // On GFX10 we have ST mode to use no registers for an address.
+ // Otherwise we need to materialize 0 into an SGPR.
+ NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc);
+ }
- if (NewOpc != -1) {
- // removeOperand doesn't fixup tied operand indexes as it goes, so
- // it asserts. Untie vdst_in for now and retie them afterwards.
- int VDstIn = AMDGPU::getNamedOperandIdx(Opc,
- AMDGPU::OpName::vdst_in);
- bool TiedVDst = VDstIn != -1 &&
- MI->getOperand(VDstIn).isReg() &&
- MI->getOperand(VDstIn).isTied();
- if (TiedVDst)
- MI->untieRegOperand(VDstIn);
-
- MI->removeOperand(
- AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
-
- if (TiedVDst) {
- int NewVDst =
- AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
- int NewVDstIn =
- AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in);
- assert (NewVDst != -1 && NewVDstIn != -1 && "Must be tied!");
- MI->tieOperands(NewVDst, NewVDstIn);
- }
- MI->setDesc(TII->get(NewOpc));
- return false;
+ if (NewOpc != -1) {
+ // removeOperand doesn't fixup tied operand indexes as it goes, so
+ // it asserts. Untie vdst_in for now and retie them afterwards.
+ int VDstIn =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
+ bool TiedVDst = VDstIn != -1 && MI->getOperand(VDstIn).isReg() &&
+ MI->getOperand(VDstIn).isTied();
+ if (TiedVDst)
+ MI->untieRegOperand(VDstIn);
+
+ MI->removeOperand(
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
+
+ if (TiedVDst) {
+ int NewVDst =
+ AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
+ int NewVDstIn =
+ AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in);
+ assert(NewVDst != -1 && NewVDstIn != -1 && "Must be tied!");
+ MI->tieOperands(NewVDst, NewVDstIn);
}
+ MI->setDesc(TII->get(NewOpc));
+ return false;
}
}
+ }
- if (!FrameReg) {
- FIOp->ChangeToImmediate(Offset);
- if (TII->isImmOperandLegal(*MI, FIOperandNum, *FIOp))
- return false;
- }
+ if (!FrameReg) {
+ FIOp->ChangeToImmediate(Offset);
+ if (TII->isImmOperandLegal(*MI, FIOperandNum, *FIOp))
+ return false;
+ }
- // We need to use register here. Check if we can use an SGPR or need
- // a VGPR.
- FIOp->ChangeToRegister(AMDGPU::M0, false);
- bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, FIOp);
+ // We need to use register here. Check if we can use an SGPR or need
+ // a VGPR.
+ FIOp->ChangeToRegister(AMDGPU::M0, false);
+ bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, FIOp);
- if (!Offset && FrameReg && UseSGPR) {
- FIOp->setReg(FrameReg);
- return false;
- }
+ if (!Offset && FrameReg && UseSGPR) {
+ FIOp->setReg(FrameReg);
+ return false;
+ }
- const TargetRegisterClass *RC = UseSGPR ? &AMDGPU::SReg_32_XM0RegClass
- : &AMDGPU::VGPR_32RegClass;
+ const TargetRegisterClass *RC =
+ UseSGPR ? &AMDGPU::SReg_32_XM0RegClass : &AMDGPU::VGPR_32RegClass;
- Register TmpReg =
- RS->scavengeRegisterBackwards(*RC, MI, false, 0, !UseSGPR);
- FIOp->setReg(TmpReg);
- FIOp->setIsKill();
+ Register TmpReg =
+ RS->scavengeRegisterBackwards(*RC, MI, false, 0, !UseSGPR);
+ FIOp->setReg(TmpReg);
+ FIOp->setIsKill();
- if ((!FrameReg || !Offset) && TmpReg) {
- unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
- auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg);
- if (FrameReg)
- MIB.addReg(FrameReg);
- else
- MIB.addImm(Offset);
+ if ((!FrameReg || !Offset) && TmpReg) {
+ unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
+ auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg);
+ if (FrameReg)
+ MIB.addReg(FrameReg);
+ else
+ MIB.addImm(Offset);
- return false;
- }
+ return false;
+ }
- bool NeedSaveSCC = RS->isRegUsed(AMDGPU::SCC) &&
- !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
+ bool NeedSaveSCC = RS->isRegUsed(AMDGPU::SCC) &&
+ !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
- Register TmpSReg =
- UseSGPR ? TmpReg
- : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
- MI, false, 0, !UseSGPR);
+ Register TmpSReg =
+ UseSGPR ? TmpReg
+ : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
+ MI, false, 0, !UseSGPR);
- // TODO: for flat scratch another attempt can be made with a VGPR index
- // if no SGPRs can be scavenged.
- if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
- report_fatal_error("Cannot scavenge register in FI elimination!");
+ // TODO: for flat scratch another attempt can be made with a VGPR index
+ // if no SGPRs can be scavenged.
+ if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
+ report_fatal_error("Cannot scavenge register in FI elimination!");
- if (!TmpSReg) {
- // Use frame register and restore it after.
- TmpSReg = FrameReg;
- FIOp->setReg(FrameReg);
- FIOp->setIsKill(false);
- }
+ if (!TmpSReg) {
+ // Use frame register and restore it after.
+ TmpSReg = FrameReg;
+ FIOp->setReg(FrameReg);
+ FIOp->setIsKill(false);
+ }
- if (NeedSaveSCC) {
- assert(!(Offset & 0x1) && "Flat scratch offset must be aligned!");
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADDC_U32), TmpSReg)
- .addReg(FrameReg)
- .addImm(Offset);
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITCMP1_B32))
- .addReg(TmpSReg)
- .addImm(0);
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITSET0_B32), TmpSReg)
+ if (NeedSaveSCC) {
+ assert(!(Offset & 0x1) && "Flat scratch offset must be aligned!");
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADDC_U32), TmpSReg)
+ .addReg(FrameReg)
+ .addImm(Offset);
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITCMP1_B32))
+ .addReg(TmpSReg)
+ .addImm(0);
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITSET0_B32), TmpSReg)
+ .addImm(0)
+ .addReg(TmpSReg);
+ } else {
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg)
+ .addReg(FrameReg)
+ .addImm(Offset);
+ }
+
+ if (!UseSGPR)
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
+ .addReg(TmpSReg, RegState::Kill);
+
+ if (TmpSReg == FrameReg) {
+ // Undo frame register modification.
+ if (NeedSaveSCC &&
+ !MI->registerDefIsDead(AMDGPU::SCC, /*TRI=*/nullptr)) {
+ MachineBasicBlock::iterator I =
+ BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADDC_U32),
+ TmpSReg)
+ .addReg(FrameReg)
+ .addImm(-Offset);
+ I = BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITCMP1_B32))
+ .addReg(TmpSReg)
+ .addImm(0);
+ BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITSET0_B32),
+ TmpSReg)
.addImm(0)
.addReg(TmpSReg);
} else {
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg)
+ BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32),
+ FrameReg)
.addReg(FrameReg)
- .addImm(Offset);
+ .addImm(-Offset);
}
+ }
- if (!UseSGPR)
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
- .addReg(TmpSReg, RegState::Kill);
-
- if (TmpSReg == FrameReg) {
- // Undo frame register modification.
- if (NeedSaveSCC &&
- !MI->registerDefIsDead(AMDGPU::SCC, /*TRI=*/nullptr)) {
- MachineBasicBlock::iterator I =
- BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADDC_U32),
- TmpSReg)
- .addReg(FrameReg)
- .addImm(-Offset);
- I = BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITCMP1_B32))
- .addReg(TmpSReg)
- .addImm(0);
- BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITSET0_B32),
- TmpSReg)
- .addImm(0)
- .addReg(TmpSReg);
- } else {
- BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32),
- FrameReg)
- .addReg(FrameReg)
- .addImm(-Offset);
- }
- }
+ return false;
+ }
- return false;
- }
+ bool IsMUBUF = TII->isMUBUF(*MI);
+
+ if (!IsMUBUF && !MFI->isBottomOfStack()) {
+ // Convert to a swizzled stack address by scaling by the wave size.
+ // In an entry function/kernel the offset is already swizzled.
+ bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum));
+ bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) &&
+ !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
+ const TargetRegisterClass *RC = IsSALU && !LiveSCC
+ ? &AMDGPU::SReg_32RegClass
+ : &AMDGPU::VGPR_32RegClass;
+ bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
+ MI->getOpcode() == AMDGPU::V_MOV_B32_e64 ||
+ MI->getOpcode() == AMDGPU::S_MOV_B32;
+ Register ResultReg =
+ IsCopy ? MI->getOperand(0).getReg()
+ : RS->scavengeRegisterBackwards(*RC, MI, false, 0);
- bool IsMUBUF = TII->isMUBUF(*MI);
-
- if (!IsMUBUF && !MFI->isBottomOfStack()) {
- // Convert to a swizzled stack address by scaling by the wave size.
- // In an entry function/kernel the offset is already swizzled.
- bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum));
- bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) &&
- !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
- const TargetRegisterClass *RC = IsSALU && !LiveSCC
- ? &AMDGPU::SReg_32RegClass
- : &AMDGPU::VGPR_32RegClass;
- bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
- MI->getOpcode() == AMDGPU::V_MOV_B32_e64 ||
- MI->getOpcode() == AMDGPU::S_MOV_B32;
- Register ResultReg =
- IsCopy ? MI->getOperand(0).getReg()
- : RS->scavengeRegisterBackwards(*RC, MI, false, 0);
-
- int64_t Offset = FrameInfo.getObjectOffset(Index);
- if (Offset == 0) {
- unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32
- : AMDGPU::V_LSHRREV_B32_e64;
- Register TmpResultReg = ResultReg;
- if (IsSALU && LiveSCC) {
- TmpResultReg = RS->scavengeRegisterBackwards(
- AMDGPU::VGPR_32RegClass, MI, false, 0);
- }
+ int64_t Offset = FrameInfo.getObjectOffset(Index);
+ if (Offset == 0) {
+ unsigned OpCode =
+ IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32 : AMDGPU::V_LSHRREV_B32_e64;
+ Register TmpResultReg = ResultReg;
+ if (IsSALU && LiveSCC) {
+ TmpResultReg = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
+ MI, false, 0);
+ }
- auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), TmpResultReg);
- if (OpCode == AMDGPU::V_LSHRREV_B32_e64)
- // For V_LSHRREV, the operands are reversed (the shift count goes
- // first).
- Shift.addImm(ST.getWavefrontSizeLog2()).addReg(FrameReg);
- else
- Shift.addReg(FrameReg).addImm(ST.getWavefrontSizeLog2());
- if (IsSALU && !LiveSCC)
- Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead.
- if (IsSALU && LiveSCC) {
- Register NewDest =
- IsCopy ? ResultReg
- : RS->scavengeRegisterBackwards(AMDGPU::SReg_32RegClass,
- Shift, false, 0);
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
- NewDest)
- .addReg(TmpResultReg);
- ResultReg = NewDest;
- }
- } else {
- MachineInstrBuilder MIB;
- if (!IsSALU) {
- if ((MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) !=
- nullptr) {
- // Reuse ResultReg in intermediate step.
- Register ScaledReg = ResultReg;
-
- BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
- ScaledReg)
+ auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), TmpResultReg);
+ if (OpCode == AMDGPU::V_LSHRREV_B32_e64)
+ // For V_LSHRREV, the operands are reversed (the shift count goes
+ // first).
+ Shift.addImm(ST.getWavefrontSizeLog2()).addReg(FrameReg);
+ else
+ Shift.addReg(FrameReg).addImm(ST.getWavefrontSizeLog2());
+ if (IsSALU && !LiveSCC)
+ Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead.
+ if (IsSALU && LiveSCC) {
+ Register NewDest =
+ IsCopy ? ResultReg
+ : RS->scavengeRegisterBackwards(AMDGPU::SReg_32RegClass,
+ Shift, false, 0);
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), NewDest)
+ .addReg(TmpResultReg);
+ ResultReg = NewDest;
+ }
+ } else {
+ MachineInstrBuilder MIB;
+ if (!IsSALU) {
+ if ((MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) !=
+ nullptr) {
+ // Reuse ResultReg in intermediate step.
+ Register ScaledReg = ResultReg;
+
+ BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
+ ScaledReg)
.addImm(ST.getWavefrontSizeLog2())
.addReg(FrameReg);
- const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
+ const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
- // TODO: Fold if use instruction is another add of a constant.
- if (IsVOP2 || AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
- // FIXME: This can fail
- MIB.addImm(Offset);
- MIB.addReg(ScaledReg, RegState::Kill);
- if (!IsVOP2)
- MIB.addImm(0); // clamp bit
- } else {
- assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 &&
- "Need to reuse carry out register");
-
- // Use scavenged unused carry out as offset register.
- Register ConstOffsetReg;
- if (!isWave32)
- ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0);
- else
- ConstOffsetReg = MIB.getReg(1);
-
- BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
- .addImm(Offset);
- MIB.addReg(ConstOffsetReg, RegState::Kill);
- MIB.addReg(ScaledReg, RegState::Kill);
+ // TODO: Fold if use instruction is another add of a constant.
+ if (IsVOP2 ||
+ AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
+ // FIXME: This can fail
+ MIB.addImm(Offset);
+ MIB.addReg(ScaledReg, RegState::Kill);
+ if (!IsVOP2)
MIB.addImm(0); // clamp bit
- }
+ } else {
+ assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 &&
+ "Need to reuse carry out register");
+
+ // Use scavenged unused carry out as offset register.
+ Register ConstOffsetReg;
+ if (!isWave32)
+ ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0);
+ else
+ ConstOffsetReg = MIB.getReg(1);
+
+ BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32),
+ ConstOffsetReg)
+ .addImm(Offset);
+ MIB.addReg(ConstOffsetReg, RegState::Kill);
+ MIB.addReg(ScaledReg, RegState::Kill);
+ MIB.addImm(0); // clamp bit
}
}
- if (!MIB || IsSALU) {
- // We have to produce a carry out, and there isn't a free SGPR pair
- // for it. We can keep the whole computation on the SALU to avoid
- // clobbering an additional register at the cost of an extra mov.
-
- // We may have 1 free scratch SGPR even though a carry out is
- // unavailable. Only one additional mov is needed.
- Register TmpScaledReg = IsCopy && IsSALU
- ? ResultReg
- : RS->scavengeRegisterBackwards(
- AMDGPU::SReg_32_XM0RegClass, MI,
- false, 0, /*AllowSpill=*/false);
- Register ScaledReg =
- TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
- Register TmpResultReg = ScaledReg;
-
- if (!LiveSCC) {
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), TmpResultReg)
- .addReg(FrameReg)
- .addImm(ST.getWavefrontSizeLog2());
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpResultReg)
- .addReg(TmpResultReg, RegState::Kill)
- .addImm(Offset);
+ }
+ if (!MIB || IsSALU) {
+ // We have to produce a carry out, and there isn't a free SGPR pair
+ // for it. We can keep the whole computation on the SALU to avoid
+ // clobbering an additional register at the cost of an extra mov.
+
+ // We may have 1 free scratch SGPR even though a carry out is
+ // unavailable. Only one additional mov is needed.
+ Register TmpScaledReg = IsCopy && IsSALU
+ ? ResultReg
+ : RS->scavengeRegisterBackwards(
+ AMDGPU::SReg_32_XM0RegClass, MI,
+ false, 0, /*AllowSpill=*/false);
+ Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
+ Register TmpResultReg = ScaledReg;
+
+ if (!LiveSCC) {
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), TmpResultReg)
+ .addReg(FrameReg)
+ .addImm(ST.getWavefrontSizeLog2());
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpResultReg)
+ .addReg(TmpResultReg, RegState::Kill)
+ .addImm(Offset);
+ } else {
+ TmpResultReg = RS->scavengeRegisterBackwards(
+ AMDGPU::VGPR_32RegClass, MI, false, 0, /*AllowSpill=*/true);
+
+ MachineInstrBuilder Add;
+ if ((Add = TII->getAddNoCarry(*MBB, MI, DL, TmpResultReg, *RS))) {
+ BuildMI(*MBB, *Add, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
+ TmpResultReg)
+ .addImm(ST.getWavefrontSizeLog2())
+ .addReg(FrameReg);
+ if (Add->getOpcode() == AMDGPU::V_ADD_CO_U32_e64) {
+ BuildMI(*MBB, *Add, DL, TII->get(AMDGPU::S_MOV_B32), ResultReg)
+ .addImm(Offset);
+ Add.addReg(ResultReg, RegState::Kill)
+ .addReg(TmpResultReg, RegState::Kill)
+ .addImm(0);
+ } else
+ Add.addImm(Offset).addReg(TmpResultReg, RegState::Kill);
} else {
- TmpResultReg = RS->scavengeRegisterBackwards(
- AMDGPU::VGPR_32RegClass, MI, false, 0, /*AllowSpill=*/true);
-
- MachineInstrBuilder Add;
- if ((Add = TII->getAddNoCarry(*MBB, MI, DL, TmpResultReg, *RS))) {
- BuildMI(*MBB, *Add, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
+ assert(Offset > 0 && isUInt<24>(2 * ST.getMaxWaveScratchSize()) &&
+ "offset is unsafe for v_mad_u32_u24");
+
+ // We start with a frame pointer with a wave space value, and
+ // an offset in lane-space. We are materializing a lane space
+ // value. We can either do a right shift of the frame pointer
+ // to get to lane space, or a left shift of the offset to get
+ // to wavespace. We can right shift after the computation to
+ // get back to the desired per-lane value. We are using the
+ // mad_u32_u24 primarily as an add with no carry out clobber.
+ bool IsInlinableLiteral =
+ AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm());
+ if (!IsInlinableLiteral) {
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32),
TmpResultReg)
- .addImm(ST.getWavefrontSizeLog2())
- .addReg(FrameReg);
- if (Add->getOpcode() == AMDGPU::V_ADD_CO_U32_e64) {
- BuildMI(*MBB, *Add, DL, TII->get(AMDGPU::S_MOV_B32),
- ResultReg)
- .addImm(Offset);
- Add.addReg(ResultReg, RegState::Kill)
- .addReg(TmpResultReg, RegState::Kill)
- .addImm(0);
- } else
- Add.addImm(Offset).addReg(TmpResultReg, RegState::Kill);
- } else {
- assert(Offset > 0 &&
- isUInt<24>(2 * ST.getMaxWaveScratchSize()) &&
- "offset is unsafe for v_mad_u32_u24");
-
- // We start with a frame pointer with a wave space value, and
- // an offset in lane-space. We are materializing a lane space
- // value. We can either do a right shift of the frame pointer
- // to get to lane space, or a left shift of the offset to get
- // to wavespace. We can right shift after the computation to
- // get back to the desired per-lane value. We are using the
- // mad_u32_u24 primarily as an add with no carry out clobber.
- bool IsInlinableLiteral = AMDGPU::isInlinableLiteral32(
- Offset, ST.hasInv2PiInlineImm());
- if (!IsInlinableLiteral) {
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32),
- TmpResultReg)
- .addImm(Offset);
- }
-
- Add = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MAD_U32_U24_e64),
- TmpResultReg);
-
- if (!IsInlinableLiteral) {
- Add.addReg(TmpResultReg, RegState::Kill);
- } else {
- // We fold the offset into mad itself if its inlinable.
- Add.addImm(Offset);
- }
- Add.addImm(ST.getWavefrontSize()).addReg(FrameReg).addImm(0);
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
- TmpResultReg)
- .addImm(ST.getWavefrontSizeLog2())
- .addReg(TmpResultReg);
+ .addImm(Offset);
}
- Register NewDest = IsCopy ? ResultReg
- : RS->scavengeRegisterBackwards(
- AMDGPU::SReg_32RegClass, *Add,
- false, 0, /*AllowSpill=*/true);
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
- NewDest)
+ Add = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MAD_U32_U24_e64),
+ TmpResultReg);
+
+ if (!IsInlinableLiteral) {
+ Add.addReg(TmpResultReg, RegState::Kill);
+ } else {
+ // We fold the offset into mad itself if its inlinable.
+ Add.addImm(Offset);
+ }
+ Add.addImm(ST.getWavefrontSize()).addReg(FrameReg).addImm(0);
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
+ TmpResultReg)
+ .addImm(ST.getWavefrontSizeLog2())
.addReg(TmpResultReg);
- ResultReg = NewDest;
}
- if (!IsSALU)
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
- .addReg(TmpResultReg, RegState::Kill);
- else
- ResultReg = TmpResultReg;
- // If there were truly no free SGPRs, we need to undo everything.
- if (!TmpScaledReg.isValid()) {
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
+
+ Register NewDest = IsCopy ? ResultReg
+ : RS->scavengeRegisterBackwards(
+ AMDGPU::SReg_32RegClass, *Add,
+ false, 0, /*AllowSpill=*/true);
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+ NewDest)
+ .addReg(TmpResultReg);
+ ResultReg = NewDest;
+ }
+ if (!IsSALU)
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
+ .addReg(TmpResultReg, RegState::Kill);
+ else
+ ResultReg = TmpResultReg;
+ // If there were truly no free SGPRs, we need to undo everything.
+ if (!TmpScaledReg.isValid()) {
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
.addReg(ScaledReg, RegState::Kill)
.addImm(-Offset);
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
.addReg(FrameReg)
.addImm(ST.getWavefrontSizeLog2());
- }
}
}
+ }
- // Don't introduce an extra copy if we're just materializing in a mov.
- if (IsCopy) {
- MI->eraseFromParent();
- return true;
- }
- FIOp->ChangeToRegister(ResultReg, false, false, true);
- return false;
+ // Don't introduce an extra copy if we're just materializing in a mov.
+ if (IsCopy) {
+ MI->eraseFromParent();
+ return true;
}
+ FIOp->ChangeToRegister(ResultReg, false, false, true);
+ return false;
+ }
- if (IsMUBUF) {
- // Disable offen so we don't need a 0 vgpr base.
- assert(static_cast<int>(FIOperandNum) ==
- AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::vaddr));
+ if (IsMUBUF) {
+ // Disable offen so we don't need a 0 vgpr base.
+ assert(
+ static_cast<int>(FIOperandNum) ==
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr));
- auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset);
- assert((SOffset.isImm() && SOffset.getImm() == 0));
+ auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset);
+ assert((SOffset.isImm() && SOffset.getImm() == 0));
- if (FrameReg != AMDGPU::NoRegister)
- SOffset.ChangeToRegister(FrameReg, false);
+ if (FrameReg != AMDGPU::NoRegister)
+ SOffset.ChangeToRegister(FrameReg, false);
- int64_t Offset = FrameInfo.getObjectOffset(Index);
- int64_t OldImm
- = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
- int64_t NewOffset = OldImm + Offset;
+ int64_t Offset = FrameInfo.getObjectOffset(Index);
+ int64_t OldImm =
+ TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
+ int64_t NewOffset = OldImm + Offset;
- if (TII->isLegalMUBUFImmOffset(NewOffset) &&
- buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) {
- MI->eraseFromParent();
- return true;
- }
+ if (TII->isLegalMUBUFImmOffset(NewOffset) &&
+ buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) {
+ MI->eraseFromParent();
+ return true;
}
+ }
- // If the offset is simply too big, don't convert to a scratch wave offset
- // relative index.
+ // If the offset is simply too big, don't convert to a scratch wave offset
+ // relative index.
- FIOp->ChangeToImmediate(Offset);
- if (!TII->isImmOperandLegal(*MI, FIOperandNum, *FIOp)) {
- Register TmpReg = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
- MI, false, 0);
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
+ FIOp->ChangeToImmediate(Offset);
+ if (!TII->isImmOperandLegal(*MI, FIOperandNum, *FIOp)) {
+ Register TmpReg =
+ RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
.addImm(Offset);
- FIOp->ChangeToRegister(TmpReg, false, false, true);
- }
+ FIOp->ChangeToRegister(TmpReg, false, false, true);
}
- }
+
return false;
}
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
index fc6cd74bf052ca..831e246426ba70 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
@@ -980,8 +980,8 @@ body: |
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; GFX7-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
- ; GFX7-NEXT: $vgpr1 = V_MOV_B32_e32 84, implicit $exec
- ; GFX7-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vgpr1, 0, 1, implicit $exec
+ ; GFX7-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; GFX7-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 1, implicit $exec
; GFX7-NEXT: SI_RETURN implicit $vgpr0
;
; GFX8-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
@@ -989,8 +989,8 @@ body: |
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; GFX8-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
- ; GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 84, implicit $exec
- ; GFX8-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vgpr1, 0, 1, implicit $exec
+ ; GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; GFX8-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 1, implicit $exec
; GFX8-NEXT: SI_RETURN implicit $vgpr0
;
; GFX900-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
@@ -998,8 +998,8 @@ body: |
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; GFX900-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
- ; GFX900-NEXT: $vgpr1 = V_MOV_B32_e32 84, implicit $exec
- ; GFX900-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vgpr1, 0, 1, implicit $exec
+ ; GFX900-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; GFX900-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 1, implicit $exec
; GFX900-NEXT: SI_RETURN implicit $vgpr0
;
; GFX90A-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
@@ -1007,8 +1007,8 @@ body: |
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
- ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 84, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vgpr1, 0, 1, implicit $exec
+ ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 1, implicit $exec
; GFX90A-NEXT: SI_RETURN implicit $vgpr0
;
; GFX10-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
@@ -1020,8 +1020,8 @@ body: |
; GFX10-NEXT: SI_RETURN implicit $vgpr0
;
; GFX940-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
- ; GFX940: $vgpr1 = V_MOV_B32_e32 84, implicit $exec
- ; GFX940-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vgpr1, 0, 1, implicit $exec
+ ; GFX940: $sgpr4 = S_MOV_B32 72
+ ; GFX940-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $sgpr4, 1, implicit $exec
; GFX940-NEXT: SI_RETURN implicit $vgpr0
;
; GFX11-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
@@ -2036,3 +2036,521 @@ body: |
S_ENDPGM 0
...
+
+---
+name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+tracksRegLiveness: true
+frameInfo:
+ localFrameSize: 12576
+stack:
+ - { id: 0, size: 4, alignment: 8192, local-offset: 0 }
+ - { id: 1, size: 8480, alignment: 4096, local-offset: 4096 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; GFX7-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+ ; GFX7: liveins: $sgpr4
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX7-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+ ; GFX7-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+ ; GFX7-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+ ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; GFX7-NEXT: $vcc_lo = S_MOV_B32 12288
+ ; GFX7-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+ ; GFX7-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+ ; GFX7-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+ ; GFX7-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX7-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX8-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+ ; GFX8: liveins: $sgpr4
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+ ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+ ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+ ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; GFX8-NEXT: $vcc_lo = S_MOV_B32 12288
+ ; GFX8-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+ ; GFX8-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+ ; GFX8-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+ ; GFX8-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX8-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX900-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+ ; GFX900: liveins: $sgpr4
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX900-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+ ; GFX900-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+ ; GFX900-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+ ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+ ; GFX900-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+ ; GFX900-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+ ; GFX900-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX900-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX90A-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+ ; GFX90A: liveins: $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX90A-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+ ; GFX90A-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+ ; GFX90A-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+ ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+ ; GFX90A-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+ ; GFX90A-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX90A-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX10-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+ ; GFX10: liveins: $sgpr4
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX10-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+ ; GFX10-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+ ; GFX10-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+ ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; GFX10-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+ ; GFX10-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+ ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX10-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX940-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+ ; GFX940: liveins: $sgpr4
+ ; GFX940-NEXT: {{ $}}
+ ; GFX940-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX940-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+ ; GFX940-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+ ; GFX940-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+ ; GFX940-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc
+ ; GFX940-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $sgpr5, 0, implicit $exec
+ ; GFX940-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+ ; GFX940-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX940-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+ ; GFX11: liveins: $sgpr4
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX11-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+ ; GFX11-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+ ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
+ ; GFX11-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+ ; GFX11-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+ ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX11-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX12-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+ ; GFX12: liveins: $sgpr4
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX12-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+ ; GFX12-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+ ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
+ ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
+ ; GFX12-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
+ ; GFX12-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24576, implicit-def dead $scc
+ ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX12-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0, renamable dead $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, %stack.1, 0, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+tracksRegLiveness: true
+frameInfo:
+ localFrameSize: 12576
+stack:
+ - { id: 0, size: 4, alignment: 8192, local-offset: 0 }
+ - { id: 1, size: 8480, alignment: 4096, local-offset: 4096 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; GFX7-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+ ; GFX7: liveins: $sgpr4
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX7-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+ ; GFX7-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+ ; GFX7-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+ ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; GFX7-NEXT: $vcc_lo = S_MOV_B32 12288
+ ; GFX7-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+ ; GFX7-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+ ; GFX7-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+ ; GFX7-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX7-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+ ;
+ ; GFX8-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+ ; GFX8: liveins: $sgpr4
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+ ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+ ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+ ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; GFX8-NEXT: $vcc_lo = S_MOV_B32 12288
+ ; GFX8-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+ ; GFX8-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+ ; GFX8-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+ ; GFX8-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+ ;
+ ; GFX900-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+ ; GFX900: liveins: $sgpr4
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX900-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+ ; GFX900-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+ ; GFX900-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+ ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+ ; GFX900-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+ ; GFX900-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+ ; GFX900-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX900-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+ ;
+ ; GFX90A-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+ ; GFX90A: liveins: $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX90A-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+ ; GFX90A-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+ ; GFX90A-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+ ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+ ; GFX90A-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+ ; GFX90A-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX90A-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+ ;
+ ; GFX10-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+ ; GFX10: liveins: $sgpr4
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX10-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+ ; GFX10-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+ ; GFX10-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+ ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; GFX10-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+ ; GFX10-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
+ ; GFX10-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+ ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+ ;
+ ; GFX940-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+ ; GFX940: liveins: $sgpr4
+ ; GFX940-NEXT: {{ $}}
+ ; GFX940-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX940-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+ ; GFX940-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+ ; GFX940-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+ ; GFX940-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc
+ ; GFX940-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $sgpr5, 0, implicit $exec
+ ; GFX940-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+ ; GFX940-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX940-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+ ;
+ ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+ ; GFX11: liveins: $sgpr4
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX11-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+ ; GFX11-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+ ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
+ ; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+ ; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
+ ; GFX11-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+ ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX11-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+ ;
+ ; GFX12-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+ ; GFX12: liveins: $sgpr4
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX12-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+ ; GFX12-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+ ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
+ ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
+ ; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
+ ; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
+ ; GFX12-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24576, implicit-def dead $scc
+ ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX12-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+ renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, %stack.1, 0, implicit $exec
+ SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+
+...
+
+---
+name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+tracksRegLiveness: true
+frameInfo:
+ localFrameSize: 12576
+stack:
+ - { id: 0, size: 4, alignment: 8192, local-offset: 0 }
+ - { id: 1, size: 8480, alignment: 4096, local-offset: 4096 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; GFX7-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+ ; GFX7: liveins: $sgpr4
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX7-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+ ; GFX7-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+ ; GFX7-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+ ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; GFX7-NEXT: $vcc_lo = S_MOV_B32 12288
+ ; GFX7-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+ ; GFX7-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+ ; GFX7-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+ ; GFX7-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX7-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX8-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+ ; GFX8: liveins: $sgpr4
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+ ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+ ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+ ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; GFX8-NEXT: $vcc_lo = S_MOV_B32 12288
+ ; GFX8-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+ ; GFX8-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+ ; GFX8-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+ ; GFX8-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX8-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX900-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+ ; GFX900: liveins: $sgpr4
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX900-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+ ; GFX900-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+ ; GFX900-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+ ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+ ; GFX900-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+ ; GFX900-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+ ; GFX900-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX900-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX90A-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+ ; GFX90A: liveins: $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX90A-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+ ; GFX90A-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+ ; GFX90A-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+ ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+ ; GFX90A-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+ ; GFX90A-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX90A-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX10-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+ ; GFX10: liveins: $sgpr4
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX10-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+ ; GFX10-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+ ; GFX10-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+ ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; GFX10-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+ ; GFX10-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+ ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX10-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX940-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+ ; GFX940: liveins: $sgpr4
+ ; GFX940-NEXT: {{ $}}
+ ; GFX940-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX940-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+ ; GFX940-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+ ; GFX940-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+ ; GFX940-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc
+ ; GFX940-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $sgpr5, 0, implicit $exec
+ ; GFX940-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+ ; GFX940-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX940-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+ ; GFX11: liveins: $sgpr4
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX11-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+ ; GFX11-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+ ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
+ ; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+ ; GFX11-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+ ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX11-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX12-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+ ; GFX12: liveins: $sgpr4
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX12-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+ ; GFX12-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+ ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
+ ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
+ ; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
+ ; GFX12-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24576, implicit-def dead $scc
+ ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX12-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0, renamable dead $vcc = V_ADD_CO_U32_e64 64, %stack.1, 0, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+tracksRegLiveness: true
+frameInfo:
+ localFrameSize: 12576
+stack:
+ - { id: 0, size: 4, alignment: 8192, local-offset: 0 }
+ - { id: 1, size: 8480, alignment: 4096, local-offset: 4096 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; GFX7-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+ ; GFX7: liveins: $sgpr4
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX7-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+ ; GFX7-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+ ; GFX7-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+ ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; GFX7-NEXT: $sgpr6 = S_MOV_B32 12288
+ ; GFX7-NEXT: $vgpr1, dead $sgpr6_sgpr7 = V_ADD_CO_U32_e64 killed $sgpr6, killed $vgpr1, 0, implicit $exec
+ ; GFX7-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+ ; GFX7-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+ ; GFX7-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX7-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ;
+ ; GFX8-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+ ; GFX8: liveins: $sgpr4
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+ ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+ ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+ ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; GFX8-NEXT: $sgpr6 = S_MOV_B32 12288
+ ; GFX8-NEXT: $vgpr1, dead $sgpr6_sgpr7 = V_ADD_CO_U32_e64 killed $sgpr6, killed $vgpr1, 0, implicit $exec
+ ; GFX8-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+ ; GFX8-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+ ; GFX8-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ;
+ ; GFX900-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+ ; GFX900: liveins: $sgpr4
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX900-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+ ; GFX900-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+ ; GFX900-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+ ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+ ; GFX900-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+ ; GFX900-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+ ; GFX900-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX900-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ;
+ ; GFX90A-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+ ; GFX90A: liveins: $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX90A-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+ ; GFX90A-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+ ; GFX90A-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+ ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+ ; GFX90A-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+ ; GFX90A-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX90A-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ;
+ ; GFX10-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+ ; GFX10: liveins: $sgpr4
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX10-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+ ; GFX10-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+ ; GFX10-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+ ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+ ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
+ ; GFX10-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+ ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ;
+ ; GFX940-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+ ; GFX940: liveins: $sgpr4
+ ; GFX940-NEXT: {{ $}}
+ ; GFX940-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX940-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+ ; GFX940-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+ ; GFX940-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+ ; GFX940-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc
+ ; GFX940-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $sgpr5, 0, implicit $exec
+ ; GFX940-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+ ; GFX940-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX940-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ;
+ ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+ ; GFX11: liveins: $sgpr4
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX11-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+ ; GFX11-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+ ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
+ ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+ ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
+ ; GFX11-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+ ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX11-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ;
+ ; GFX12-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+ ; GFX12: liveins: $sgpr4
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; GFX12-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+ ; GFX12-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+ ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
+ ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
+ ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
+ ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
+ ; GFX12-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24576, implicit-def dead $scc
+ ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; GFX12-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, %stack.1, 0, implicit $exec
+ SI_RETURN implicit $vgpr0, implicit $vcc
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
index 9c2fef05124d7f..de198941b565e6 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
@@ -1692,3 +1692,61 @@ body: |
SI_RETURN implicit $vgpr0
...
+
+---
+name: v_add_u32_e64_imm_fi_vop3_literal_error
+tracksRegLiveness: true
+frameInfo:
+ localFrameSize: 12576
+stack:
+ - { id: 0, size: 4, alignment: 8192, local-offset: 0 }
+ - { id: 1, size: 8480, alignment: 4096, local-offset: 4096 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; MUBUF-LABEL: name: v_add_u32_e64_imm_fi_vop3_literal_error
+ ; MUBUF: liveins: $sgpr4
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+ ; MUBUF-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+ ; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+ ; MUBUF-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; MUBUF-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 64, killed $vgpr1, 0, implicit $exec
+ ; MUBUF-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+ ; MUBUF-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e64_imm_fi_vop3_literal_error
+ ; MUBUFW32: liveins: $sgpr4
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; MUBUFW32-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 262112, implicit-def $scc
+ ; MUBUFW32-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294705152, implicit-def dead $scc
+ ; MUBUFW32-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1048576, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr33, implicit $exec
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+ ; MUBUFW32-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1048576, implicit-def dead $scc
+ ; MUBUFW32-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e64_imm_fi_vop3_literal_error
+ ; FLATSCRW64: liveins: $sgpr4
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+ ; FLATSCRW64-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+ ; FLATSCRW64-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+ ; FLATSCRW64-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+ ; FLATSCRW64-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 64, killed $sgpr5, 0, implicit $exec
+ ; FLATSCRW64-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+ ; FLATSCRW64-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_U32_e64 64, %stack.1, 0, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
index c11b7d67a8a214..b1ea275a97a394 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
@@ -327,4 +327,23 @@ entry:
ret void
}
+; GCN-LABEL: {{^}}fi_vop3_literal_error:
+; CI: v_lshr_b32_e64 [[SCALED_FP:v[0-9]+]], s33, 6
+; CI: s_movk_i32 vcc_lo, 0x3000
+; CI-NEXT: v_add_i32_e32 [[SCALED_FP]], vcc, vcc_lo, [[SCALED_FP]]
+; CI-NEXT: v_add_i32_e32 v0, vcc, 64, [[SCALED_FP]]
+
+; GFX9-MUBUF: v_lshrrev_b32_e64 [[SCALED_FP:v[0-9]+]], 6, s33
+; GFX9-MUBUF-NEXT: v_add_u32_e32 [[SCALED_FP]], 0x3000, [[SCALED_FP]]
+; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 64, [[SCALED_FP]]
+define void @fi_vop3_literal_error() {
+entry:
+ %pin.low = alloca i32, align 8192, addrspace(5)
+ %local.area = alloca [1060 x i64], align 4096, addrspace(5)
+ store i32 0, ptr addrspace(5) %pin.low, align 4
+ %gep.small.offset = getelementptr i8, ptr addrspace(5) %local.area, i64 64
+ %load1 = load volatile i64, ptr addrspace(5) %gep.small.offset, align 4
+ ret void
+}
+
attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
index 12afc267354220..29fbb0bb1c6c97 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
@@ -20,8 +20,9 @@ define amdgpu_kernel void @local_stack_offset_uses_sp(ptr addrspace(1) %out) {
; MUBUF-LABEL: local_stack_offset_uses_sp:
; MUBUF: ; %bb.0: ; %entry
; MUBUF-NEXT: s_add_u32 s0, s0, s17
+; MUBUF-NEXT: v_mov_b32_e32 v1, 0x3000
; MUBUF-NEXT: s_addc_u32 s1, s1, 0
-; MUBUF-NEXT: v_mov_b32_e32 v0, 0x3040
+; MUBUF-NEXT: v_add_u32_e32 v0, 64, v1
; MUBUF-NEXT: v_mov_b32_e32 v1, 0
; MUBUF-NEXT: v_mov_b32_e32 v2, 0x2000
; MUBUF-NEXT: s_mov_b32 s4, 0
@@ -110,7 +111,8 @@ define void @func_local_stack_offset_uses_sp(ptr addrspace(1) %out) {
; MUBUF-NEXT: s_add_i32 s33, s32, 0x7ffc0
; MUBUF-NEXT: s_and_b32 s33, s33, 0xfff80000
; MUBUF-NEXT: v_lshrrev_b32_e64 v3, 6, s33
-; MUBUF-NEXT: v_add_u32_e32 v2, 0x3040, v3
+; MUBUF-NEXT: v_add_u32_e32 v3, 0x3000, v3
+; MUBUF-NEXT: v_add_u32_e32 v2, 64, v3
; MUBUF-NEXT: v_mov_b32_e32 v3, 0
; MUBUF-NEXT: v_mov_b32_e32 v4, 0x2000
; MUBUF-NEXT: s_mov_b32 s4, 0
More information about the llvm-commits
mailing list