[llvm] 8ea1cf3 - Revert "[AMDGPU] SIFixSGPRCopies refactoring"
Evgenii Stepanov via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 10 11:32:51 PDT 2022
Author: Evgenii Stepanov
Date: 2022-08-10T11:32:46-07:00
New Revision: 8ea1cf31118a54295b5d671edc61fca683d6d70f
URL: https://github.com/llvm/llvm-project/commit/8ea1cf31118a54295b5d671edc61fca683d6d70f
DIFF: https://github.com/llvm/llvm-project/commit/8ea1cf31118a54295b5d671edc61fca683d6d70f.diff
LOG: Revert "[AMDGPU] SIFixSGPRCopies refactoring"
Breaks ASan tests.
This reverts commit 3f8ae7efa866e581a16e9ccc8e29744722f13fff.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir
llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 8a9d07461758..b77499e0fee9 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -120,10 +120,6 @@ class V2SCopyInfo {
class SIFixSGPRCopies : public MachineFunctionPass {
MachineDominatorTree *MDT;
- SmallVector<MachineInstr*, 4> SCCCopies;
- SmallVector<MachineInstr*, 4> RegSequences;
- SmallVector<MachineInstr*, 4> PHINodes;
- SmallVector<MachineInstr*, 4> S2VCopies;
unsigned NextVGPRToSGPRCopyID;
DenseMap<unsigned, V2SCopyInfo> V2SCopies;
DenseMap<MachineInstr *, SetVector<unsigned>> SiblingPenalty;
@@ -138,11 +134,8 @@ class SIFixSGPRCopies : public MachineFunctionPass {
SIFixSGPRCopies() : MachineFunctionPass(ID), NextVGPRToSGPRCopyID(0) {}
bool runOnMachineFunction(MachineFunction &MF) override;
- void fixSCCCopies(bool IsWave32);
- void prepareRegSequenceAndPHIs(MachineFunction &MF);
unsigned getNextVGPRToSGPRCopyId() { return ++NextVGPRToSGPRCopyID; }
- bool needToBeConvertedToVALU(V2SCopyInfo *I);
- void analyzeVGPRToSGPRCopy(MachineInstr *MI);
+ void analyzeVGPRToSGPRCopy(V2SCopyInfo& Info);
void lowerVGPR2SGPRCopies(MachineFunction &MF);
// Handles copies which source register is:
// 1. Physical register
@@ -178,6 +171,19 @@ FunctionPass *llvm::createSIFixSGPRCopiesPass() {
return new SIFixSGPRCopies();
}
+static bool hasVectorOperands(const MachineInstr &MI,
+ const SIRegisterInfo *TRI) {
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.getReg().isVirtual())
+ continue;
+
+ if (TRI->hasVectorRegisters(MRI.getRegClass(MO.getReg())))
+ return true;
+ }
+ return false;
+}
+
static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
getCopyRegClasses(const MachineInstr &Copy,
const SIRegisterInfo &TRI,
@@ -610,6 +616,14 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
TII = ST.getInstrInfo();
MDT = &getAnalysis<MachineDominatorTree>();
+ // We have to lower VGPR to SGPR copies before the main loop
+ // because the REG_SEQUENCE and PHI lowering in main loop
+ // convert the def-use chains to VALU and close the opportunities
+ // for keeping them scalar.
+ // TODO: REG_SEQENCE and PHIs are semantically copies. The next patch
+ // addresses their lowering and unify the processing in one main loop.
+ lowerVGPR2SGPRCopies(MF);
+
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
MachineBasicBlock *MBB = &*BI;
@@ -625,66 +639,100 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
case AMDGPU::STRICT_WQM:
case AMDGPU::SOFT_WQM:
case AMDGPU::STRICT_WWM: {
- Register SrcReg = MI.getOperand(1).getReg();
Register DstReg = MI.getOperand(0).getReg();
const TargetRegisterClass *SrcRC, *DstRC;
std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, *MRI);
- if (MI.isCopy() && (SrcReg == AMDGPU::SCC || DstReg == AMDGPU::SCC))
- SCCCopies.push_back(&MI);
-
- if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) {
- // Since VGPR to SGPR copies affect VGPR to SGPR copy
- // score and, hence the lowering decision, let's try to get rid of
- // them as early as possible
- if (tryChangeVGPRtoSGPRinCopy(MI, TRI, TII))
+ if (MI.isCopy()) {
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (SrcReg == AMDGPU::SCC) {
+ Register SCCCopy = MRI->createVirtualRegister(
+ TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID));
+ I = BuildMI(*MI.getParent(),
+ std::next(MachineBasicBlock::iterator(MI)),
+ MI.getDebugLoc(),
+ TII->get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
+ : AMDGPU::S_CSELECT_B64),
+ SCCCopy)
+ .addImm(-1)
+ .addImm(0);
+ I = BuildMI(*MI.getParent(), std::next(I), I->getDebugLoc(),
+ TII->get(AMDGPU::COPY), DstReg)
+ .addReg(SCCCopy);
+ MI.eraseFromParent();
+ continue;
+ } else if (DstReg == AMDGPU::SCC) {
+ unsigned Opcode =
+ ST.isWave64() ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
+ Register Exec = ST.isWave64() ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
+ Register Tmp = MRI->createVirtualRegister(TRI->getBoolRC());
+ I = BuildMI(*MI.getParent(),
+ std::next(MachineBasicBlock::iterator(MI)),
+ MI.getDebugLoc(), TII->get(Opcode))
+ .addReg(Tmp, getDefRegState(true))
+ .addReg(SrcReg)
+ .addReg(Exec);
+ MI.eraseFromParent();
continue;
- // Collect those not changed to try them after VGPR to SGPR copies
- // lowering as there will be more opportunities.
- S2VCopies.push_back(&MI);
+ }
}
- if (!isVGPRToSGPRCopy(SrcRC, DstRC, *TRI))
- continue;
- if (lowerSpecialCase(MI))
+
+ if (!DstReg.isVirtual()) {
+ // If the destination register is a physical register there isn't
+ // really much we can do to fix this.
+ // Some special instructions use M0 as an input. Some even only use
+ // the first lane. Insert a readfirstlane and hope for the best.
+ if (DstReg == AMDGPU::M0 && TRI->hasVectorRegisters(SrcRC)) {
+ Register TmpReg
+ = MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+
+ BuildMI(*MBB, MI, MI.getDebugLoc(),
+ TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg)
+ .add(MI.getOperand(1));
+ MI.getOperand(1).setReg(TmpReg);
+ }
+
continue;
+ }
- analyzeVGPRToSGPRCopy(&MI);
+ if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) {
+ tryChangeVGPRtoSGPRinCopy(MI, TRI, TII);
+ }
break;
}
- case AMDGPU::INSERT_SUBREG:
- case AMDGPU::PHI:
+ case AMDGPU::PHI: {
+ processPHINode(MI);
+ break;
+ }
case AMDGPU::REG_SEQUENCE: {
- if (TRI->isSGPRClass(TII->getOpRegClass(MI, 0))) {
- for (MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || !MO.getReg().isVirtual())
- continue;
- const TargetRegisterClass *SrcRC = MRI->getRegClass(MO.getReg());
- if (TRI->hasVectorRegisters(SrcRC)) {
- const TargetRegisterClass *DestRC =
- TRI->getEquivalentSGPRClass(SrcRC);
- Register NewDst = MRI->createVirtualRegister(DestRC);
- MachineBasicBlock *BlockToInsertCopy =
- MI.isPHI() ? MI.getOperand(MI.getOperandNo(&MO) + 1).getMBB()
- : MBB;
- MachineBasicBlock::iterator PointToInsertCopy =
- MI.isPHI() ? BlockToInsertCopy->getFirstInstrTerminator() : I;
- MachineInstr *NewCopy =
- BuildMI(*BlockToInsertCopy, PointToInsertCopy,
- PointToInsertCopy->getDebugLoc(),
- TII->get(AMDGPU::COPY), NewDst)
- .addReg(MO.getReg());
- MO.setReg(NewDst);
- analyzeVGPRToSGPRCopy(NewCopy);
- }
- }
+ if (TRI->hasVectorRegisters(TII->getOpRegClass(MI, 0)) ||
+ !hasVectorOperands(MI, TRI)) {
+ foldVGPRCopyIntoRegSequence(MI, TRI, TII, *MRI);
+ continue;
}
- if (MI.isPHI())
- PHINodes.push_back(&MI);
- else if (MI.isRegSequence())
- RegSequences.push_back(&MI);
-
+ break;
+ }
+ case AMDGPU::INSERT_SUBREG: {
+ const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
+ DstRC = MRI->getRegClass(MI.getOperand(0).getReg());
+ Src0RC = MRI->getRegClass(MI.getOperand(1).getReg());
+ Src1RC = MRI->getRegClass(MI.getOperand(2).getReg());
+ if (TRI->isSGPRClass(DstRC) &&
+ (TRI->hasVectorRegisters(Src0RC) ||
+ TRI->hasVectorRegisters(Src1RC))) {
+ LLVM_DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
+ MachineBasicBlock *NewBB = TII->moveToVALU(MI, MDT);
+ if (NewBB && NewBB != MBB) {
+ MBB = NewBB;
+ E = MBB->end();
+ BI = MachineFunction::iterator(MBB);
+ BE = MF.end();
+ }
+ assert((!NewBB || NewBB == I->getParent()) &&
+ "moveToVALU did not return the right basic block");
+ }
break;
}
case AMDGPU::V_WRITELANE_B32: {
@@ -752,41 +800,11 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
}
}
- lowerVGPR2SGPRCopies(MF);
-
- // Postprocessing
- fixSCCCopies(ST.isWave32());
-
- for (auto MI : S2VCopies) {
- // Check if it is still valid
- if (MI->getParent() && MI->isCopy()) {
- const TargetRegisterClass *SrcRC, *DstRC;
- std::tie(SrcRC, DstRC) = getCopyRegClasses(*MI, *TRI, *MRI);
- if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI))
- tryChangeVGPRtoSGPRinCopy(*MI, TRI, TII);
- }
- }
-
-
- for (auto MI : RegSequences) {
- // Check if it is still valid
- if (MI->getParent() && MI->isRegSequence())
- foldVGPRCopyIntoRegSequence(*MI, TRI, TII, *MRI);
- }
-
- for (auto MI : PHINodes) {
- processPHINode(*MI);
- }
-
if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge)
hoistAndMergeSGPRInits(AMDGPU::M0, *MRI, TRI, *MDT, TII);
SiblingPenalty.clear();
V2SCopies.clear();
- SCCCopies.clear();
- RegSequences.clear();
- PHINodes.clear();
- S2VCopies.clear();
return true;
}
@@ -843,29 +861,7 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
}
bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI) {
-
- Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
-
- if (!DstReg.isVirtual()) {
- // If the destination register is a physical register there isn't
- // really much we can do to fix this.
- // Some special instructions use M0 as an input. Some even only use
- // the first lane. Insert a readfirstlane and hope for the best.
- if (DstReg == AMDGPU::M0 &&
- TRI->hasVectorRegisters(MRI->getRegClass(SrcReg))) {
- Register TmpReg =
- MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
-
- BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
- TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg)
- .add(MI.getOperand(1));
- MI.getOperand(1).setReg(TmpReg);
- }
-
- return true;
- }
-
if (!SrcReg.isVirtual() || TRI->isAGPR(*MRI, SrcReg)) {
TII->moveToVALU(MI, MDT);
return true;
@@ -884,13 +880,9 @@ bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI) {
return false;
}
-void SIFixSGPRCopies::analyzeVGPRToSGPRCopy(MachineInstr* MI) {
- Register DstReg = MI->getOperand(0).getReg();
- const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
- V2SCopyInfo Info(getNextVGPRToSGPRCopyId(), MI,
- TRI->getRegSizeInBits(*DstRC));
+void SIFixSGPRCopies::analyzeVGPRToSGPRCopy(V2SCopyInfo& Info) {
SmallVector<MachineInstr *, 8> AnalysisWorklist;
// Needed because the SSA is not a tree but a graph and may have
// forks and joins. We should not then go same way twice.
@@ -938,51 +930,142 @@ void SIFixSGPRCopies::analyzeVGPRToSGPRCopy(MachineInstr* MI) {
AnalysisWorklist.push_back(U);
}
}
- V2SCopies[Info.ID] = Info;
}
-// The main function that computes the VGPR to SGPR copy score
-// and determines copy further lowering way: v_readfirstlane_b32 or moveToVALU
-bool SIFixSGPRCopies::needToBeConvertedToVALU(V2SCopyInfo *Info) {
- if (Info->SChain.empty()) {
- Info->Score = 0;
- return true;
- }
- Info->Siblings = SiblingPenalty[*std::max_element(
- Info->SChain.begin(), Info->SChain.end(),
- [&](MachineInstr *A, MachineInstr *B) -> bool {
- return SiblingPenalty[A].size() < SiblingPenalty[B].size();
- })];
- Info->Siblings.remove_if([&](unsigned ID) { return ID == Info->ID; });
- // The loop below computes the number of another VGPR to SGPR V2SCopies
- // which contribute to the current copy SALU chain. We assume that all the
- // V2SCopies with the same source virtual register will be squashed to one
- // by regalloc. Also we take care of the V2SCopies of the
diff ernt subregs
- // of the same register.
- SmallSet<std::pair<Register, unsigned>, 4> SrcRegs;
- for (auto J : Info->Siblings) {
- auto InfoIt = V2SCopies.find(J);
- if (InfoIt != V2SCopies.end()) {
- MachineInstr *SiblingCopy = InfoIt->getSecond().Copy;
- if (SiblingCopy->isImplicitDef())
- // the COPY has already been MoveToVALUed
+void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) {
+
+ // The main function that computes the VGPR to SGPR copy score
+ // and determines copy further lowering way: v_readfirstlane_b32 or moveToVALU
+ auto needToBeConvertedToVALU = [&](V2SCopyInfo *I) -> bool {
+ if (I->SChain.empty()) {
+ I->Score = 0;
+ return true;
+ }
+ I->Siblings = SiblingPenalty[*std::max_element(
+ I->SChain.begin(), I->SChain.end(),
+ [&](MachineInstr *A, MachineInstr *B) -> bool {
+ return SiblingPenalty[A].size() < SiblingPenalty[B].size();
+ })];
+ I->Siblings.remove_if([&](unsigned ID) { return ID == I->ID; });
+ // The loop below computes the number of another VGPR to SGPR V2SCopies
+ // which contribute to the current copy SALU chain. We assume that all the
+ // V2SCopies with the same source virtual register will be squashed to one
+ // by regalloc. Also we take care of the V2SCopies of the
diff ernt subregs
+ // of the same register.
+ SmallSet<std::pair<Register, unsigned>, 4> SrcRegs;
+ for (auto J : I->Siblings) {
+ auto InfoIt = V2SCopies.find(J);
+ if (InfoIt != V2SCopies.end()) {
+ MachineInstr *SiblingCopy = InfoIt->getSecond().Copy;
+ if (SiblingCopy->isImplicitDef())
+ // the COPY has already been MoveToVALUed
+ continue;
+
+ SrcRegs.insert(std::make_pair(SiblingCopy->getOperand(1).getReg(),
+ SiblingCopy->getOperand(1).getSubReg()));
+ }
+ }
+ I->SiblingPenalty = SrcRegs.size();
+
+ unsigned Penalty =
+ I->NumSVCopies + I->SiblingPenalty + I->NumReadfirstlanes;
+ unsigned Profit = I->SChain.size();
+ I->Score = Penalty > Profit ? 0 : Profit - Penalty;
+ I->NeedToBeConvertedToVALU = I->Score < 3;
+ return I->NeedToBeConvertedToVALU;
+ };
+
+ auto needProcessing = [](MachineInstr &MI) -> bool {
+ switch (MI.getOpcode()) {
+ case AMDGPU::COPY:
+ case AMDGPU::WQM:
+ case AMDGPU::STRICT_WQM:
+ case AMDGPU::SOFT_WQM:
+ case AMDGPU::STRICT_WWM:
+ case AMDGPU::REG_SEQUENCE:
+ case AMDGPU::PHI:
+ return true;
+ default:
+ return false;
+ }
+ };
+
+ SmallSet<MachineInstr *, 4> OutOfOrderProcessedCopies;
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
+ ++BI) {
+ MachineBasicBlock *MBB = &*BI;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I) {
+ MachineInstr *MI = &*I;
+ if (!needProcessing(*MI))
continue;
- SrcRegs.insert(std::make_pair(SiblingCopy->getOperand(1).getReg(),
- SiblingCopy->getOperand(1).getSubReg()));
+ if (MI->isRegSequence() || MI->isPHI()) {
+ MachineBasicBlock::iterator J = I;
+ if (TRI->isSGPRClass(TII->getOpRegClass(*MI, 0))) {
+ for (MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg() || !MO.getReg().isVirtual())
+ continue;
+ const TargetRegisterClass *SrcRC = MRI->getRegClass(MO.getReg());
+ if (TRI->hasVectorRegisters(SrcRC)) {
+ const TargetRegisterClass *DestRC =
+ TRI->getEquivalentSGPRClass(SrcRC);
+ Register NewDst = MRI->createVirtualRegister(DestRC);
+ MachineBasicBlock *BlockToInsertCopy = MBB;
+ MachineBasicBlock::iterator PointToInsertCopy = I;
+ if (MI->isPHI()) {
+ BlockToInsertCopy =
+ MI->getOperand(MI->getOperandNo(&MO) + 1).getMBB();
+ PointToInsertCopy =
+ BlockToInsertCopy->getFirstInstrTerminator();
+ }
+ MachineBasicBlock::iterator NewI =
+ BuildMI(*BlockToInsertCopy, PointToInsertCopy,
+ PointToInsertCopy->getDebugLoc(),
+ TII->get(AMDGPU::COPY), NewDst)
+ .addReg(MO.getReg());
+ MO.setReg(NewDst);
+ if (!MI->isPHI()) {
+ I = NewI;
+ MI = &*I;
+ } else {
+ // We insert the copy into the basic block that may have been
+ // already processed. Pass it to the analysis explicitly.
+ V2SCopyInfo In(getNextVGPRToSGPRCopyId(), MI,
+ TRI->getRegSizeInBits(*DestRC));
+ analyzeVGPRToSGPRCopy(In);
+ V2SCopies[In.ID] = In;
+ OutOfOrderProcessedCopies.insert(MI);
+ }
+ }
+ }
+ }
+
+ if (J == I)
+ continue;
+ }
+
+ const TargetRegisterClass *SrcRC, *DstRC;
+ std::tie(SrcRC, DstRC) = getCopyRegClasses(*MI, *TRI, *MRI);
+
+ if (!isVGPRToSGPRCopy(SrcRC, DstRC, *TRI))
+ continue;
+
+ if (lowerSpecialCase(*MI))
+ continue;
+
+ if (OutOfOrderProcessedCopies.contains(MI))
+ continue;
+
+ V2SCopyInfo In(getNextVGPRToSGPRCopyId(), MI,
+ TRI->getRegSizeInBits(*DstRC));
+
+ analyzeVGPRToSGPRCopy(In);
+
+ V2SCopies[In.ID] = In;
}
}
- Info->SiblingPenalty = SrcRegs.size();
-
- unsigned Penalty =
- Info->NumSVCopies + Info->SiblingPenalty + Info->NumReadfirstlanes;
- unsigned Profit = Info->SChain.size();
- Info->Score = Penalty > Profit ? 0 : Profit - Penalty;
- Info->NeedToBeConvertedToVALU = Info->Score < 3;
- return Info->NeedToBeConvertedToVALU;
-}
-
-void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) {
SmallVector<unsigned, 8> LoweringWorklist;
for (auto &C : V2SCopies) {
@@ -1059,46 +1142,3 @@ void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) {
MI->eraseFromParent();
}
}
-
-void SIFixSGPRCopies::fixSCCCopies(bool IsWave32) {
- for (auto MI : SCCCopies) {
- // May be lowered out
- if (!MI->getParent())
- continue;
- // May already have been lowered.
- if (!MI->isCopy())
- continue;
- Register SrcReg = MI->getOperand(1).getReg();
- Register DstReg = MI->getOperand(0).getReg();
- if (SrcReg == AMDGPU::SCC) {
- Register SCCCopy = MRI->createVirtualRegister(
- TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID));
- MachineBasicBlock::iterator I =
- BuildMI(*MI->getParent(),
- std::next(MachineBasicBlock::iterator(MI)),
- MI->getDebugLoc(),
- TII->get(IsWave32 ? AMDGPU::S_CSELECT_B32
- : AMDGPU::S_CSELECT_B64),
- SCCCopy)
- .addImm(-1)
- .addImm(0);
- BuildMI(*MI->getParent(), std::next(I), I->getDebugLoc(),
- TII->get(AMDGPU::COPY), DstReg)
- .addReg(SCCCopy);
- MI->eraseFromParent();
- continue;
- }
-
- if (DstReg == AMDGPU::SCC) {
- unsigned Opcode = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
- Register Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
- Register Tmp = MRI->createVirtualRegister(TRI->getBoolRC());
- BuildMI(*MI->getParent(), std::next(MachineBasicBlock::iterator(MI)),
- MI->getDebugLoc(), TII->get(Opcode))
- .addReg(Tmp, getDefRegState(true))
- .addReg(SrcReg)
- .addReg(Exec);
- MI->eraseFromParent();
- }
- }
-}
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir
index 60327e5fad37..84b315b80031 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir
@@ -41,9 +41,9 @@ body: |
; W64-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; W64-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; W64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec
- ; W64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
+ ; W64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
; W64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec
- ; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
; W64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
; W64-NEXT: {{ $}}
; W64-NEXT: .1:
@@ -88,9 +88,9 @@ body: |
; W32-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; W32-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; W32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec
- ; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
+ ; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
; W32-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec
- ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
; W32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
; W32-NEXT: {{ $}}
; W32-NEXT: .1:
@@ -160,10 +160,10 @@ body: |
; W64-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; W64-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; W64-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
- ; W64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
- ; W64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
- ; W64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec
- ; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, killed [[COPY6]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; W64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec
+ ; W64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
+ ; W64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
+ ; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, [[COPY2]], %subreg.sub3
; W64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
; W64-NEXT: {{ $}}
; W64-NEXT: .1:
@@ -207,10 +207,10 @@ body: |
; W32-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; W32-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; W32-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
- ; W32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
- ; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
- ; W32-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec
- ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, killed [[COPY6]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; W32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec
+ ; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
+ ; W32-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
+ ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, [[COPY2]], %subreg.sub3
; W32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
; W32-NEXT: {{ $}}
; W32-NEXT: .1:
@@ -280,10 +280,10 @@ body: |
; W64-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; W64-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; W64-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
- ; W64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec
- ; W64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
- ; W64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
- ; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, [[COPY2]], %subreg.sub3
+ ; W64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
+ ; W64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
+ ; W64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec
+ ; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, killed [[COPY6]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
; W64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
; W64-NEXT: {{ $}}
; W64-NEXT: .1:
@@ -327,10 +327,10 @@ body: |
; W32-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; W32-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; W32-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
- ; W32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec
- ; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
- ; W32-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
- ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, [[COPY2]], %subreg.sub3
+ ; W32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
+ ; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
+ ; W32-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec
+ ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, killed [[COPY6]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
; W32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
; W32-NEXT: {{ $}}
; W32-NEXT: .1:
@@ -400,9 +400,9 @@ body: |
; ADDR64-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; ADDR64-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; ADDR64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec
- ; ADDR64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
+ ; ADDR64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
; ADDR64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec
- ; ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
; ADDR64-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; ADDR64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; ADDR64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
@@ -429,9 +429,9 @@ body: |
; W32-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; W32-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; W32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec
- ; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
+ ; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
; W32-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec
- ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
; W32-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; W32-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; W32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
@@ -485,9 +485,9 @@ body: |
; ADDR64-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; ADDR64-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; ADDR64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec
- ; ADDR64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
+ ; ADDR64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
; ADDR64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec
- ; ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
; ADDR64-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; ADDR64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; ADDR64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
@@ -513,9 +513,9 @@ body: |
; W64-NO-ADDR64-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; W64-NO-ADDR64-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; W64-NO-ADDR64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec
- ; W64-NO-ADDR64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
+ ; W64-NO-ADDR64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
; W64-NO-ADDR64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec
- ; W64-NO-ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; W64-NO-ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
; W64-NO-ADDR64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
; W64-NO-ADDR64-NEXT: {{ $}}
; W64-NO-ADDR64-NEXT: .1:
@@ -560,9 +560,9 @@ body: |
; W32-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; W32-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; W32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec
- ; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
+ ; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
; W32-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec
- ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
; W32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
; W32-NEXT: {{ $}}
; W32-NEXT: .1:
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
index 8e648f8d3bd5..29f8c60ad281 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
@@ -433,7 +433,7 @@ define amdgpu_gfx i64 @strict_wwm_called_i64(i64 %a) noinline {
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0
; GFX9-O0-NEXT: ; implicit-def: $sgpr34
; GFX9-O0-NEXT: ; implicit-def: $sgpr34
-; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $exec
+; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
; GFX9-O0-NEXT: ; implicit-def: $sgpr34_sgpr35
@@ -585,7 +585,7 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
; GFX9-O0-NEXT: ; implicit-def: $sgpr40
; GFX9-O0-NEXT: ; implicit-def: $sgpr40
-; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec
+; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
; GFX9-O0-NEXT: v_add_co_u32_e64 v2, s[40:41], v2, v4
@@ -722,7 +722,7 @@ define amdgpu_gfx void @strict_wwm_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %in
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10
; GFX9-O0-NEXT: ; implicit-def: $sgpr35
; GFX9-O0-NEXT: ; implicit-def: $sgpr35
-; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $exec
+; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $exec
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7
; GFX9-O0-NEXT: s_mov_b32 s35, 0x7fffffff
@@ -741,7 +741,7 @@ define amdgpu_gfx void @strict_wwm_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %in
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
; GFX9-O0-NEXT: ; implicit-def: $sgpr35
; GFX9-O0-NEXT: ; implicit-def: $sgpr35
-; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $exec
+; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $exec
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5
@@ -770,7 +770,7 @@ define amdgpu_gfx void @strict_wwm_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %in
; GFX9-O0-NEXT: ; implicit-def: $sgpr35
; GFX9-O0-NEXT: ; implicit-def: $sgpr35
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $exec
-; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 killed $exec
+; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 killed $exec
; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 killed $exec
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6_vgpr7_vgpr8 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
More information about the llvm-commits
mailing list